#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsimportcopyimportpypath.share.settingsassettingsimportpypath.share.sessionassessionimportpypath_common._constantsas_constimportpypath.inputs.uniprot_idmappingasuniprot_idmappingimportpypath.inputs.unichemasunichem_input_logger=session.Logger(name='input_formats')__all__=['FileMapping','PickleMapping','NetworkInput','ReadList','UniprotListMapping','ProMapping','ArrayMapping','BiomartMapping',]AC_QUERY={'genesymbol':'gene_primary','genesymbol-syn':'gene_synonym','hgnc':'xref_hgnc','embl':'xref_embl','entrez':'xref_geneid','geneid':'xref_geneid','refseqp':'xref_refseq','enst':'xref_ensembl','uniprot-entry':'id','protein-name':'protein_name','gene-name':'gene_names','gene-orf':'gene_orf','gene-oln':'gene_oln','ec':'ec',}AC_MAPPING={'uniprot':'UniProtKB','uniprot-entry':'UniProtKB','embl':'EMBL-GeneBank-DDBJ','embl_id':'EMBL-GeneBank-DDBJ_CDS','pir':'PIR','entrez':'GeneID','gi':'GI_number','refseqp':'RefSeq_Protein','refseqn':'RefSeq_Nucleotide','ensembl':'Ensembl','ensp':'Ensembl_Protein','enst':'Ensembl_Transcript','ensg':'Ensembl','ensgp':'Ensembl_Genomes_Protein','ensgt':'Ensembl_Genomes_Transcript','hgnc':'HGNC','ensp_string':'STRING','genesymbol':'Gene_Name',}BIOMART_MAPPING={'hgnc_symbol':'hgnc_symbol','rnacentral':'rnacentral','hgnc_trans_name':'hgnc_trans_name','wikigene_name':'wikigene_name','gene_name':'external_gene_name','genesymbol':'external_gene_name','transcript_name':'external_transcript_name','gene_description':'description','gene_synonym':'external_synonym','interpro_description':'interpro_description','interpro':'interpro','interpro_short_description':'interpro_short_description','enst_biomart':'ensembl_transcript_id','ensg_biomart':'ensembl_gene_id','ensp_biomart':'ensembl_peptide_id','ensembl_gene_id':'ensembl_gene_id','ensembl_transcript_id':'ensembl_transcript_id','ensembl_peptide_id':'ensembl_peptide_id','uniprot':'uniprotswissprot','trembl':'uniprotsptrembl',}PRO_MAPPING={'alzforum':'Alzforum_mut','araport':'Araport','cgnc':'CGNC','dictybase':'dictyBase','dto':'DTO','ecocyc':'EcoCyc','ecogene':'EcoGene','ensembl_pro':'Ensembl','ensembl_bacteria':'EnsemblBacteria','flybase':'FlyBase','hgnc':'HGNC','iuphar_fam':'IUPHARfam','iuphar':'IUPHARobj','mgi':'MGI','mro':'MRO','ncbi_gene':'NCBIGene','pbd':'PDB','pombase':'PomBase','interpro':'PRO','reactome':'Reactome','rgd':'RGD','sgd':'SGD','tdr':'TDR','uniprot':'UniProtKB','uniprot-var':'UniProtKB_VAR','wormbase':'WormBase','zfin':'ZFIN',}ARRAY_MAPPING={'affy','affymetrix','illumina','agilent','codelink','phalanx',}RAMP_MAPPING={'cas':'CAS','cas_id':'CAS','lipidmaps':'LIPIDMAPS','en':'EN','enzymatic_nomenclature':'EN','genesymbol':'gene_symbol','pubchem_compound':'pubchem','pubchem_cid':'pubchem',}HMDB_MAPPING={'hmdb':'accession','pubchem_cid':'pubchem_compound','pubchem':'pubchem_compound','phenolexplorer':'phenol_explorer_compound','cas':'cas_registry_number','formula':'chemical_formula','inchi':'inchi','inchikey':'inchikey','hmdb_name':'name','hmdb_synonym':'synonyms','smiles':'smiles','iupac':'traditional_iupac',}
[docs]def__init__(self,id_type_a,id_type_b='uniprot',ncbi_tax_id=9606,swissprot='true',):""" Defines an ID conversion table to retrieve from UniProt. id_type : str Type of accession numbers you would like to translate. target_id_type : str Type of accession numbers you would like to translate to. tax : int NCBI Taxonomy ID of the organism of interest. swissprot : str Look for SwissProt or Trembl. Passed directly to UniProt`s `reviewed` parameter. `yes` or `no` To fetch Trembl and SwissProt together, set value to None. mapping : bool Get the data from UniProt`s programmatic access query interface, (uniprot.org/uniprot) or the batch retrieval/id mapping service (uniprot.org/mapping). These have slightly different APIs and capabilities. Some IDs can be obtained from the former, some from the latter. """self.type='uniprot'MappingInput.__init__(self,type_='uniprot',id_type_a=id_type_a,id_type_b=id_type_b,ncbi_tax_id=ncbi_tax_id,)self.ncbi_tax_id=int(ncbi_tax_id)self.typ='protein'self.swissprot=swissprot
[docs]@staticmethoddefresource_id_type(id_type,override=None):""" For an ID type label used in pypath, returns the one used in the UniProt web service. If the label is not available in the built in list None is returned. Returns (str): The ID type label used by UniProt; None if the input label is not known. """id_type=AC_QUERY.get(id_type,id_type)returnid_type
[docs]classUniprotListMapping(MappingInput):""" Provides parameters for downloading mapping table from UniProt `Upload Lists` webservice. :arg str id_type_a: Custom name for one of the ID types. :arg str id_type_b: Custom name for the other ID type. :arg str uniprot_id_type_a: This is the symbol the UniProt webservice uses for the first name type. These are included in the module and set automatically, the argument only gives a way to override this. :arg str uniprot_id_type_b: Same as above just for the other ID type. :arg bool swissprot: Download data only for SwissProt IDs. """_resource_id_types=AC_MAPPING_from_uniprot={'uniprot':'UniProtKB_AC-ID','swissprot':'UniProtKB_AC-ID','trembl':'UniProtKB_AC-ID',}_to_uniprot={'uniprot':'UniProtKB','swissprot':'UniProtKB-Swiss-Prot','trembl':'UniProtKB',}
[docs]classProMapping(MappingInput):""" Provides parameters for mapping table from the Protein Ontology Consortium. :arg str id_type_a: Custom name for one of the ID types. :arg str id_type_b: Custom name for the other ID type. :arg str pro_id_type_a: This is the symbol PRO uses to label the IDs. These are included in the module and set automatically, the argument only gives a way to override this. :arg str pro_id_type_b: Same as above just for the other ID type. """_resource_id_types=PRO_MAPPING
[docs]def__init__(self,id_type_a,id_type_b,ncbi_tax_id=_const.NOT_ORGANISM_SPECIFIC,):""" Paramaters for UniChem based ID translation. Args: id_type_a: Custom name for one of the ID types. id_type_b: Custom name for the other ID type. """MappingInput.__init__(self,type_='unichem',id_type_a=id_type_a,id_type_b=id_type_b,ncbi_tax_id=_const.NOT_ORGANISM_SPECIFIC,)
[docs]def__init__(self,id_type_a,id_type_b,ncbi_tax_id=_const.NOT_ORGANISM_SPECIFIC,):""" Paramaters for ID translation tables from the RaMP database. Args: id_type_a: Custom name for one of the ID types. id_type_b: Custom name for the other ID type. """MappingInput.__init__(self,type_='ramp',id_type_a=id_type_a,id_type_b=id_type_b,ncbi_tax_id=_const.NOT_ORGANISM_SPECIFIC,)
[docs]def__init__(self,id_type_a,id_type_b,ncbi_tax_id=_const.NOT_ORGANISM_SPECIFIC,):""" Paramaters for ID translation tables from the Human Metabolome Database. Args: id_type_a: Custom name for one of the ID types. id_type_b: Custom name for the other ID type. """MappingInput.__init__(self,type_='hmdb',id_type_a=id_type_a,id_type_b=id_type_b,ncbi_tax_id=_const.NOT_ORGANISM_SPECIFIC,input_method='hmdb.metabolites_mapping',)
[docs]classArrayMapping(MappingInput):""" Provides parameters for microarray probe mapping tables. :arg str id_type_a: Custom name for one of the ID types. :arg str id_type_b: Custom name for the other ID type. :arg str pro_id_type_a: This is the symbol PRO uses to label the IDs. These are included in the module and set automatically, the argument only gives a way to override this. :arg str pro_id_type_b: Same as above just for the other ID type. """_resource_id_types=ARRAY_MAPPING
@classmethoddef_process_id_type(cls,id_type:str,fail:bool=True):id_type=id_type.lower()id_type='affy'ifid_type=='affymetrix'elseid_typeid_type='ensg'ifid_type=='ensembl'elseid_typeif(id_typenotincls._resource_id_typesandid_typenotin{'ensg','enst','ensp'}):iffail:msg=('Unknown ID type for microarray probe mapping: `%s`. ''Microarray ID types include `affy`, `illumina`, `agilent`, ''`codelink` and `phalanx`, all these can be translated to ''Ensembl gene, transcript or peptide IDs: `ensg`, `enst` ''or `ensp`. If you translate to some other ID type, do it ''in multiple steps.'%str(id_type))_logger._log(msg)raiseValueError(msg)else:returnNonereturnid_type@classmethoddefpossible(cls,id_type_a:str,id_type_b:str,ncbi_tax_id:int|None=None,)->bool:return(cls._process_id_type(id_type_a,fail=False)andcls._process_id_type(id_type_b,fail=False))
[docs]def__init__(self,name="unknown",separator=None,id_col_a=0,id_col_b=1,id_type_a="uniprot",id_type_b="uniprot",entity_type_a="protein",entity_type_b="protein",is_directed=False,sign=False,input=None,references=None,extra_edge_attrs=None,extra_node_attrs_a=None,extra_node_attrs_b=None,header=False,taxon_a=9606,taxon_b=9606,ncbi_tax_id=9606,interaction_type='post_translational',positive_filters=None,negative_filters=None,mark_source=None,mark_target=None,input_args=None,curl_args=None,must_have_references=True,huge=False,resource=None,unique_fields=None,expand_complexes=None,data_model=None,allow_loops=None,only_default_organism=False,dataset=None,):""" :param str mark_source: Creates a boolean vertex attribute and sets it True for the source vertex of directed interactions from this particular resource. :param str mark_target: Same as ``mark_source`` but for target vertices. """self.entity_type_a=entity_type_aself.entity_type_b=entity_type_bself.id_col_a=id_col_aself.id_col_b=id_col_bself.id_type_a=id_type_aself.id_type_b=id_type_bself.is_directed=is_directedself.input=inputself.extra_edge_attrs=extra_edge_attrsor{}self.extra_node_attrs_a=extra_node_attrs_aor{}self.extra_node_attrs_b=extra_node_attrs_bor{}self.name=nameself.separator=separatorself.header=headerself.refs=referencesorNoneself.sign=signself.taxon_a=taxon_aself.taxon_b=taxon_bself.ncbi_tax_id=ncbi_tax_idself.interaction_type=interaction_typeself.positive_filters=positive_filtersor[]self.negative_filters=negative_filtersor[]self.input_args=input_argsor{}self.curl_args=curl_argsor{}self.must_have_references=must_have_referencesandbool(references)self.huge=hugeself.resource=self.nameifresourceisNoneelseresourceself.mark_source=mark_sourceself.mark_target=mark_targetself.unique_fields=unique_fieldsorset()self.expand_complexes=expand_complexesself.data_model=data_modelself.allow_loops=allow_loopsself.only_default_organism=only_default_organismself.dataset=dataset