Source code for pypath.inputs.guide2pharma

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

import csv
import collections
import itertools

import pypath.share.curl as curl
import pypath.share.common as common
import pypath_common._constants as _const
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.utils.taxonomy as taxonomy
import pypath.internals.intera as intera


[docs] def guide2pharma_download( organism: str | int = 'human', endogenous: bool = True, process_interactions: bool = True, process_complexes: bool = True, ) -> tuple[list, dict]: """ Downloads and processes Guide to Pharmacology data. Returns list of dicts. Args: organism Name of the organism, e.g. `human`. endogenous Whether to include only endogenous ligands interactions. """ get_taxid = lambda x: ( _const.NOT_ORGANISM_SPECIFIC if x in {'', 'None'} else taxonomy.ensure_ncbi_tax_id(x) ) organism_ = None ncbi_tax_id = None if isinstance(organism, str): ncbi_tax_id = get_taxid(organism) try: organism_ = taxonomy.ensure_common_name(ncbi_tax_id) organism_ = organism_.capitalize() if organism_ else None except KeyError: pass # no organism specified positives = { 'agonist', 'activator', 'potentiation', 'partial agonist', 'inverse antagonist', 'full agonist', 'activation', 'irreversible agonist', 'positive', } negatives = { 'inhibitor', 'antagonist', 'inhibition', 'irreversible inhibition', 'inverse agonist', 'negative', 'weak inhibition', 'reversible inhibition', } GuideToPharmacologyInteraction = collections.namedtuple( 'GuideToPharmacologyInteraction', [ 'ligand', 'ligand_id_type', 'target', 'target_id_type', 'target_is_ligand', 'ligand_organism', 'target_organism', 'effect', 'ligand_location', 'target_type', 'ligand_endogenous', 'pubmed_ids', ] ) def is_positive(term): return term.lower().strip() in positives def is_negative(term): return term.lower().strip() in negatives interactions = [] complexes = {} url = urls.urls['gtp']['url'] c = curl.Curl(url, silent = False, large = True, encoding = 'utf-8') line0 = next(c.result) if line0[:2] != '"#': c.fileobj.seek(0) data = csv.DictReader(c.result) if organism_ is not None: data = [ d for d in data if ( get_taxid(d['Target Species']) == ncbi_tax_id and ncbi_tax_id in set( get_taxid(t) for t in d['Ligand Species'].split('|') ) ) ] if endogenous: data = [d for d in data if d['Endogenous'].strip() == 'true'] for d in data: if is_positive(d['Type']) or is_positive(d['Action']): effect = 1 elif is_negative(d['Type']) or is_negative(d['Action']): effect = -1 else: effect = 0 ligands = d['Ligand Gene Symbol'] or d['Ligand PubChem SID'] ligands = ligands.split('|') ligand_taxons = [get_taxid(l) for l in d['Ligand Species'].split('|')] for ligand_taxon in zip(ligands, ligand_taxons): targets = ( d['Target UniProt ID'] or d['Target Ligand UniProt ID'] or d['Target Ligand PubChem SID'] ) targets = targets.split('|') references = d['PubMed ID'].split('|') if d['PubMed ID'] else [] if process_interactions: for ligand, target in itertools.product(ligands, targets): interactions.append( GuideToPharmacologyInteraction( ligand = ligand, ligand_id_type = ( 'genesymbol' if d['Ligand Gene Symbol'] else 'pubchem_sid' if d['Ligand PubChem SID'] else None ), target = target, target_id_type = ( 'uniprot' if ( d['Target UniProt ID'] or d['Target Ligand UniProt ID'] ) else 'pubchem_sid' if d['Target Ligand PubChem SID'] else None ), target_is_ligand = bool(d['Target Ligand']), ligand_organism = ligand_taxon, target_organism = get_taxid(d['Target Species']), effect = effect, ligand_location = ( d['Ligand Context'].strip().lower() or None ), target_type = ( d['Receptor Site'].strip().lower() or None ), ligand_endogenous = ( d['Endogenous'].strip() == 't' ), pubmed_ids = references, ) ) if process_complexes: if ( len(targets) > 1 and ( d['Target UniProt ID'] or d['Target Ligand UniProt ID'] ) ): cplex = intera.Complex( components = targets, sources = 'Guide2Pharma', references = references, ) key = cplex.__str__() if key in complexes: complexes[key] += cplex else: complexes[key] = cplex if ( len(ligands) > 1 and d['Ligand Gene Symbol'] ): ligand_uniprots = [ mapping.map_name0(ligand, 'genesymbol', 'uniprot') for ligand in ligands ] ligand_uniprots = [u for u in ligand_uniprots if u] if len(ligand_uniprots) > 1: cplex = intera.Complex( components = ligand_uniprots, sources = 'Guide2Pharma', references = references, ) key = cplex.__str__() if key in complexes: complexes[key] += cplex else: complexes[key] = cplex return interactions, complexes
[docs] def guide2pharma_interactions(**kwargs): interactions, complexes = guide2pharma_download( process_complexes = False, **kwargs ) return interactions
[docs] def guide2pharma_complexes(**kwargs): interactions, complexes = guide2pharma_download( process_interactions = False, **kwargs ) return complexes