Source code for pypath.inputs.guide2pharma

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

import csv
import collections
import itertools

import pypath.share.curl as curl
import pypath.share.common as common
import pypath_common._constants as _const
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.utils.taxonomy as taxonomy
import pypath.internals.intera as intera



[docs]
def guide2pharma_download(
        organism: str | int = 'human',
        endogenous: bool = True,
        process_interactions: bool = True,
        process_complexes: bool = True,
    ) -> tuple[list, dict]:
    """
    Downloads and processes Guide to Pharmacology data.
    Returns list of dicts.

    Args:
        organism
            Name of the organism, e.g. `human`.
        endogenous
            Whether to include only endogenous ligands interactions.
    """

    get_taxid = lambda x: (
        _const.NOT_ORGANISM_SPECIFIC
            if x in {'', 'None'} else
        taxonomy.ensure_ncbi_tax_id(x)
    )
    organism_ = None
    ncbi_tax_id = None

    if isinstance(organism, str):

        ncbi_tax_id = get_taxid(organism)

        try:

            organism_ = taxonomy.ensure_common_name(ncbi_tax_id)
            organism_ = organism_.capitalize() if organism_ else None

        except KeyError:

            pass  # no organism specified

    positives = {
        'agonist', 'activator', 'potentiation', 'partial agonist',
        'inverse antagonist', 'full agonist', 'activation',
        'irreversible agonist', 'positive',
    }
    negatives = {
        'inhibitor', 'antagonist', 'inhibition', 'irreversible inhibition',
        'inverse agonist', 'negative', 'weak inhibition',
        'reversible inhibition',
    }


    GuideToPharmacologyInteraction = collections.namedtuple(
        'GuideToPharmacologyInteraction',
        [
            'ligand',
            'ligand_id_type',
            'target',
            'target_id_type',
            'target_is_ligand',
            'ligand_organism',
            'target_organism',
            'effect',
            'ligand_location',
            'target_type',
            'ligand_endogenous',
            'pubmed_ids',
        ]
    )

    def is_positive(term):
        return term.lower().strip() in positives

    def is_negative(term):
        return term.lower().strip() in negatives

    interactions = []
    complexes = {}

    url = urls.urls['gtp']['url']

    c = curl.Curl(url, silent = False, large = True, encoding = 'utf-8')

    line0 = next(c.result)

    if line0[:2] != '"#':

        c.fileobj.seek(0)

    data = csv.DictReader(c.result)

    if organism_ is not None:

        data = [
            d for d in data
            if (
                get_taxid(d['Target Species']) == ncbi_tax_id and
                ncbi_tax_id in set(
                    get_taxid(t)
                    for t in d['Ligand Species'].split('|')
                )
            )
        ]

    if endogenous:

        data = [d for d in data if d['Endogenous'].strip() == 'true']

    for d in data:

        if is_positive(d['Type']) or is_positive(d['Action']):
            effect = 1

        elif is_negative(d['Type']) or is_negative(d['Action']):
            effect = -1

        else:
            effect = 0

        ligands = d['Ligand Gene Symbol'] or d['Ligand PubChem SID']
        ligands = ligands.split('|')
        ligand_taxons = [get_taxid(l) for l in d['Ligand Species'].split('|')]

        for ligand_taxon in zip(ligands, ligand_taxons):

            targets = (
                d['Target UniProt ID'] or
                d['Target Ligand UniProt ID'] or
                d['Target Ligand PubChem SID']
            )
            targets = targets.split('|')
            references = d['PubMed ID'].split('|') if d['PubMed ID'] else []

            if process_interactions:

                for ligand, target in itertools.product(ligands, targets):

                    interactions.append(
                        GuideToPharmacologyInteraction(
                            ligand = ligand,
                            ligand_id_type = (
                                'genesymbol'
                                    if d['Ligand Gene Symbol'] else
                                'pubchem_sid'
                                    if d['Ligand PubChem SID'] else
                                None
                            ),
                            target = target,
                            target_id_type = (
                                'uniprot'
                                    if (
                                        d['Target UniProt ID'] or
                                        d['Target Ligand UniProt ID']
                                    ) else
                                'pubchem_sid'
                                    if d['Target Ligand PubChem SID'] else
                                None
                            ),
                            target_is_ligand = bool(d['Target Ligand']),
                            ligand_organism = ligand_taxon,
                            target_organism = get_taxid(d['Target Species']),
                            effect = effect,
                            ligand_location = (
                                d['Ligand Context'].strip().lower() or None
                            ),
                            target_type = (
                                d['Receptor Site'].strip().lower() or None
                            ),
                            ligand_endogenous = (
                                d['Endogenous'].strip() == 't'
                            ),
                            pubmed_ids = references,
                        )
                    )

            if process_complexes:
                if (
                    len(targets) > 1 and (
                        d['Target UniProt ID'] or
                        d['Target Ligand UniProt ID']
                    )
                ):
                    cplex = intera.Complex(
                        components = targets,
                        sources = 'Guide2Pharma',
                        references = references,
                    )
                    key = cplex.__str__()

                    if key in complexes:
                        complexes[key] += cplex

                    else:
                        complexes[key] = cplex

                if (
                    len(ligands) > 1 and
                    d['Ligand Gene Symbol']
                ):
                    ligand_uniprots = [
                        mapping.map_name0(ligand, 'genesymbol', 'uniprot')
                        for ligand in ligands
                    ]
                    ligand_uniprots = [u for u in ligand_uniprots if u]

                    if len(ligand_uniprots) > 1:
                        cplex = intera.Complex(
                            components = ligand_uniprots,
                            sources = 'Guide2Pharma',
                            references = references,
                        )
                        key = cplex.__str__()

                        if key in complexes:
                            complexes[key] += cplex

                        else:
                            complexes[key] = cplex

    return interactions, complexes




[docs]
def guide2pharma_interactions(**kwargs):

    interactions, complexes = guide2pharma_download(
        process_complexes = False,
        **kwargs
    )

    return interactions




[docs]
def guide2pharma_complexes(**kwargs):

    interactions, complexes = guide2pharma_download(
        process_interactions = False,
        **kwargs
    )

    return complexes