Source code for pypath.inputs.icellnet

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import re
import collections
import itertools
import csv

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.internals.intera as intera
import pypath.core.entity as entity
import pypath.inputs.pubmed as pubmed_input


IcellnetRecord = collections.namedtuple(
    'IcellnetRecord',
    [
        'ligand',
        'receptor',
        'family',
        'subfamily',
        'classification',
        'resources',
        'references',
    ]
)



[docs]
def icellnet_interactions():

    url = urls.urls['icellnet']['url']
    c = curl.Curl(url, silent = False, large = True)

    bom = c.fileobj.read(1) # this file starts with an UTF8 BOM

    if bom != '\ufeff':

        c.fileobj.seek(0)

    tbl = list(csv.DictReader(c.result, delimiter = ';'))

    for line in tbl:

        references = _icellnet_get_references(line)
        resources = _icellnet_get_resources(line)

        if resources:

            references.extend([r for r in resources if r.isdigit()])
            resources = [r for r in resources if not r.isdigit()]

        ligand_components = _icellnet_get_components(line, 'Ligand')
        receptor_components = _icellnet_get_components(line, 'Receptor')

        ligand = _icellnet_get_entity(ligand_components, references)
        receptor = _icellnet_get_entity(receptor_components, references)

        if ligand and receptor:

            yield IcellnetRecord(
                ligand = ligand,
                receptor = receptor,
                family = line['Family'].strip() or None,
                subfamily = line['Subfamily'].strip() or None,
                classification = (
                    [
                        cls.strip().replace('.', '').capitalize()
                        for cls in
                        line['Classifications'].split('/')
                    ]
                        if line['Classifications'].strip() else
                    None
                ),
                resources = resources,
                references = references,
            )




[docs]
def icellnet_complexes():

    complexes = {}

    for ia in icellnet_interactions():

        for attr in ('ligand', 'receptor'):

            if hasattr(getattr(ia, attr), 'components'):

                cplex = getattr(ia, attr)
                cplex_str = cplex.__str__()

                if cplex_str in complexes:

                    complexes[cplex_str] += cplex

                else:

                    complexes[cplex_str] = cplex

    return complexes




[docs]
def icellnet_annotations(complexes = None):

    IcellnetAnnotation = collections.namedtuple(
        'IcellnetAnnotation',
        [
            'role',
            'family',
            'subfamily',
            'classification',
        ]
    )


    def get_entities(ia, entity_attr, complexes):

        entities = getattr(ia, entity_attr)

        if not entities:

            return ()

        complex_entities = (
            (entities,)
                if entity.Entity._is_complex(entities) else
            ()
        )
        protein_entities = (
            (entities,)
                if entity.Entity._is_protein(entities) else
            tuple(entities.components.keys())
        )

        return (
            complex_entities
                if complexes == True else
            protein_entities
                if complexes == False else
            complex_entities + protein_entities
        )


    annotations = collections.defaultdict(set)

    for ia in icellnet_interactions():

        for role in ('ligand', 'receptor'):

            for en in get_entities(ia, role, complexes):

                annotations[en].add(
                    IcellnetAnnotation(
                        role = role,
                        family = ia.family,
                        subfamily = ia.subfamily,
                        classification = (
                            tuple(sorted(ia.classification))
                                if ia.classification else
                            None
                        ),
                    )
                )

    return dict(annotations)



def _icellnet_get_components(line, prefix):

    genesymbols = [
        genesymbol.strip()
        for label, genesymbol in iteritems(line)
        if label.startswith(prefix) and genesymbol.strip()
    ]

    return [
        uniprot
        for uniprot in
        (
            mapping.map_name0(genesymbol, 'genesymbol', 'uniprot')
            for genesymbol in genesymbols
        )
        if uniprot
    ]


def _icellnet_get_entity(components, references):

    if len(components) > 1:

        return intera.Complex(
            components = components,
            sources = 'ICELLNET',
            references = references,
        )

    elif len(components) == 1:

        return components[0]


def _icellnet_get_references(line):

    return [
        str(int(float(ref)))
        for ref in
        pubmed_input.only_pmids(
            ref
            for ref in
            (_ref.strip() for _ref in re.split(r'[,;]', line['PubMed ID']))
            if ref
        )
    ]


def _icellnet_get_resources(line):

    # the recent update of ICELLNET does not list the resources any more :(
    return None

    rerami = re.compile(r'(Ramilowski)\d{4}')
    resource_synonyms = {
        'Signor': 'SIGNOR',
        'guidetopharmacology.org': 'Guide2Pharma',
        'IUPHAR': 'Guide2Pharma',
        'IUPHAR-DB': 'Guide2Pharma',
        'GO_lig_rec': 'GO-lig-rec',
        'CellPhone': 'CellPhoneDB',
        'SignaLink': 'SignaLink3',
        'Innate': 'InnateDB',
        'Kegg': 'KEGG',
    }

    resources = line['Source for interaction'].replace(
        'Dinarello et al.2013 (Immunity)',
        'Dinarello2013'
    )
    resources = {
        rerami.sub(
            r'\g<1>2015',
            resource_synonyms.get(res, res)
        )
        for res in
        (_res.strip() for _res in re.split(r'[/,; ]', resources))
    }

    resources.discard('')
    resources.discard('DB')

    return sorted(resources) or None