Source code for pypath.inputs.icellnet

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import re
import collections
import itertools
import csv

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.internals.intera as intera
import pypath.core.entity as entity
import pypath.inputs.pubmed as pubmed_input


IcellnetRecord = collections.namedtuple(
    'IcellnetRecord',
    [
        'ligand',
        'receptor',
        'family',
        'subfamily',
        'classification',
        'resources',
        'references',
    ]
)


[docs] def icellnet_interactions(): url = urls.urls['icellnet']['url'] c = curl.Curl(url, silent = False, large = True) bom = c.fileobj.read(1) # this file starts with an UTF8 BOM if bom != '\ufeff': c.fileobj.seek(0) tbl = list(csv.DictReader(c.result, delimiter = ';')) for line in tbl: references = _icellnet_get_references(line) resources = _icellnet_get_resources(line) if resources: references.extend([r for r in resources if r.isdigit()]) resources = [r for r in resources if not r.isdigit()] ligand_components = _icellnet_get_components(line, 'Ligand') receptor_components = _icellnet_get_components(line, 'Receptor') ligand = _icellnet_get_entity(ligand_components, references) receptor = _icellnet_get_entity(receptor_components, references) if ligand and receptor: yield IcellnetRecord( ligand = ligand, receptor = receptor, family = line['Family'].strip() or None, subfamily = line['Subfamily'].strip() or None, classification = ( [ cls.strip().replace('.', '').capitalize() for cls in line['Classifications'].split('/') ] if line['Classifications'].strip() else None ), resources = resources, references = references, )
[docs] def icellnet_complexes(): complexes = {} for ia in icellnet_interactions(): for attr in ('ligand', 'receptor'): if hasattr(getattr(ia, attr), 'components'): cplex = getattr(ia, attr) cplex_str = cplex.__str__() if cplex_str in complexes: complexes[cplex_str] += cplex else: complexes[cplex_str] = cplex return complexes
[docs] def icellnet_annotations(complexes = None): IcellnetAnnotation = collections.namedtuple( 'IcellnetAnnotation', [ 'role', 'family', 'subfamily', 'classification', ] ) def get_entities(ia, entity_attr, complexes): entities = getattr(ia, entity_attr) if not entities: return () complex_entities = ( (entities,) if entity.Entity._is_complex(entities) else () ) protein_entities = ( (entities,) if entity.Entity._is_protein(entities) else tuple(entities.components.keys()) ) return ( complex_entities if complexes == True else protein_entities if complexes == False else complex_entities + protein_entities ) annotations = collections.defaultdict(set) for ia in icellnet_interactions(): for role in ('ligand', 'receptor'): for en in get_entities(ia, role, complexes): annotations[en].add( IcellnetAnnotation( role = role, family = ia.family, subfamily = ia.subfamily, classification = ( tuple(sorted(ia.classification)) if ia.classification else None ), ) ) return dict(annotations)
def _icellnet_get_components(line, prefix): genesymbols = [ genesymbol.strip() for label, genesymbol in iteritems(line) if label.startswith(prefix) and genesymbol.strip() ] return [ uniprot for uniprot in ( mapping.map_name0(genesymbol, 'genesymbol', 'uniprot') for genesymbol in genesymbols ) if uniprot ] def _icellnet_get_entity(components, references): if len(components) > 1: return intera.Complex( components = components, sources = 'ICELLNET', references = references, ) elif len(components) == 1: return components[0] def _icellnet_get_references(line): return [ str(int(float(ref))) for ref in pubmed_input.only_pmids( ref for ref in (_ref.strip() for _ref in re.split(r'[,;]', line['PubMed ID'])) if ref ) ] def _icellnet_get_resources(line): # the recent update of ICELLNET does not list the resources any more :( return None rerami = re.compile(r'(Ramilowski)\d{4}') resource_synonyms = { 'Signor': 'SIGNOR', 'guidetopharmacology.org': 'Guide2Pharma', 'IUPHAR': 'Guide2Pharma', 'IUPHAR-DB': 'Guide2Pharma', 'GO_lig_rec': 'GO-lig-rec', 'CellPhone': 'CellPhoneDB', 'SignaLink': 'SignaLink3', 'Innate': 'InnateDB', 'Kegg': 'KEGG', } resources = line['Source for interaction'].replace( 'Dinarello et al.2013 (Immunity)', 'Dinarello2013' ) resources = { rerami.sub( r'\g<1>2015', resource_synonyms.get(res, res) ) for res in (_res.strip() for _res in re.split(r'[/,; ]', resources)) } resources.discard('') resources.discard('DB') return sorted(resources) or None