Source code for pypath.inputs.spike

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import itertools
import collections

from typing import Dict, List

import xml.etree.cElementTree as ET

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.internals.intera as intera


[docs] def spike_interactions(min_confidence: int = 2) -> List[tuple]: """ Args min_confidence: Confidence (integrity) levels in SPIKE span from 1 to 4, 1 being the highest confidence. """ url = urls.urls['spike']['url'] c = curl.Curl( url, silent = False, large = True, files_needed = ['LatestSpikeDB.xml'], ) spikexml = c.result xml = ET.parse(spikexml['LatestSpikeDB.xml']) xmlroot = xml.getroot() # iterating genes bblock = xmlroot.find('BuildingBlock') rblock = xmlroot.find('RegulationBlock') iblock = xmlroot.find('InteractionBlock') genes = {} result = [] SpikeInteraction = collections.namedtuple( 'SpikeInteraction', ( 'entrez_a', 'genesymbol_a', 'entrez_b', 'genesymbol_b', 'directed', 'pmids', 'integrity', 'effect', 'assay', 'data_source', 'description', 'mechanism', 'regulation', ) ) Gene = collections.namedtuple('Gene', ('entrez', 'genesymbol', 'type')) for gene in bblock.findall('Gene'): sy = '' if 'name' not in gene.attrib else gene.attrib['name'] genes[gene.attrib['id']] = [ Gene( entrez = gene.find('XRef').attrib['id'], genesymbol = sy, type = 'gene', ) ] for grp in bblock.findall('Group'): if grp.attrib['type'] == 'Complex': members = [m.attrib['ref'] for m in grp.findall('Member')] if all( m in genes and genes[m][0].type != 'complex' for m in members ): uniprots = [ mapping.map_name(genes[m][0].entrez, 'entrez', 'uniprot') for m in members ] genes[grp.attrib['id']] = [ Gene( entrez = cplex, genesymbol = cplex, type = 'complex', ) for cplex in ( intera.Complex( name = grp.attrib['name'], components = ups, sources = 'SPIKE', ) for ups in itertools.product(*uniprots) ) ] for i in itertools.chain( rblock.findall('Regulation'), iblock.findall('Interaction'), ): regulation = i.tag == 'Regulation' src_tag = 'Source' if regulation else 'ProteinA' tgt_tag = 'PhysicalTarget' if regulation else 'ProteinB' ds = i.attrib['dataSource'] itg = i.attrib['integrity'] eff = i.attrib.get('effect', '') mec = i.attrib.get('mechanism', '') src = i.find(src_tag).attrib['ref'] tgt = i.find(tgt_tag).attrib['ref'] dcd = str(int(regulation)) dsc = ( '' if i.find('Description') is None else i.find('Description').text.replace('\n', ' ') ) asy = ( '' if 'biologicalAssay' not in i.attrib else i.attrib['biologicalAssay'] ) refs = i.findall('Reference') pmids = [r.attrib['pmid'] for r in refs] if src in genes and tgt in genes: if int(itg) <= min_confidence: for _src, _tgt in itertools.product(genes[src], genes[tgt]): result.append( SpikeInteraction( entrez_a = _src.entrez, genesymbol_a = _src.genesymbol, entrez_b = _tgt.entrez, genesymbol_b = _tgt.genesymbol, directed = dcd, pmids = ';'.join(pmids), integrity = itg, effect = eff, assay = asy, data_source = ds, description = dsc, mechanism = mec, regulation = regulation, ) ) return result
[docs] def spike_complexes(min_confidence: int = 2) -> Dict[str, intera.Complex]: interactions = spike_interactions(min_confidence = min_confidence) complexes = [ getattr(i, attr) for i in interactions for attr in ('entrez_a', 'entrez_b') if isinstance(getattr(i, attr), intera.Complex) ] return dict((cplx.__str__(), cplx) for cplx in complexes)