Source code for pypath.inputs.mppi
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import xml.etree.cElementTree as ET
import pypath.share.curl as curl
import pypath.resources.urls as urls
[docs]
def mppi_interactions(organism = 9606):
url = urls.urls['mppi']['url_rescued']
c = curl.Curl(url, silent = False, large = True)
xmlfile = c.gzfile
prefix = '{net:sf:psidev:mi}'
result = []
xml = ET.parse(xmlfile)
xmlroot = xml.getroot()
ilist = xmlroot[0][1]
proteinInteractor = './/%sproteinInteractor' % prefix
_organism = './/%sorganism' % prefix
organism = '%u' % organism
ncbiTaxId = 'ncbiTaxId'
primaryRef = './/%sprimaryRef' % prefix
bibref = './/%sbibref' % prefix
interactionDetection = './/%sinteractionDetection' % prefix
shortLabel = './/%sshortLabel' % prefix
fullName = './/%sfullName' % prefix
for i in ilist:
_proteins = i.findall(proteinInteractor)
if (
len(_proteins) == 2 and
(
organism is None or
(
_proteins[0].findall(
_organism
)[0].attrib[ncbiTaxId] == organism and
_proteins[1].findall(
_organism
)[0].attrib[ncbiTaxId] == organism
)
)
):
pmids = []
pms = i.findall(bibref)[0].findall(primaryRef)
for pm in pms:
if 'id' in pm.attrib:
pmids.append(pm.attrib['id'])
meths = []
dets = i.findall(interactionDetection)[0].findall(shortLabel)
for m in dets:
meths.append(m.text)
proteins = []
for prot in _proteins:
thisP = {}
if 'id' in prot.findall(primaryRef)[0].attrib:
thisP['u'] = prot.findall(primaryRef)[0].attrib['id']
else:
thisP['u'] = ''
thisP['nt'] = prot.findall(primaryRef)[0].attrib['db']
thisP['gn'] = prot.findall(fullName)[0].text
thisP['o'] = prot.findall(_organism)[0].attrib[ncbiTaxId]
proteins.append(thisP)
result.append([
';'.join(pmids),
';'.join(pmids),
proteins[0]['u'],
proteins[0]['nt'],
proteins[0]['gn'],
proteins[0]['o'],
proteins[1]['u'],
proteins[1]['nt'],
proteins[1]['gn'],
proteins[1]['o'],
])
return result