Source code for pypath.inputs.netpath
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import re
import collections
import bs4
import xml.etree.cElementTree as ET
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
[docs]
def netpath_interactions():
result = []
repwnum = re.compile(r'NetPath_([0-9]+)_')
mi = '{net:sf:psidev:mi}'
url = urls.urls['netpath_psimi']['url']
c = curl.Curl(url, silent = False)
data = c.result
data = dict([(k, v) for k, v in iteritems(data) if k.endswith('xml')])
pwnames = netpath_names()
for pwfile, rawxml in iteritems(data):
try:
pwnum = repwnum.findall(pwfile)[0]
except:
sys.stdout.write('Error at processing file:\n')
sys.stdout.write(pwfile)
sys.stdout.write('\n')
sys.stdout.flush()
pwname = pwnames[pwnum]
root = ET.fromstring(rawxml)
for e in root.findall(mi + 'entry'):
thisInt = ()
db = [
pr.find(mi + 'primaryRef').attrib['db']
for pr in e.find(mi + 'source').findall(mi + 'xref')
]
refs = []
mets = []
for ex in e.find(mi + 'experimentList').findall(
mi + 'experimentDescription'):
for pm in ex.find(mi + 'bibref').iter(mi + 'primaryRef'):
if pm.attrib['db'] == 'pubmed':
refs.append(pm.attrib['id'])
for me in ex.find(mi + 'interactionDetectionMethod').\
iter(mi + 'shortLabel'):
mets.append(me.text)
mols = {}
for mo in e.find(mi + 'interactorList').findall(mi + 'interactor'):
iid = mo.attrib['id']
name = mo.find(mi + 'names').find(mi + 'shortLabel').text
entrez = ''
if mo.find(mi + 'xref') is not None:
entrez = ';'.join([
ac.attrib['id']
for ac in mo.find(mi + 'xref')
.findall(mi + 'secondaryRef')
if ac.attrib['db'] == 'Entrez gene'
])
mols[iid] = (name, entrez)
theInt = e.find(mi + 'interactionList').find(mi + 'interaction')
for p in theInt.find(mi + 'participantList').findall(
mi + 'participant'):
pid = p.find(mi + 'interactorRef').text
roles = ''
if p.find(mi + 'experimentalRoleList') is not None:
roles = ';'.join([
rl.find(mi + 'names').find(mi + 'shortLabel').text
for rl in p.find(mi + 'experimentalRoleList')
.findall(mi + 'experimentalRole')
])
mols[pid] += (roles, )
intTyp = (
theInt.find(
mi + 'interactionType'
).find(
mi + 'names'
).find(
mi + 'shortLabel'
).text
)
molkeys = list(mols.keys())
for i in range(0, len(mols) - 1):
for j in range(i, len(mols)):
A = mols[molkeys[i]][0:2]
B = mols[molkeys[j]][0:2]
result.append(
list(A) +
list(B) +
[
';'.join(refs),
';'.join(mets),
intTyp,
pwname
]
)
return result
[docs]
def netpath_names():
repwnum = re.compile(r'_([0-9]+)$')
result = {}
url = urls.urls['netpath_names']['url']
c = curl.Curl(url, silent = False)
html = c.result
soup = bs4.BeautifulSoup(html, 'html.parser')
for a in soup.find_all('a'):
if a.attrs['href'].startswith('pathways'):
num = repwnum.findall(a.attrs['href'])[0]
name = a.text
result[num] = name.strip()
return result
[docs]
def netpath_pathway_annotations():
NetpathPathway = collections.namedtuple(
'NetpathPathway',
['pathway'],
)
result = collections.defaultdict(set)
url_template = urls.urls['netpath_pw']['url']
url_main = urls.urls['netpath_pw']['mainpage']
c = curl.Curl(url_main, cache = False)
cookie = [
h.decode().split(':')[1].split(';')[0].strip()
for h in c.resp_headers
if h.startswith(b'Set-Cookie')
]
cookie_hdr = ['Cookie: %s' % '; '.join(cookie)]
pathway_ids = netpath_names()
for _id, pathway in iteritems(pathway_ids):
url = url_template % int(_id)
c = curl.Curl(
url,
req_headers = cookie_hdr,
silent = False,
encoding = 'iso-8859-1',
)
soup = bs4.BeautifulSoup(c.result, 'html.parser')
for tbl in soup.find_all('table'):
hdr = tbl.find('td', {'class': 'barhead'})
if not hdr or not hdr.text.strip().startswith('Molecules Invol'):
continue
for td in tbl.find_all('td'):
genesymbol = td.text.strip()
if not genesymbol:
continue
uniprots = mapping.map_name(
genesymbol,
'genesymbol',
'uniprot',
)
for uniprot in uniprots:
result[uniprot].add(
NetpathPathway(
pathway = pathway
)
)
return dict(result)