Source code for pypath.inputs.deathdomain
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import bs4
import pypath.resources.urls as urls
import pypath.share.curl as curl
[docs]
def deathdomain_interactions():
"""
Downloads HTML tables from the DeathDomain webpage and extracts
the interactions.
"""
result = []
families = ['CARD', 'DD', 'DED', 'PYD']
for fam in families:
url = urls.urls['death']['url_dead'] % fam
c = curl.Curl(url, silent = False)
html = c.result
soup = bs4.BeautifulSoup(html, 'lxml')
d = {}
for tab in soup.find_all('table', {'class': 'tab'}):
for r in tab.find_all('tr'):
cs = r.find_all('td')
if len(cs) > 0:
i = {
'family': cs[0].find('a').text,
'A': cs[1].find('a').text,
'B': cs[3].find('a').text,
'met': cs[4].text if cs[4].text is not None else '',
'ref': cs[-1].find('a').text
}
if i['A'] not in d:
d[i['A']] = {}
if i['B'] not in d[i['A']]:
d[i['A']][i['B']] = {}
d[i['A']][i['B']]['family'] = i['family']
if 'met' not in d[i['A']][i['B']]:
d[i['A']][i['B']]['met'] = []
d[i['A']][i['B']]['met'].append(i['met'])
if 'ref' not in d[i['A']][i['B']]:
d[i['A']][i['B']]['ref'] = []
d[i['A']][i['B']]['ref'].append(i['ref'])
for p1, v1 in iteritems(d):
for p2, v2 in iteritems(v1):
if p1 != p2:
result.append([
p1, p2, ';'.join(common.unique_list(v2['met'])),
';'.join(common.unique_list(v2['ref']))
])
return result
[docs]
def deathdomain_interactions_rescued():
"""
Loads the DeathDomain interactions from rescued data.
"""
url = urls.urls['death']['url_alive']
c = curl.Curl(url, silent = False, large = True)
_ = next(c.result)
return [
[i.strip() for i in line.split('\t')]
for line in c.result
]