Source code for pypath.inputs.cancersea
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import os
import collections
import bs4
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
[docs]
def cancersea_annotations():
"""
Retrieves genes annotated with cancer funcitonal states from the
CancerSEA database.
"""
CancerseaAnnotation = collections.namedtuple(
'CancerseaAnnotation',
[
'state',
],
)
annotations = collections.defaultdict(set)
url = urls.urls['cancersea']['rescued']
c = curl.Curl(url, silent = False, large = False)
soup = bs4.BeautifulSoup(c.result, 'html.parser')
for row in soup.find_all('tbody')[1].find_all('tr'):
state = row.find_all('td')[0].text
url_end = row.find_all('td')[-1].find('a').attrs['href']
url_end = url_end.rsplit('/', maxsplit = 1)[-1]
data_url = urls.urls['cancersea']['rescued_data'] % url_end
c = curl.Curl(data_url, silent = False, large = True)
_ = next(c.result)
for line in c.result:
line = line.strip().split('\t')
uniprots = mapping.map_name(line[1], 'genesymbol', 'uniprot')
for uniprot in uniprots:
annotations[uniprot].add(
CancerseaAnnotation(state = state)
)
return dict(annotations)