Source code for pypath.inputs.integrins

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import bs4

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping


[docs] def get_integrins(): """ Returns a set of the UniProt IDs of the human integrins from Table 1 of Takada et al 2007 (10.1186/gb-2007-8-5-215). """ url = urls.urls['integrins']['url'] req_headers = [ 'Host: www.ncbi.nlm.nih.gov', 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:67.0) '\ 'Gecko/20100101 Firefox/67.0', 'Accept: text/html,application/xhtml+xml,' 'application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: en-US,en;q=0.5', 'Connection: keep-alive', 'Upgrade-Insecure-Requests: 1', 'Pragma: no-cache', 'Cache-Control: no-cache', ] c = curl.Curl( url, silent = False, req_headers = req_headers, large = True, ) soup = bs4.BeautifulSoup(c.fileobj.read(), 'lxml') integrins = [] rows = soup.find_all('tr') for tr in rows[1:]: cells = [td for td in tr.find_all('td')] integrins.append(cells[-1].text.split('}')[-1]) return mapping.map_names(integrins, 'uniprot', 'uniprot')