Source code for pypath.inputs.pdzbase

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import collections

import bs4

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.taxonomy as taxonomy


[docs] def pdzbase_interactions(): """ Downloads data from PDZbase. Parses data from the HTML tables. Returns List of named tuples with interaction data. """ PDZbaseInteraction = collections.namedtuple( 'PDZbaseInteraction', [ 'uniprot_pdz', 'isoform_pdz', 'uniprot_ligand', 'isoform_ligand', 'genesymbol_pdz', 'genesymbol_ligand', 'pdz_domain', 'organism', 'pubmed', ], ) # UniProt ID with isoform e.g. O14754-1 reupi = re.compile(r'([\w]{6,10})(?:-([0-9]{1,2}))?') url = urls.urls['pdzbase']['url_rescued'] c = curl.Curl(url, silent = False) data = c.result soup = bs4.BeautifulSoup(data, 'html.parser') rows = ( soup.find_all('table')[3].find('table').find('table').find_all('tr') ) result = [] del rows[0] for r in rows: r = [c.text.strip() for c in r.find_all('td')] uniprot_pdz, isoform_pdz = reupi.match(r[1]).groups() uniprot_ligand, isoform_ligand = reupi.match(r[4]).groups() result.append( PDZbaseInteraction( uniprot_pdz = uniprot_pdz, isoform_pdz = int(isoform_pdz) if isoform_pdz else 1, uniprot_ligand = uniprot_ligand, isoform_ligand = int(isoform_ligand) if isoform_ligand else 1, genesymbol_pdz = r[0], genesymbol_ligand = r[3], pdz_domain = int(r[2]), organism = taxonomy.ensure_ncbi_tax_id(r[5]), pubmed = int(r[6]), ) ) return result