#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importreimportcollectionsimportbs4importpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.utils.taxonomyastaxonomy
[docs]defpdzbase_interactions():""" Downloads data from PDZbase. Parses data from the HTML tables. Returns List of named tuples with interaction data. """PDZbaseInteraction=collections.namedtuple('PDZbaseInteraction',['uniprot_pdz','isoform_pdz','uniprot_ligand','isoform_ligand','genesymbol_pdz','genesymbol_ligand','pdz_domain','organism','pubmed',],)# UniProt ID with isoform e.g. O14754-1reupi=re.compile(r'([\w]{6,10})(?:-([0-9]{1,2}))?')url=urls.urls['pdzbase']['url_rescued']c=curl.Curl(url,silent=False)data=c.resultsoup=bs4.BeautifulSoup(data,'html.parser')rows=(soup.find_all('table')[3].find('table').find('table').find_all('tr'))result=[]delrows[0]forrinrows:r=[c.text.strip()forcinr.find_all('td')]uniprot_pdz,isoform_pdz=reupi.match(r[1]).groups()uniprot_ligand,isoform_ligand=reupi.match(r[4]).groups()result.append(PDZbaseInteraction(uniprot_pdz=uniprot_pdz,isoform_pdz=int(isoform_pdz)ifisoform_pdzelse1,uniprot_ligand=uniprot_ligand,isoform_ligand=int(isoform_ligand)ifisoform_ligandelse1,genesymbol_pdz=r[0],genesymbol_ligand=r[3],pdz_domain=int(r[2]),organism=taxonomy.ensure_ncbi_tax_id(r[5]),pubmed=int(r[6]),))returnresult