#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritemsimportreimportbs4importwarningsimportpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.share.sessionassessionimportpypath.share.settingsassettings_logger=session.Logger(name='inputs.genecards')_log=_logger._log_respace=re.compile(r'\s+')_summary_sources={'Gene Wiki':'GeneWiki','UniProtKB/Swiss-Prot':'UniProt',}
[docs]defgenecards_datasheet(gene):""" Retrieves a gene (protein) datasheet from GeneCards. Returns HTML as string. :param str gene: A Gene Symbol or UniProt ID. """url=urls.urls['genecards']['url']%genec=curl.Curl(url,silent=True,large=False,connect_timeout=settings.get('genecards_datasheet_connect_timeout'),timeout=settings.get('genecards_datasheet_timeout'),)ifc.statusnotin{0,200}:_log('Failed to retrieve gene card for ID `%s`.'%gene)returnNonereturnc.result
[docs]defgenecards_soup(gene):""" Retrieves a gene (protein) datasheet from GeneCards. Returns ``bs4.BeautifulSoup`` object. :param str gene: A Gene Symbol or UniProt ID. """html=genecards_datasheet(gene)ifhtml:withwarnings.catch_warnings():warnings.simplefilter('ignore')soup=bs4.BeautifulSoup(html)returnsoup
[docs]defgenecards_summaries(gene):""" Retrieves the summaries from a GeneCards datasheet. Returns a dict with the resource names as keys and the summary texts as values. :param str gene: A Gene Symbol or UniProt ID. """result={}soup=genecards_soup(gene)ifnotsoup:returnresultsummaries=soup.select_one('section#summaries')ifsummaries:forsummaryinsummaries.select('div.gc-subsection'):title=summary.select_one('h3').text.strip('\r\n ')iftitle[:7]in{'No data','Additio'}:continuecontent=_respace.sub(' ',' '.join(par.textforparinsummary.select(':not(:nth-child(1))'))).strip('\n\r ')forgc_name,nameiniteritems(_summary_sources):iftitle.startswith(gc_name):title=namebreaktitle=title.split(maxsplit=1)[0]ifcontent:result[title]=contentreturnresult