#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritemsimportosimportbs4importpickleimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.cacheascacheimportpypath.share.sessionassessionimportpypath.share.commonascommon_logger=session.Logger(name='trip_input')_log=_logger._log
[docs]deftake_a_trip(cachefile=None):""" Downloads TRIP data from webpage and preprocesses it. Saves preprocessed data into `cachefile` and next time loads from this file. :arg cachefile str: Path to pickle dump of preprocessed TRIP database. If does not exist the database will be downloaded and saved to this file. By default the path queried from the ``settings`` module. """cachefile=cachefileorcache.cache_item('trip_preprocessed')ifos.path.exists(cachefile):_log('Loading preprocessed TRIP database ''content from `%s`'%cachefile)result=pickle.load(open(cachefile,'rb'))returnresult_log('No cache found, downloading and preprocessing TRIP database.')result={'sc':{},'cc':{},'vvc':{},'vtc':{},'fc':{}}intrs={}titles={'Characterization':'cc','Screening':'sc','Validation: In vitro validation':'vtc','Validation: In vivo validation':'vvc','Functional consequence':'fc',}interactors={}base_url=urls.urls['trip']['base_rescued']show_url=urls.urls['trip']['show_rescued']c=curl.Curl(base_url)mainhtml=c.resultmainsoup=bs4.BeautifulSoup(mainhtml,'html.parser')trppages=common.flat_list([[a.attrs['href']forainul.find_all('a')]forulinmainsoup.find('div',id='trp_selector').find('ul').find_all('ul')])fortrppintrppages:trp=trpp.split('/')[-1]trpurl=show_url%trpc=curl.Curl(trpurl,silent=False)trphtml=c.resulttrpsoup=bs4.BeautifulSoup(trphtml,'html.parser')trp_uniprot=trip_find_uniprot(trpsoup)iftrp_uniprotisNoneorlen(trp_uniprot)<6:_log('Could not find UniProt for %s'%trp)fortabintrpsoup.find_all('th',colspan=['11','13']):ttl=titles[tab.text.strip()]tab=tab.find_parent('table')trip_process_table(tab,result[ttl],intrs,trp_uniprot)_log('Saving processed TRIP database content to `%s`'%cachefile)pickle.dump(result,open(cachefile,'wb'))returnresult
[docs]deftrip_process_table(tab,result,intrs,trp_uniprot):""" Processes one HTML table downloaded from TRIP webpage. @tab : bs4.element.Tag() One table of interactions from TRIP webpage. @result : dict Dictionary the data should be filled in. @intrs : dict Dictionary of already converted interactor IDs. This serves as a cache so do not need to look up the same ID twice. @trp_uniprot : str UniProt ID of TRP domain containing protein. """forrowintab.find_all('tr'):cells=row.find_all(['td','th'])if'th'notin[c.nameforcincells]:intr=cells[2].text.strip()ifintrnotinintrs:intr_uniprot=trip_get_uniprot(intr)intrs[intr]=intr_uniprotifintr_uniprotisNoneorlen(intr_uniprot)<6:_log('Could not find UniProt for %s'%intr)else:intr_uniprot=intrs[intr]if(trp_uniprot,intr_uniprot)notinresult:result[(trp_uniprot,intr_uniprot)]=[]result[(trp_uniprot,intr_uniprot)].append([c.text.strip()forcincells])
[docs]deftrip_get_uniprot(syn):""" Downloads table from TRIP webpage and UniProt attempts to look up the UniProt ID for one synonym. @syn : str The synonym as shown on TRIP webpage. """url=urls.urls['trip']['show_rescued']%sync=curl.Curl(url)ifc.result:soup=bs4.BeautifulSoup(c.result,'html.parser')returntrip_find_uniprot(soup)
[docs]deftrip_find_uniprot(soup):""" Looks up a UniProt name in table downloaded from TRIP webpage. @soup : bs4.BeautifulSoup The `BeautifulSoup` instance returned by ``pypath.inputs.trip.trip_get_uniprot``. """fortrinsoup.find_all('div',id='tab2')[0].find_all('tr'):if(tr.find('td')isnotNoneandtr.find('td').text.strip()=='Human'):uniprot=tr.find_all('td')[2].text.strip()returnuniprotreturnNone
[docs]deftrip_process(exclude_methods=['Inference','Speculation'],predictions=False,species='Human',strict=False,):""" Downloads TRIP data by calling `pypath.dadio.take_a_trip()` and further provcesses it. Returns dict of dict with TRIP data. @exclude_methods : list Interaction detection methods to be discarded. @predictions : bool Whether to include predicted interactions. @species : str Organism name, e.g. `Human`. @strict : bool Whether include interactions with species not used as a bait or not specified. """nd='Not determined'spec=set([])ifstrict \
elseset(['Not specified','Not used as a bait',''])spec.add(species)result={}data=take_a_trip()foruniprotsincommon.unique_list(common.flat_list([v.keys()forvindata.values()])):to_process=Falserefs=set([])mets=set([])tiss=set([])reg=set([])eff=set([])ifuniprotsindata['sc']:forscindata['sc'][uniprots]:ifsc[4]inspecandsc[6]inspecand \
(predictionsorsc[9]!='Prediction')and \
sc[3]notinexclude_methods:refs.add(sc[10])mets.add(sc[3])tiss.add(sc[7])ifuniprotsindata['vtc']:forvtcindata['vtc'][uniprots]:ifvtc[4]inspecandvtc[7]inspecand \
vtc[3]notinexclude_methods:refs.add(vtc[10])mets.add(vtc[3])ifuniprotsindata['vvc']:forvvcindata['vvc'][uniprots]:ifvvc[6]inspecandvvc[8]inspecand \
vvc[3]notinexclude_methods:refs.add(vvc[10])mets.add(vvc[3])iflen(vvc[4])>0:tiss.add(vvc[4])iflen(vvc[5])>0:tiss.add(vvc[5])ifuniprotsindata['cc']:forccindata['cc'][uniprots]:ifcc[4]inspecandcc[6]inspecand \
cc[3]notinexclude_methods:refs.add(cc[10])mets.add(cc[3])if(cc[5]!=ndandlen(cc[5])>0)or \
(cc[7]!=ndandlen(cc[7])>0):reg.add((cc[5],cc[7]))ifuniprotsindata['fc']:forfcindata['fc'][uniprots]:mets.add(fc[3])refs.add(fc[7])iflen(fc[5])>0:eff.add(fc[5])iflen(fc[6])>0:eff.add(fc[6])iflen(refs)>0:result[uniprots]={'refs':refs,'methods':mets,'tissues':tiss,'effect':eff,'regions':reg}returnresult
[docs]deftrip_interactions(exclude_methods=['Inference','Speculation'],predictions=False,species='Human',strict=False,):""" Obtains processed TRIP interactions by calling ``pypath.inputs.trip.trip_process`` and returns list of interactions. All arguments are passed to ``trip_process``, see their definition there. """data=trip_process(exclude_methods,predictions,species,strict)deftrip_effect(eff):pos={'Sensitization','Activation','Increase in plasma membrane level','Increase in lysosomal membrane level','New channel creation',}neg={'Desensitization','Decrease in plasma membrane level','Inhibition','Internalization from membrane by ligand','Retain in the endoplasmic reticulum',}return('stimulation'iflen(eff&pos)>0else'inhibition'iflen(eff&neg)>0else'unknown')return[[unipr[0],unipr[1],';'.join(d['refs']),';'.join(d['methods']),trip_effect(d['effect'])]forunipr,diniteritems(data)]