#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importreimportcsvimportcollectionsimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.commonascommonimportpypath.share.settingsassettings
[docs]defget_protmapper():""" Returns the raw records as read by ``csv.DictReader``. From Bachman et al. 2019 "Assembling a phosphoproteomic knowledge base using ProtMapper to normalize phosphosite information from databases and text mining", https://www.biorxiv.org/content/10.1101/822668v3.supplementary-material """url=urls.urls['protmapper']['url']files=urls.urls['protmapper']['files']c=curl.Curl(url,large=True,silent=False,files_needed=files,req_headers=[settings.get('user_agent')],alpn=False,)evidences=collections.defaultdict(list)forrecincsv.DictReader(c.files_multipart['evidences.csv']):evidences[rec['ID']].append(rec)records=list(csv.DictReader(c.files_multipart['export.csv']))returnrecords,evidences
[docs]defprotmapper_enzyme_substrate(only_evidences=None,only_literature=False,interactions=False,):""" :arg str,set,NoneType only_evidences: Keep only the interactions with these evidence type, e.g. `VALID`. See the 'descriptions' column in the 'evidences.csv' supplementary table. """databases={'signor':'SIGNOR','psp':'PhosphoSite','sparser':'Sparser','reach':'REACH','pid':'NCI-PID','reactome':'Reactome','rlimsp':'RLIMS-P','bel':'BEL-Large-Corpus',}result=[]only_evidences=common.to_set(only_evidences)records,evidences=get_protmapper()forrecinrecords:ifrec['CTRL_NS']!='UP':continueifonly_evidences:ev_types={ev['DESCRIPTION']forevinevidences[rec['ID']]}ifnotonly_evidences&ev_types:continuereferences={ev['PMID']forevinevidences[rec['ID']]ifev['PMID']}ifonly_literatureandnotreferences:continuetyp=('phosphorylation'ifrec['CTRL_IS_KINASE']=='True'else'unknown')sources={databases[source]ifsourceindatabaseselsesourceforsourceinrec['SOURCES'].strip('"').split(',')}ifinteractions:result.append([rec['CTRL_ID'],rec['TARGET_UP_ID'],sources,references,])else:result.append({'kinase':rec['CTRL_ID'],'resaa':rec['TARGET_RES'],'resnum':int(rec['TARGET_POS']),'references':references,'substrate':rec['TARGET_UP_ID'],'databases':sources,})returnresult