Source code for pypath.inputs.protmapper
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import csv
import collections
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.share.common as common
import pypath.share.settings as settings
[docs]
def get_protmapper():
"""
Returns the raw records as read by ``csv.DictReader``.
From Bachman et al. 2019 "Assembling a phosphoproteomic knowledge base
using ProtMapper to normalize phosphosite information from databases and
text mining",
https://www.biorxiv.org/content/10.1101/822668v3.supplementary-material
"""
url = urls.urls['protmapper']['url']
files = urls.urls['protmapper']['files']
c = curl.Curl(
url,
large = True,
silent = False,
files_needed = files,
req_headers = [settings.get('user_agent')],
alpn = False,
)
evidences = collections.defaultdict(list)
for rec in csv.DictReader(c.files_multipart['evidences.csv']):
evidences[rec['ID']].append(rec)
records = list(csv.DictReader(c.files_multipart['export.csv']))
return records, evidences
[docs]
def protmapper_enzyme_substrate(
only_evidences = None,
only_literature = False,
interactions = False,
):
"""
:arg str,set,NoneType only_evidences:
Keep only the interactions with these evidence type, e.g. `VALID`.
See the 'descriptions' column in the 'evidences.csv' supplementary
table.
"""
databases = {
'signor': 'SIGNOR',
'psp': 'PhosphoSite',
'sparser': 'Sparser',
'reach': 'REACH',
'pid': 'NCI-PID',
'reactome': 'Reactome',
'rlimsp': 'RLIMS-P',
'bel': 'BEL-Large-Corpus',
}
result = []
only_evidences = common.to_set(only_evidences)
records, evidences = get_protmapper()
for rec in records:
if rec['CTRL_NS'] != 'UP':
continue
if only_evidences:
ev_types = {
ev['DESCRIPTION']
for ev in evidences[rec['ID']]
}
if not only_evidences & ev_types:
continue
references = {
ev['PMID']
for ev in evidences[rec['ID']]
if ev['PMID']
}
if only_literature and not references:
continue
typ = (
'phosphorylation'
if rec['CTRL_IS_KINASE'] == 'True' else
'unknown'
)
sources = {
databases[source] if source in databases else source
for source in rec['SOURCES'].strip('"').split(',')
}
if interactions:
result.append([
rec['CTRL_ID'],
rec['TARGET_UP_ID'],
sources,
references,
])
else:
result.append({
'kinase': rec['CTRL_ID'],
'resaa': rec['TARGET_RES'],
'resnum': int(rec['TARGET_POS']),
'references': references,
'substrate': rec['TARGET_UP_ID'],
'databases': sources,
})
return result
[docs]
def protmapper_interactions(**kwargs):
return protmapper_enzyme_substrate(interactions = True, **kwargs)