Source code for pypath.inputs.protmapper

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import csv
import collections

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.share.common as common
import pypath.share.settings as settings



[docs]
def get_protmapper():
    """
    Returns the raw records as read by ``csv.DictReader``.
    From Bachman et al. 2019 "Assembling a phosphoproteomic knowledge base
    using ProtMapper to normalize phosphosite information from databases and
    text mining",
    https://www.biorxiv.org/content/10.1101/822668v3.supplementary-material
    """

    url = urls.urls['protmapper']['url']
    files = urls.urls['protmapper']['files']
    c = curl.Curl(
        url,
        large = True,
        silent = False,
        files_needed = files,
        req_headers = [settings.get('user_agent')],
        alpn = False,
    )

    evidences = collections.defaultdict(list)

    for rec in csv.DictReader(c.files_multipart['evidences.csv']):

        evidences[rec['ID']].append(rec)

    records = list(csv.DictReader(c.files_multipart['export.csv']))

    return records, evidences




[docs]
def protmapper_enzyme_substrate(
        only_evidences = None,
        only_literature = False,
        interactions = False,
    ):
    """
    :arg str,set,NoneType only_evidences:
        Keep only the interactions with these evidence type, e.g. `VALID`.
        See the 'descriptions' column in the 'evidences.csv' supplementary
        table.
    """

    databases = {
        'signor': 'SIGNOR',
        'psp': 'PhosphoSite',
        'sparser': 'Sparser',
        'reach': 'REACH',
        'pid': 'NCI-PID',
        'reactome': 'Reactome',
        'rlimsp': 'RLIMS-P',
        'bel': 'BEL-Large-Corpus',
    }

    result = []
    only_evidences = common.to_set(only_evidences)

    records, evidences = get_protmapper()

    for rec in records:

        if rec['CTRL_NS'] != 'UP':

            continue

        if only_evidences:

            ev_types = {
                ev['DESCRIPTION']
                for ev in evidences[rec['ID']]
            }

            if not only_evidences & ev_types:

                continue

        references = {
            ev['PMID']
            for ev in evidences[rec['ID']]
            if ev['PMID']
        }

        if only_literature and not references:

            continue

        typ = (
            'phosphorylation'
                if rec['CTRL_IS_KINASE'] == 'True' else
            'unknown'
        )
        sources = {
            databases[source] if source in databases else source
            for source in rec['SOURCES'].strip('"').split(',')
        }

        if interactions:

            result.append([
                rec['CTRL_ID'],
                rec['TARGET_UP_ID'],
                sources,
                references,
            ])

        else:

            result.append({
                'kinase': rec['CTRL_ID'],
                'resaa': rec['TARGET_RES'],
                'resnum': int(rec['TARGET_POS']),
                'references': references,
                'substrate': rec['TARGET_UP_ID'],
                'databases': sources,
            })

    return result




[docs]
def protmapper_interactions(**kwargs):

    return protmapper_enzyme_substrate(interactions = True, **kwargs)