Source code for pypath.inputs.matrixdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import collections

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.inputs.uniprot_db as uniprot_db



[docs]
def matrixdb_interactions(organism = 9606):

    url = urls.urls['matrixdb']['url']
    c = curl.Curl(url, silent = False, large = True)
    f = c.result
    i = []
    lnum = 0

    for l in f:
        if lnum == 0:
            lnum += 1

            continue

        l = l.replace('\n', '').replace('\r', '')
        l = l.split('\t')
        specA = 0 if l[9] == '-' else int(l[9].split(':')[1].split('(')[0])
        specB = 0 if l[10] == '-' else int(l[10].split(':')[1].split('(')[0])

        if organism is None or (specA == organism and specB == organism):
            pm = [
                p.replace('pubmed:', '') for p in l[8].split('|')
                if p.startswith('pubmed:')
            ]
            met = [
                m.split('(')[1].replace(')', '').strip('"')
                for m in l[6].split('|')
                if '(' in m
            ]
            l = [l[0], l[1]]
            interaction = ()

            for ll in l:
                ll = ll.split('|')
                uniprot = ''

                for lll in ll:
                    nm = lll.split(':')

                    if nm[0] == 'uniprotkb' and len(nm[1]) == 6:
                        uniprot = nm[1]

                interaction += (uniprot, )

            interaction += ('|'.join(pm), '|'.join(met))

            if len(interaction[0]) > 5 and len(interaction[1]) > 5:
                i.append(list(interaction))

        lnum += 1

    f.close()

    return i



def _matrixdb_protein_list(category, organism = 9606):
    """
    Returns a set of proteins annotated by MatrixDB.

    :arg str category:
        The protein annotation category. Possible values: `ecm`, `membrane`
        or `secreted`.
    """

    url = urls.urls['matrixdb']['%s_proteins' % category]
    c = curl.Curl(url, silent = False, large = True)

    proteins = set()

    # header row
    _ = next(c.result)

    for l in c.result:
        if not l:
            continue

        proteins.add(
            l.strip().replace('"', '').split('\t')[0]
        )

    proteins = mapping.map_names(proteins, 'uniprot', 'uniprot')

    if organism:

        uniprots = uniprot_db.all_uniprots(
            organism = organism,
            swissprot = True,
        )
        proteins = proteins & set(uniprots)

    return proteins



[docs]
def matrixdb_membrane_proteins(organism = 9606):
    """
    Returns a set of membrane protein UniProt IDs retrieved from MatrixDB.
    """

    return _matrixdb_protein_list('membrane', organism = organism)




[docs]
def matrixdb_secreted_proteins(organism = 9606):
    """
    Returns a set of secreted protein UniProt IDs retrieved from MatrixDB.
    """

    return _matrixdb_protein_list('secreted', organism = organism)




[docs]
def matrixdb_ecm_proteins(organism = 9606):
    """
    Returns a set of ECM (extracellular matrix) protein UniProt IDs
    retrieved from MatrixDB.
    """

    return _matrixdb_protein_list('ecm', organism = organism)




[docs]
def matrixdb_annotations(organism = 9606):

    MatrixdbAnnotation = collections.namedtuple(
        'MatrixdbAnnotation',
        ('mainclass',),
    )
    annot = collections.defaultdict(set)

    for cls in ('membrane', 'secreted', 'ecm'):
        cls_annot = MatrixdbAnnotation(mainclass = cls)

        method = globals()['matrixdb_%s_proteins' % cls]

        for uniprot in method(organism = organism):
            annot[uniprot].add(cls_annot)

    return dict(annot)