Source code for pypath.inputs.matrixdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import collections

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.inputs.uniprot_db as uniprot_db


[docs] def matrixdb_interactions(organism = 9606): url = urls.urls['matrixdb']['url'] c = curl.Curl(url, silent = False, large = True) f = c.result i = [] lnum = 0 for l in f: if lnum == 0: lnum += 1 continue l = l.replace('\n', '').replace('\r', '') l = l.split('\t') specA = 0 if l[9] == '-' else int(l[9].split(':')[1].split('(')[0]) specB = 0 if l[10] == '-' else int(l[10].split(':')[1].split('(')[0]) if organism is None or (specA == organism and specB == organism): pm = [ p.replace('pubmed:', '') for p in l[8].split('|') if p.startswith('pubmed:') ] met = [ m.split('(')[1].replace(')', '').strip('"') for m in l[6].split('|') if '(' in m ] l = [l[0], l[1]] interaction = () for ll in l: ll = ll.split('|') uniprot = '' for lll in ll: nm = lll.split(':') if nm[0] == 'uniprotkb' and len(nm[1]) == 6: uniprot = nm[1] interaction += (uniprot, ) interaction += ('|'.join(pm), '|'.join(met)) if len(interaction[0]) > 5 and len(interaction[1]) > 5: i.append(list(interaction)) lnum += 1 f.close() return i
def _matrixdb_protein_list(category, organism = 9606): """ Returns a set of proteins annotated by MatrixDB. :arg str category: The protein annotation category. Possible values: `ecm`, `membrane` or `secreted`. """ url = urls.urls['matrixdb']['%s_proteins' % category] c = curl.Curl(url, silent = False, large = True) proteins = set() # header row _ = next(c.result) for l in c.result: if not l: continue proteins.add( l.strip().replace('"', '').split('\t')[0] ) proteins = mapping.map_names(proteins, 'uniprot', 'uniprot') if organism: uniprots = uniprot_db.all_uniprots( organism = organism, swissprot = True, ) proteins = proteins & set(uniprots) return proteins
[docs] def matrixdb_membrane_proteins(organism = 9606): """ Returns a set of membrane protein UniProt IDs retrieved from MatrixDB. """ return _matrixdb_protein_list('membrane', organism = organism)
[docs] def matrixdb_secreted_proteins(organism = 9606): """ Returns a set of secreted protein UniProt IDs retrieved from MatrixDB. """ return _matrixdb_protein_list('secreted', organism = organism)
[docs] def matrixdb_ecm_proteins(organism = 9606): """ Returns a set of ECM (extracellular matrix) protein UniProt IDs retrieved from MatrixDB. """ return _matrixdb_protein_list('ecm', organism = organism)
[docs] def matrixdb_annotations(organism = 9606): MatrixdbAnnotation = collections.namedtuple( 'MatrixdbAnnotation', ('mainclass',), ) annot = collections.defaultdict(set) for cls in ('membrane', 'secreted', 'ecm'): cls_annot = MatrixdbAnnotation(mainclass = cls) method = globals()['matrixdb_%s_proteins' % cls] for uniprot in method(organism = organism): annot[uniprot].add(cls_annot) return dict(annot)