Source code for pypath.inputs.corum

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import csv

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.internals.intera as intera
import pypath.utils.taxonomy as taxonomy


[docs] def corum_complexes(organism = 9606): annots = ( 'mithocondr', 'nucleus', 'endoplasmic reticulum', 'cytoplasm', 'transcriptional control', 'vesicle docking', 'extracellular matrix component', 'cell-matrix adhesion', 'cytokines', 'cell death', 'integrin receptor signalling pathway', 'eukaryotic plasma membrane', 'nuclear membrane', 'cellular export and secretion', 'cell-substrate adherens junction', 'cytoskeleton', 'receptor binding', 'nucleolus', 'transmembrane signal transduction', 'transcription', 'modification by phosphorylation', 'cell-cell adhesion', 'intercellular junction', 'ion transport', 'cell adhesion', 'cell junction', 'endocytosis', ) organism = taxonomy.ensure_ncbi_tax_id(organism) complexes = {} c = curl.Curl( urls.urls['corum']['url_rescued'], silent = False, large = True, files_needed = ['allComplexes.txt'], ) tab = csv.DictReader(c.result['allComplexes.txt'], delimiter = '\t') for rec in tab: cplex_organism = rec['Organism'] if taxonomy.ensure_ncbi_tax_id(cplex_organism) != organism: continue uniprots = rec['subunits(UniProt IDs)'].split(';') pubmeds = rec['PubMed ID'].split(';') name = rec['ComplexName'] cplex = intera.Complex( name = name, components = uniprots, sources = 'CORUM', references = pubmeds, ids = rec['ComplexID'], attrs = { 'funcat': set(rec['FunCat description'].split(';')), 'go': set(rec['GO description'].split(';')), }, ) if cplex.__str__() in complexes: complexes[cplex.__str__()].references.update(set(pubmeds)) else: complexes[cplex.__str__()] = cplex return complexes