#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importcsvimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.internals.interaasinteraimportpypath.utils.mappingasmapping
[docs]defcompleat_raw():""" Raw protein complex data from the Compleat database. """url=urls.urls['compleat']['rescued']c=curl.Curl(url,large=True,silent=False)tab=list(csv.DictReader(c.result,delimiter='\t',fieldnames=('compleat_id','member_count','predicted','functions','functions2','nothing','sources','name','method','organisms','pubmeds','members',)))returntab
[docs]defcompleat_complexes(predicted=True):""" Retrieves and processes protein complexes from the Compleat database. """raw=compleat_raw()complexes={}forrecinraw:is_predicted=(rec['predicted']andrec['predicted'].strip()=='Predicted')ifis_predictedandnotpredicted:continueifnotrec['members']:continueuniprots=[]forentrezinrec['members'].split():uniprot=mapping.map_name0(entrez.strip(),'entrez','uniprot')ifuniprot:uniprots.append(uniprot)ifnotuniprots:continuename=rec['name']references=rec['pubmeds'].split(',')ifrec['pubmeds']elseNonesources=set(rec['sources'].split(','))ifis_predictedelseset()sources.add('Compleat')cplex=intera.Complex(components=uniprots,sources=sources,references=references,name=name,ids={'Compleat':rec['compleat_id']},)ifcplex.__str__()incomplexes:complexes[cplex.__str__()]+=cplexelse:complexes[cplex.__str__()]=cplexreturncomplexes