Source code for pypath.inputs.compleat
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import csv
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.internals.intera as intera
import pypath.utils.mapping as mapping
[docs]
def compleat_raw():
"""
Raw protein complex data from the Compleat database.
"""
url = urls.urls['compleat']['rescued']
c = curl.Curl(url, large = True, silent = False)
tab = list(csv.DictReader(
c.result,
delimiter = '\t',
fieldnames = (
'compleat_id',
'member_count',
'predicted',
'functions',
'functions2',
'nothing',
'sources',
'name',
'method',
'organisms',
'pubmeds',
'members',
)
))
return tab
[docs]
def compleat_complexes(predicted = True):
"""
Retrieves and processes protein complexes from the Compleat database.
"""
raw = compleat_raw()
complexes = {}
for rec in raw:
is_predicted = (
rec['predicted'] and
rec['predicted'].strip() == 'Predicted'
)
if is_predicted and not predicted:
continue
if not rec['members']:
continue
uniprots = []
for entrez in rec['members'].split():
uniprot = mapping.map_name0(entrez.strip(), 'entrez', 'uniprot')
if uniprot:
uniprots.append(uniprot)
if not uniprots:
continue
name = rec['name']
references = rec['pubmeds'].split(',') if rec['pubmeds'] else None
sources = set(rec['sources'].split(',')) if is_predicted else set()
sources.add('Compleat')
cplex = intera.Complex(
components = uniprots,
sources = sources,
references = references,
name = name,
ids = {'Compleat': rec['compleat_id']},
)
if cplex.__str__() in complexes:
complexes[cplex.__str__()] += cplex
else:
complexes[cplex.__str__()] = cplex
return complexes