Source code for pypath.inputs.opm
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import csv
import collections
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.utils.taxonomy as taxonomy
[docs]
def opm_annotations(organism = 9606):
reparentheses = re.compile(r'\((.*)\)')
regenesymbol = re.compile(r' ([A-Z0-9]{3,}) ')
def get_dict(name):
result = {}
url = urls.urls['opm'][name]
c = curl.Curl(url, large = True, silent = False)
data = csv.DictReader(c.result, delimiter = ',')
for rec in data:
result[rec['id']] = rec['name']
return result
OpmAnnotation = collections.namedtuple(
'OpmAnnotation',
['membrane', 'family', 'transmembrane'],
)
result = collections.defaultdict(set)
organism_name = (
taxonomy.phosphoelm_taxids[organism]
if organism in taxonomy.phosphoelm_taxids else
None
)
types = get_dict('types')
families = get_dict('families')
url = urls.urls['opm']['proteins']
c = curl.Curl(url, silent = False, large = True)
data = csv.DictReader(c.result, delimiter = ',')
for rec in data:
if organism_name and rec['species_name_cache'] != organism_name:
continue
name = rec['name']
names = [
name,
name.split('(')[0],
name.split(',')[0],
]
m = reparentheses.search(name)
if m:
names.append(m.groups()[0])
genesymbols = regenesymbol.findall(name)
for this_name in names:
uniprot = mapping.map_name0(this_name, 'protein-name', 'uniprot')
if uniprot:
break
if not uniprot:
for gs in genesymbols:
uniprot = (
mapping.map_name0(this_name, 'genesymbol', 'uniprot')
)
if uniprot:
break
if not uniprot:
continue
result[uniprot].add(
OpmAnnotation(
membrane = rec['membrane_name_cache'],
family = rec['family_name_cache'],
transmembrane = types[rec['type_id']] == 'Transmembrane',
)
)
return dict(result)