Source code for pypath.inputs.hgnc
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import collections
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
[docs]
def hgnc_genegroups():
HGNCGeneGroupAnnotation = collections.namedtuple(
'HGNCGeneGroupAnnotation',
['mainclass'],
)
result = collections.defaultdict(set)
url = urls.urls['hgnc']['groups']
c = curl.Curl(url, large = True, silent = False)
_ = next(c.result)
for rec in c.result:
rec = rec.split('\t')
uniprots = {u.strip() for u in rec[2].split(',')}
uniprots.discard('')
if not uniprots:
continue
uniprots = mapping.map_names(uniprots, 'uniprot', 'uniprot')
if not uniprots:
continue
groups = rec[3].split('|')
for group in groups:
group = group.strip()
if group:
for uniprot in uniprots:
result[uniprot].add(
HGNCGeneGroupAnnotation(mainclass = group)
)
return dict(result)