Source code for pypath.inputs.humsavar

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import collections

import pypath.resources.urls as urls
import pypath.share.curl as curl

redesc = re.compile(
    r'([^\(^\[]+[^\s^\(^\[])\s?'
    r'(?:\((\w+)\))?\s?'
    r'(?:\[(MIM:\d+)\])?'
)
reempty = re.compile(r'^-?\s?$')


def _parse_desc(desc):

        return (
            (None,) * 3
                if reempty.match(desc) else
            redesc.match(desc).groups()
        )


[docs] def uniprot_variants() -> dict[str, set[tuple]]: """ Retrieves all human missense variants annotated in UniProtKB/Swiss-Prot. Returns: Drug attributes in below as a list of named tuples. """ UniprotVariant = collections.namedtuple( 'UniprotVariant', ( 'genesymbol', 'ftid', 'aa_change', 'variant_category', 'dbsnp', 'disease', 'disease_symbol', 'disease_omim' ), ) url = urls.urls['humsavar']['url'] c = curl.Curl(url, large=True, silent=False) result = collections.defaultdict(set) respace = re.compile(r'\s+') # skipping data description information in txt file for r in c.result: if r.startswith('_'): break for line in c.result: line = respace.split(line.strip()) if len(line) == 1: break disease, symbol, omim = _parse_desc(' '.join(line[6:])) variant = UniprotVariant( line[0], *line[2:6], disease = disease, disease_symbol = symbol, disease_omim = omim, ) result[line[1]].add(variant) return dict(result)