Source code for pypath.inputs.uniprot_idmapping

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2024
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

import json

import pandas as pd

import pypath.resources.urls as urls
import pypath.share.curl as curl


[docs] def idtypes( pairs: bool = True, raw: bool = False, ) -> dict[str, pd.DataFrame] | set[tuple[str, str]] | dict: """ Identifier types in the UniProt ID mapping service. Args: pairs: Process the data into pairs of identifiers. raw: Return the raw data as extracted from JSON. Returns: The JSON contents as a dict if `raw` is `True`, a list of tuples if `pairs` is `True`, otherwise a set of tuples of ID types. """ url = urls.urls['uniprot_idmapping']['fields'] c = curl.Curl(url, large = False, silent = False) data = json.loads(c.result) if raw: return data groups = ( pd.DataFrame(data['groups']). explode('items'). reset_index(drop = True) ) groups = ( pd.concat( [ groups['groupName'], pd.DataFrame(groups['items'].tolist()) ], axis = 1, ). rename(columns = {'from': 'from_'}) ) rules = pd.DataFrame(data['rules']) if not pairs: return {'groups': groups, 'rules': rules} rules = {int(r.ruleId): r.tos for r in rules.itertuples()} groups.fillna(-1., inplace = True) result = set() for idtype in groups.itertuples(): tos = rules.get(int(idtype.ruleId), []) from_to = {(idtype.name, t) for t in tos} if idtype.from_: result.update(from_to) if idtype.to: result.update({t[::-1] for t in from_to}) return result