Source code for pypath.inputs.gutmgene
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from __future__ import annotations
from typing import Literal, NamedTuple
import collections
import pypath.share.curl as curl
import pypath.utils.taxonomy as taxonomy
import pypath.utils.mapping as mapping
import pypath.resources.urls as urls
import pypath.share.session as session
_log = session.Logger(name = 'gutmgene_input')._log
[docs]
class GutmgeneRaw(NamedTuple):
microbe_taxon: str
microbe_ncbi_tax_id: str
gut_microbiota_id: str
classification: str
genesymbol: str
entrez: str
effect: str
throughput: str
[docs]
class GutmgeneAnnotation(NamedTuple):
microbe_taxon: str
microbe_ncbi_tax_id: str
gut_microbiota_id: str
classification: str
effect: str
throughput: str
[docs]
def gutmgene_raw(organism: Literal['human', 'mouse'] = 'human') -> list[tuple]:
"""
Gut microbiota genes from the gut microbiota gene database (gutMGene).
Args:
organism: Organism ID or name; human and mouse are available.
Returns:
A list of named tuples containing information about gut microbiota
genes in human.
"""
organism_ = taxonomy.ensure_common_name(organism, lower = True)
if organism not in ('human', 'mouse'):
err = '`organism` must be either `human` or `mouse`, not `{organism}`.'
_log(err)
raise ValueError(err)
url = urls.urls['gutmgene'][f'url_{organism_}']
c = curl.Curl(url, silent = False, large = True)
result = set()
for l in c.result:
if l.startswith('"'):
continue
l = l.replace('"', '')
l = l.strip().split('\t')
l = (None if not i else i for i in l)
if l:
result.add(GutmgeneRaw(*l))
return list(result)
[docs]
def gutmgene_annotations(
organism: Literal['human', 'mouse'] = 'human',
) -> dict[str, set[GutmgeneAnnotation]]:
"""
Microbial effectors of human or mouse genes from the gutMGene database.
Args:
organism:
Organism ID or name; human and mouse are available.
Return:
A dict of sets of named tuples representing microbial relationships;
top level keys are UniProt IDs.
"""
raw = gutmgene_raw(organism)
ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(organism)
result = collections.defaultdict(set)
for rec in raw:
uniprots = mapping.map_name(
rec.genesymbol,
'genesymbol',
'uniprot',
ncbi_tax_id = ncbi_tax_id,
)
for uniprot in uniprots:
result[uniprot].add(
GutmgeneAnnotation(
microbe_taxon = rec.microbe_taxon,
microbe_ncbi_tax_id = rec.microbe_ncbi_tax_id,
gut_microbiota_id = rec.gut_microbiota_id,
classification = rec.classification,
effect = rec.effect,
throughput = rec.throughput,
)
)
return dict(result)