Source code for pypath.inputs.ensembl

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import collections
import warnings

import bs4

import pypath.resources.urls as urls
import pypath.share.curl as curl


"""
Note: find other Ensembl related functions in ``inputs.biomart``.
"""


[docs] def ensembl_organisms(): """ List of organisms in Ensembl with various taxon IDs and metadata about related Ensembl database contents. Returns List of named tuples. """ record = None result = [] url = urls.urls['ensembl']['species'] c = curl.Curl(url) with warnings.catch_warnings(): warnings.simplefilter('ignore', bs4.XMLParsedAsHTMLWarning) soup = bs4.BeautifulSoup(c.result, 'html.parser') for r in soup.find('table').find_all('tr'): if not record: record = collections.namedtuple( 'EnsemblOrganism', [c.text.lower().replace(' ', '_') for c in r] + ['ensembl_name'] ) continue r = list(r) result.append( record( *( int(c.text) if i == 2 else c.text for i, c in enumerate(r) ), ( # Mus musculus -> mmusculus lambda x: ''.join([xx[0] for xx in x[:-1]] + [x[-1]]) )( r[1].text.lower().split() ) ) ) return result