Source code for pypath.inputs.exocarta

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import collections

import pypath.share.curl as curl
import pypath.share.settings as settings
import pypath.resources.urls as urls
import pypath.utils.taxonomy as taxonomy


[docs] def get_exocarta(organism = 9606, types = None): """ :param set types: Molecule types to retrieve. Possible values: `protein`, `mrna`. """ return _get_exocarta_vesiclepedia( database = 'exocarta', organism = organism, types = types, )
[docs] def get_vesiclepedia(organism = 9606, types = None): """ :param set types: Molecule types to retrieve. Possible values: `protein`, `mrna`. """ return _get_exocarta_vesiclepedia( database = 'vesiclepedia', organism = organism, types = types, )
def _get_exocarta_vesiclepedia( database = 'exocarta', organism = 9606, types = None ): """ :param str database: Which database to download: ExoCarta or Vesiclepedia. :param set types: Molecule types to retrieve. Possible values: `protein`, `mrna`. """ database = database.lower() types = types or {'protein'} organism = taxonomy.phosphoelm_taxids[organism] taxid_rev = dict((v, k) for k, v in iteritems(taxonomy.phosphoelm_taxids)) # collecting the references url_s = urls.urls[database]['url_study'] c = curl.Curl(url_s, large = True, silent = False) _ = next(c.result) studies = {} for s in c.result: s = s.split('\t') organisms = tuple( taxid_rev[t.strip()] for t in s[2].split('|') if t.strip() in taxid_rev ) if not organisms: continue stud = ( s[1] if s[1] != '0' else None, # PubMed ID organisms, # organism s[4], # sample source (cell type, tissue) ) if database == 'vesiclepedia': vtype = s[11].strip() stud += ( tuple(vtype.split('/')) if vtype else (), ) studies[int(s[0])] = tuple(stud) # processing proteins url_p = urls.urls[database]['url_protein'] c = curl.Curl(url_p, large = True, silent = False, slow = True) _ = next(c.result) for s in c.result: s = s.split('\t') if s[4] != organism or s[1] not in types: continue yield ( s[2], # Entrez ID s[3], # Gene Symbol taxid_rev[s[4]], # NCBI Taxonomy ID studies[int(s[5])], # study reference )