Source code for pypath.inputs.exocarta
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import collections
import pypath.share.curl as curl
import pypath.share.settings as settings
import pypath.resources.urls as urls
import pypath.utils.taxonomy as taxonomy
[docs]
def get_exocarta(organism = 9606, types = None):
"""
:param set types:
Molecule types to retrieve. Possible values: `protein`, `mrna`.
"""
return _get_exocarta_vesiclepedia(
database = 'exocarta',
organism = organism,
types = types,
)
[docs]
def get_vesiclepedia(organism = 9606, types = None):
"""
:param set types:
Molecule types to retrieve. Possible values: `protein`, `mrna`.
"""
return _get_exocarta_vesiclepedia(
database = 'vesiclepedia',
organism = organism,
types = types,
)
def _get_exocarta_vesiclepedia(
database = 'exocarta',
organism = 9606,
types = None
):
"""
:param str database:
Which database to download: ExoCarta or Vesiclepedia.
:param set types:
Molecule types to retrieve. Possible values: `protein`, `mrna`.
"""
database = database.lower()
types = types or {'protein'}
organism = taxonomy.phosphoelm_taxids[organism]
taxid_rev = dict((v, k) for k, v in iteritems(taxonomy.phosphoelm_taxids))
# collecting the references
url_s = urls.urls[database]['url_study']
c = curl.Curl(url_s, large = True, silent = False)
_ = next(c.result)
studies = {}
for s in c.result:
s = s.split('\t')
organisms = tuple(
taxid_rev[t.strip()]
for t in s[2].split('|')
if t.strip() in taxid_rev
)
if not organisms:
continue
stud = (
s[1] if s[1] != '0' else None, # PubMed ID
organisms, # organism
s[4], # sample source (cell type, tissue)
)
if database == 'vesiclepedia':
vtype = s[11].strip()
stud += (
tuple(vtype.split('/')) if vtype else (),
)
studies[int(s[0])] = tuple(stud)
# processing proteins
url_p = urls.urls[database]['url_protein']
c = curl.Curl(url_p, large = True, silent = False, slow = True)
_ = next(c.result)
for s in c.result:
s = s.split('\t')
if s[4] != organism or s[1] not in types:
continue
yield (
s[2], # Entrez ID
s[3], # Gene Symbol
taxid_rev[s[4]], # NCBI Taxonomy ID
studies[int(s[5])], # study reference
)