Source code for pypath.inputs.oreganno

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import collections

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.taxonomy as taxonomy


[docs] def oreganno_raw(): """ Downloads TF-target data from the ORegAnnO database. Yields: Tuples of raw records. """ url = urls.urls['oreganno']['url'] c = curl.Curl(url, silent = False, large = True, slow = True) data = c.result _ = next(data) for l in data: if not l: continue yield tuple(x.strip() for x in l.split('\t'))
[docs] def oreganno_interactions(organism = 9606): """ Downloads TF-target interactions from the ORegAnnO database. Yields: Named tuples of TF, target and literature references. """ OregannoInteraction = collections.namedtuple( 'OregannoInteraction', ('tf', 'target', 'pmid'), ) taxids = taxonomy.phosphoelm_taxids if organism in taxids: organism = taxids[organism] nsep = re.compile(r'([-A-Za-z0-9]{3,})[\s/\(]*.*') nrem = re.compile(r'[-/]') for l in oreganno_raw(): if (l[1] == organism and l[3] == 'TRANSCRIPTION FACTOR BINDING SITE' and l[2] == 'POSITIVE OUTCOME' and l[4] != 'N/A' and l[7] != 'N/A' ): yield OregannoInteraction( tf = ( l[7] if len(l[7]) < 3 else nrem.sub('', nsep.findall(l[7])[0]) ), target = ( l[4] if len(l[4]) < 3 else nrem.sub('', nsep.findall(l[4])[0]) ), pmid = l[11] if l[11] != 'N/A' else '', )