Source code for pypath.inputs.oreganno
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import collections
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.taxonomy as taxonomy
[docs]
def oreganno_raw():
"""
Downloads TF-target data from the ORegAnnO database.
Yields:
Tuples of raw records.
"""
url = urls.urls['oreganno']['url']
c = curl.Curl(url, silent = False, large = True, slow = True)
data = c.result
_ = next(data)
for l in data:
if not l:
continue
yield tuple(x.strip() for x in l.split('\t'))
[docs]
def oreganno_interactions(organism = 9606):
"""
Downloads TF-target interactions from the ORegAnnO database.
Yields:
Named tuples of TF, target and literature references.
"""
OregannoInteraction = collections.namedtuple(
'OregannoInteraction',
('tf', 'target', 'pmid'),
)
taxids = taxonomy.phosphoelm_taxids
if organism in taxids:
organism = taxids[organism]
nsep = re.compile(r'([-A-Za-z0-9]{3,})[\s/\(]*.*')
nrem = re.compile(r'[-/]')
for l in oreganno_raw():
if (l[1] == organism and
l[3] == 'TRANSCRIPTION FACTOR BINDING SITE' and
l[2] == 'POSITIVE OUTCOME' and
l[4] != 'N/A' and
l[7] != 'N/A'
):
yield OregannoInteraction(
tf = (
l[7]
if len(l[7]) < 3 else
nrem.sub('', nsep.findall(l[7])[0])
),
target = (
l[4]
if len(l[4]) < 3 else
nrem.sub('', nsep.findall(l[4])[0])
),
pmid = l[11] if l[11] != 'N/A' else '',
)