Source code for pypath.inputs.cpad
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import csv
import collections
import pypath.share.curl as curl
import pypath.share.common as common
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
[docs]
def get_cpad():
url = urls.urls['cpad']['url']
c = curl.Curl(url, silent = False, large = True, encoding = 'iso-8859-1')
reader = csv.DictReader(c.result, delimiter = '\t')
return reader
[docs]
def cpad_annotations(include_unknown_type = False):
CpadAnnotation = collections.namedtuple(
'CpadAnnotation',
[
'regulator_type',
'effect_on_pathway',
'pathway',
'effect_on_cancer',
'effect_on_cancer_outcome',
'cancer',
'pathway_category',
]
)
cpad = get_cpad()
result = collections.defaultdict(set)
for rec in cpad:
if rec['Regulator'] == 'NULL':
continue
for regulator in rec['Regulator'].split(' and '):
uniprot = mapping.map_name0(regulator, 'genesymbol', 'uniprot')
if uniprot:
regulator_name = uniprot
regulator_type = 'protein'
else:
mirbase = mapping.map_name(
'hsa-%s' % regulator,
'mir-mat-name',
'mirbase',
)
if not mirbase:
mirbase = mapping.map_name(
'hsa-%s' % regulator,
'mir-name',
'mirbase',
)
if mirbase:
regulator_name = mirbase
regulator_type = 'mirna'
else:
if include_unknown_type:
regulator_name = regulator
regulator_type = 'unknown'
else:
continue
if isinstance(regulator_name, str):
regulator_name = (regulator_name,)
for regulator_name_0 in regulator_name:
record = CpadAnnotation(
regulator_type = regulator_type,
effect_on_pathway = rec['Regulator_Type'],
effect_on_cancer = rec['Regulation_Type'],
effect_on_cancer_outcome = rec['Outcome_Description'],
pathway = rec['Pathway'],
pathway_category = rec['Pathway_Category'],
cancer = rec['Cancer'],
)
result[regulator_name_0].add(record)
return dict(result)
[docs]
def cpad_pathway_cancer():
"""
Collects only the pathway-cancer relationships. Returns sets of records
grouped in dicts by cancer and by pathway.
"""
CpadPathwayCancer = collections.namedtuple(
'CpadPathwayCancer',
[
'pathway',
'cancer',
'pathway_category',
'effect_on_cancer',
'effect_on_cancer_outcome',
]
)
cpad = get_cpad()
by_cancer = collections.defaultdict(set)
by_pathway = collections.defaultdict(set)
for rec in cpad:
record = CpadPathwayCancer(
pathway = rec['Pathway'],
cancer = rec['Cancer'],
pathway_category = rec['Pathway_Category'],
effect_on_cancer = rec['Regulation_Type'],
effect_on_cancer_outcome = rec['Outcome_Description'],
)
by_cancer[rec['Cancer']].add(record)
by_pathway[rec['Pathway']].add(record)
return dict(by_cancer), dict(by_pathway)