Source code for pypath.inputs.cpad

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import csv
import collections

import pypath.share.curl as curl
import pypath.share.common as common
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping



[docs]
def get_cpad():

    url = urls.urls['cpad']['url']
    c = curl.Curl(url, silent = False, large = True, encoding = 'iso-8859-1')
    reader = csv.DictReader(c.result, delimiter = '\t')

    return reader




[docs]
def cpad_annotations(include_unknown_type = False):

    CpadAnnotation = collections.namedtuple(
        'CpadAnnotation',
        [
            'regulator_type',
            'effect_on_pathway',
            'pathway',
            'effect_on_cancer',
            'effect_on_cancer_outcome',
            'cancer',
            'pathway_category',
        ]
    )

    cpad = get_cpad()

    result = collections.defaultdict(set)

    for rec in cpad:
        if rec['Regulator'] == 'NULL':
            continue

        for regulator in rec['Regulator'].split(' and '):
            uniprot = mapping.map_name0(regulator, 'genesymbol', 'uniprot')

            if uniprot:
                regulator_name = uniprot
                regulator_type = 'protein'

            else:
                mirbase = mapping.map_name(
                    'hsa-%s' % regulator,
                    'mir-mat-name',
                    'mirbase',
                )

                if not mirbase:
                    mirbase = mapping.map_name(
                        'hsa-%s' % regulator,
                        'mir-name',
                        'mirbase',
                    )

                if mirbase:
                    regulator_name = mirbase
                    regulator_type = 'mirna'

                else:
                    if include_unknown_type:
                        regulator_name = regulator
                        regulator_type = 'unknown'

                    else:
                        continue

            if isinstance(regulator_name, str):
                regulator_name = (regulator_name,)

            for regulator_name_0 in regulator_name:
                record = CpadAnnotation(
                    regulator_type = regulator_type,
                    effect_on_pathway = rec['Regulator_Type'],
                    effect_on_cancer = rec['Regulation_Type'],
                    effect_on_cancer_outcome = rec['Outcome_Description'],
                    pathway = rec['Pathway'],
                    pathway_category = rec['Pathway_Category'],
                    cancer = rec['Cancer'],
                )

                result[regulator_name_0].add(record)

    return dict(result)




[docs]
def cpad_pathway_cancer():
    """
    Collects only the pathway-cancer relationships. Returns sets of records
    grouped in dicts by cancer and by pathway.
    """

    CpadPathwayCancer = collections.namedtuple(
        'CpadPathwayCancer',
        [
            'pathway',
            'cancer',
            'pathway_category',
            'effect_on_cancer',
            'effect_on_cancer_outcome',
        ]
    )

    cpad = get_cpad()

    by_cancer = collections.defaultdict(set)
    by_pathway = collections.defaultdict(set)

    for rec in cpad:

        record = CpadPathwayCancer(
            pathway = rec['Pathway'],
            cancer = rec['Cancer'],
            pathway_category = rec['Pathway_Category'],
            effect_on_cancer = rec['Regulation_Type'],
            effect_on_cancer_outcome = rec['Outcome_Description'],
        )

        by_cancer[rec['Cancer']].add(record)
        by_pathway[rec['Pathway']].add(record)

    return dict(by_cancer), dict(by_pathway)