Source code for pypath.inputs.huri

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import collections
import itertools

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.inputs.common as inputs_common
import pypath.inputs.cell as cell



[docs]
def rolland_hi_ii_14():
    """
    Loads the HI-II-14 unbiased interactome from the large scale screening
    of from Rolland 2014.
    Returns list of interactions.
    """

    xlsname = cell.cell_supplementary(
        supp_url = urls.urls['hiii14']['url'],
        article_url = urls.urls['hiii14']['article_url'],
    )
    tbl = inputs_common.read_xls(xlsname, sheet = '2G')

    for row in tbl[1:]:

        yield [c.split('.')[0] for c in row]




[docs]
def vidal_hi_iii_old(fname):
    """
    Loads the HI-III  unbiased interactome from preliminary data of
    the next large scale screening of Vidal Lab.

    The data is accessible here:
        http://interactome.dfci.harvard.edu/H_sapiens/dload_trk.php
    You need to register and accept the license terms.

    Returns list of interactions.
    """

    f = curl.FileOpener(fname)

    return [l.strip().split('\t') for l in f.result][1:]




[docs]
def hi_iii_old():
    """
    Loads the unbiased human interactome version III (HI-III).
    This is an unpublished data and its use is limited.
    Please check the conditions and licensing terms carefully at
    http://interactome.baderlab.org.
    """

    HiiiiInteraction = collections.namedtuple(
        'HiiiiInteraction',
        [
            'id_a',
            'id_b',
            'isoform_a',
            'isoform_b',
            'screens',
            'score',
        ]
    )


    rescore = re.compile(r'author score: ([\d\.]+)')
    rescreens = re.compile(r'Found in screens ([\d,]+)')

    url = urls.urls['hid']['hi-iii']
    post_data = {
        'form[request_dataset]': '2',
        'form[request_file_format]': 'psi',
    }

    c = curl.Curl(
        url,
        silent = False,
        large = True,
        post = post_data,
        slow = True,
    )

    for row in c.result:

        if not row.strip():

            continue

        id_a, id_b, rest = row.split(' ', maxsplit = 2)
        id_a, isoform_a = id_a.split('-') if '-' in id_a else (id_a, 1)
        id_b, isoform_b = id_b.split('-') if '-' in id_b else (id_b, 1)

        sc = rescore.search(rest)
        score = float(sc.groups()[0]) if sc else None
        screens = tuple(
            int(i) for i in rescreens.search(rest).groups()[0].split(',')
        )

        yield HiiiiInteraction(
            id_a = id_a[10:],
            id_b = id_b[10:],
            isoform_a = int(isoform_a),
            isoform_b = int(isoform_b),
            screens = screens,
            score = score,
        )




[docs]
def lit_bm_13_interactions():
    """
    Downloads and processes Lit-BM-13 dataset, the 2013 version of the
    high confidence literature curated interactions from CCSB.
    Returns list of interactions.
    """

    LitBm13Interaction = collections.namedtuple(
        'LitBm13Interaction',
        [
            'entrez_a',
            'entrez_b',
            'genesymbol_a',
            'genesymbol_b',
        ]
    )

    url = urls.urls['hid']['lit-bm-13']
    c = curl.Curl(url, silent = False, large = True, slow = True)

    _ = next(c.result)

    for row in c.result:

        row = row.strip().split('\t')

        yield LitBm13Interaction(
            entrez_a = row[0],
            entrez_b = row[2],
            genesymbol_a = row[1],
            genesymbol_b = row[3],
        )




[docs]
def lit_bm_17_interactions():
    """
    Downloads and processes Lit-BM-13 dataset, the 2017 version of the
    high confidence literature curated interactions from CCSB.
    Returns list of interactions.
    """

    LitBm17Interaction = collections.namedtuple(
        'LitBm17Interaction',
        [
            'id_a',
            'id_b',
            'pubmed',
            'score',
        ]
    )

    url = urls.urls['hid']['lit-bm-17']
    c = curl.Curl(url, silent = False)
    data = c.result

    c = curl.Curl(url, silent = False, large = True, slow = True)

    _ = next(c.result)

    for row in c.result:

        row = row.strip().split('\t')

        id_a = row[0][10:]
        id_b = row[1][10:]

        pubmed = row[8][7:]
        score = float(row[14][13:])

        yield LitBm17Interaction(
            id_a = id_a,
            id_b = id_b,
            pubmed = pubmed,
            score = score,
        )




[docs]
def huri_interactions():

    return _huri_interactions(dataset = 'huri')




[docs]
def yu2011_interactions():

    return _huri_interactions(dataset = 'yu-2011')




[docs]
def hi_union_interactions():

    return _huri_interactions(dataset = 'hi-union')




[docs]
def yang2016_interactions():

    return _huri_interactions(dataset = 'yang-2016')




[docs]
def hi_ii_interactions():
    """
    Interactions from Rolland 2014 https://pubmed.ncbi.nlm.nih.gov/25416956/.
    """

    return _huri_interactions(dataset = 'hi-ii-14-pmi')




[docs]
def hi_i_interactions():
    """
    Interactions from Rual 2005 https://pubmed.ncbi.nlm.nih.gov/16189514/.
    """

    return _huri_interactions(dataset = 'hi-i-05-pmi')




[docs]
def lit_bm_interactions():
    """
    Literature collected interactions from Luck 2020.
    """

    LitBmInteraction = collections.namedtuple(
        'LitBmInteraction',
        ['uniprot_a', 'uniprot_b'],
    )

    url = urls.urls['hid']['lit-bm']
    c = curl.Curl(url, large = True, silent = False, slow = True)

    for row in c.result:

        row = row.strip().split('\t')

        uniprots_a = mapping.map_name(row[0], 'ensembl', 'uniprot')
        uniprots_b = mapping.map_name(row[1], 'ensembl', 'uniprot')

        for uniprot_a, uniprot_b in itertools.product(uniprots_a, uniprots_b):

            yield LitBmInteraction(
                uniprot_a = uniprot_a,
                uniprot_b = uniprot_b,
            )



def _huri_interactions(dataset):

    reuniprot = re.compile(r'[a-z]+:([\w\.]+)(?:-?([0-9]?))?')
    rescore = re.compile(r'author score: ([\.0-9]+)')

    HuriInteraction = collections.namedtuple(
        'HuriInteraction',
        [
            'uniprot_a',
            'uniprot_b',
            'isoform_a',
            'isoform_b',
            'score',
        ]
    )


    def _map_ids(_id):

        return mapping.map_name(
            _id,
            _id[:4].lower() if _id[:4] in {'ensp', 'enst'} else 'uniprot',
            'uniprot',
        )


    url = dataset if dataset.startswith('http') else urls.urls['hid'][dataset]
    c = curl.Curl(url, large = True, silent = False, slow = True)
    path = (
        c.fileobj.name
            if hasattr(c, 'fileobj') else
        c.cache_file_name or c.outfile
    )
    del c
    c = curl.FileOpener(path)

    for row in c.result:

        score = rescore.search(row)

        if score:

            score = float(score.groups()[0])

        row = row.split()

        if len(row) < 2:

            continue

        id_a, isoform_a = reuniprot.match(row[0]).groups()
        id_b, isoform_b = reuniprot.match(row[1]).groups()

        uniprots_a = _map_ids(id_a)
        uniprots_b = _map_ids(id_b)

        for uniprot_a, uniprot_b in itertools.product(uniprots_a, uniprots_b):

            #pass
            yield HuriInteraction(
                uniprot_a = uniprot_a,
                uniprot_b = uniprot_b,
                isoform_a = int(isoform_a) if isoform_a else 1,
                isoform_b = int(isoform_b) if isoform_b else 1,
                score = score,
            )