#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import collections
import itertools
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.inputs.common as inputs_common
import pypath.inputs.cell as cell
[docs]
def rolland_hi_ii_14():
"""
Loads the HI-II-14 unbiased interactome from the large scale screening
of from Rolland 2014.
Returns list of interactions.
"""
xlsname = cell.cell_supplementary(
supp_url = urls.urls['hiii14']['url'],
article_url = urls.urls['hiii14']['article_url'],
)
tbl = inputs_common.read_xls(xlsname, sheet = '2G')
for row in tbl[1:]:
yield [c.split('.')[0] for c in row]
[docs]
def vidal_hi_iii_old(fname):
"""
Loads the HI-III unbiased interactome from preliminary data of
the next large scale screening of Vidal Lab.
The data is accessible here:
http://interactome.dfci.harvard.edu/H_sapiens/dload_trk.php
You need to register and accept the license terms.
Returns list of interactions.
"""
f = curl.FileOpener(fname)
return [l.strip().split('\t') for l in f.result][1:]
[docs]
def hi_iii_old():
"""
Loads the unbiased human interactome version III (HI-III).
This is an unpublished data and its use is limited.
Please check the conditions and licensing terms carefully at
http://interactome.baderlab.org.
"""
HiiiiInteraction = collections.namedtuple(
'HiiiiInteraction',
[
'id_a',
'id_b',
'isoform_a',
'isoform_b',
'screens',
'score',
]
)
rescore = re.compile(r'author score: ([\d\.]+)')
rescreens = re.compile(r'Found in screens ([\d,]+)')
url = urls.urls['hid']['hi-iii']
post_data = {
'form[request_dataset]': '2',
'form[request_file_format]': 'psi',
}
c = curl.Curl(
url,
silent = False,
large = True,
post = post_data,
slow = True,
)
for row in c.result:
if not row.strip():
continue
id_a, id_b, rest = row.split(' ', maxsplit = 2)
id_a, isoform_a = id_a.split('-') if '-' in id_a else (id_a, 1)
id_b, isoform_b = id_b.split('-') if '-' in id_b else (id_b, 1)
sc = rescore.search(rest)
score = float(sc.groups()[0]) if sc else None
screens = tuple(
int(i) for i in rescreens.search(rest).groups()[0].split(',')
)
yield HiiiiInteraction(
id_a = id_a[10:],
id_b = id_b[10:],
isoform_a = int(isoform_a),
isoform_b = int(isoform_b),
screens = screens,
score = score,
)
[docs]
def lit_bm_13_interactions():
"""
Downloads and processes Lit-BM-13 dataset, the 2013 version of the
high confidence literature curated interactions from CCSB.
Returns list of interactions.
"""
LitBm13Interaction = collections.namedtuple(
'LitBm13Interaction',
[
'entrez_a',
'entrez_b',
'genesymbol_a',
'genesymbol_b',
]
)
url = urls.urls['hid']['lit-bm-13']
c = curl.Curl(url, silent = False, large = True, slow = True)
_ = next(c.result)
for row in c.result:
row = row.strip().split('\t')
yield LitBm13Interaction(
entrez_a = row[0],
entrez_b = row[2],
genesymbol_a = row[1],
genesymbol_b = row[3],
)
[docs]
def lit_bm_17_interactions():
"""
Downloads and processes Lit-BM-13 dataset, the 2017 version of the
high confidence literature curated interactions from CCSB.
Returns list of interactions.
"""
LitBm17Interaction = collections.namedtuple(
'LitBm17Interaction',
[
'id_a',
'id_b',
'pubmed',
'score',
]
)
url = urls.urls['hid']['lit-bm-17']
c = curl.Curl(url, silent = False)
data = c.result
c = curl.Curl(url, silent = False, large = True, slow = True)
_ = next(c.result)
for row in c.result:
row = row.strip().split('\t')
id_a = row[0][10:]
id_b = row[1][10:]
pubmed = row[8][7:]
score = float(row[14][13:])
yield LitBm17Interaction(
id_a = id_a,
id_b = id_b,
pubmed = pubmed,
score = score,
)
[docs]
def huri_interactions():
return _huri_interactions(dataset = 'huri')
[docs]
def yu2011_interactions():
return _huri_interactions(dataset = 'yu-2011')
[docs]
def hi_union_interactions():
return _huri_interactions(dataset = 'hi-union')
[docs]
def yang2016_interactions():
return _huri_interactions(dataset = 'yang-2016')
[docs]
def hi_ii_interactions():
"""
Interactions from Rolland 2014 https://pubmed.ncbi.nlm.nih.gov/25416956/.
"""
return _huri_interactions(dataset = 'hi-ii-14-pmi')
[docs]
def hi_i_interactions():
"""
Interactions from Rual 2005 https://pubmed.ncbi.nlm.nih.gov/16189514/.
"""
return _huri_interactions(dataset = 'hi-i-05-pmi')
[docs]
def lit_bm_interactions():
"""
Literature collected interactions from Luck 2020.
"""
LitBmInteraction = collections.namedtuple(
'LitBmInteraction',
['uniprot_a', 'uniprot_b'],
)
url = urls.urls['hid']['lit-bm']
c = curl.Curl(url, large = True, silent = False, slow = True)
for row in c.result:
row = row.strip().split('\t')
uniprots_a = mapping.map_name(row[0], 'ensembl', 'uniprot')
uniprots_b = mapping.map_name(row[1], 'ensembl', 'uniprot')
for uniprot_a, uniprot_b in itertools.product(uniprots_a, uniprots_b):
yield LitBmInteraction(
uniprot_a = uniprot_a,
uniprot_b = uniprot_b,
)
def _huri_interactions(dataset):
reuniprot = re.compile(r'[a-z]+:([\w\.]+)(?:-?([0-9]?))?')
rescore = re.compile(r'author score: ([\.0-9]+)')
HuriInteraction = collections.namedtuple(
'HuriInteraction',
[
'uniprot_a',
'uniprot_b',
'isoform_a',
'isoform_b',
'score',
]
)
def _map_ids(_id):
return mapping.map_name(
_id,
_id[:4].lower() if _id[:4] in {'ensp', 'enst'} else 'uniprot',
'uniprot',
)
url = dataset if dataset.startswith('http') else urls.urls['hid'][dataset]
c = curl.Curl(url, large = True, silent = False, slow = True)
path = (
c.fileobj.name
if hasattr(c, 'fileobj') else
c.cache_file_name or c.outfile
)
del c
c = curl.FileOpener(path)
for row in c.result:
score = rescore.search(row)
if score:
score = float(score.groups()[0])
row = row.split()
if len(row) < 2:
continue
id_a, isoform_a = reuniprot.match(row[0]).groups()
id_b, isoform_b = reuniprot.match(row[1]).groups()
uniprots_a = _map_ids(id_a)
uniprots_b = _map_ids(id_b)
for uniprot_a, uniprot_b in itertools.product(uniprots_a, uniprots_b):
#pass
yield HuriInteraction(
uniprot_a = uniprot_a,
uniprot_b = uniprot_b,
isoform_a = int(isoform_a) if isoform_a else 1,
isoform_b = int(isoform_b) if isoform_b else 1,
score = score,
)