#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import collections
import itertools
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.utils.orthology as orthology_mod
import pypath.internals.intera as intera
import pypath.inputs.common as inputs_common
[docs]
def baccin2019_interactions(ncbi_tax_id = 9606):
recamel = re.compile(r'(.+?)([A-Z][a-z])')
recap = re.compile(r'(^[A-Z][a-z]|_[A-Z][a-z])(.+)')
def camel_to_snake(value):
return (
recamel.sub(
lambda m: m.group(1).lower() + '_' + m.group(2),
value.strip()
).lower()
)
def id_translate(mouse_gs):
uniprots = mapping.map_name(
mouse_gs,
'genesymbol',
'uniprot',
10090,
)
if ncbi_tax_id != 10090:
uniprots = set(
itertools.chain(*(
orthology_mod.translate(
uniprot,
target = ncbi_tax_id,
source = 10090,
)
for uniprot in uniprots
))
)
return uniprots
def raw_to_uniprots(raw):
components = raw.split('&')
return set(
itertools.product(
*(id_translate(comp) for comp in components)
)
)
def get_partners(components, sources, references):
return {
(
comp[0]
if len(comp) == 1 else
intera.Complex(
components = comp,
sources = sources,
references = references,
)
)
for comp in components
}
Baccin2019Interaction = collections.namedtuple(
'Baccin2019Interaction',
[
'ligand',
'receptor',
'correct',
'ligand_location',
'ligand_category',
'resources',
'references',
]
)
source_names = {
'Baccin': 'Baccin2019',
'Ramilowski': 'Ramilowski2015',
}
url = urls.urls['baccin2019']['url']
c = curl.Curl(url, silent = False, large = True)
data = inputs_common.read_xls(c.fileobj.name, sheet = 'SuppTable3')
result = []
for rec in data[3:]:
if rec[4].strip().lower() == 'incorrect':
continue
ligand_components = raw_to_uniprots(rec[1])
if not ligand_components:
continue
receptor_components = raw_to_uniprots(rec[2])
if not receptor_components:
continue
sources = {'Baccin2019', rec[3].strip()}
sources = {
source_names[s] if s in source_names else s
for s in sources
}
references = {
_ref for _ref in
(
ref.strip().replace('.0', '')
for ref in rec[7].split(',')
)
if _ref.isdigit()
}
ligands = get_partners(ligand_components, sources, references)
receptors = get_partners(receptor_components, sources, references)
for ligand, receptor in itertools.product(ligands, receptors):
result.append(
Baccin2019Interaction(
ligand = ligand,
receptor = receptor,
correct = rec[4].strip(),
ligand_location = camel_to_snake(rec[5]),
ligand_category = camel_to_snake(rec[6]),
resources = sources,
references = references,
)
)
return result
[docs]
def baccin2019_annotations(ncbi_tax_id = 9606):
Baccin2019Annotation = collections.namedtuple(
'Baccin2019Annotation',
[
'mainclass',
'subclass',
'location',
]
)
ia_all = baccin2019_interactions(ncbi_tax_id = ncbi_tax_id)
result = collections.defaultdict(set)
for ia in ia_all:
result[ia.ligand].add(
Baccin2019Annotation(
mainclass = 'ligand',
subclass = ia.ligand_category,
location = ia.ligand_location,
)
)
result[ia.receptor].add(
Baccin2019Annotation(
mainclass = 'receptor',
subclass = (
'%s_receptor' % ia.ligand_category
if ia.ligand_category != 'other' else
None
),
location = None,
)
)
return dict(result)