#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import collections
import pypath.inputs.common as inputs_common
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.inputs.cell as cell_input
import pypath.share.common as common
[docs]
def lambert2018_s1_raw():
def process_field(f):
f = common.try_bool(common.try_float(f.strip()))
return None if f in {'', '#N/A'} else f
path = cell_input.cell_supplementary(
supp_url = urls.urls['lambert2018']['s1'],
article_url = urls.urls['lambert2018']['article'],
)
content = inputs_common.read_xls(path, sheet = 1)
h0, h1 = content.pop(0), content.pop(0)
h1[3] = h0[3]
names = ['%s_%s' % n for n in zip()]
record = collections.namedtuple(
'Lambert2018Raw',
[
nn for nn in (
re.sub('[- ?;:]', '_', n).lower().strip('_ ')
for n in h1
)
if nn
]
)
nfields = len(record._fields)
return [
record(*(process_field(f) for f in r[:nfields]))
for r in content
]
[docs]
def lambert2018_annotations():
Lambert2018Annotation = collections.namedtuple(
'Lambert2018Annotation',
(
'ensg',
'genesymbol',
'is_tf',
'tf_assessment',
'binding_mode',
'binding_domain',
'tf_disagree',
'binding_disagree',
'binding1',
'binding2',
'assessment1',
'assessment2',
'vaquerizas2009',
'cisbp',
'tfclass',
'tfcat_annot',
'tfcat_pmids',
'go',
'pdb',
)
)
result = collections.defaultdict(set)
for r in lambert2018_s1_raw():
uniprots = mapping.map_name(r.name, 'genesymbol', 'uniprot')
vaquerizas = r.vaquerizas_2009_tf_classification or 'no'
tfcat_annot = (
tuple(common.del_empty(sorted(
a.strip() for a in
re.split(
'[_;]',
re.sub(
'PMIDS:[\d;]+', '',
r.tf_cat_classification
).
replace('tf', 'TF').
replace('Transcription Factor', 'TF')
)
)))
if r.tf_cat_classification else
()
)
tfcat_pmids = common.re_safe_groups(
'PMIDS:([\d;]+)',
r.tf_cat_classification.strip()
)[0] if r.tf_cat_classification else None
tfcat_pmids = None if tfcat_pmids == '0' else tfcat_pmids
for uniprot in uniprots:
result[uniprot].add(
Lambert2018Annotation(
ensg = r.id,
genesymbol = r.name,
is_tf = r.is_tf,
tf_assessment = r.tf_assessment,
binding_mode = r.binding_mode,
binding_domain = r.dbd,
tf_disagree = r.disagree_on_assessment == 'Disagree',
binding_disagree = r.disagree_on_binding == 'Disagree',
binding1 = r.binding1,
binding2 = r.binding2,
assessment1 = r.assesment1,
assessment2 = r.assesment2,
vaquerizas2009 = vaquerizas,
cisbp = r.cisbp_considers_it_as_a_tf,
tfclass = r.tfclass_considers_it_as_a_tf,
tfcat_annot = tfcat_annot,
tfcat_pmids = tfcat_pmids,
go = r.is_a_go_tf,
pdb = r.pdb,
)
)
return dict(result)