Source code for pypath.inputs.threedcomplex
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import itertools
import collections
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.progress as progress
import pypath.share.common as common
import pypath.inputs.pdb as pdb_input
import pypath.inputs.pfam as pfam_input
import pypath.internals.intera as intera
[docs]
def threedcomplex_complexes():
"""
To be implemented later. Should return dictionary of
pypath.internals.intera.Complex objects.
"""
raise NotImplementedError
[docs]
def threedcomplex_ddi(contacts = None):
"""
Downloads and preprocesses data from the 3DComplex database
(http://shmoo.weizmann.ac.il/elevy/3dcomplexV6/Home.cgi).
Args
contacts (set): A map of 3D structure based contacts, as provided by
``threedcomplex_contacts``. If None, it will be obtained
automatically.
Returns
A list of domain-domain interaction objects.
"""
contacts = contacts or threedcomplex_contacts()
uniprots = (
common.values(contacts, 'uniprot_1') |
common.values(contacts, 'uniprot_2')
)
u_pfam = pfam_input.pfam_regions(uniprots, value = 'uniprot')
ddi = []
prg = progress.Progress(
len(contacts),
'Processing contact information',
9,
)
for con in contacts:
prg.step()
pdb = common.prefix(con.pdb, '_')
pfams1 = {common.prefix(x, '.') for x in con.domain_p1}
pfams2 = {common.prefix(x, '.') for x in con.domain_p2}
for pfam1, pfam2 in itertools.product(pfams1, pfams2):
pfam1_details = [{
'start': None,
'end': None,
'isoform': 1
}]
pfam2_details = [{
'start': None,
'end': None,
'isoform': 1
}]
if con.uniprot_1 in u_pfam and pfam1 in u_pfam[con.uniprot_1]:
pfam1_details = u_pfam[con.uniprot_1][pfam1]
if con.uniprot_2 in u_pfam and pfam2 in u_pfam[con.uniprot_2]:
pfam2_details = u_pfam[con.uniprot_2][pfam2]
for pfam1_d, pfam2_d in itertools.product(
pfam1_details,
pfam2_details,
):
dom1 = intera.Domain(
protein = con.uniprot_1,
domain = pfam1,
start = pfam1_d['start'],
end = pfam1_d['end'],
isoform = pfam1_d['isoform'],
chains = {pdb: con.chain_1},
)
dom2 = intera.Domain(
protein = con.uniprot_2,
domain = pfam2,
start = pfam2_d['start'],
end = pfam2_d['end'],
isoform = pfam2_d['isoform'],
chains = {pdb: con.chain_2},
)
dd = intera.DomainDomain(
dom1,
dom2,
pdbs = pdb,
sources = '3DComplex',
contact_residues = con.n_residues,
)
ddi.append(dd)
prg.terminate()
return ddi
[docs]
def threedcomplex_chains():
"""
Returns the chain correspondancy map of the 3D Complex database.
"""
c = curl.Curl(
urls.urls['3dcomplex_correspondancy']['url'],
silent = False,
)
corresp = c.result
corresp = corresp.split('\n')
corr_dict = collections.defaultdict(
lambda: collections.defaultdict(dict)
)
# chain correspondancy map in a dict
for l in corresp:
l = l.strip().split('\t')
if len(l) > 2:
pdb = common.prefix(l[0], '.')
corr_dict[pdb][l[1]] = l[2]
return dict((k, dict(v)) for k, v in iteritems(corr_dict))
[docs]
def threedcomplex_contacts(chains = None, pdb_uniprot = None):
"""
Returns the 3D structure based domain-domain contact map from the
3D Complex database.
Args
chains (dict): A dict of chain correspondancy map, as created by
``threedcomplex_chains``. If None, it will be obtained
automatically.
pdb_uniprot (dict): A dict of PDB-UniProt mappings, as created by
``pypath.inputs.pdb.pdb_chains``. If None, it will be obtained
automatically.
"""
ThreedcomplexContact = collections.namedtuple(
'ThreedcomplexContact',
(
'pdb',
'uniprot_1',
'uniprot_2',
'chain_1',
'chain_2',
'n_residues',
'length_1',
'length_2',
'domain_s1',
'domain_p1',
'domain_s2',
'domain_p2',
'ident',
'homo',
),
)
chains = chains or threedcomplex_chains()
pdb_u = pdb_uniprot or pdb_input.pdb_chains()[1]
c = curl.Curl(
urls.urls['3dcomplex_contact']['url'],
silent = False,
slow = True,
)
contact = c.result
result = set()
for l in contact.split('\n'):
l = l.strip().split('\t')
if len(l) > 11:
compl = l[0]
pdb = common.prefix(compl, '_')
if (
compl in chains and
l[1] in chains[compl] and
l[2] in chains[compl]
):
ch1 = chains[compl][l[1]]
ch2 = chains[compl][l[2]]
if (
pdb in pdb_u and
ch1 in pdb_u[pdb] and
ch2 in pdb_u[pdb]
):
up1 = pdb_u[pdb][ch1]['uniprot']
up2 = pdb_u[pdb][ch2]['uniprot']
result.add(
ThreedcomplexContact(
pdb = compl,
uniprot_1 = up1,
uniprot_2 = up2,
chain_1 = ch1,
chain_2 = ch2,
n_residues = float(l[3]),
length_1 = int(l[4]),
length_2 = int(l[5]),
domain_s1 = tuple(l[6].split(';')),
domain_s2 = tuple(l[8].split(';')),
domain_p1 = tuple(l[7].split(';')),
domain_p2 = tuple(l[9].split(';')),
ident = bool(int(l[10])),
homo = bool(int(l[11])),
)
)
return result
[docs]
def threedcomplex_nresidues():
"""
Downloads and preprocesses data from the 3DComplex database
(http://shmoo.weizmann.ac.il/elevy/3dcomplexV6/Home.cgi).
Returns dict of dicts where top level keys are PDB IDs, second level
keys are pairs of tuples of UniProt IDs and values are list with the
number of amino acids in contact.
"""
nresidues = collections.defaultdict(
lambda: collections.defaultdict(list)
)
for contact in threedcomplex_contacts():
uniprot_key = tuple(sorted((contact.uniprot_1, contact.uniprot_2)))
nresidues[contact.pdb][uniprot_key] = contact.n_residues
return dict((k, dict(v)) for k, v in iteritems(nresidues))