#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
"""
This module provides classes to represent and handle
structural details of protein interactions
i.e. residues, post-translational modifications,
short motifs, domains, domain-motifs and
domain-motif interactions, binding interfaces.
"""
from future.utils import iteritems
from past.builtins import xrange, range, reduce
import re
import sys
import importlib as imp
import collections
import itertools
# from pypath:
import pypath.share.common as common
import pypath_common._constants as _const
import pypath.utils.mapping as mapping
import pypath.core.evidence as evidence
import pypath.core.entity as entity
import pypath.utils.taxonomy as taxonomy
__all__ = [
'Residue',
'Ptm',
'Motif',
'Domain',
'DomainDomain',
'DomainMotif',
'Interface',
]
if 'unicode' not in __builtins__:
unicode = str
COMPLEX_SEP = '_'
[docs]
class Residue(object):
[docs]
def __init__(
self,
number,
name,
protein,
id_type = 'uniprot',
ncbi_tax_id = 9606,
isoform = 1,
mutated = False,
seq = None
):
non_digit = re.compile(r'[^\d.-]+')
self.name = name
self.number = (
number
if not isinstance(number, str) else
int(non_digit.sub('', number))
)
self.protein = (
protein
if hasattr(protein, 'identifier') else
entity.Entity(
identifier = protein,
id_type = id_type,
taxon = ncbi_tax_id,
)
)
self.mutated = mutated
self.seq = seq
self.isoform = (
isoform
if type(isoform) is int else
int(non_digit.sub('', isoform))
)
def __hash__(self):
return hash((self.number, self.name, self.protein))
def __eq__(self, other):
return (
self.protein == other.protein and
self.number == other.number and
self.name == other.name
)
def __ne__(self, other):
return not self.__eq__(other)
def __str__(self):
return 'Residue %s-%u in protein %s-%u%s\n' % (
self.name,
self.number,
self.protein.identifier,
self.isoform,
' (mutated)' if self.mutated else ''
)
def __repr__(self):
return '<Residue %s-%u:%s%u>' % (
self.protein.label,
self.isoform,
self.name,
self.number,
)
def serialize(self):
return '%s%u' % (self.name, self.number)
def in_isoform(self, isoform, seq=None):
seq = seq or self.seq
if seq and seq.has_isoform(isoform):
if seq.get(self.number, isoform=isoform) == self.name:
res = Residue(
number = self.number,
name = self.name,
protein = self.protein,
id_type = self.id_type,
isoform = isoform,
mutated = self.mutated,
)
return res
return None
[docs]
class Ptm(object):
[docs]
def __init__(
self,
protein,
id_type = 'uniprot',
ncbi_tax_id = 9606,
typ = 'unknown',
motif = None,
residue = None,
isoform = 1,
evidences = None,
seq = None,
):
self.non_digit = re.compile(r'[^\d.-]+')
self.protein = (
protein
if hasattr(protein, 'identifier') else
entity.Entity(
identifier = protein,
id_type = id_type,
taxon = ncbi_tax_id,
)
)
self.id_type = id_type
self.typ = typ.lower()
self.seq = seq
self.motif = motif
self.residue = residue
self.isoform = (
isoform
if type(isoform) is int else
int(self.non_digit.sub('', isoform))
)
self.isoforms = set()
self.add_isoform(isoform)
self.evidences = evidence.Evidences()
self.add_evidences(evidences)
def __hash__(self):
return hash((self.residue, self.typ))
def __str__(self):
return (
'%s in protein %s-%u\n Motif: %s\n%s' % (
(
'Domain-motif interaction'
if (
self.typ == 'unknown' and
self.residue is None
) else
'PTM: %s' % self.typ
),
self.protein.label,
self.isoform,
(
'unknown'
if self.motif is None else
self.motif.__str__()
),
(
''
if self.residue is None else
'\n Residue: %s' % self.residue.__str__()
),
)
)
def __repr__(self):
return '<PTM %s%s>' % (
(
self.residue.__repr__().strip('<>')
if self.residue else
self.protein.label
),
':%s' % self.typ if self.residue else '',
)
def __eq__(self, other):
return (
isinstance(other, Ptm) and
self.protein == other.protein and
(
self.residue == other.residue or
(
(self.residue is None or other.residue is None) and
self.motif == other.motif
)
) and (
self.typ == other.typ or
self.typ is None or other.typ is None
)
)
def __ne__(self, other):
return not self.__eq__(other)
def __contains__(self, other):
if isinstance(other, Residue):
if self.residue is not None:
return other == self.residue
elif self.motif is not None:
return other in self.motif
else:
return False
if isinstance(other, Motif):
return other in self.motif
elif other == self.protein:
return True
elif isinstance(other, Mutation):
return (
other.original == self.residue or
other.original in self.motif
)
def __deepcopy__(self, memo):
new = type(self)(
protein = self.protein,
id_type = self.id_type,
typ = self.typ,
motif = self.motif,
residue = self.residue,
isoform = self.isoform,
)
new.add_isoform(self.isoforms)
return new
def add_evidences(self, evidences):
self.evidences += evidences
def serialize(self):
return '%s-%u:%s:%s:%s:%s:%u' % (
self.protein,
self.isoform,
self.typ,
','.join(self.sources),
':::0-0' if self.motif is None else self.motif.serialize(),
'' if self.residue is None else self.residue.name,
0 if self.residue is None else self.residue.number,
)
def print_residue(self):
return '%s-%u:%s:%u' % (
self.protein, self.isoform,
'' if self.residue is None else self.residue.name,
0 if self.residue is None else self.residue.number,
)
def merge(self, other):
if self == other:
self.add_evidences(other.evidences)
self.motif = (
self.motif
if other.motif is None else
other.motif
if self.motif is None else
self.motif.merge(other.motif)
)
if (
(self.typ == 'unknown' or len(self.typ) == 3) and
other.typ != 'unknown'
):
self.typ = other.typ
self.isoform = min(self.isoform, other.isoform)
self.isoforms = other.isoforms | self.isoforms
def add_isoform(self, isoform):
isoform = (
set([isoform])
if isinstance(isoform, int) else
isoform
if isinstance(isoform, set) else
{int(self.non_digit.sub('', isoform))}
)
self.isoforms = self.isoforms | isoform
def get_isoforms(self, seq = None):
result = []
seq = seq or self.seq
if seq:
for isoform in seq.get_isoforms():
ptm = self.in_isoform(isoform, seq)
if ptm:
result.append(ptm)
return result
def in_isoform(self, isoform, seq = None):
seq = seq or self.seq
if seq and seq.has_isoform(isoform):
if (
seq.get(self.residue.number, isoform = isoform) ==
self.residue.name
):
res = self.residue.in_isoform(isoform, seq = seq)
mot = self.motif.in_isoform(isoform, seq = seq)
ptm = Ptm(
protein = self.protein,
id_type = self.id_type,
typ = self.typ,
motif = mot,
residue = res,
evidences = self.sources,
isoform = isoform,
seq = seq,
)
return ptm
[docs]
class Motif(object):
[docs]
def __init__(
self,
protein,
start,
end,
id_type = 'uniprot',
ncbi_tax_id = 9606,
regex = None,
instance = None,
isoform = 1,
motif_name = None,
prob = None,
elm = None,
description = None,
seq = None,
evidences = None,
):
non_digit = re.compile(r'[^\d.-]+')
self.protein = (
protein
if hasattr(protein, 'identifier') else
entity.Entity(
protein,
id_type = id_type,
taxon = ncbi_tax_id,
)
)
self.id_type = id_type
self.seq = seq
self.isoform = (
isoform
if isinstance(isoform, int) else
int(non_digit.sub('', isoform))
)
self.start = (
start
if not isinstance(start, str) else
int(non_digit.sub('', start))
)
self.end = (
end
if not isinstance(end, str) else
int(non_digit.sub('', end))
)
self.regex = None if regex is None else re.compile(regex)
self.instance = instance
self.motif_name = motif_name
self.prob = prob
self.elm = elm
self.description = description
self.evidences = evidence.Evidences()
self.add_evidences(evidences)
def __hash__(self):
return hash((self.protein, self.start, self.end))
def __eq__(self, other):
return (
other.protein == self.protein and
other.start == self.start and
other.end == self.end
)
def __contains__(self, other):
return (
(
isinstance(other, Residue) and
other.protein == self.protein and
other.number >= self.start and
other.number <= self.end
) or (
other == self.protein or
other == self.instance or
other == self.motif_name
)
)
def add_evidences(self, evidences):
self.evidences += evidences
def serialize(self):
return '%s:%s:%u-%u' % (
self.motif_name or 'unknown',
self.instance,
0 if self.start is None else self.start,
0 if self.end is None else self.end,
)
def print_residues(self):
return '%s-%u:%u-%u' % (
self.protein, self.isoform,
0 if self.start is None else self.start,
0 if self.end is None else self.end,
)
def merge(self, other):
if self == other:
self.instance = self.instance or other.instance
self.regex = self.regex or other.regex
self.elm = self.elm or other.elm
self.prob = self.prob or other.prob
self.motif_name = self.motif_name or other.motif_name
self.description = self.description or other.description
self.evidences += other.evidences
def __str__(self):
return (
'Motif in protein %s-%u:\n'
'\tName: %s\n'
'\tELM: %s\n'
'\tRange: %u-%u\n'
'\tRegex: %s\n'
'\tInstance: %s\n' % (
self.protein.label,
self.isoform,
self.motif_name or 'unknown',
self.elm or 'unknown',
0 if self.start is None else self.start,
0 if self.end is None else self.end,
'unknown' if self.regex is None else self.regex.pattern,
self.instance or 'unknown',
)
)
def __repr__(self):
rng = self.range_str()
return '<Motif %sin %s-%u%s>' % (
'%s ' % self.motif_name if self.motif_name else '',
self.protein.label,
self.isoform,
' [%s]' % rng if rng else '',
)
def range(self):
return (
(self.start, self.end)
if self.start and self.end else
None
)
def range_str(self):
start_end = self.range()
return '%s-%s' % start_end if start_end else ''
def in_isoform(self, isoform, seq = None):
seq = seq or self.seq
if seq and seq.has_isoform(isoform):
start, end, reg = seq.get_region(self.start, self.start, self.end)
mot = Motif(
self.protein,
start,
end,
self.id_type,
self.regex,
reg,
isoform,
self.motif_name,
self.prob,
self.elm,
self.description,
seq,
)
return mot
return None
[docs]
class Domain(object):
[docs]
def __init__(
self,
protein,
id_type = 'uniprot',
ncbi_tax_id = 9606,
domain = None,
domain_id_type = 'pfam',
start = None,
end = None,
isoform = 1,
chains = {},
):
non_digit = re.compile(r'[^\d.-]+')
self.protein = (
protein
if hasattr(protein, 'identifier') else
entity.Entity(
identifier = protein,
id_type = id_type,
taxon = ncbi_tax_id,
)
)
self.id_type = id_type
self.domain = domain
self.domain_id_type = domain_id_type
self.start = start if type(start) not in [str, unicode] \
else int(non_digit.sub('', start))
self.end = end if type(end) not in [str, unicode] \
else int(non_digit.sub('', end))
self.isoform = isoform if type(isoform) is int \
else int(non_digit.sub('', isoform))
self.pdbs = {}
for pdb, chain in iteritems(chains):
self.add_chains(pdb, chain)
def __hash__(self):
return hash((self.protein, self.domain))
def __eq__(self, other):
if any(
num is None
for num in (self.start, self.end, other.start, other.end)
):
return False
flk = min(
max(
int(
min(
self.end - self.start,
other.end - other.start
) * 0.1
),
10
),
30
)
return (
self.protein == other.protein and
self.id_type == other.id_type and
self.start is not None and
self.end is not None and
self.start < other.start + flk and
self.start > other.start - flk and
self.end < other.end + flk and
self.end > other.end - flk
)
def __ne__(self, other):
return not self.__eq__(other)
def __contains__(self, other):
return (
(
isinstance(other, Residue) and
other.protein == self.protein and
other.number >= self.start and
other.number <= self.end
) or
(
isinstance(other, Motif) and
other.protein == self.protein and
other.start < self.end and
other.end <= self.start
) or
(
isinstance(other, Ptm) and
(
other.residue in self or
other.motif in self
)
) or
(
other == self.protein or
other == self.instance or
other == self.motif_name
)
)
def has_position(self):
return bool(self.start and self.end)
def get_position(self):
return (self.start, self.end)
def add_chains(self, pdb, chain):
if pdb not in self.pdbs:
self.pdbs[pdb] = []
self.pdbs[pdb] = common.add_to_list(self.pdbs[pdb], chain)
def serialize(self):
return '%s-%u:%s:%u-%u:%s' % (
self.protein,
self.isoform,
'unknown' if self.domain is None else self.domain,
0 if self.start is None else self.start,
0 if self.end is None else self.end,
','.join(
'%s.%s' % (pdb, '.'.join(chains))
for pdb, chains in iteritems(self.pdbs)
)
)
def __str__(self):
return (
'Domain in protein %s-%u:\n'
'\tName: %s\n'
'\tRange: %u-%u\n'
'\t3D structures: %s\n' % (
self.protein.label,
self.isoform,
self.domain or 'unknown',
0 if self.start is None else self.start,
0 if self.end is None else self.end,
', '.join(
'%s (chains %s)' % (pdb, ', '.join(chains))
for pdb, chains in iteritems(self.pdbs)
)
)
)
def __repr__(self):
rng = self.range_str()
return '<Domain %sin %s-%u%s>' % (
'%s ' % self.domain if self.domain else '',
self.protein.label,
self.isoform,
' [%s]' % rng if rng else '',
)
def range(self):
return (
(self.start, self.end)
if self.start and self.end else
None
)
def range_str(self):
start_end = self.range()
return '%s-%s' % start_end if start_end else ''
def merge(self, other):
if (
self == other or
(self.start and self.end) is None or
(other.start and other.end) is None
):
for pdb, chain in iteritems(other.pdbs):
self.add_chains(pdb, chain)
self.domain_id_type = self.domain_id_type or other.domain_id_type
if (
(
self.domain_id_type != 'pfam' and
other.domain is not None
) or
(
self.domain is None and
other.domain is not None
)
):
self.domain = other.domain
[docs]
class DomainDomain(object):
[docs]
def __init__(
self,
domain_a,
domain_b,
pdbs = None,
sources = None,
refs = None,
contact_residues = None,
):
self.domains = [domain_a, domain_b]
self.sources = set([])
self.refs = set([])
self.pdbs = set([])
self.add_sources(sources)
self.add_refs(refs)
self.add_pdbs(pdbs)
'''This can be found from 3DComplexes; floating point
numbers show the number of residues in contact. Other
two numbers in the tuple are the length of domain sequences.'''
self.contact_residues = contact_residues
def __hash__(self):
return hash((self.domain_a, self.domain_b))
def __eq__(self, other):
if self.__dict__ == other.__dict__:
return True
else:
return False
def __ne__(self, other):
return not self.__eq__(other)
def __contains__(self, other):
return other in self.domains[0] or other in self.domains[1]
def add_sources(self, source):
if source is None:
return None
elif type(source) in _const.CHAR_TYPES:
self._add_source(source)
else:
for s in source:
self._add_source(s)
def _add_source(self, source):
self.sources.add(source)
def add_refs(self, refs):
self.refs = common.add_to_set(self.refs, refs)
def add_pdbs(self, pdbs):
self.pdbs = common.add_to_set(self.pdbs, pdbs)
def serialize(self):
return '|'.join([
self.domains[0].serialize(), self.domains[1].serialize(),
','.join(self.sources), ','.join(self.refs), ','.join(self.pdbs)
])
# domain1|domain2|sources|references|pdb
def __str__(self):
return 'Domain-domain interaction:\n'\
' %s %s\n'\
' Data sources: %s\n'\
' References: %s\n'\
' 3D structures: %s\n' % (
self.domains[0].__str__(),
self.domains[1].__str__(),
', '.join(self.sources),
', '.join(self.refs),
','.join(self.pdbs)
)
[docs]
class DomainMotif(object):
[docs]
def __init__(self, domain, ptm, evidences = None, pdbs = None):
self.ptm = ptm
self.domain = domain
self.pdbs = set()
self.pnetw_score = None
self.add_pdbs(pdbs)
self.evidences = evidences or evidence.Evidences()
def __hash__(self):
return hash((self.domain, self.ptm))
def __str__(self):
return (
'Domain-motif interaction:\n'
' %s %s'
' Data sources: %s\n'
' References: %s\n'
' 3D structures: \n' % (
self.domain.__str__(),
self.ptm.__str__(),
', '.join(self.evidences.get_resource_names()),
', '.join(str(r) for r in self.evidences.get_references()),
)
)
def __repr__(self):
return '<%s => %s [%s]>' % (
self.domain.protein.label,
self.ptm.__repr__().strip('<>').replace('PTM ', ''),
self.evidences.__repr__().strip('<>')
)
def __eq__(self, other):
if isinstance(other, DomainMotif) and \
self.ptm == other.ptm and \
(self.domain == other.domain or
(self.domain.start and self.domain.end) is None or
(other.domain.start and other.domain.end) is None):
return True
else:
return False
def __ne__(self, other):
return not self.__eq__(other)
def __contains__(self, other):
if other == self.domain or other == self.ptm:
return True
elif other == self.domain.protein or other == self.ptm.protein:
return True
else:
return False
[docs]
def key(self):
"""
Returns a unique key which is a tuple of the proteins, the residue
and the modification type.
"""
return (
self.domain.protein,
self.ptm.protein,
self.ptm.residue.name,
self.ptm.residue.number,
self.ptm.typ,
)
def get_proteins(self):
return [self.domain.protein, self.ptm.protein]
def add_pdbs(self, pdbs):
self.pdbs = common.add_to_set(self.pdbs, pdbs)
def serialize(self):
return '|'.join([
self.domain.serialize(), self.ptm.serialize(),
','.join(self.sources), ','.join(self.refs), ','.join(self.pdbs)
])
def print_residues(self):
return '%s-%u:%s:%s' % (
self.domain.protein,
self.domain.isoform,
'%s-%u:' % (self.ptm.protein, self.ptm.isoform)
if self.ptm.motif is None else
self.ptm.motif.print_residues(),
self.ptm.print_residue(),
)
def merge(self, other):
if self == other:
self.domain.merge(other.domain)
self.ptm.merge(other.ptm)
self.add_evidences(other.evidences)
self.add_pdbs(other.pdbs)
self.pnetw_score = self.pnetw_score or other.pnetw_score
def resources(self, only_primary = False):
return [
'%s%s' % (
res,
'_%s' % via if via else '',
)
for res, via in
self.evidences.get_resource_names_via(via = None)
if not only_primary or not via
]
def references(self):
return self.evidences.get_references()
def references_by_resource(self, only_primary = True):
return [
(
ev.resource.name,
ev.resource.via,
ref,
)
for ev in self.evidences
for ref in ev.references
if not only_primary or not ev.resource.via
]
def references_by_resource_str(self, only_primary = True):
return ';'.join(sorted(
'%s%s:%s' % (
res,
'_%s' % via if via else '',
ref.pmid,
)
for res, via, ref
in self.references_by_resource(only_primary = only_primary)
))
[docs]
def get_line(self, resources_only_primary = False):
"""
Returns a list intended to be a row in a data frame of
enzyme-substrate relationships.
Elements of the list:
- enzyme
- enzyme_genesymbol
- substrate
- substrate_genesymbol
- isoforms
- residue_type
- residue_offset
- modification
- sources
- references
- curation_effort
"""
return [
self.domain.protein.identifier,
self.domain.protein.label,
self.ptm.protein.identifier,
self.ptm.protein.label,
';'.join(map(lambda i: '%u' % i, sorted(self.ptm.isoforms))),
self.ptm.residue.name,
'%u' % self.ptm.residue.number,
self.ptm.typ,
';'.join(sorted(
self.resources(only_primary = resources_only_primary)
)),
self.references_by_resource_str(),
self.evidences.count_curation_effort(),
]
def add_evidences(self, evidences):
self.evidences += evidences
[docs]
class Regulation(object):
[docs]
def __init__(self, ptm, source, target, effect, sources=None, refs=None):
self.ptm = ptm if type(ptm) is list else [ptm]
self.source = source
self.target = target
self.effect = effect
self.sources = set([])
self.refs = set([])
self.add_sources(sources)
self.add_refs(refs)
def __hash__(self):
return hash((self.ptm, self.source, self.target, self.effect))
def __eq__(self, other):
if isinstance(other, Regulation) and \
self.ptm == other.ptm and \
self.source == other.source and \
self.target == other.target and \
self.effect == other.effect:
return True
else:
return False
def __ne__(self, other):
return not self.__eq__(other)
def add_sources(self, source):
if source is None:
return None
elif type(source) in _const.CHAR_TYPES:
self._add_source(source)
else:
for s in source:
self._add_source(s)
def _add_source(self, source):
self.sources.add(source)
def add_refs(self, refs):
self.refs = common.add_to_set(self.refs, refs)
def serialize(self):
return '|'.join([
self.effect, self.ptm.serialize(), self.target,
','.join(self.sources), ','.join(self.refs)
])
def __str__(self):
return 'Regulation by PTM:\n'\
' PTM on %s %s interaction with %s\n'\
' %s \n'\
' Data sources: %s\n'\
' References: %s\n' % (self.source, self.target,
self.ptm.__str__(), ', '.join(self.sources),
', '.join(self.refs))
def merge(self, other):
if self == other:
self.ptm.merge(other.ptm)
self.add_sources(other.sources)
self.add_refs(other.refs)
#TODO this class does not belong here, find a better place
[docs]
class Complex(object):
have_stoichiometry = {
'PDB',
'Compleat',
'ComplexPortal',
'CellPhoneDB',
}
[docs]
def __init__(
self,
components,
ncbi_tax_id = 9606,
name = None,
ids = None,
sources = None,
interactions = None,
references = None,
proteins = None,
attrs = None,
):
"""
Represents a molecular complex.
components : list,dict
Either a list of identifiers or a dict with identifiers as keys
and stoichiometric coefficients as values. List of identifiers
also assumed to represent stoichiometry by repetition
of identifiers.
ncbi_tax_id : int
NCBI taxonomy identifier of the complex. It implies all members
of the complex belong to the same organism. Support for multi-
organism complexes will be implemented in the future.
name : str
A custom name or identifier of the complex.
ids : dict
Identifiers. If ``sources`` is a set, list or tuple it should be
a dict with database names as keys and set of identifiers as
values. If ``sources`` is a string, it can be a set of
identifiers or a single identifier.
sources : set,str
Database(s) the complex has been defined in.
interactions : list,dict
Interactions between the components of the complex. Either
a list of tuples of component IDs or a dict with tuples as
keys and custom interaction properties as values.
proteins : list,dict
Synonym for `components`, kept for compatibility.
"""
components = components or proteins
if not isinstance(components, dict):
self.components = dict(collections.Counter(components))
else:
self.components = components
self.proteins = self.components
self.name = name
self.ids = collections.defaultdict(set)
self.add_ids(ids, source = sources)
self.sources = common.to_set(sources)
self.references = common.to_set(references)
self.ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(ncbi_tax_id)
self.attrs = {}
if isinstance(attrs, dict):
self.attrs.update(attrs)
self.interactions = interactions
def reload(self):
modname = self.__class__.__module__
mod = __import__(modname, fromlist = [modname.split('.')[0]])
import importlib as imp
imp.reload(mod)
new = getattr(mod, self.__class__.__name__)
setattr(self, '__class__', new)
def __str__(self):
return 'COMPLEX:%s' % (
COMPLEX_SEP.join(sorted(self.components.keys()))
)
def __repr__(self):
return 'Complex%s: %s' % (
' %s' % self.name if self.name else '',
self.__str__(),
)
def __hash__(self):
return hash(self.__str__())
def __contains__(self, other):
return other in self.components
def __eq__(self, other):
return self.__hash__() == other.__hash__()
def __iadd__(self, other):
self.merge(other)
return self
def __lt__(self, other):
return self.__str__() < other
def __gt__(self, other):
return self.__str__() > other
def __len__(self):
return len(self.components)
[docs]
def merge(self, other):
"""
Adds the annotations (sources, references, attrs) of the other
``Complex`` instance to this one. If the other ``Complex`` has
different components it does nothing.
"""
if self != other:
return
if (
set(self.components.values()) == {1} and
set(other.components.values()) != {1}
):
# this complex has no stoichiometry information
# but the other has
self.components = other.components
self.sources.update(other.sources)
self.references.update(other.references)
self.add_ids(other.ids)
for k, v in iteritems(other.attrs):
if k not in self.attrs:
self.attrs[k] = v
elif isinstance(self.attrs[k], (dict, set)):
self.attrs[k].update(v)
def add_ids(self, ids, source = None):
if not isinstance(ids, dict):
ids = common.to_set(ids)
if isinstance(ids, set) and source:
source = common.to_set(source)
ids = dict((s, ids) for s in source)
if isinstance(ids, dict):
for this_source, this_ids in iteritems(ids):
this_ids = common.to_set(this_ids)
self.ids[this_source].update(this_ids)
def get_interaction(self, component1, component2):
if self.has_interaction(component1, component2):
return self.interactions[(component1, component2)]
def set_interaction(self, component1, component2, interaction):
key = (component1, component2)
self.interactions = self.interactions or {}
self.interactions[key] = interaction
def has_interaction(self, component1, component2):
key = (component1, component2)
return self.interactions and key in self.interactions
def add_source(self, source):
self.sources.add(source)
def iter_proteins(self):
for protein in self.proteins.keys():
yield protein
__iter__ = iter_proteins
[docs]
def add_attr(self, source, attr):
"""
Attributes can store annotations for complexes.
"""
self.attrs[source] = attr
@property
def stoichiometry(self):
return ':'.join(
'%u' % (
cnt
if self.sources & self.have_stoichiometry else
0
)
for _id, cnt in
sorted(
iteritems(self.components),
key = lambda id_cnt: id_cnt[0],
)
)
@property
def stoichiometry_str(self):
return ';'.join(
itertools.chain(*(
(comp,) * cnt
for comp, cnt in
sorted(
iteritems(self.components),
key = lambda comp_cnt: comp_cnt[0],
)
))
)
@property
def stoichiometry_str_genesymbols(self):
return ';'.join(
itertools.chain(*(
(
(
mapping.map_name0(
uniprot,
'uniprot',
'genesymbol',
) or
uniprot
),
) * cnt
for uniprot, cnt in
sorted(
iteritems(self.components),
key = lambda comp_cnt: comp_cnt[0],
)
))
)
@property
def genesymbols(self):
return sorted(
(
mapping.map_name0(uniprot, 'uniprot', 'genesymbol') or
uniprot
)
for uniprot in self.components.keys()
)
@property
def genesymbol_str(self):
return COMPLEX_SEP.join(self.genesymbols)
[docs]
class Interface(object):
[docs]
def __init__(self,
id_a,
id_b,
source,
id_type='uniprot',
pdb=None,
css=None,
stab_en=None,
solv_en=None,
area=None,
isoform_a=1,
isoform_b=1):
'''
This class is to store residue level information of
protein-protein interfaces.
'''
self.source = source
self.isoform_a = isoform_a if type(isoform_a) is int \
else int(non_digit.sub('', isoform_a))
self.isoform_b = isoform_b if type(isoform_b) is int \
else int(non_digit.sub('', isoform_b))
self.pdb = pdb
self.id_a = id_a
self.id_b = id_b
self.id_type = id_type
self.types = ['undefined', 'hbonds', 'sbridges', 'ssbonds', 'covbonds']
for t in self.types:
self.__dict__[t] = {id_a: [], id_b: []}
self.area = area
self.stab_en = stab_en
self.solv_en = solv_en
self.css = css
[docs]
def add_residues(self, res_a, res_b, typ='undefined'):
'''
Adds one pair of residues of type `typ`,
where `res_a` and `res_b` are tuples of
residue number in sequence and residue type,
e.g. (124, 'S') -- (means Serine #124)
`typ` can be undefined, hbonds, sbridges, ssbonds or covbonds
'''
if type(res_a) is not tuple or type(res_b) is not tuple \
or type(res_a[0]) is not int or type(res_b[0]) is not int \
or (type(res_a[1]) is not unicode and type(res_a[1]) is not str) \
or (type(res_b[1]) is not unicode and type(res_b[1]) is not str) \
or typ not in self.__dict__:
sys.stdout.write(
'\tWrong parameters for Interface.add_residues()\n')
else:
self.__dict__[typ][self.id_a].append(
Residue(res_a[0], res_a[1], res_a[2], self.id_type))
self.__dict__[typ][self.id_b].append(
Residue(res_b[0], res_b[1], res_b[2], self.id_type))
[docs]
def numof_residues(self):
'''
Returns the number of residue pairs by bound type
'''
nbonds = {}
for t in self.types:
nbonds[t] = len(self.__dict__[t][self.id_a])
return nbonds
[docs]
def bond_types(self):
'''
Returns the bond types present in this interface
'''
types = []
for t in self.types:
if len(self.__dict__[t][self.id_a]) > 0:
types.append(t)
return types
[docs]
def get_bonds(self, typ=None, mode=None):
'''
Gives a generator to iterate throught bonds in
this interface. If no type given, bonds of all types
returned.
'''
if typ is None:
typ = self.types
if type(typ) is str:
typ = [typ]
for t in typ:
if t in self.__dict__:
for i in range(0, len(self.__dict__[t][self.id_a])):
if mode == 'dict':
yield {
self.id_a: self.__dict__[t][self.id_a][i],
self.id_b: self.__dict__[t][self.id_b][i],
'type': t,
}
else:
yield (
(self.id_a,) +
(self.__dict__[t][self.id_a][i].serialize(),) +
(self.id_b,) +
(self.__dict__[t][self.id_b][i].serialize(),) +
(t,)
)
def serialize(self):
res = []
for t in self.types:
if self.__dict__[t][self.id_a] and self.__dict__[t][self.id_b]:
res.append(
'%s:%s+%s' % (
t,
','.join(self.__dict__[t][self.id_a].serialize()),
','.join(self.__dict__[t][self.id_b].serialize()),
)
)
return (
'%s-%u:%s-%u:%s:%s:%s' % (
self.id_a,
self.isoform_a,
self.id_b,
self.isoform_b,
self.source,
self.pdb,
':'.join(res),
)
)
def __str__(self):
nbonds = self.numof_residues()
return (
'Molecular interface between %s and %s,\n'
'as observed in PDB structure %s\n\n'
' Data source: %s\n'
' Number of residues in contact: %u\n'
' Hydrogene bonds: %u\n'
' Covalent bonds: %u\n'
' Saltbridges: %u\n'
' S-S bonds: %u\n'
' Stable energy: %s\n'
' Solvation energy: %s\n'
' Surface area: %s\n'
' Complexation significance score: %s\n' % (
self.id_a,
self.id_b,
self.pdb,
self.source,
sum(nbonds.values()),
nbonds['hbonds'],
nbonds['covbonds'],
nbonds['sbridges'],
nbonds['ssbonds'],
'n/a' if self.stab_en is None else str(self.stab_en),
'n/a' if self.solv_en is None else str(self.solv_en),
'n/a' if self.area is None else str(self.area),
'n/a' if self.css is None else str(self.css),
)
)
def __repr__(self):
nbonds = self.numof_residues()
return (
'Interface [%s-%s, %u bonds]' % (
self.id_a,
self.id_b,
sum(nbonds.values()),
)
)