Source code for pypath.internals.intera

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

"""
This module provides classes to represent and handle
structural details of protein interactions
i.e. residues, post-translational modifications,
short motifs, domains, domain-motifs and
domain-motif interactions, binding interfaces.
"""

from future.utils import iteritems
from past.builtins import xrange, range, reduce

import re
import sys
import importlib as imp
import collections
import itertools

# from pypath:
import pypath.share.common as common
import pypath_common._constants as _const
import pypath.utils.mapping as mapping
import pypath.core.evidence as evidence
import pypath.core.entity as entity
import pypath.utils.taxonomy as taxonomy

__all__ = [
    'Residue',
    'Ptm',
    'Motif',
    'Domain',
    'DomainDomain',
    'DomainMotif',
    'Interface',
]

if 'unicode' not in __builtins__:
    unicode = str


COMPLEX_SEP = '_'


[docs] class Residue(object):
[docs] def __init__( self, number, name, protein, id_type = 'uniprot', ncbi_tax_id = 9606, isoform = 1, mutated = False, seq = None ): non_digit = re.compile(r'[^\d.-]+') self.name = name self.number = ( number if not isinstance(number, str) else int(non_digit.sub('', number)) ) self.protein = ( protein if hasattr(protein, 'identifier') else entity.Entity( identifier = protein, id_type = id_type, taxon = ncbi_tax_id, ) ) self.mutated = mutated self.seq = seq self.isoform = ( isoform if type(isoform) is int else int(non_digit.sub('', isoform)) )
def __hash__(self): return hash((self.number, self.name, self.protein)) def __eq__(self, other): return ( self.protein == other.protein and self.number == other.number and self.name == other.name ) def __ne__(self, other): return not self.__eq__(other) def __str__(self): return 'Residue %s-%u in protein %s-%u%s\n' % ( self.name, self.number, self.protein.identifier, self.isoform, ' (mutated)' if self.mutated else '' ) def __repr__(self): return '<Residue %s-%u:%s%u>' % ( self.protein.label, self.isoform, self.name, self.number, ) def serialize(self): return '%s%u' % (self.name, self.number) def in_isoform(self, isoform, seq=None): seq = seq or self.seq if seq and seq.has_isoform(isoform): if seq.get(self.number, isoform=isoform) == self.name: res = Residue( number = self.number, name = self.name, protein = self.protein, id_type = self.id_type, isoform = isoform, mutated = self.mutated, ) return res return None
[docs] class Ptm(object):
[docs] def __init__( self, protein, id_type = 'uniprot', ncbi_tax_id = 9606, typ = 'unknown', motif = None, residue = None, isoform = 1, evidences = None, seq = None, ): self.non_digit = re.compile(r'[^\d.-]+') self.protein = ( protein if hasattr(protein, 'identifier') else entity.Entity( identifier = protein, id_type = id_type, taxon = ncbi_tax_id, ) ) self.id_type = id_type self.typ = typ.lower() self.seq = seq self.motif = motif self.residue = residue self.isoform = ( isoform if type(isoform) is int else int(self.non_digit.sub('', isoform)) ) self.isoforms = set() self.add_isoform(isoform) self.evidences = evidence.Evidences() self.add_evidences(evidences)
def __hash__(self): return hash((self.residue, self.typ)) def __str__(self): return ( '%s in protein %s-%u\n Motif: %s\n%s' % ( ( 'Domain-motif interaction' if ( self.typ == 'unknown' and self.residue is None ) else 'PTM: %s' % self.typ ), self.protein.label, self.isoform, ( 'unknown' if self.motif is None else self.motif.__str__() ), ( '' if self.residue is None else '\n Residue: %s' % self.residue.__str__() ), ) ) def __repr__(self): return '<PTM %s%s>' % ( ( self.residue.__repr__().strip('<>') if self.residue else self.protein.label ), ':%s' % self.typ if self.residue else '', ) def __eq__(self, other): return ( isinstance(other, Ptm) and self.protein == other.protein and ( self.residue == other.residue or ( (self.residue is None or other.residue is None) and self.motif == other.motif ) ) and ( self.typ == other.typ or self.typ is None or other.typ is None ) ) def __ne__(self, other): return not self.__eq__(other) def __contains__(self, other): if isinstance(other, Residue): if self.residue is not None: return other == self.residue elif self.motif is not None: return other in self.motif else: return False if isinstance(other, Motif): return other in self.motif elif other == self.protein: return True elif isinstance(other, Mutation): return ( other.original == self.residue or other.original in self.motif ) def __deepcopy__(self, memo): new = type(self)( protein = self.protein, id_type = self.id_type, typ = self.typ, motif = self.motif, residue = self.residue, isoform = self.isoform, ) new.add_isoform(self.isoforms) return new def add_evidences(self, evidences): self.evidences += evidences def serialize(self): return '%s-%u:%s:%s:%s:%s:%u' % ( self.protein, self.isoform, self.typ, ','.join(self.sources), ':::0-0' if self.motif is None else self.motif.serialize(), '' if self.residue is None else self.residue.name, 0 if self.residue is None else self.residue.number, ) def print_residue(self): return '%s-%u:%s:%u' % ( self.protein, self.isoform, '' if self.residue is None else self.residue.name, 0 if self.residue is None else self.residue.number, ) def merge(self, other): if self == other: self.add_evidences(other.evidences) self.motif = ( self.motif if other.motif is None else other.motif if self.motif is None else self.motif.merge(other.motif) ) if ( (self.typ == 'unknown' or len(self.typ) == 3) and other.typ != 'unknown' ): self.typ = other.typ self.isoform = min(self.isoform, other.isoform) self.isoforms = other.isoforms | self.isoforms def add_isoform(self, isoform): isoform = ( set([isoform]) if isinstance(isoform, int) else isoform if isinstance(isoform, set) else {int(self.non_digit.sub('', isoform))} ) self.isoforms = self.isoforms | isoform def get_isoforms(self, seq = None): result = [] seq = seq or self.seq if seq: for isoform in seq.get_isoforms(): ptm = self.in_isoform(isoform, seq) if ptm: result.append(ptm) return result def in_isoform(self, isoform, seq = None): seq = seq or self.seq if seq and seq.has_isoform(isoform): if ( seq.get(self.residue.number, isoform = isoform) == self.residue.name ): res = self.residue.in_isoform(isoform, seq = seq) mot = self.motif.in_isoform(isoform, seq = seq) ptm = Ptm( protein = self.protein, id_type = self.id_type, typ = self.typ, motif = mot, residue = res, evidences = self.sources, isoform = isoform, seq = seq, ) return ptm
[docs] class Motif(object):
[docs] def __init__( self, protein, start, end, id_type = 'uniprot', ncbi_tax_id = 9606, regex = None, instance = None, isoform = 1, motif_name = None, prob = None, elm = None, description = None, seq = None, evidences = None, ): non_digit = re.compile(r'[^\d.-]+') self.protein = ( protein if hasattr(protein, 'identifier') else entity.Entity( protein, id_type = id_type, taxon = ncbi_tax_id, ) ) self.id_type = id_type self.seq = seq self.isoform = ( isoform if isinstance(isoform, int) else int(non_digit.sub('', isoform)) ) self.start = ( start if not isinstance(start, str) else int(non_digit.sub('', start)) ) self.end = ( end if not isinstance(end, str) else int(non_digit.sub('', end)) ) self.regex = None if regex is None else re.compile(regex) self.instance = instance self.motif_name = motif_name self.prob = prob self.elm = elm self.description = description self.evidences = evidence.Evidences() self.add_evidences(evidences)
def __hash__(self): return hash((self.protein, self.start, self.end)) def __eq__(self, other): return ( other.protein == self.protein and other.start == self.start and other.end == self.end ) def __contains__(self, other): return ( ( isinstance(other, Residue) and other.protein == self.protein and other.number >= self.start and other.number <= self.end ) or ( other == self.protein or other == self.instance or other == self.motif_name ) ) def add_evidences(self, evidences): self.evidences += evidences def serialize(self): return '%s:%s:%u-%u' % ( self.motif_name or 'unknown', self.instance, 0 if self.start is None else self.start, 0 if self.end is None else self.end, ) def print_residues(self): return '%s-%u:%u-%u' % ( self.protein, self.isoform, 0 if self.start is None else self.start, 0 if self.end is None else self.end, ) def merge(self, other): if self == other: self.instance = self.instance or other.instance self.regex = self.regex or other.regex self.elm = self.elm or other.elm self.prob = self.prob or other.prob self.motif_name = self.motif_name or other.motif_name self.description = self.description or other.description self.evidences += other.evidences def __str__(self): return ( 'Motif in protein %s-%u:\n' '\tName: %s\n' '\tELM: %s\n' '\tRange: %u-%u\n' '\tRegex: %s\n' '\tInstance: %s\n' % ( self.protein.label, self.isoform, self.motif_name or 'unknown', self.elm or 'unknown', 0 if self.start is None else self.start, 0 if self.end is None else self.end, 'unknown' if self.regex is None else self.regex.pattern, self.instance or 'unknown', ) ) def __repr__(self): rng = self.range_str() return '<Motif %sin %s-%u%s>' % ( '%s ' % self.motif_name if self.motif_name else '', self.protein.label, self.isoform, ' [%s]' % rng if rng else '', ) def range(self): return ( (self.start, self.end) if self.start and self.end else None ) def range_str(self): start_end = self.range() return '%s-%s' % start_end if start_end else '' def in_isoform(self, isoform, seq = None): seq = seq or self.seq if seq and seq.has_isoform(isoform): start, end, reg = seq.get_region(self.start, self.start, self.end) mot = Motif( self.protein, start, end, self.id_type, self.regex, reg, isoform, self.motif_name, self.prob, self.elm, self.description, seq, ) return mot return None
[docs] class Domain(object):
[docs] def __init__( self, protein, id_type = 'uniprot', ncbi_tax_id = 9606, domain = None, domain_id_type = 'pfam', start = None, end = None, isoform = 1, chains = {}, ): non_digit = re.compile(r'[^\d.-]+') self.protein = ( protein if hasattr(protein, 'identifier') else entity.Entity( identifier = protein, id_type = id_type, taxon = ncbi_tax_id, ) ) self.id_type = id_type self.domain = domain self.domain_id_type = domain_id_type self.start = start if type(start) not in [str, unicode] \ else int(non_digit.sub('', start)) self.end = end if type(end) not in [str, unicode] \ else int(non_digit.sub('', end)) self.isoform = isoform if type(isoform) is int \ else int(non_digit.sub('', isoform)) self.pdbs = {} for pdb, chain in iteritems(chains): self.add_chains(pdb, chain)
def __hash__(self): return hash((self.protein, self.domain)) def __eq__(self, other): if any( num is None for num in (self.start, self.end, other.start, other.end) ): return False flk = min( max( int( min( self.end - self.start, other.end - other.start ) * 0.1 ), 10 ), 30 ) return ( self.protein == other.protein and self.id_type == other.id_type and self.start is not None and self.end is not None and self.start < other.start + flk and self.start > other.start - flk and self.end < other.end + flk and self.end > other.end - flk ) def __ne__(self, other): return not self.__eq__(other) def __contains__(self, other): return ( ( isinstance(other, Residue) and other.protein == self.protein and other.number >= self.start and other.number <= self.end ) or ( isinstance(other, Motif) and other.protein == self.protein and other.start < self.end and other.end <= self.start ) or ( isinstance(other, Ptm) and ( other.residue in self or other.motif in self ) ) or ( other == self.protein or other == self.instance or other == self.motif_name ) ) def has_position(self): return bool(self.start and self.end) def get_position(self): return (self.start, self.end) def add_chains(self, pdb, chain): if pdb not in self.pdbs: self.pdbs[pdb] = [] self.pdbs[pdb] = common.add_to_list(self.pdbs[pdb], chain) def serialize(self): return '%s-%u:%s:%u-%u:%s' % ( self.protein, self.isoform, 'unknown' if self.domain is None else self.domain, 0 if self.start is None else self.start, 0 if self.end is None else self.end, ','.join( '%s.%s' % (pdb, '.'.join(chains)) for pdb, chains in iteritems(self.pdbs) ) ) def __str__(self): return ( 'Domain in protein %s-%u:\n' '\tName: %s\n' '\tRange: %u-%u\n' '\t3D structures: %s\n' % ( self.protein.label, self.isoform, self.domain or 'unknown', 0 if self.start is None else self.start, 0 if self.end is None else self.end, ', '.join( '%s (chains %s)' % (pdb, ', '.join(chains)) for pdb, chains in iteritems(self.pdbs) ) ) ) def __repr__(self): rng = self.range_str() return '<Domain %sin %s-%u%s>' % ( '%s ' % self.domain if self.domain else '', self.protein.label, self.isoform, ' [%s]' % rng if rng else '', ) def range(self): return ( (self.start, self.end) if self.start and self.end else None ) def range_str(self): start_end = self.range() return '%s-%s' % start_end if start_end else '' def merge(self, other): if ( self == other or (self.start and self.end) is None or (other.start and other.end) is None ): for pdb, chain in iteritems(other.pdbs): self.add_chains(pdb, chain) self.domain_id_type = self.domain_id_type or other.domain_id_type if ( ( self.domain_id_type != 'pfam' and other.domain is not None ) or ( self.domain is None and other.domain is not None ) ): self.domain = other.domain
[docs] class DomainDomain(object):
[docs] def __init__( self, domain_a, domain_b, pdbs = None, sources = None, refs = None, contact_residues = None, ): self.domains = [domain_a, domain_b] self.sources = set([]) self.refs = set([]) self.pdbs = set([]) self.add_sources(sources) self.add_refs(refs) self.add_pdbs(pdbs) '''This can be found from 3DComplexes; floating point numbers show the number of residues in contact. Other two numbers in the tuple are the length of domain sequences.''' self.contact_residues = contact_residues
def __hash__(self): return hash((self.domain_a, self.domain_b)) def __eq__(self, other): if self.__dict__ == other.__dict__: return True else: return False def __ne__(self, other): return not self.__eq__(other) def __contains__(self, other): return other in self.domains[0] or other in self.domains[1] def add_sources(self, source): if source is None: return None elif type(source) in _const.CHAR_TYPES: self._add_source(source) else: for s in source: self._add_source(s) def _add_source(self, source): self.sources.add(source) def add_refs(self, refs): self.refs = common.add_to_set(self.refs, refs) def add_pdbs(self, pdbs): self.pdbs = common.add_to_set(self.pdbs, pdbs) def serialize(self): return '|'.join([ self.domains[0].serialize(), self.domains[1].serialize(), ','.join(self.sources), ','.join(self.refs), ','.join(self.pdbs) ]) # domain1|domain2|sources|references|pdb def __str__(self): return 'Domain-domain interaction:\n'\ ' %s %s\n'\ ' Data sources: %s\n'\ ' References: %s\n'\ ' 3D structures: %s\n' % ( self.domains[0].__str__(), self.domains[1].__str__(), ', '.join(self.sources), ', '.join(self.refs), ','.join(self.pdbs) )
[docs] class DomainMotif(object):
[docs] def __init__(self, domain, ptm, evidences = None, pdbs = None): self.ptm = ptm self.domain = domain self.pdbs = set() self.pnetw_score = None self.add_pdbs(pdbs) self.evidences = evidences or evidence.Evidences()
def __hash__(self): return hash((self.domain, self.ptm)) def __str__(self): return ( 'Domain-motif interaction:\n' ' %s %s' ' Data sources: %s\n' ' References: %s\n' ' 3D structures: \n' % ( self.domain.__str__(), self.ptm.__str__(), ', '.join(self.evidences.get_resource_names()), ', '.join(str(r) for r in self.evidences.get_references()), ) ) def __repr__(self): return '<%s => %s [%s]>' % ( self.domain.protein.label, self.ptm.__repr__().strip('<>').replace('PTM ', ''), self.evidences.__repr__().strip('<>') ) def __eq__(self, other): if isinstance(other, DomainMotif) and \ self.ptm == other.ptm and \ (self.domain == other.domain or (self.domain.start and self.domain.end) is None or (other.domain.start and other.domain.end) is None): return True else: return False def __ne__(self, other): return not self.__eq__(other) def __contains__(self, other): if other == self.domain or other == self.ptm: return True elif other == self.domain.protein or other == self.ptm.protein: return True else: return False
[docs] def key(self): """ Returns a unique key which is a tuple of the proteins, the residue and the modification type. """ return ( self.domain.protein, self.ptm.protein, self.ptm.residue.name, self.ptm.residue.number, self.ptm.typ, )
def get_proteins(self): return [self.domain.protein, self.ptm.protein] def add_pdbs(self, pdbs): self.pdbs = common.add_to_set(self.pdbs, pdbs) def serialize(self): return '|'.join([ self.domain.serialize(), self.ptm.serialize(), ','.join(self.sources), ','.join(self.refs), ','.join(self.pdbs) ]) def print_residues(self): return '%s-%u:%s:%s' % ( self.domain.protein, self.domain.isoform, '%s-%u:' % (self.ptm.protein, self.ptm.isoform) if self.ptm.motif is None else self.ptm.motif.print_residues(), self.ptm.print_residue(), ) def merge(self, other): if self == other: self.domain.merge(other.domain) self.ptm.merge(other.ptm) self.add_evidences(other.evidences) self.add_pdbs(other.pdbs) self.pnetw_score = self.pnetw_score or other.pnetw_score def resources(self, only_primary = False): return [ '%s%s' % ( res, '_%s' % via if via else '', ) for res, via in self.evidences.get_resource_names_via(via = None) if not only_primary or not via ] def references(self): return self.evidences.get_references() def references_by_resource(self, only_primary = True): return [ ( ev.resource.name, ev.resource.via, ref, ) for ev in self.evidences for ref in ev.references if not only_primary or not ev.resource.via ] def references_by_resource_str(self, only_primary = True): return ';'.join(sorted( '%s%s:%s' % ( res, '_%s' % via if via else '', ref.pmid, ) for res, via, ref in self.references_by_resource(only_primary = only_primary) ))
[docs] def get_line(self, resources_only_primary = False): """ Returns a list intended to be a row in a data frame of enzyme-substrate relationships. Elements of the list: - enzyme - enzyme_genesymbol - substrate - substrate_genesymbol - isoforms - residue_type - residue_offset - modification - sources - references - curation_effort """ return [ self.domain.protein.identifier, self.domain.protein.label, self.ptm.protein.identifier, self.ptm.protein.label, ';'.join(map(lambda i: '%u' % i, sorted(self.ptm.isoforms))), self.ptm.residue.name, '%u' % self.ptm.residue.number, self.ptm.typ, ';'.join(sorted( self.resources(only_primary = resources_only_primary) )), self.references_by_resource_str(), self.evidences.count_curation_effort(), ]
def add_evidences(self, evidences): self.evidences += evidences
[docs] class Regulation(object):
[docs] def __init__(self, ptm, source, target, effect, sources=None, refs=None): self.ptm = ptm if type(ptm) is list else [ptm] self.source = source self.target = target self.effect = effect self.sources = set([]) self.refs = set([]) self.add_sources(sources) self.add_refs(refs)
def __hash__(self): return hash((self.ptm, self.source, self.target, self.effect)) def __eq__(self, other): if isinstance(other, Regulation) and \ self.ptm == other.ptm and \ self.source == other.source and \ self.target == other.target and \ self.effect == other.effect: return True else: return False def __ne__(self, other): return not self.__eq__(other) def add_sources(self, source): if source is None: return None elif type(source) in _const.CHAR_TYPES: self._add_source(source) else: for s in source: self._add_source(s) def _add_source(self, source): self.sources.add(source) def add_refs(self, refs): self.refs = common.add_to_set(self.refs, refs) def serialize(self): return '|'.join([ self.effect, self.ptm.serialize(), self.target, ','.join(self.sources), ','.join(self.refs) ]) def __str__(self): return 'Regulation by PTM:\n'\ ' PTM on %s %s interaction with %s\n'\ ' %s \n'\ ' Data sources: %s\n'\ ' References: %s\n' % (self.source, self.target, self.ptm.__str__(), ', '.join(self.sources), ', '.join(self.refs)) def merge(self, other): if self == other: self.ptm.merge(other.ptm) self.add_sources(other.sources) self.add_refs(other.refs)
#TODO this class does not belong here, find a better place
[docs] class Complex(object): have_stoichiometry = { 'PDB', 'Compleat', 'ComplexPortal', 'CellPhoneDB', }
[docs] def __init__( self, components, ncbi_tax_id = 9606, name = None, ids = None, sources = None, interactions = None, references = None, proteins = None, attrs = None, ): """ Represents a molecular complex. components : list,dict Either a list of identifiers or a dict with identifiers as keys and stoichiometric coefficients as values. List of identifiers also assumed to represent stoichiometry by repetition of identifiers. ncbi_tax_id : int NCBI taxonomy identifier of the complex. It implies all members of the complex belong to the same organism. Support for multi- organism complexes will be implemented in the future. name : str A custom name or identifier of the complex. ids : dict Identifiers. If ``sources`` is a set, list or tuple it should be a dict with database names as keys and set of identifiers as values. If ``sources`` is a string, it can be a set of identifiers or a single identifier. sources : set,str Database(s) the complex has been defined in. interactions : list,dict Interactions between the components of the complex. Either a list of tuples of component IDs or a dict with tuples as keys and custom interaction properties as values. proteins : list,dict Synonym for `components`, kept for compatibility. """ components = components or proteins if not isinstance(components, dict): self.components = dict(collections.Counter(components)) else: self.components = components self.proteins = self.components self.name = name self.ids = collections.defaultdict(set) self.add_ids(ids, source = sources) self.sources = common.to_set(sources) self.references = common.to_set(references) self.ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(ncbi_tax_id) self.attrs = {} if isinstance(attrs, dict): self.attrs.update(attrs) self.interactions = interactions
def reload(self): modname = self.__class__.__module__ mod = __import__(modname, fromlist = [modname.split('.')[0]]) import importlib as imp imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new) def __str__(self): return 'COMPLEX:%s' % ( COMPLEX_SEP.join(sorted(self.components.keys())) ) def __repr__(self): return 'Complex%s: %s' % ( ' %s' % self.name if self.name else '', self.__str__(), ) def __hash__(self): return hash(self.__str__()) def __contains__(self, other): return other in self.components def __eq__(self, other): return self.__hash__() == other.__hash__() def __iadd__(self, other): self.merge(other) return self def __lt__(self, other): return self.__str__() < other def __gt__(self, other): return self.__str__() > other def __len__(self): return len(self.components)
[docs] def merge(self, other): """ Adds the annotations (sources, references, attrs) of the other ``Complex`` instance to this one. If the other ``Complex`` has different components it does nothing. """ if self != other: return if ( set(self.components.values()) == {1} and set(other.components.values()) != {1} ): # this complex has no stoichiometry information # but the other has self.components = other.components self.sources.update(other.sources) self.references.update(other.references) self.add_ids(other.ids) for k, v in iteritems(other.attrs): if k not in self.attrs: self.attrs[k] = v elif isinstance(self.attrs[k], (dict, set)): self.attrs[k].update(v)
def add_ids(self, ids, source = None): if not isinstance(ids, dict): ids = common.to_set(ids) if isinstance(ids, set) and source: source = common.to_set(source) ids = dict((s, ids) for s in source) if isinstance(ids, dict): for this_source, this_ids in iteritems(ids): this_ids = common.to_set(this_ids) self.ids[this_source].update(this_ids) def get_interaction(self, component1, component2): if self.has_interaction(component1, component2): return self.interactions[(component1, component2)] def set_interaction(self, component1, component2, interaction): key = (component1, component2) self.interactions = self.interactions or {} self.interactions[key] = interaction def has_interaction(self, component1, component2): key = (component1, component2) return self.interactions and key in self.interactions def add_source(self, source): self.sources.add(source) def iter_proteins(self): for protein in self.proteins.keys(): yield protein __iter__ = iter_proteins
[docs] def add_attr(self, source, attr): """ Attributes can store annotations for complexes. """ self.attrs[source] = attr
@property def stoichiometry(self): return ':'.join( '%u' % ( cnt if self.sources & self.have_stoichiometry else 0 ) for _id, cnt in sorted( iteritems(self.components), key = lambda id_cnt: id_cnt[0], ) ) @property def stoichiometry_str(self): return ';'.join( itertools.chain(*( (comp,) * cnt for comp, cnt in sorted( iteritems(self.components), key = lambda comp_cnt: comp_cnt[0], ) )) ) @property def stoichiometry_str_genesymbols(self): return ';'.join( itertools.chain(*( ( ( mapping.map_name0( uniprot, 'uniprot', 'genesymbol', ) or uniprot ), ) * cnt for uniprot, cnt in sorted( iteritems(self.components), key = lambda comp_cnt: comp_cnt[0], ) )) ) @property def genesymbols(self): return sorted( ( mapping.map_name0(uniprot, 'uniprot', 'genesymbol') or uniprot ) for uniprot in self.components.keys() ) @property def genesymbol_str(self): return COMPLEX_SEP.join(self.genesymbols)
[docs] class Interface(object):
[docs] def __init__(self, id_a, id_b, source, id_type='uniprot', pdb=None, css=None, stab_en=None, solv_en=None, area=None, isoform_a=1, isoform_b=1): ''' This class is to store residue level information of protein-protein interfaces. ''' self.source = source self.isoform_a = isoform_a if type(isoform_a) is int \ else int(non_digit.sub('', isoform_a)) self.isoform_b = isoform_b if type(isoform_b) is int \ else int(non_digit.sub('', isoform_b)) self.pdb = pdb self.id_a = id_a self.id_b = id_b self.id_type = id_type self.types = ['undefined', 'hbonds', 'sbridges', 'ssbonds', 'covbonds'] for t in self.types: self.__dict__[t] = {id_a: [], id_b: []} self.area = area self.stab_en = stab_en self.solv_en = solv_en self.css = css
[docs] def add_residues(self, res_a, res_b, typ='undefined'): ''' Adds one pair of residues of type `typ`, where `res_a` and `res_b` are tuples of residue number in sequence and residue type, e.g. (124, 'S') -- (means Serine #124) `typ` can be undefined, hbonds, sbridges, ssbonds or covbonds ''' if type(res_a) is not tuple or type(res_b) is not tuple \ or type(res_a[0]) is not int or type(res_b[0]) is not int \ or (type(res_a[1]) is not unicode and type(res_a[1]) is not str) \ or (type(res_b[1]) is not unicode and type(res_b[1]) is not str) \ or typ not in self.__dict__: sys.stdout.write( '\tWrong parameters for Interface.add_residues()\n') else: self.__dict__[typ][self.id_a].append( Residue(res_a[0], res_a[1], res_a[2], self.id_type)) self.__dict__[typ][self.id_b].append( Residue(res_b[0], res_b[1], res_b[2], self.id_type))
[docs] def numof_residues(self): ''' Returns the number of residue pairs by bound type ''' nbonds = {} for t in self.types: nbonds[t] = len(self.__dict__[t][self.id_a]) return nbonds
[docs] def bond_types(self): ''' Returns the bond types present in this interface ''' types = [] for t in self.types: if len(self.__dict__[t][self.id_a]) > 0: types.append(t) return types
[docs] def get_bonds(self, typ=None, mode=None): ''' Gives a generator to iterate throught bonds in this interface. If no type given, bonds of all types returned. ''' if typ is None: typ = self.types if type(typ) is str: typ = [typ] for t in typ: if t in self.__dict__: for i in range(0, len(self.__dict__[t][self.id_a])): if mode == 'dict': yield { self.id_a: self.__dict__[t][self.id_a][i], self.id_b: self.__dict__[t][self.id_b][i], 'type': t, } else: yield ( (self.id_a,) + (self.__dict__[t][self.id_a][i].serialize(),) + (self.id_b,) + (self.__dict__[t][self.id_b][i].serialize(),) + (t,) )
def serialize(self): res = [] for t in self.types: if self.__dict__[t][self.id_a] and self.__dict__[t][self.id_b]: res.append( '%s:%s+%s' % ( t, ','.join(self.__dict__[t][self.id_a].serialize()), ','.join(self.__dict__[t][self.id_b].serialize()), ) ) return ( '%s-%u:%s-%u:%s:%s:%s' % ( self.id_a, self.isoform_a, self.id_b, self.isoform_b, self.source, self.pdb, ':'.join(res), ) ) def __str__(self): nbonds = self.numof_residues() return ( 'Molecular interface between %s and %s,\n' 'as observed in PDB structure %s\n\n' ' Data source: %s\n' ' Number of residues in contact: %u\n' ' Hydrogene bonds: %u\n' ' Covalent bonds: %u\n' ' Saltbridges: %u\n' ' S-S bonds: %u\n' ' Stable energy: %s\n' ' Solvation energy: %s\n' ' Surface area: %s\n' ' Complexation significance score: %s\n' % ( self.id_a, self.id_b, self.pdb, self.source, sum(nbonds.values()), nbonds['hbonds'], nbonds['covbonds'], nbonds['sbridges'], nbonds['ssbonds'], 'n/a' if self.stab_en is None else str(self.stab_en), 'n/a' if self.solv_en is None else str(self.solv_en), 'n/a' if self.area is None else str(self.area), 'n/a' if self.css is None else str(self.css), ) ) def __repr__(self): nbonds = self.numof_residues() return ( 'Interface [%s-%s, %u bonds]' % ( self.id_a, self.id_b, sum(nbonds.values()), ) )