Source code for pypath.inputs.pisa

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#  This file is part of the `pypath` python module
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#  Website:

from future.utils import iteritems
from past.builtins import xrange, range

import os
import sys
import re
import collections

    import cPickle as pickle
    import pickle

import bs4

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.common as common
import pypath.share.cache as cache
import pypath.share.progress as progress
import pypath.utils.pdb as pdb_utils
import pypath.inputs.pdb as pdb_input
import pypath.internals.intera as intera

PisaBond = collections.namedtuple(

[docs] def pisa_bonds(bonds, chains): """ To be refactored in the future. If you are interested in using this function, please contact the authors. """ non_digit = re.compile(r'[^\d.-]+') result = [] for bond in bonds.find_all('bond'): seqnum1 = int(non_digit.sub('', bond.find('seqnum-1').text)) seqnum2 = int(non_digit.sub('', bond.find('seqnum-2').text)) res1 = bond.find('res-1').text res1 = common.aaletters.get(res1, res1) res2 = bond.find('res-2').text res2 = common.aaletters.get(res2, res2) chain1 = bond.find('chain-1').text chain2 = bond.find('chain-2').text uniprot1 = chains.get(chain1, None) uniprot2 = chains.get(chain2, None) if uniprot1 and uniprot2: result.append( PisaBond( uniprot_1 = uniprot1, chain_1 = chain1, residue_1 = res1, seqnum_1 = seqnum1, uniprot_2 = uniprot2, chain_2 = chain2, residue_2 = res2, seqnum_2 = seqnum2, ) ) return result
[docs] def pisa_interfaces(pdbs, return_unmapped = False): """ To be refactored in the future. If you are interested in using this function, please contact the authors. Args pdbs (set): A set of PDB IDs to query. return_unmapped (bool): Return also a list of unmapped residues. In this case, a tuple is returned, its first element is a dict of interfaces, while its second element is a list of unmapped residues (normally empty, if all residues could be mapped between PDB and UniProt sequences). Returns A dict of dicts with interfaces. The upper level keys are PDB structure IDs, the lower level keys are tuples of UniProt IDs, the values are ``pypath.internals.intera.Interface`` objects. """ bond_types = { 'hbonds': 'h-bonds', 'sbridges': 'salt-bridges', 'covbonds': 'cov-bonds', 'ssbonds': 'ss-bonds' } interfaces = collections.defaultdict(dict) cachefile = os.path.join(cache.get_cachedir(), 'pisa.pickle') u_pdb, pdb_u = pdb_input.pdb_chains() if os.path.exists(cachefile): try: interfaces = pickle.load(open(cachefile, 'rb')) except: pass errors = [] unmapped_residues = [] p = 5 pdbs = list(set(pdbs) - set(interfaces.keys())) prg = progress.Progress( len(pdbs) / p, 'Downloading data from PDBe PISA', 1, ) pdbs = sorted(common.to_set(pdbs)) for i in xrange(0, len(pdbs), p): to = i + p thisPart = pdbs[i:to] url = urls.urls['pisa_interfaces']['url'] + ','.join(thisPart) c = curl.Curl(url, cache = False) data = c.result if not data: msg = 'Could not download: \n\t\t%s' % url errors.append(msg) continue soup = bs4.BeautifulSoup(data, 'html.parser') for pdb in soup.find_all('pdb_entry'): pdb_id = pdb.find('pdb_code').text.lower() interfaces[pdb_id] = {} chains = {} resconv = pdb_utils.ResidueMapper() if pdb_id in pdb_u: for chain, chain_data in iteritems(pdb_u[pdb_id]): chains[chain] = chain_data['uniprot'] for interface in pdb.find_all('interface'): for b, t in iteritems(bond_types): bonds = interface.find(t) if bonds: bonds = pisa_bonds(bonds, chains) for bond in bonds: uniprots = ( bond.uniprot_1, bond.uniprot_2, ) if uniprots not in interfaces[pdb_id]: css = common.non_digit.sub( '', interface.find('css').text) css = ( None if len(css) == 0 else float(css) ) area = common.non_digit.sub( '', interface.find('int_area').text) area = None if len(area) == 0 else float( area) solv_en = common.non_digit.sub( '', interface.find('int_solv_en').text ) solv_en = ( None if len(solv_en) == 0 else float(solv_en) ) stab_en = common.non_digit.sub( '', interface.find('stab_en').text ) stab_en = ( None if len(stab_en) == 0 else float(stab_en) ) interfaces[pdb_id][uniprots] = ( intera.Interface( uniprots[0], uniprots[1], source = 'PISA', pdb = pdb_id, css = css, solv_en = solv_en, area = area, stab_en = stab_en, ) ) res1 = resconv.get_residue( pdb_id, bond.seqnum_1, ) res2 = resconv.get_residue( pdb_id, bond.seqnum_2, ) if ( res1 and res2 and res1.uniprot == uniprots[0] and res2.uniprot == uniprots[1] ): interfaces[pdb_id][uniprots].add_residues( ( res1.resnum, bond.residue_1, uniprots[0], ), ( res2.resnum, bond.residue_2, uniprots[1], ), typ = b, ) else: unmapped_residues.append( ( pdb_id, bond.seqnum_1, bond.seqnum_2, uniprots[0], uniprots[1], ) ) prg.step() prg.terminate() pickle.dump(interfaces, open(cachefile, 'wb'), 2) interfaces = dict(interfaces) if len(errors) > 0: sys.stdout.write( '\t:: Failed to download %u files of total %u:\n\n' % ( len(errors), len(pdbs)('P00968', 'P0A6F1') ) ) for e in errors: sys.stdout.write('\t' + e + '\n') sys.stdout.flush() if return_unmapped: return interfaces, unmapped_residues else: return interfaces