Source code for pypath.utils.pdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import re
import json
import collections
import urllib

try:
    import urllib2
except:
    import urllib.request as urllib2

try:
    import urlparse
except:
    import urllib.parse
    urlparse = urllib.parse

import bs4

import pypath.resources.urls as urls
import pypath.share.common as common
import pypath.share.session as session


Segment = collections.namedtuple(
    'Segment',
    (
        'uniprot',
        'pdb_start',
        'pdb_end',
        'uniprot_start',
        'uniprot_end',
    ),
)


Residue = collections.namedtuple(
    'Residue',
    (
        'uniprot',
        'chain',
        'resnum',
        'offset',
    ),
)


[docs] class ResidueMapper(session.Logger): """ This class stores and serves the PDB --> UniProt residue level mapping. Attempts to download the mapping, and stores it for further use. Converts PDB residue numbers to the corresponding UniProt ones. """
[docs] def __init__(self): session.Logger.__init__(self, 'pdb_utils') self.clean()
[docs] def load_mapping(self, pdbs): """ Loads PDB-UniProt sequence mapping for one or more PDB IDs. Args pdb (str,list): One or more PDB IDs. """ non_digit = re.compile(r'[^\d.-]+') pdbs = common.to_set(pdbs) pdbs = {p.lower() for p in pdbs} for pdb in pdbs: url = urls.urls['pdb_align']['url'] + pdb for attempt in range(3): try: data = urllib2.urlopen(url) break except: self._log( 'Downloading PDB alignment for %s: ' '%u attempt failed.' % (pdb, attempt + 1) ) finally: self._log('Failed to obtain alignment for PDB %s.' % pdb) data = None mapper = collections.defaultdict(dict) if data: alignments = json.loads(data.read()) for uniprot, alignment in ( iteritems(alignments[pdb]['UniProt']) ): for segment in alignment['mappings']: chain = segment['chain_id'] pdbstart = segment['start']['residue_number'] pdbend = segment['end']['residue_number'] uniprotstart = segment['unp_start'] uniprotend = segment['unp_end'] if chain not in mapper: mapper[chain] = {} mapper[chain][pdbend] = Segment( uniprot = uniprot, pdb_start = pdbstart, pdb_end = pdbend, uniprot_start = uniprotstart, uniprot_end = uniprotend, ) self.mappers[pdb] = dict(mapper)
[docs] def get_residue(self, pdb, resnum, chain = None): """ For a residue in a PDB structure returns the UniProt ID and the position of the residue in the UniProt sequence. Args pdb (str): A PDB structure ID. resnum (int): The position of the residue. chain (str): The chain ID, optional. Returns Tuple of residue number, offset, UniProt ID and chain ID. Returns None if the residue can not be found. """ pdb = pdb.lower() if pdb not in self.mappers: self.load_mapping(pdb) if pdb in self.mappers: for _chain, data in iteritems(self.mappers[pdb]): pdbends = data.keys() if ( resnum <= max(pdbends) and ( not chain or chain == _chain ) ): pdbend = min( [x for x in [e - resnum for e in pdbends] if x >= 0]) + resnum seg = data[pdbend] if seg.pdb_start <= resnum: offset = seg.uniprot_start - seg.pdb_start residue = Residue( resnum = resnum + offset, offset = offset, uniprot = seg.uniprot, chain = chain, ) return residue return None
[docs] def clean(self): """ Removes cached mappings, freeing up memory. """ self.mappers = {}
[docs] def residue_pdb(pdb, chain, residue): url = urls.urls['pdbsws']['url'] params = urlparse.urlencode({ 'plain': 1, 'qtype': 'pdb', 'id': pdb, 'chain': chain, 'res': residue }) data = urllib2.urlopen(url + "?%s" % params) result = {} for l in data: l = l.decode('utf-8') if not l.startswith('//'): l = [x.strip() for x in l.split(':')] result[l[0]] = l[1] return result