Source code for pypath.utils.pdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import re
import json
import collections
import urllib

try:
    import urllib2
except:
    import urllib.request as urllib2

try:
    import urlparse
except:
    import urllib.parse
    urlparse = urllib.parse

import bs4

import pypath.resources.urls as urls
import pypath.share.common as common
import pypath.share.session as session


Segment = collections.namedtuple(
    'Segment',
    (
        'uniprot',
        'pdb_start',
        'pdb_end',
        'uniprot_start',
        'uniprot_end',
    ),
)


Residue = collections.namedtuple(
    'Residue',
    (
        'uniprot',
        'chain',
        'resnum',
        'offset',
    ),
)



[docs]
class ResidueMapper(session.Logger):
    """
    This class stores and serves the PDB --> UniProt
    residue level mapping. Attempts to download the
    mapping, and stores it for further use. Converts
    PDB residue numbers to the corresponding UniProt ones.
    """



[docs]
    def __init__(self):

        session.Logger.__init__(self, 'pdb_utils')

        self.clean()




[docs]
    def load_mapping(self, pdbs):
        """
        Loads PDB-UniProt sequence mapping for one or more PDB IDs.

        Args
            pdb (str,list): One or more PDB IDs.
        """

        non_digit = re.compile(r'[^\d.-]+')
        pdbs = common.to_set(pdbs)
        pdbs = {p.lower() for p in pdbs}

        for pdb in pdbs:

            url = urls.urls['pdb_align']['url'] + pdb

            for attempt in range(3):

                try:

                    data = urllib2.urlopen(url)
                    break

                except:

                    self._log(
                        'Downloading PDB alignment for %s: '
                        '%u attempt failed.' % (pdb, attempt + 1)
                    )

                finally:

                    self._log('Failed to obtain alignment for PDB %s.' % pdb)
                    data = None

            mapper = collections.defaultdict(dict)

            if data:

                alignments = json.loads(data.read())

                for uniprot, alignment in (
                    iteritems(alignments[pdb]['UniProt'])
                ):

                    for segment in alignment['mappings']:

                        chain = segment['chain_id']
                        pdbstart = segment['start']['residue_number']
                        pdbend = segment['end']['residue_number']
                        uniprotstart = segment['unp_start']
                        uniprotend = segment['unp_end']

                        if chain not in mapper:

                            mapper[chain] = {}

                        mapper[chain][pdbend] = Segment(
                            uniprot = uniprot,
                            pdb_start = pdbstart,
                            pdb_end = pdbend,
                            uniprot_start = uniprotstart,
                            uniprot_end = uniprotend,
                        )

            self.mappers[pdb] = dict(mapper)




[docs]
    def get_residue(self, pdb, resnum, chain = None):
        """
        For a residue in a PDB structure returns the UniProt ID and
        the position of the residue in the UniProt sequence.

        Args
            pdb (str): A PDB structure ID.
            resnum (int): The position of the residue.
            chain (str): The chain ID, optional.

        Returns
            Tuple of residue number, offset, UniProt ID and chain ID.
            Returns None if the residue can not be found.
        """

        pdb = pdb.lower()

        if pdb not in self.mappers:

            self.load_mapping(pdb)

        if pdb in self.mappers:

            for _chain, data in iteritems(self.mappers[pdb]):

                pdbends = data.keys()

                if (
                    resnum <= max(pdbends) and (
                        not chain or
                        chain == _chain
                    )
                ):

                    pdbend = min(
                        [x for x in [e - resnum for e in pdbends]
                         if x >= 0]) + resnum
                    seg = data[pdbend]

                    if seg.pdb_start <= resnum:

                        offset = seg.uniprot_start - seg.pdb_start
                        residue = Residue(
                            resnum = resnum + offset,
                            offset = offset,
                            uniprot = seg.uniprot,
                            chain = chain,
                        )

                        return residue

        return None




[docs]
    def clean(self):
        """
        Removes cached mappings, freeing up memory.
        """

        self.mappers = {}





[docs]
def residue_pdb(pdb, chain, residue):

    url = urls.urls['pdbsws']['url']
    params = urlparse.urlencode({
        'plain': 1,
        'qtype': 'pdb',
        'id': pdb,
        'chain': chain,
        'res': residue
    })
    data = urllib2.urlopen(url + "?%s" % params)
    result = {}

    for l in data:

        l = l.decode('utf-8')

        if not l.startswith('//'):

            l = [x.strip() for x in l.split(':')]
            result[l[0]] = l[1]

    return result