#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritemsimportreimportjsonimportcollectionsimporturllibtry:importurllib2except:importurllib.requestasurllib2try:importurlparseexcept:importurllib.parseurlparse=urllib.parseimportbs4importpypath.resources.urlsasurlsimportpypath.share.commonascommonimportpypath.share.sessionassessionSegment=collections.namedtuple('Segment',('uniprot','pdb_start','pdb_end','uniprot_start','uniprot_end',),)Residue=collections.namedtuple('Residue',('uniprot','chain','resnum','offset',),)
[docs]classResidueMapper(session.Logger):""" This class stores and serves the PDB --> UniProt residue level mapping. Attempts to download the mapping, and stores it for further use. Converts PDB residue numbers to the corresponding UniProt ones. """
[docs]defload_mapping(self,pdbs):""" Loads PDB-UniProt sequence mapping for one or more PDB IDs. Args pdb (str,list): One or more PDB IDs. """non_digit=re.compile(r'[^\d.-]+')pdbs=common.to_set(pdbs)pdbs={p.lower()forpinpdbs}forpdbinpdbs:url=urls.urls['pdb_align']['url']+pdbforattemptinrange(3):try:data=urllib2.urlopen(url)breakexcept:self._log('Downloading PDB alignment for %s: ''%u attempt failed.'%(pdb,attempt+1))finally:self._log('Failed to obtain alignment for PDB %s.'%pdb)data=Nonemapper=collections.defaultdict(dict)ifdata:alignments=json.loads(data.read())foruniprot,alignmentin(iteritems(alignments[pdb]['UniProt'])):forsegmentinalignment['mappings']:chain=segment['chain_id']pdbstart=segment['start']['residue_number']pdbend=segment['end']['residue_number']uniprotstart=segment['unp_start']uniprotend=segment['unp_end']ifchainnotinmapper:mapper[chain]={}mapper[chain][pdbend]=Segment(uniprot=uniprot,pdb_start=pdbstart,pdb_end=pdbend,uniprot_start=uniprotstart,uniprot_end=uniprotend,)self.mappers[pdb]=dict(mapper)
[docs]defget_residue(self,pdb,resnum,chain=None):""" For a residue in a PDB structure returns the UniProt ID and the position of the residue in the UniProt sequence. Args pdb (str): A PDB structure ID. resnum (int): The position of the residue. chain (str): The chain ID, optional. Returns Tuple of residue number, offset, UniProt ID and chain ID. Returns None if the residue can not be found. """pdb=pdb.lower()ifpdbnotinself.mappers:self.load_mapping(pdb)ifpdbinself.mappers:for_chain,datainiteritems(self.mappers[pdb]):pdbends=data.keys()if(resnum<=max(pdbends)and(notchainorchain==_chain)):pdbend=min([xforxin[e-resnumforeinpdbends]ifx>=0])+resnumseg=data[pdbend]ifseg.pdb_start<=resnum:offset=seg.uniprot_start-seg.pdb_startresidue=Residue(resnum=resnum+offset,offset=offset,uniprot=seg.uniprot,chain=chain,)returnresiduereturnNone
[docs]defclean(self):""" Removes cached mappings, freeing up memory. """self.mappers={}