#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importretry:importurllib2except:importurllib.requestasurllib2importgzipimportbs4try:fromcStringIOimportStringIOexcept:try:fromStringIOimportStringIOexcept:fromioimportStringIO
[docs]classResidueMapper(object):""" This class stores and serves the PDB --> UniProt residue level mapping. Attempts to download the mapping, and stores it for further use. Converts PDB residue numbers to the corresponding UniProt ones. """
defload_mapping(self,pdb):data=Nonenon_digit=re.compile(r'[^\d.-]+')pdb=pdb.lower()url=self.url%pdbforiinrange(5):try:data=urllib2.urlopen(url,timeout=60)breakexcept:continueifnotdata:self.download_errors.append(pdb)mapper={}soup=bs4.BeautifulSoup(data.read())forblockinsoup.find_all('block'):seg=block.find_all('segment')chain=seg[0]['intobjectid'].split('.')[1]uniprot=seg[1]['intobjectid']pdbstart=int(non_digit.sub('',seg[0]['start']))pdbend=int(non_digit.sub('',seg[0]['end']))uniprotstart=int(non_digit.sub('',seg[1]['start']))uniprotend=int(non_digit.sub('',seg[1]['end']))ifchainnotinmapper:mapper[chain]={}mapper[chain][pdbend]={'uniprot':uniprot,'pdbstart':pdbstart,'uniprotstart':uniprotstart,'uniprotend':uniprotend}ifuniprotnotinmapper:mapper[uniprot]={}ifchainnotinmapper[uniprot]:mapper[uniprot][chain]={}mapper[uniprot][chain][uniprotend]={'pdbstart':pdbstart,'pdbend':pdbend,'uniprotstart':uniprotstart}self.mappers[pdb]=mapperdefchains(self,chains):iftype(chains)in[str,unicode]:chains=[chains]iftype(chains)islist:chains=list(set(chains))returnchainsdefpdb2uniprot(self,pdb,resnum,chains=None):chains=self.chains(chains)results={}pdb=pdb.lower()ifpdbnotinself.mappers:self.load_mapping(pdb)ifpdbinself.mappers:forch,datainself.mappers[pdb].iteritems():iflen(ch)==1and(chainsisNoneorchinchains):pdbends=data.keys()ifresnum<=max(pdbends):pdbend=min([xforxin[e-resnumforeinpdbends]ifx>=0])+resnumseg=data[pdbend]ifseg['pdbstart']<=resnum:offset=seg['uniprotstart']-seg['pdbstart']residue={'resnum':resnum+offset,'offset':offset,'uniprot':seg['uniprot']}results[ch]=residuereturnresultsdefuniprot2pdb(self,uniprot,resnum,chains=None,pdbs=None):chains=self.chains(chains)ifself.uniprot_pdbisNone:self.get_pdb_chains()results={}# one uniprot can occure in more pdbs, first# we need to find out, which pdb files should we look at:ifpdbsisNone:pdbs=[]ifuniprotinself.uniprot_pdb:forupdbinself.uniprot_pdb[uniprot]:pdbs.append(updb['pdb'])eliftype(pdbs)in[str,unicode]:pdbs=[pdbs]pdbs=list(set(pdbs))# now find the residue number in each of the pdb's:forpdbinpdbs:ifpdbnotinself.mappers:self.load_mapping(pdb)ifpdbinself.mappersanduniprotinself.mappers[pdb]:forch,upinself.mappers[pdb][uniprot].iteritems():ifchainsisNoneorchinchains:uniprotends=up.keys()ifresnum<=max(uniprotends):uniprotend=min([xforxin[e-resnumforeinuniprotends]ifx>=0])+resnumseg=up[uniprotend]ifseg['uniprotstart']<=resnum:offset=seg['pdbstart']-seg['uniprotstart']residue={'resnum':resnum+offset,'offset':offset}ifpdbnotinresults:results[pdb]={}results[pdb][ch]=residuereturnresultsdefget_residue(self,ac,resnum,chains=None,pdbs=None):iflen(ac.strip())==4:returnself.pdb2uniprot(ac,resnum,chains)else:returnself.uniprot2pdb(ac,resnum,chains,pdbs)
[docs]defclean(self):''' Removes cached mappings, freeing up memory. '''self.mappers={}self.uniprot_pdb=Noneself.pdb_uniprot=None