#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritemsfrompast.builtinsimportxrange,rangeimportosimportsysimportreimportcollectionstry:importcPickleaspickleexcept:importpickleimportbs4importpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.share.commonascommonimportpypath.share.cacheascacheimportpypath.share.progressasprogressimportpypath.utils.pdbaspdb_utilsimportpypath.inputs.pdbaspdb_inputimportpypath.internals.interaasinteraPisaBond=collections.namedtuple('PisaBond',('uniprot_1','chain_1','residue_1','seqnum_1','uniprot_2','chain_2','residue_2','seqnum_2',),)
[docs]defpisa_bonds(bonds,chains):""" To be refactored in the future. If you are interested in using this function, please contact the authors. """non_digit=re.compile(r'[^\d.-]+')result=[]forbondinbonds.find_all('bond'):seqnum1=int(non_digit.sub('',bond.find('seqnum-1').text))seqnum2=int(non_digit.sub('',bond.find('seqnum-2').text))res1=bond.find('res-1').textres1=common.aaletters.get(res1,res1)res2=bond.find('res-2').textres2=common.aaletters.get(res2,res2)chain1=bond.find('chain-1').textchain2=bond.find('chain-2').textuniprot1=chains.get(chain1,None)uniprot2=chains.get(chain2,None)ifuniprot1anduniprot2:result.append(PisaBond(uniprot_1=uniprot1,chain_1=chain1,residue_1=res1,seqnum_1=seqnum1,uniprot_2=uniprot2,chain_2=chain2,residue_2=res2,seqnum_2=seqnum2,))returnresult
[docs]defpisa_interfaces(pdbs,return_unmapped=False):""" To be refactored in the future. If you are interested in using this function, please contact the authors. Args pdbs (set): A set of PDB IDs to query. return_unmapped (bool): Return also a list of unmapped residues. In this case, a tuple is returned, its first element is a dict of interfaces, while its second element is a list of unmapped residues (normally empty, if all residues could be mapped between PDB and UniProt sequences). Returns A dict of dicts with interfaces. The upper level keys are PDB structure IDs, the lower level keys are tuples of UniProt IDs, the values are ``pypath.internals.intera.Interface`` objects. """bond_types={'hbonds':'h-bonds','sbridges':'salt-bridges','covbonds':'cov-bonds','ssbonds':'ss-bonds'}interfaces=collections.defaultdict(dict)cachefile=os.path.join(cache.get_cachedir(),'pisa.pickle')u_pdb,pdb_u=pdb_input.pdb_chains()ifos.path.exists(cachefile):try:interfaces=pickle.load(open(cachefile,'rb'))except:passerrors=[]unmapped_residues=[]p=5pdbs=list(set(pdbs)-set(interfaces.keys()))prg=progress.Progress(len(pdbs)/p,'Downloading data from PDBe PISA',1,)pdbs=sorted(common.to_set(pdbs))foriinxrange(0,len(pdbs),p):to=i+pthisPart=pdbs[i:to]url=urls.urls['pisa_interfaces']['url']+','.join(thisPart)c=curl.Curl(url,cache=False)data=c.resultifnotdata:msg='Could not download: \n\t\t%s'%urlerrors.append(msg)continuesoup=bs4.BeautifulSoup(data,'html.parser')forpdbinsoup.find_all('pdb_entry'):pdb_id=pdb.find('pdb_code').text.lower()interfaces[pdb_id]={}chains={}resconv=pdb_utils.ResidueMapper()ifpdb_idinpdb_u:forchain,chain_datainiteritems(pdb_u[pdb_id]):chains[chain]=chain_data['uniprot']forinterfaceinpdb.find_all('interface'):forb,tiniteritems(bond_types):bonds=interface.find(t)ifbonds:bonds=pisa_bonds(bonds,chains)forbondinbonds:uniprots=(bond.uniprot_1,bond.uniprot_2,)ifuniprotsnotininterfaces[pdb_id]:css=common.non_digit.sub('',interface.find('css').text)css=(Noneiflen(css)==0elsefloat(css))area=common.non_digit.sub('',interface.find('int_area').text)area=Noneiflen(area)==0elsefloat(area)solv_en=common.non_digit.sub('',interface.find('int_solv_en').text)solv_en=(Noneiflen(solv_en)==0elsefloat(solv_en))stab_en=common.non_digit.sub('',interface.find('stab_en').text)stab_en=(Noneiflen(stab_en)==0elsefloat(stab_en))interfaces[pdb_id][uniprots]=(intera.Interface(uniprots[0],uniprots[1],source='PISA',pdb=pdb_id,css=css,solv_en=solv_en,area=area,stab_en=stab_en,))res1=resconv.get_residue(pdb_id,bond.seqnum_1,)res2=resconv.get_residue(pdb_id,bond.seqnum_2,)if(res1andres2andres1.uniprot==uniprots[0]andres2.uniprot==uniprots[1]):interfaces[pdb_id][uniprots].add_residues((res1.resnum,bond.residue_1,uniprots[0],),(res2.resnum,bond.residue_2,uniprots[1],),typ=b,)else:unmapped_residues.append((pdb_id,bond.seqnum_1,bond.seqnum_2,uniprots[0],uniprots[1],))prg.step()prg.terminate()pickle.dump(interfaces,open(cachefile,'wb'),2)interfaces=dict(interfaces)iflen(errors)>0:sys.stdout.write('\t:: Failed to download %u files of total %u:\n\n'%(len(errors),len(pdbs)('P00968','P0A6F1')))foreinerrors:sys.stdout.write('\t'+e+'\n')sys.stdout.flush()ifreturn_unmapped:returninterfaces,unmapped_residueselse:returninterfaces