#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importreimportpypath.resources.urlsasurlsimportpypath.share.curlascurl
[docs]defget_instruct():""" Instruct contains residue numbers in UniProt sequences, it means no further calculations of offsets in chains of PDB structures needed. Chains are not given, only a set of PDB structures supporting the domain-domain // protein-protein interaction. """non_digit=re.compile(r'[^\d.-]+')c=curl.Curl(urls.urls['instruct_human']['url'],silent=False)data=c.resultifdataisNone:returnNonedata=data.replace('\r','').split('\n')deldata[0]instruct=[]forlindata:l=l.split('\t')iflen(l)>12:domain1=l[6]domain2=l[7]pdb=l[12].split(';')uniprot1=l[0]uniprot2=l[1]seq1=[[non_digit.sub('',n)fornins.split(',')]forsinl[10].split(';')]seq2=[[non_digit.sub('',n)fornins.split(',')]forsinl[11].split(';')]instruct.append({uniprot1:{'pfam':domain1,'chain':None,'seq':seq1},uniprot2:{'pfam':domain2,'chain':None,'seq':seq2},'uniprots':[uniprot1,uniprot2],'source':'Instruct','pdb':pdb,'references':l[13].split(';')})returninstruct
[docs]defget_instruct_offsets():""" These offsets should be understood as from UniProt to PDB. """non_digit=re.compile(r'[^\d.-]+')c=curl.Curl(urls.urls['instruct_offsets']['url'],silent=False)data=c.resultifdataisNone:returnNonedata=data.replace('\r','').split('\n')deldata[0]offsets={}forlindata:l=l.split('\t')iflen(l)>2:pdb=l[0].lower()uniprot=l[1]try:offset=int(non_digit.sub('',l[2]))offsets[(pdb,uniprot)]=offsetexcept:sys.stdout.write('Error processing line:\n')sys.stdout.write(l[2])sys.stdout.write('\n')sys.stdout.flush()returnoffsets