Source code for pypath.inputs.instruct
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import pypath.resources.urls as urls
import pypath.share.curl as curl
[docs]
def get_instruct():
"""
Instruct contains residue numbers in UniProt sequences, it means
no further calculations of offsets in chains of PDB structures needed.
Chains are not given, only a set of PDB structures supporting the
domain-domain // protein-protein interaction.
"""
non_digit = re.compile(r'[^\d.-]+')
c = curl.Curl(urls.urls['instruct_human']['url'], silent = False)
data = c.result
if data is None:
return None
data = data.replace('\r', '').split('\n')
del data[0]
instruct = []
for l in data:
l = l.split('\t')
if len(l) > 12:
domain1 = l[6]
domain2 = l[7]
pdb = l[12].split(';')
uniprot1 = l[0]
uniprot2 = l[1]
seq1 = [[non_digit.sub('', n) for n in s.split(',')]
for s in l[10].split(';')]
seq2 = [[non_digit.sub('', n) for n in s.split(',')]
for s in l[11].split(';')]
instruct.append({
uniprot1: {
'pfam': domain1,
'chain': None,
'seq': seq1
},
uniprot2: {
'pfam': domain2,
'chain': None,
'seq': seq2
},
'uniprots': [uniprot1, uniprot2],
'source': 'Instruct',
'pdb': pdb,
'references': l[13].split(';')
})
return instruct
[docs]
def get_instruct_offsets():
"""
These offsets should be understood as from UniProt to PDB.
"""
non_digit = re.compile(r'[^\d.-]+')
c = curl.Curl(urls.urls['instruct_offsets']['url'], silent = False)
data = c.result
if data is None:
return None
data = data.replace('\r', '').split('\n')
del data[0]
offsets = {}
for l in data:
l = l.split('\t')
if len(l) > 2:
pdb = l[0].lower()
uniprot = l[1]
try:
offset = int(non_digit.sub('', l[2]))
offsets[(pdb, uniprot)] = offset
except:
sys.stdout.write('Error processing line:\n')
sys.stdout.write(l[2])
sys.stdout.write('\n')
sys.stdout.flush()
return offsets