Source code for pypath.inputs.instruct

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re

import pypath.resources.urls as urls
import pypath.share.curl as curl


[docs] def get_instruct(): """ Instruct contains residue numbers in UniProt sequences, it means no further calculations of offsets in chains of PDB structures needed. Chains are not given, only a set of PDB structures supporting the domain-domain // protein-protein interaction. """ non_digit = re.compile(r'[^\d.-]+') c = curl.Curl(urls.urls['instruct_human']['url'], silent = False) data = c.result if data is None: return None data = data.replace('\r', '').split('\n') del data[0] instruct = [] for l in data: l = l.split('\t') if len(l) > 12: domain1 = l[6] domain2 = l[7] pdb = l[12].split(';') uniprot1 = l[0] uniprot2 = l[1] seq1 = [[non_digit.sub('', n) for n in s.split(',')] for s in l[10].split(';')] seq2 = [[non_digit.sub('', n) for n in s.split(',')] for s in l[11].split(';')] instruct.append({ uniprot1: { 'pfam': domain1, 'chain': None, 'seq': seq1 }, uniprot2: { 'pfam': domain2, 'chain': None, 'seq': seq2 }, 'uniprots': [uniprot1, uniprot2], 'source': 'Instruct', 'pdb': pdb, 'references': l[13].split(';') }) return instruct
[docs] def get_instruct_offsets(): """ These offsets should be understood as from UniProt to PDB. """ non_digit = re.compile(r'[^\d.-]+') c = curl.Curl(urls.urls['instruct_offsets']['url'], silent = False) data = c.result if data is None: return None data = data.replace('\r', '').split('\n') del data[0] offsets = {} for l in data: l = l.split('\t') if len(l) > 2: pdb = l[0].lower() uniprot = l[1] try: offset = int(non_digit.sub('', l[2])) offsets[(pdb, uniprot)] = offset except: sys.stdout.write('Error processing line:\n') sys.stdout.write(l[2]) sys.stdout.write('\n') sys.stdout.flush() return offsets