Source code for pypath.inputs.i3d

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import sys

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.inputs.pfam as pfam_input
import pypath.share.progress as progress


[docs] def get_i3d(): """ Interaction3D contains residue numbers in given chains in given PDB stuctures, so we need to add an offset to get the residue numbers valid for UniProt sequences. Offsets can be obtained from Instruct, or from the Pfam PDB-chain-UniProt mapping table. """ dname_pfam, pfam_dname = pfam_input.pfam_names() if dname_pfam is None: sys.stdout.write('\n\t:: Could not get Pfam domain names\n\n') non_digit = re.compile(r'[^\d.-]+') c = curl.Curl(urls.urls['i3d_human']['url'], silent = False) data = c.result if data is None: return None data = data.replace('\r', '').split('\n') del data[0] i3d = [] prg = progress.Progress( len(data), 'Processing domain-domain interactions', 11) for l in data: prg.step() l = l.split('\t') if len(l) > 20: domain1 = None if l[13] not in dname_pfam else dname_pfam[l[13]] domain2 = None if l[20] not in dname_pfam else dname_pfam[l[20]] pdb = l[5] uniprot1 = l[0] uniprot2 = l[1] chain1 = l[7] seq1 = [[ int(non_digit.sub('', l[11])), int(non_digit.sub('', l[12])) ]] chain2 = l[14] seq2 = [[ int(non_digit.sub('', l[18])), int(non_digit.sub('', l[19])) ]] i3d.append({ uniprot1: { 'pfam': domain1, 'chain': chain1, 'seq': seq1 }, uniprot2: { 'pfam': domain2, 'chain': chain2, 'seq': seq2 }, 'uniprots': [uniprot1, uniprot2], 'source': 'I3D', 'pdb': [pdb], 'references': [] }) prg.terminate() return i3d