Source code for pypath.inputs.csa
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
try:
from cStringIO import StringIO
except ModuleNotFoundError:
from io import StringIO
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.progress as progress
import pypath.inputs.pdb as pdb_input
import pypath.inputs.common as inputs_common
import pypath.utils.pdb as pdb_utils
import pypath.internals.intera as intera
[docs]
def get_csa(uniprots = None):
"""
Downloads and preprocesses catalytic sites data.
This data tells which residues are involved in the catalytic
activity of one protein.
"""
url = urls.urls['catalytic_sites']['url']
c = curl.Curl(url, silent = False)
data = c.result
if data is None:
return None
u_pdb, pdb_u = pdb_input.pdb_chains()
buff = StringIO()
buff.write(data)
cols = {
'pdb': 0,
'id': 1,
'resname': 2,
'chain': 3,
'resnum': 4,
'chem_fun': 5,
'evidence': 6,
}
table = inputs_common.read_table(
cols = cols,
fileObject = buff,
sep = ',',
hdr = 1,
)
css = {}
prg = progress.Progress(len(table), 'Processing catalytic sites', 11)
for l in table:
if l['pdb'] in pdb_u:
if l['chain'] in pdb_u[l['pdb']]:
uniprot = pdb_u[l['pdb']][l['chain']]['uniprot']
if uniprots is None or uniprot in uniprots:
offset = pdb_u[l['pdb']][l['chain']]['offset']
if offset is not None:
l['resnum'] = int(l['resnum']) + offset
else:
this_res = pdb_utils.residue_pdb(
l['pdb'],
l['chain'],
l['resnum'],
)
if len(this_res) > 0:
l['resnum'] = int(this_res['UPCOUNT'])
else:
l['resnum'] = None
if l['resnum'] is not None:
if uniprot not in css:
css[uniprot] = {}
if l['pdb'] not in css[uniprot]:
css[uniprot][l['pdb']] = {}
if l['id'] not in css[uniprot][l['pdb']]:
css[uniprot][l['pdb']][l['id']] = []
css[uniprot][l['pdb']][l['id']].append(
intera.Residue(
name = l['resname'],
number = l['resnum'],
protein = uniprot,
)
)
prg.step()
prg.terminate()
return css