#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import sys
import os
import re
import copy
import collections
import itertools
import hashlib
import warnings
import contextlib
from pypath.share import session as session_mod
_logger = session_mod.Logger(name = 'server')
_log = _logger._log
try:
import twisted.web.resource
import twisted.web.server
import twisted.internet.reactor
TwistedWebResource = twisted.web.resource.Resource
TwistedWebSite = twisted.web.server.Site
TWISTED_NOT_DONE_YET = twisted.web.server.NOT_DONE_YET
twisted_listen_tcp = twisted.internet.reactor.listenTCP
twisted_run = twisted.internet.reactor.run
except:
_log('No module `twisted` available. Necessary to run HTTP server.', -1)
[docs]
class TwistedWebResource: pass
[docs]
class TwistedWebSite: pass
TWISTED_NOT_DONE_YET = None
twisted_listen_tcp = lambda: None
twisted_run = lambda: None
import urllib
import json
import mimetypes
import pandas as pd
import numpy as np
import pypath.resources as resources
from pypath.omnipath.server import generate_about_page
import pypath.omnipath.server._html as _html
import pypath.resources.urls as urls
import pypath.resources as resources_mod
import pypath.share.common as common
import pypath_common._constants as _const
import pypath.core.intercell_annot as intercell_annot
import pypath.share.settings as settings
from pypath.share.common import flat_list
from pypath._metadata import __version__
if 'unicode' not in __builtins__:
unicode = str
LICENSE_IGNORE = 'ignore'
[docs]
def stop_server():
reactor.removeAll()
[docs]
@contextlib.contextmanager
def ignore_pandas_copywarn():
try:
with warnings.catch_warnings():
warnings.simplefilter('ignore', pd.errors.SettingWithCopyWarning)
yield
finally:
pass
[docs]
class BaseServer(TwistedWebResource, session_mod.Logger):
recomment = re.compile(r'<!--\s*Title:(.*?)-->')
[docs]
def __init__(self):
if not hasattr(self, '_log_name'):
session_mod.Logger.__init__(name = 'server')
self._log('Initializing BaseServer.')
self.htmls = ['info', 'error_page.html']
self.welcome_message = (
'Hello, this is the REST service of pypath %s. Welcome!\n'
'For the descriptions of pathway resources go to `/info`.\n'
'Available query types: interactions, enz_sub, complexes, \n'
'annotations, intercell'
) % __version__
self.isLeaf = True
self._set_www_root()
self._read_license_secret()
self._res_ctrl = resources_mod.get_controller()
TwistedWebResource.__init__(self)
self._log('Twisted resource initialized.')
def render_GET(self, request):
response = []
request.postpath = [i.decode('utf-8') for i in request.postpath if i]
self._log(
'Processing request: `%s` from `%s`; headers: [%s].' % (
request.uri.decode('utf-8'),
str(request.getClientAddress()),
common.dict_str(request.getAllHeaders()),
)
)
if not request.postpath:
request.postpath = ['index.html']
request.postpath[0] = self._query_type(request.postpath[0])
self._set_headers(request)
if (
request.postpath and
(
hasattr(self, request.postpath[0]) or
request.postpath[0] == 'error_page.html'
) and
request.postpath[0][0] != '_'
):
if request.postpath[0] == 'error_page.html':
toCall = self._error_page
else:
self._process_postpath(request)
toCall = getattr(self, request.postpath[0])
if hasattr(toCall, '__call__'):
self._log(
'Query type: `%s`; Arguments: [%s].' % (
request.postpath[0],
common.dict_str(request.args),
)
)
try:
response = toCall(request)
response = (
response.encode('utf-8')
if hasattr(response, 'encode') else
response
)
response = [response]
except:
self._log(
'Error while rendering `%s`:' %
request.uri.decode('utf-8')
)
self._log_traceback()
raise
else:
local_path = self._local_path(request)
if local_path:
with open(local_path, 'rb') as fp:
response = [fp.read()]
response = self._add_html_header(local_path, response)
if not response:
response = [
(
"Not found: %s%s" % (
'/'.join(request.postpath),
''
if len(request.args) == 0 else
'?%s' %
'&'.join([
'%s=%s' % (
k.decode('utf-8'),
v[0].decode('utf-8')
)
for k, v in iteritems(request.args)
if v
])
)
).encode('utf-8')
]
request.setHeader('Content-Length', str(len(response[0])))
request.write(response[0])
self._log(
'Finished serving request: `%s`.' % request.uri.decode('utf-8')
)
request.finish()
return TWISTED_NOT_DONE_YET
def render_POST(self, request):
if (
request.getHeader(b'content-type') and
request.getHeader(b'content-type').startswith(b'application/json')
):
post_content = request.content.getvalue()
if post_content and post_content.strip():
args_raw = json.loads(post_content)
request.args = dict(
(
k.encode('utf-8'),
[v.encode('utf-8')]
if type(v) is not list else
[','.join(v).encode('utf-8')]
)
for k, v in iteritems(args_raw)
)
return self.render_GET(request)
def _set_www_root(self):
self.wwwbuiltin = os.path.join(
session_mod.session().module_root,
'data',
'www',
)
self.wwwroot = settings.get('www_root')
if not os.path.exists(self.wwwroot):
self.wwwroot = self.wwwbuiltin
def _local_path(self, request):
if request.postpath and request.postpath[-1][0] in ('_', '.'):
return
for wwwroot in (self.wwwroot, self.wwwbuiltin):
path = os.path.join(wwwroot, *request.postpath)
if os.path.isfile(path):
return path
def _set_headers(self, request):
for k, v in iteritems(request.args):
request.args[k] = [b','.join(v)]
request.setHeader('Cache-Control', 'Public')
request.setHeader('Access-Control-Allow-Origin', '*')
if '' in request.postpath:
request.postpath.remove('')
if not request.postpath:
request.postpath = ['index.html']
if request.postpath and request.postpath[0] == 'resources':
request.args[b'format'] = [b'json']
local_path = self._local_path(request)
if local_path:
format_ = mimetypes.guess_type(local_path)[0]
format_ = (
tuple(format_.split('/'))
if format_ else
('text', 'plain')
)
elif (
not request.postpath or
request.postpath[0] in self.htmls or
request.postpath[0] == 'error_page.html'
):
format_ = ('text', 'html')
elif (
b'format' in request.args and
request.args[b'format'][0] == b'json'
):
format_ = ('application', 'json')
elif request.postpath[0] == 'favicon.ico':
format_ = ('image', 'vnd.microsoft.icon')
else:
request.args[b'format'] = [b'text']
format_ = ('text', 'plain')
request.setHeader(
'Content-Type',
'%s/%s%s' % (
format_ + (
'; charset=utf-8' if format_[0] == 'text' else '',
)
)
)
request.args[b'header'] = (
[b'1']
if b'header' not in request.args else
request.args[b'header']
)
self._set_fields(request)
self._set_license(request)
def _set_fields(self, req):
synonyms = (
self.field_synonyms
if hasattr(self, 'field_synonyms') else
{}
)
if b'fields' in req.args:
used = set()
fields_checked = []
for field in req.args[b'fields'][0].decode('utf-8').split(','):
field = synonyms[field] if field in synonyms else field
if field not in used:
fields_checked.append(field)
used.add(field)
req.args[b'fields'] = [','.join(fields_checked).encode('utf-8')]
else:
req.args[b'fields'] = []
def _set_license(self, req):
query_type = req.postpath[0] if req.postpath else None
query_type = self._query_type(query_type)
if (
not hasattr(self, 'args_reference') or
not query_type or
query_type not in self.args_reference or
'license' not in self.args_reference[query_type]
):
return
auth = False
if b'password' in req.args:
req_secret = hashlib.md5(req.args[b'password'][0]).hexdigest()
auth = (
self._license_secret is not None and
self._license_secret == req_secret
)
# if someone sent a good password
# why not to ignore the licenses
if auth:
req.args[b'license'] = [b'ignore']
# if the license level is not set
# or set to `ignore` but no successfull authentication
# we fall back to the default license level
if (
b'license' not in req.args or (
not auth and
req.args[b'license'][0] == b'ignore'
)
):
req.args[b'license'] = self._default_license
def _process_postpath(self, req):
if len(req.postpath) > 1:
ids_left = [req.postpath[1].encode('utf-8')]
ids_right = (
[req.postpath[2].encode('utf-8')]
if (
len(req.postpath) > 2 and
req.postpath[2].lower() not in {'and', 'or'}
) else
None
)
left_right = (
[b'OR']
if req.postpath[-1].lower() not in {'and', 'or'} else
[req.postpath[-1].encode('utf-8')]
)
if ids_right:
if req.postpath[0] == 'enzsub':
req.args[b'enzymes'] = ids_left
req.args[b'substrates'] = ids_right
else:
req.args[b'sources'] = ids_left
req.args[b'targets'] = ids_right
else:
req.args[b'partners'] = ids_left
if req.postpath[0] == 'enzsub':
req.args[b'enzyme_substrate'] = left_right
else:
req.args[b'source_target'] = left_right
def _query_type(self, query_type):
return (
self.query_type_synonyms[query_type]
if (
hasattr(self, 'query_type_synonyms') and
query_type in self.query_type_synonyms
) else
query_type
)
def _add_html_header(self, local_path, response):
if (
local_path.endswith('html') or
local_path.endswith('htm')
) and not response[0].startswith(b'<!DOCTYPE html>'):
head_foot = [
(
b'<!DOCTYPE html>\n<html lang="en">\n'
b'<head><title>%s</title></head>\n<body>\n'
),
b'</body>\n</html>',
]
for wwwroot in (self.wwwroot, self.wwwbuiltin):
for i, part in enumerate(('header', 'footer')):
path = os.path.join(wwwroot, '_%s.html' % part)
if os.path.exists(path):
with open(path, 'rb') as fp:
head_foot[i] = fp.read()
if b'%s' in head_foot[0]:
title = self.recomment.search(response[0])
title = title.groups()[0] if title else b'pypath server'
head_foot[0] = head_foot[0] % title.strip()
response[0] = head_foot[0] + response[0] + head_foot[1]
return response
def about(self, req):
return self.welcome_message
def info(self, req):
if (
b'format' in req.args and
req.args[b'format'][0] == b'json' and
hasattr(self, 'resources')
):
return self.resources(req)
rc = resources.get_controller()
rc.update()
return generate_about_page.generate_about_html(rc.data)
def _root(self, req):
return _html.main_page()
def _parse_arg(self, arg):
if isinstance(arg, list) and arg:
arg = arg[0]
if hasattr(arg, 'decode'):
arg = arg.decode('utf-8')
if hasattr(arg, 'lower'):
arg = arg.lower()
if hasattr(arg, 'isdigit') and arg.isdigit():
arg = int(arg)
if arg in _const.BOOLEAN_FALSE:
arg = False
if arg in _const.BOOLEAN_TRUE:
arg = True
return bool(arg)
def _read_license_secret(self):
self._license_secret = None
path = settings.get('license_secret')
if os.path.exists(path):
self._log('Reading license unlocking secret from `%s`.' % path)
with open(path, 'r') as fp:
self._license_secret = fp.read().strip()
self._default_license = [
settings.get('server_default_license').encode('ascii')
]
def _error_page(self, req):
req.setResponseCode(500)
return _html.http_500()
[docs]
class TableServer(BaseServer):
query_types = {
'annotations',
'intercell',
'interactions',
'enz_sub',
'enzsub',
'ptms',
'complexes',
'about',
'info',
'queries',
'annotations_summary',
'intercell_summary',
}
data_query_types = {
'annotations',
'intercell',
'interactions',
'enzsub',
'complexes',
}
list_fields = {
'sources',
'references',
'isoforms',
}
int_list_fields = {
'references',
'isoforms',
}
field_synonyms = {
'organism': 'ncbi_tax_id',
'tfregulons_level': 'dorothea_level',
'tfregulons_curated': 'dorothea_curated',
'tfregulons_chipseq': 'dorothea_chipseq',
'tfregulons_tfbs': 'dorothea_tfbs',
'tfregulons_coexp': 'dorothea_coexp',
'sources': 'resources',
'databases': 'resources',
}
args_reference = {
'interactions': {
'header': None,
'format': {
'json',
'tab',
'text',
'tsv',
'table'
},
'license': {
'ignore',
'academic',
'non_profit',
'nonprofit',
'for_profit',
'forprofit',
'commercial',
},
'password': None,
'limit': None,
'datasets': {
'omnipath',
'tfregulons',
'dorothea',
'collectri',
'tf_target',
'tf_mirna',
'lncrna_mrna',
'kinaseextra',
'ligrecextra',
'pathwayextra',
'mirnatarget',
'small_molecule',
},
'types': {
'post_translational',
'transcriptional',
'post_transcriptional',
'mirna_transcriptional',
'lncrna_post_transcriptional',
'small_molecule_protein',
},
'sources': None,
'resources': None,
'databases': None,
'targets': None,
'partners': None,
'genesymbols': _const.BOOLEAN_VALUES,
'evidences': None,
'extra_attrs': None,
'fields': {
'entity_type',
'references',
'sources',
'tfregulons_level',
'tfregulons_curated',
'tfregulons_chipseq',
'tfregulons_tfbs',
'tfregulons_coexp',
'dorothea_level',
'dorothea_curated',
'dorothea_chipseq',
'dorothea_tfbs',
'dorothea_coexp',
'type',
'ncbi_tax_id',
'databases',
'resources',
'organism',
'curation_effort',
'datasets',
'extra_attrs',
'evidences',
},
'tfregulons_levels': {'A', 'B', 'C', 'D', 'E'},
'tfregulons_methods': {
'curated',
'chipseq',
'coexp',
'tfbs',
},
'dorothea_levels': {'A', 'B', 'C', 'D', 'E'},
'dorothea_methods': {
'curated',
'chipseq',
'coexp',
'tfbs',
},
'organisms': {
'9606',
'10090',
'10116',
},
'source_target': {
'AND',
'OR',
'and',
'or',
},
'directed': _const.BOOLEAN_VALUES,
'signed': _const.BOOLEAN_VALUES,
'loops': _const.BOOLEAN_VALUES,
'entity_types': {
'protein',
'complex',
'mirna',
'lncrna',
'small_molecule',
'drug',
'metabolite',
'lipid',
},
},
'enzsub': {
'header': None,
'format': {
'json',
'tab',
'text',
'tsv',
'table',
},
'license': {
'ignore',
'academic',
'non_profit',
'nonprofit',
'for_profit',
'forprofit',
'commercial',
},
'password': None,
'limit': None,
'enzymes': None,
'substrates': None,
'partners': None,
'genesymbols': _const.BOOLEAN_VALUES,
'organisms': {
'9606',
'10090',
'10116',
},
'databases': None,
'resources': None,
'residues': None,
'modification': None,
'types': None,
'fields': {
'sources',
'references',
'ncbi_tax_id',
'organism',
'databases',
'resources',
'isoforms',
'curation_effort',
},
'enzyme_substrate': {
'AND',
'OR',
'and',
'or',
}
},
'annotations': {
'header': None,
'format': {
'json',
'tab',
'text',
'tsv',
'table',
},
'license': {
'ignore',
'academic',
'non_profit',
'nonprofit',
'for_profit',
'forprofit',
'commercial',
},
'password': None,
'limit': None,
'databases': None,
'resources': None,
'proteins': None,
'fields': None,
'genesymbols': _const.BOOLEAN_VALUES,
'entity_types': {
'protein',
'complex',
'mirna',
'lncrna',
'small_molecule',
'drug',
'metabolite',
'lipid',
},
},
'annotations_summary': {
'header': None,
'format': {
'json',
'tab',
'text',
'tsv',
'table',
},
'databases': None,
'resources': None,
'fields': None,
'cytoscape': _const.BOOLEAN_VALUES,
},
'intercell': {
'header': None,
'format': {
'json',
'tab',
'text',
'tsv',
'table',
},
'license': {
'ignore',
'academic',
'non_profit',
'nonprofit',
'for_profit',
'forprofit',
'commercial',
},
'password': None,
'limit': None,
'scope': {
'specific',
'generic',
},
'aspect': {
'functional',
'locational',
},
'source': {
'resource_specific',
'composite',
},
'categories': None,
'databases': None,
'resources': None,
'parent': None,
'proteins': None,
'fields': None,
'entity_types': {
'protein',
'complex',
'mirna',
'lncrna',
'small_molecule',
'drug',
'metabolite',
'lipid',
},
'transmitter': _const.BOOLEAN_VALUES,
'receiver': _const.BOOLEAN_VALUES,
'trans': _const.BOOLEAN_VALUES,
'rec': _const.BOOLEAN_VALUES,
'secreted': _const.BOOLEAN_VALUES,
'plasma_membrane_peripheral': _const.BOOLEAN_VALUES,
'plasma_membrane_transmembrane': _const.BOOLEAN_VALUES,
'sec': _const.BOOLEAN_VALUES,
'pmp': _const.BOOLEAN_VALUES,
'pmtm': _const.BOOLEAN_VALUES,
'causality': {
'transmitter',
'trans',
'receiver',
'rec',
'both'
},
'topology': {
'secreted',
'sec',
'plasma_membrane_peripheral',
'pmp',
'plasma_membrane_transmembrane',
'pmtm',
},
},
'intercell_summary': {
'header': None,
'format': {
'json',
'tab',
'text',
'tsv',
'table',
},
'scope': {
'specific',
'generic',
},
'aspect': {
'functional',
'locational',
},
'source': {
'resource_specific',
'generic',
},
'categories': None,
'resources': None,
'databases': None,
'parent': None,
'fields': None,
'transmitter': _const.BOOLEAN_VALUES,
'receiver': _const.BOOLEAN_VALUES,
'trans': _const.BOOLEAN_VALUES,
'rec': _const.BOOLEAN_VALUES,
'secreted': _const.BOOLEAN_VALUES,
'plasma_membrane_peripheral': _const.BOOLEAN_VALUES,
'plasma_membrane_transmembrane': _const.BOOLEAN_VALUES,
'sec': _const.BOOLEAN_VALUES,
'pmp': _const.BOOLEAN_VALUES,
'pmtm': _const.BOOLEAN_VALUES,
},
'complexes': {
'header': None,
'format': {
'json',
'tab',
'text',
'tsv',
'table',
},
'license': {
'ignore',
'academic',
'non_profit',
'nonprofit',
'for_profit',
'forprofit',
'commercial',
},
'password': None,
'limit': None,
'databases': None,
'resources': None,
'proteins': None,
'fields': None,
},
'resources': {
'license': {
'ignore',
'academic',
'non_profit',
'nonprofit',
'for_profit',
'forprofit',
'commercial',
},
'format': {
'json',
},
'datasets': {
'interactions',
'interaction',
'network',
'enzsub',
'enz_sub',
'enzyme-substrate',
'annotations',
'annotation',
'annot',
'intercell',
'complex',
'complexes',
},
'subtypes': None,
},
'queries': {
'format': {
'tab',
'text',
'tsv',
'table',
'json',
},
},
}
query_type_synonyms = {
'interactions': 'interactions',
'interaction': 'interactions',
'network': 'interactions',
'enz_sub': 'enzsub',
'enz-sub': 'enzsub',
'ptms': 'enzsub',
'ptm': 'enzsub',
'enzyme-substrate': 'enzsub',
'enzyme_substrate': 'enzsub',
'annotations': 'annotations',
'annotation': 'annotations',
'annot': 'annotations',
'intercell': 'intercell',
'intercellular': 'intercell',
'inter_cell': 'intercell',
'inter-cell': 'intercell',
'complex': 'complexes',
'complexes': 'complexes',
}
datasets_ = {
'omnipath',
'tfregulons',
'dorothea',
'collectri',
'tf_target',
'kinaseextra',
'ligrecextra',
'pathwayextra',
'mirnatarget',
'tf_mirna',
'lncrna_mrna',
'small_molecule',
}
dorothea_methods = {'curated', 'coexp', 'chipseq', 'tfbs'}
dataset2type = {
'omnipath': 'post_translational',
'tfregulons': 'transcriptional',
'dorothea': 'transcriptional',
'collectri': 'transcriptional',
'tf_target': 'transcriptional',
'kinaseextra': 'post_translational',
'ligrecextra': 'post_translational',
'pathwayextra': 'post_translational',
'mirnatarget': 'post_transcriptional',
'tf_mirna': 'mirna_transcriptional',
'lncrna_mrna': 'lncrna_post_transcriptional',
'small_molecule': 'small_molecule_protein',
}
interaction_fields = {
'references', 'sources', 'dorothea_level',
'dorothea_curated', 'dorothea_chipseq',
'dorothea_tfbs', 'dorothea_coexp',
'tfregulons_level', 'tfregulons_curated',
'tfregulons_chipseq', 'tfregulons_tfbs', 'tfregulons_coexp',
'type', 'ncbi_tax_id', 'databases', 'organism',
'curation_effort', 'resources', 'entity_type',
'datasets', 'extra_attrs', 'evidences',
}
enzsub_fields = {
'references', 'sources', 'databases',
'isoforms', 'organism', 'ncbi_tax_id',
'curation_effort', 'resources',
}
default_input_files = {
'interactions': 'omnipath_webservice_interactions.tsv',
'enzsub': 'omnipath_webservice_enz_sub.tsv',
'annotations': 'omnipath_webservice_annotations.tsv',
'complexes': 'omnipath_webservice_complexes.tsv',
'intercell': 'omnipath_webservice_intercell.tsv',
}
default_dtypes = collections.defaultdict(
dict,
interactions = {
'source': 'category',
'target': 'category',
'source_genesymbol': 'category',
'target_genesymbol': 'category',
'is_directed': 'int8',
'is_stimulation': 'int8',
'is_inhibition': 'int8',
'consensus_direction': 'int8',
'consensus_stimulation': 'int8',
'consensus_inhibition': 'int8',
'sources': 'category',
'references': 'category',
'dorothea_curated': 'category',
'dorothea_chipseq': 'category',
'dorothea_tfbs': 'category',
'dorothea_coexp': 'category',
'dorothea_level': 'category',
'type': 'category',
'ncbi_tax_id_source': 'int16',
'ncbi_tax_id_target': 'int16',
'entity_type_source': 'category',
'entity_type_target': 'category',
'curation_effort': 'int16',
'extra_attrs': 'category',
'evidences': 'category',
},
annotations = {
'uniprot': 'category',
'genesymbol': 'category',
'entity_type': 'category',
'source': 'category',
'label': 'category',
'value': 'category',
'record_id': 'uint32',
},
enzsub = {
'enzyme': 'category',
'substrate': 'category',
'enzyme_genesymbol': 'category',
'substrate_genesymbol': 'category',
'isoforms': 'category',
'residue_type': 'category',
'residue_offset': 'uint16',
'modification': 'category',
'sources': 'category',
'references': 'category',
'ncbi_tax_id': 'int16',
'curation_effort': 'int32',
},
complexes = {
'name': 'category',
'stoichiometry': 'category',
'sources': 'category',
'references': 'category',
'identifiers': 'category',
},
intercell = {
'category': 'category',
'database': 'category',
'uniprot': 'category',
'genesymbol': 'category',
'parent': 'category',
'aspect': 'category',
'scope': 'category',
'source': 'category',
'entity_type': 'category',
'consensus_score': 'uint16',
'transmitter': 'bool',
'receiver': 'bool',
'secreted': 'bool',
'plasma_membrane_transmembrane': 'bool',
'plasma_membrane_peripheral': 'bool',
}
)
# the annotation attributes served for the cytoscape app
cytoscape_attributes = {
('Zhong2015', 'type'),
('MatrixDB', 'mainclass'),
('Matrisome', ('mainclass', 'subclass', 'subsubclass')),
# ('TFcensus', 'in TFcensus'),
('Locate', ('location', 'cls')),
(
'Phosphatome',
(
'family',
'subfamily',
#'has_protein_substrates',
)
),
('CancerSEA', 'state'),
('GO_Intercell', 'mainclass'),
('Adhesome', 'mainclass'),
('SignaLink3', 'pathway'),
(
'HPA_secretome',
(
'mainclass',
#'secreted',
)
),
(
'OPM',
(
'membrane',
'family',
#'transmembrane',
)
),
('KEGG', 'pathway'),
#(
#'CellPhoneDB',
#(
## 'receptor',
## 'peripheral',
## 'secreted',
## 'transmembrane',
## 'receptor_class',
## 'secreted_class',
#)
#),
('kinase.com', ('group', 'family', 'subfamily')),
('Membranome', ('membrane',)),
#('CSPA', 'in CSPA'),
#('MSigDB', 'geneset'),
#('Integrins', 'in Integrins'),
('HGNC', 'mainclass'),
('CPAD', ('pathway', 'effect_on_cancer', 'cancer', )),
('Signor', 'pathway'),
('Ramilowski2015', 'mainclass'),
('HPA_subcellular', 'location'),
#('DisGeNet', 'disease'),
('Surfaceome', ('mainclass', 'subclasses')),
('IntOGen', 'role'),
('HPMR', ('role', 'mainclass', 'subclass', 'subsubclass')),
#('CancerGeneCensus',
#(
##'hallmark',
##'somatic',
##'germline',
#'tumour_types_somatic',
#'tumour_types_germline',
#)
#),
#('DGIdb', 'category'),
('ComPPI', 'location'),
('Exocarta', 'vesicle'),
('Vesiclepedia', 'vesicle'),
('Ramilowski_location', 'location'),
('LRdb', ('role', 'cell_type')),
}
[docs]
def __init__(
self,
input_files = None,
only_tables = None,
exclude_tables = None,
):
"""
Server based on ``pandas`` data frames.
:param dict input_files:
Paths to tables exported by the ``pypath.websrvtab`` module.
"""
session_mod.Logger.__init__(self, name = 'server')
self._log('TableServer starting up.')
self.input_files = copy.deepcopy(self.default_input_files)
self.input_files.update(input_files or {})
self.data = {}
self.to_load = (
self.data_query_types - common.to_set(exclude_tables)
if only_tables is None else
common.to_set(only_tables)
)
self._log('Datasets to load: %s.' % (', '.join(sorted(self.to_load))))
self._read_tables()
self._preprocess_interactions()
self._preprocess_enzsub()
self._preprocess_annotations()
self._preprocess_complexes()
self._preprocess_intercell()
self._update_resources()
BaseServer.__init__(self)
self._log('TableServer startup ready.')
def _read_tables(self):
self._log('Loading data tables.')
for name, fname in iteritems(self.input_files):
if name not in self.to_load:
continue
fname_gz = f'{fname}.gz'
fname = fname_gz if os.path.exists(fname_gz) else fname
self._log('Loading dataset `%s` from file `%s`.' % (name, fname))
if not os.path.exists(fname):
self._log(
'Missing table: `%s`.' % fname
)
continue
dtype = self.default_dtypes[name]
self.data[name] = pd.read_csv(
fname,
sep = '\t',
index_col = False,
dtype = dtype,
)
self._log(
'Table `%s` loaded from file `%s`.' % (name, fname)
)
def _network(self, req):
hdr = ['nodes', 'edges', 'is_directed', 'sources']
tbl = self.data['network'].field
val = dict(zip(tbl.field, tbl.value))
if b'format' in req.args and req.args[b'format'] == b'json':
return json.dumps(val)
else:
return '%s\n%s' % ('\t'.join(hdr), '\t'.join(
[str(val[h]) for h in hdr]))
def _preprocess_interactions(self):
if 'interactions' not in self.data:
return
self._log('Preprocessing interactions.')
tbl = self.data['interactions']
tbl['set_sources'] = pd.Series(
[set(s.split(';')) for s in tbl.sources]
)
tbl['set_dorothea_level'] = pd.Series(
[
set(s.split(';'))
if not pd.isnull(s) else
set([])
for s in tbl.dorothea_level
]
)
def _preprocess_enzsub(self):
if 'enzsub' not in self.data:
return
self._log('Preprocessing enzyme-substrate relationships.')
tbl = self.data['enzsub']
tbl['set_sources'] = pd.Series(
[set(s.split(';')) for s in tbl.sources]
)
def _preprocess_complexes(self):
if 'complexes' not in self.data:
return
self._log('Preprocessing complexes.')
tbl = self.data['complexes']
tbl = tbl[~tbl.components.isna()]
with ignore_pandas_copywarn():
tbl['set_sources'] = [set(s.split(';')) for s in tbl.sources]
tbl['set_proteins'] = [set(c.split('_')) for c in tbl.components]
self.data['complexes'] = tbl
def _preprocess_annotations_old(self):
if 'annotations' not in self.data:
return
renum = re.compile(r'[-\d\.]+')
def _agg_values(vals):
result = (
'#'.join(sorted(set(str(ii) for ii in vals)))
if not all(
isinstance(i, (int, float)) or (
isinstance(i, str) and
i and (
i is None or
renum.match(i)
)
)
for i in vals
) else
'<numeric>'
)
return result
self._log('Preprocessing annotations.')
self.data['annotations_summary'] = self.data['annotations'].groupby(
['source', 'label'],
).agg({'value': _agg_values}).reset_index(drop = False)
def _preprocess_annotations(self):
if 'annotations' not in self.data:
return
renum = re.compile(r'[-\d\.]+')
self._log('Preprocessing annotations.')
values_by_key = collections.defaultdict(set)
# we need to do it this way as we are memory limited on the server
# and pandas groupby is very memory intensive
for row in self.data['annotations'].itertuples():
value = (
'<numeric>'
if (
(
not isinstance(row.value, bool) and
isinstance(row.value, (int, float))
) or
renum.match(row.value)
) else
str(row.value)
)
values_by_key[(row.source, row.label)].add(value)
for vals in values_by_key.values():
if len(vals) > 1:
vals.discard('<numeric>')
vals.discard('')
vals.discard('nan')
self.data['annotations_summary'] = pd.DataFrame(
list(
(source, label, '#'.join(sorted(values)))
for (source, label), values in iteritems(values_by_key)
),
columns = ['source', 'label', 'value'],
)
def _preprocess_intercell(self):
if 'intercell' not in self.data:
return
self._log('Preprocessing intercell data.')
tbl = self.data['intercell']
tbl.drop('full_name', axis = 1, inplace = True, errors = 'ignore')
self.data['intercell_summary'] = tbl.filter(
['category', 'parent', 'database'],
).drop_duplicates()
def _update_resources(self):
self._log('Updating resource information.')
self._resources_dict = collections.defaultdict(dict)
res_ctrl = resources_mod.get_controller()
for query_type in self.data_query_types:
if query_type not in self.data:
continue
tbl = self.data[query_type]
# finding out what is the name of the column with the resources
# as this is different across the tables
for colname, argname in (
('database', 'databases'),
('sources', 'databases'),
('source', 'databases'),
('category', 'categories')
):
if colname in tbl.columns:
break
# collecting all resource names
values = sorted(set(
itertools.chain(*(
val.split(';') for val in getattr(tbl, colname)
))
))
for db in values:
if 'license' not in self._resources_dict[db]:
license = res_ctrl.license(db)
if license is None:
msg = 'No license for resource `%s`.' % str(db)
self._log(msg)
raise RuntimeError(msg)
license_data = license.features
license_data['name'] = license.name
license_data['full_name'] = license.full_name
self._resources_dict[db]['license'] = license_data
if 'queries' not in self._resources_dict[db]:
self._resources_dict[db]['queries'] = {}
if query_type not in self._resources_dict[db]['queries']:
if query_type == 'interactions':
datasets = set()
for dataset in self.datasets_:
if dataset not in tbl.columns:
continue
for in_dataset, resources in zip(
getattr(tbl, dataset),
tbl.set_sources,
):
if in_dataset and db in resources:
datasets.add(dataset)
break
self._resources_dict[db]['queries'][query_type] = {
'datasets': sorted(datasets),
}
elif query_type == 'intercell':
tbl_db = tbl[
(tbl.database == db) &
(tbl.scope == 'generic')
]
self._resources_dict[db]['queries'][query_type] = {
'generic_categories': sorted(
set(tbl_db.category)
),
}
else:
self._resources_dict[db]['queries'][query_type] = {}
self.args_reference[query_type][argname] = values
self._resources_dict = dict(self._resources_dict)
self._log('Finished updating resource information.')
def _check_args(self, req):
result = []
argname = req.postpath[0]
ref = (
self.args_reference['resources']
if argname == 'databases' else
self.args_reference[argname]
)
for arg, val in iteritems(req.args):
arg = arg.decode('utf-8')
if arg in ref:
if not ref[arg] or not val:
continue
val = (
{val[0]}
if type(val[0]) is int else
set(val[0].decode('utf-8').split(','))
)
unknowns = val - set(ref[arg])
if unknowns:
result.append(
' ==> Unknown values for argument `%s`: `%s`' % (
arg,
', '.join(str(u) for u in unknowns)
)
)
else:
result.append(' ==> Unknown argument: `%s`' % arg)
req.args[b'header'] = self._parse_arg(req.args[b'header'])
if result:
return (
'Something is not entirely good:\n%s\n\n'
'Please check the examples at\n'
'https://github.com/saezlab/pypath\n'
'and\n'
'https://github.com/saezlab/DoRothEA\n'
'If you still experiencing issues contact us at\n'
'https://github.com/saezlab/pypath/issues'
'' % '\n'.join(result)
)
def queries(self, req):
query_type = (
req.postpath[1]
if len(req.postpath) > 1 else
'interactions'
)
query_type = self._query_type(query_type)
query_param = (
req.postpath[2]
if len(req.postpath) > 2 else
None
)
if query_type in self.args_reference:
result = dict(
(
k,
sorted(v) if isinstance(v, _const.LIST_LIKE) else v
)
for k, v in self.args_reference[query_type].items()
)
if query_param is not None and query_param in result:
result = {query_param: result[query_param]}
else:
result = {}
result[query_type] = (
'No possible arguments defined for'
'query `%s` or no such query available.' % query_type
)
result = self._dict_set_to_list(result)
if b'format' in req.args and req.args[b'format'][0] == b'json':
return json.dumps(result)
else:
return 'argument\tvalues\n%s' % '\n'.join(
'%s\t%s' % (
k,
';'.join(v)
if isinstance(v, (list, set, tuple)) else
str(v)
)
for k, v in iteritems(result)
)
@classmethod
def _dict_set_to_list(cls, dct):
return dict(
(
key,
(
sorted(val)
if isinstance(val, _const.LIST_LIKE) else
cls._dict_set_to_list(val)
if isinstance(val, dict) else
val
)
)
for key, val in iteritems(dct)
)
def databases(self, req):
query_type = (
req.postpath[1]
if len(req.postpath) > 1 else
'interactions'
)
query_type = self._query_type(query_type)
datasets = (
set(req.postpath[2].split(','))
if len(req.postpath) > 2 else
None
)
tbl = (
self.data[query_type]
if query_type in self.data else
self.data['interactions']
)
# filter for datasets
if query_type == 'interactions':
if datasets is not None:
tbl = tbl.loc[tbl.type.isin(datasets)]
else:
datasets = self._get_datasets()
result = {}
for dataset in datasets:
result[dataset] = sorted(set.union(
*tbl[tbl.type == dataset].set_sources)
)
else:
result = {}
result['*'] = sorted(set.union(*tbl.set_sources))
if b'format' in req.args and req.args[b'format'][0] == b'json':
return json.dumps(result)
else:
return 'dataset\tresources\n%s' % '\n'.join(
'%s\t%s' % (k, ';'.join(v)) for k, v in iteritems(result)
)
def _get_datasets(self):
return list(self.data['interactions'].type.unique())
def datasets(self, req):
query_type = (
req.postpath[1]
if len(req.postpath) > 1 else
'interactions'
)
if query_type == 'interactions':
result = self._get_datasets()
else:
result = []
if b'format' in req.args and req.args[b'format'][0] == b'json':
return json.dumps(result)
else:
return ';'.join(result)
def interactions(
self,
req,
datasets = {'omnipath'},
databases = None,
dorothea_levels = {'A', 'B'},
organisms = {9606},
source_target = 'OR',
):
bad_req = self._check_args(req)
if bad_req:
return bad_req
hdr = [
'source',
'target',
'is_directed',
'is_stimulation',
'is_inhibition',
'consensus_direction',
'consensus_stimulation',
'consensus_inhibition',
]
if b'source_target' in req.args:
source_target = (
req.args[b'source_target'][0].decode('utf-8').upper()
)
# changes the old, "tfregulons" names to new "dorothea"
self._tfregulons_dorothea(req)
if b'databases' in req.args:
req.args[b'resources'] = req.args[b'databases']
args = {}
for arg in (
'datasets',
'types',
'sources',
'targets',
'partners',
'resources',
'organisms',
'dorothea_levels',
'dorothea_methods',
):
args[arg] = self._args_set(req, arg)
# here adjust on the defaults otherwise we serve empty
# response by default
if not args['types']:
args['datasets'] = args['datasets'] or datasets
# keep only valid dataset names
args['datasets'] = args['datasets'] & self.datasets_
args['organisms'] = set(
int(t) for t in args['organisms'] if t.isdigit()
)
args['organisms'] = args['organisms'] or organisms
# do not allow impossible values
# those would result KeyError later
args['dorothea_levels'] = (
args['dorothea_levels'] or
dorothea_levels
)
args['dorothea_methods'] = (
args['dorothea_methods'] & self.dorothea_methods
)
# provide genesymbols: yes or no
if (
b'genesymbols' in req.args and
self._parse_arg(req.args[b'genesymbols'])
):
genesymbols = True
hdr.insert(2, 'source_genesymbol')
hdr.insert(3, 'target_genesymbol')
else:
genesymbols = False
self._log('Processed arguments: [%s].' % common.dict_str(args))
# starting from the entire dataset
tbl = self.data['interactions']
# filter by type
if args['types']:
tbl = tbl.loc[tbl.type.isin(args['types'])]
# if partners provided those will overwrite
# sources and targets
args['sources'] = args['sources'] or args['partners']
args['targets'] = args['targets'] or args['partners']
# then we filter by source and target
# which matched against both standard names
# and gene symbols
if args['sources'] and args['targets'] and source_target == 'OR':
tbl = tbl.loc[
tbl.target.isin(args['targets']) |
tbl.target_genesymbol.isin(args['targets']) |
tbl.source.isin(args['sources']) |
tbl.source_genesymbol.isin(args['sources'])
]
else:
if args['sources']:
tbl = tbl.loc[
tbl.source.isin(args['sources']) |
tbl.source_genesymbol.isin(args['sources'])
]
if args['targets']:
tbl = tbl.loc[
tbl.target.isin(args['targets']) |
tbl.target_genesymbol.isin(args['targets'])
]
# filter by datasets
if args['datasets']:
tbl = tbl.query(' or '.join(args['datasets']))
# filter by organism
tbl = tbl.loc[
tbl.ncbi_tax_id_source.isin(args['organisms']) |
tbl.ncbi_tax_id_target.isin(args['organisms'])
]
dorothea_included = (
'dorothea' in args['datasets'] or
any(res.endswith('DoRothEA') for res in args['resources']) or
(
'transcriptional' in args['types'] and
not args['datasets']
)
)
# filter by DoRothEA confidence levels
if dorothea_included and args['dorothea_levels']:
tbl = tbl.loc[
self._dorothea_dataset_filter(tbl, args) |
[
bool(levels & args['dorothea_levels'])
for levels in tbl.set_dorothea_level
]
]
# filter by databases
if args['resources']:
tbl = tbl.loc[
[
bool(sources & args['resources'])
for sources in tbl.set_sources
]
]
# filtering for entity types
if b'entity_types' in req.args:
entity_types = self._args_set(req, 'entity_types')
if len(entity_types) == 1 and 'protein' in entity_types:
# pandas is awful:
tbl = tbl.loc[
np.logical_and(
tbl.entity_type_source.astype('string') == 'protein',
tbl.entity_type_target.astype('string') == 'protein',
)
]
else:
tbl = tbl.loc[
tbl.entity_type_source.isin(entity_types) |
tbl.entity_type_target.isin(entity_types)
]
# filtering by DoRothEA methods
if dorothea_included and args['dorothea_methods']:
q = ['dorothea_%s' % m for m in args['dorothea_methods']]
tbl = tbl.loc[
self._dorothea_dataset_filter(tbl, args) |
tbl[q].any(1)
]
# filter directed & signed
if (
b'directed' not in req.args or
self._parse_arg(req.args[b'directed'])
):
tbl = tbl.loc[tbl.is_directed == 1]
if (
b'signed' in req.args and
self._parse_arg(req.args[b'signed'])
):
tbl = tbl.loc[np.logical_or(
tbl.is_stimulation == 1,
tbl.is_inhibition == 1
)]
# loops: remove by default
if (
b'loops' not in req.args or
not self._parse_arg(req.args[b'loops'])
):
# pandas is a disaster:
tbl = tbl.loc[
tbl.source.astype('string') !=
tbl.target.astype('string')
]
req.args[b'fields'] = req.args[b'fields'] or [b'']
_fields = [
f for f in
req.args[b'fields'][0].decode('utf-8').split(',')
if f in self.interaction_fields
]
for f in (b'evidences', b'extra_attrs'):
if f in req.uri and f not in req.args[b'fields'][0]:
_fields.append(f.decode('utf-8'))
for f in _fields:
if f == 'ncbi_tax_id' or f == 'organism':
hdr.append('ncbi_tax_id_source')
hdr.append('ncbi_tax_id_target')
elif f == 'entity_type':
hdr.append('entity_type_source')
hdr.append('entity_type_target')
elif f in {'databases', 'resources'}:
hdr.append('sources')
elif f == 'datasets':
hdr.extend(
set(tbl.columns) &
self.args_reference['interactions']['datasets'] &
args['datasets']
)
else:
hdr.append(f)
license = self._get_license(req)
tbl = self._filter_by_license_interactions(tbl, license)
tbl = tbl.loc[:,hdr]
return self._serve_dataframe(tbl, req)
@classmethod
def _dataset_included(cls, dataset: str, args: dict) -> bool:
return (
dataset in args['datasets'] or
(
not args['datasets'] and
cls.dataset2type.get(dataset, None) in args['types']
)
)
@classmethod
def _dorothea_dataset_filter(cls, tbl: pd.DataFrame, args: dict):
return (
(
# if the tf_target dataset is requested
# we need to serve it including the parts which
# don't fit the filters below
cls._dataset_included('tf_target', args) &
tbl.tf_target
) |
(
cls._dataset_included('collectri', args) &
tbl.collectri
) |
(tbl.type != 'transcriptional')
)
def _tfregulons_dorothea(self, req):
for arg in (b'datasets', b'fields'):
if arg in req.args:
req.args[arg] = [
it.replace(b'tfregulons', b'dorothea')
for it in req.args[arg]
]
for postfix in (b'levels', b'methods'):
key = b'tfregulons_%s' % postfix
new_key = b'dorothea_%s' % postfix
if key in req.args and new_key not in req.args:
req.args[new_key] = req.args[key]
_ = req.args.pop(key)
def enzsub(
self,
req,
organisms = {9606},
enzyme_substrate = 'OR'
):
bad_req = self._check_args(req)
if bad_req:
return bad_req
hdr = [
'enzyme', 'substrate', 'residue_type',
'residue_offset', 'modification'
]
if b'enzyme_substrate' in req.args:
enzyme_substrate = (
req.args[b'enzyme_substrate'][0].decode('utf-8').upper()
)
if b'databases' in req.args:
req.args[b'resources'] = req.args[b'databases']
args = {}
for arg in (
'enzymes', 'substrates', 'partners',
'resources', 'organisms', 'types',
'residues'
):
args[arg] = self._args_set(req, arg)
args['organisms'] = set(
int(t) for t in args['organisms'] if t.isdigit()
)
args['organisms'] = args['organisms'] or organisms
# provide genesymbols: yes or no
if (
b'genesymbols' in req.args and
self._parse_arg(req.args[b'genesymbols'])
):
genesymbols = True
hdr.insert(2, 'enzyme_genesymbol')
hdr.insert(3, 'substrate_genesymbol')
else:
genesymbols = False
# starting from the entire dataset
tbl = self.data['enzsub']
# filter by type
if args['types']:
tbl = tbl.loc[tbl.modification.isin(args['types'])]
# if partners provided those will overwrite
# enzymes and substrates
args['enzymes'] = args['enzymes'] or args['partners']
args['substrates'] = args['substrates'] or args['partners']
# then we filter by enzyme and substrate
# which matched against both standard names
# and gene symbols
if (
args['enzymes'] and
args['substrates'] and
enzyme_substrate == 'OR'
):
tbl = tbl.loc[
tbl.substrate.isin(args['substrates']) |
tbl.substrate_genesymbol.isin(args['substrates']) |
tbl.enzyme.isin(args['enzymes']) |
tbl.enzyme_genesymbol.isin(args['enzymes'])
]
else:
if args['enzymes']:
tbl = tbl.loc[
tbl.enzyme.isin(args['enzymes']) |
tbl.enzyme_genesymbol.isin(args['enzymes'])
]
if args['substrates']:
tbl = tbl.loc[
tbl.substrate.isin(args['substrates']) |
tbl.substrate_genesymbol.isin(args['substrates'])
]
# filter by organism
tbl = tbl.loc[tbl.ncbi_tax_id.isin(args['organisms'])]
# filter by databases
if args['resources']:
tbl = tbl.loc[
[
bool(args['resources'] & sources)
for sources in tbl.set_sources
]
]
if req.args[b'fields']:
_fields = [
f for f in
req.args[b'fields'][0].decode('utf-8').split(',')
if f in self.enzsub_fields
]
for f in _fields:
if f == 'ncbi_tax_id' or f == 'organism':
hdr.append('ncbi_tax_id')
elif f in {'databases', 'resources'}:
hdr.append('sources')
else:
hdr.append(f)
license = self._get_license(req)
tbl = self._filter_by_license_interactions(tbl, license)
tbl = tbl.loc[:,hdr]
return self._serve_dataframe(tbl, req)
def ptms(self, req):
req.postpath[0] = 'enzsub'
return self.enzsub(req)
def enz_sub(self, req):
req.postpath[0] = 'enzsub'
return self.enzsub(req)
def annotations(self, req):
bad_req = self._check_args(req)
if bad_req:
return bad_req
if b'databases' in req.args:
req.args[b'resources'] = req.args[b'databases']
if (
not settings.get('server_annotations_full_download') and
not b'resources' in req.args and
not b'proteins' in req.args
):
return (
'Downloading the entire annotations database by the REST '
'API is not allowed because of its huge size (>1GB). '
'We recommend to query a set of proteins or a few '
'resources, depending on your interest. '
'You can always download the full database from '
'https://archive.omnipathdb.org/'
'omnipath_webservice_annotations__recent.tsv'
)
# starting from the entire dataset
tbl = self.data['annotations']
hdr = tbl.columns
# filtering for resources
if b'resources' in req.args:
resources = self._args_set(req, 'resources')
tbl = tbl.loc[tbl.source.isin(resources)]
# filtering for entity types
if b'entity_types' in req.args:
entity_types = self._args_set(req, 'entity_types')
tbl = tbl.loc[tbl.entity_type.isin(entity_types)]
# filtering for proteins
if b'proteins' in req.args:
proteins = self._args_set(req, 'proteins')
tbl = tbl.loc[
tbl.uniprot.isin(proteins) |
tbl.genesymbol.isin(proteins)
]
# provide genesymbols: yes or no
if (
b'genesymbols' in req.args and
self._parse_arg(req.args[b'genesymbols'])
):
genesymbols = True
hdr.insert(1, 'genesymbol')
else:
genesymbols = False
license = self._get_license(req)
tbl = self._filter_by_license_annotations(tbl, license)
tbl = tbl.loc[:,hdr]
return self._serve_dataframe(tbl, req)
def annotations_summary(self, req):
bad_req = self._check_args(req)
if bad_req:
return bad_req
if b'databases' in req.args:
req.args[b'resources'] = req.args[b'databases']
# starting from the entire dataset
tbl = self.data['annotations_summary']
hdr = tbl.columns
# filtering for resources
if b'resources' in req.args:
resources = self._args_set(req, 'resources')
tbl = tbl.loc[tbl.source.isin(resources)]
if (
b'cytoscape' in req.args and
self._parse_arg(req.args[b'cytoscape'])
):
cytoscape = True
else:
cytoscape = False
tbl = tbl.loc[:,hdr]
if cytoscape:
tbl = tbl.set_index(['source', 'label'], drop = False)
cytoscape_keys = {
(source, label)
for source, labels in self.cytoscape_attributes
for label in (
labels if isinstance(labels, tuple) else (labels,)
)
} & set(tbl.index)
tbl = tbl.loc[list(cytoscape_keys)]
return self._serve_dataframe(tbl, req)
def intercell(self, req):
bad_req = self._check_args(req)
if bad_req:
return bad_req
if b'databases' in req.args:
req.args[b'resources'] = req.args[b'databases']
# starting from the entire dataset
tbl = self.data['intercell']
hdr = tbl.columns
# filtering for category types
for var in (
'aspect',
'source',
'scope',
'transmitter',
'receiver',
'parent',
'resources',
):
if var.encode('ascii') in req.args:
values = self._args_set(req, var)
if var in {'resources', 'databases'}:
var = 'database'
tbl = tbl.loc[getattr(tbl, var).isin(values)]
for (_long, short) in (
('transmitter', 'trans'),
('receiver', 'rec'),
('secreted', 'sec'),
('plasma_membrane_peripheral', 'pmp'),
('plasma_membrane_transmembrane', 'pmtm'),
):
this_arg = None
_long_b = _long.encode('ascii')
short_b = short.encode('ascii')
if _long_b in req.args:
this_arg = self._parse_arg(req.args[_long_b])
elif short_b in req.args:
this_arg = self._parse_arg(req.args[short_b])
if this_arg is not None:
tbl = tbl.loc[getattr(tbl, _long) == this_arg]
if b'causality' in req.args:
causality = self._args_set(req, 'causality')
trans = causality & {'transmitter', 'trans', 'both'}
rec = causality & {'receiver', 'rec', 'both'}
tbl = (
tbl.loc[tbl.transmitter | tbl.receiver]
if trans and rec else
tbl.loc[tbl.transmitter]
if trans else
tbl.loc[tbl.receiver]
if rec else
tbl
)
if b'topology' in req.args:
topology = self._args_set(req, 'topology')
query = ' or '.join(
colname
for enabled, colname in
(
(topology & {'secreted', 'sec'}, 'secreted'),
(
topology & {'plasma_membrane_peripheral', 'pmp'},
'plasma_membrane_peripheral'
),
(
topology & {'plasma_membrane_transmembrane', 'pmtm'},
'plasma_membrane_transmembrane'
)
)
if enabled
)
if query:
tbl = tbl.query(query)
# filtering for categories
if b'categories' in req.args:
categories = self._args_set(req, 'categories')
tbl = tbl.loc[tbl.category.isin(categories)]
# filtering for entity types
if b'entity_types' in req.args:
entity_types = self._args_set(req, 'entity_types')
tbl = tbl.loc[tbl.entity_type.isin(entity_types)]
# filtering for proteins
if b'proteins' in req.args:
proteins = self._args_set(req, 'proteins')
tbl = tbl.loc[
np.logical_or(
tbl.uniprot.isin(proteins),
tbl.genesymbol.isin(proteins),
)
]
license = self._get_license(req)
tbl = self._filter_by_license_intercell(tbl, license)
tbl = tbl.loc[:,hdr]
return self._serve_dataframe(tbl, req)
def intercell_summary(self, req):
bad_req = self._check_args(req)
if bad_req:
return bad_req
if b'databases' in req.args:
req.args[b'resources'] = req.args[b'databases']
# starting from the entire dataset
tbl = self.data['intercell_summary']
hdr = tbl.columns
# filtering for category types
for var in (
'aspect',
'source',
'scope',
'transmitter',
'receiver',
'parent',
'resources',
):
if var.encode('ascii') in req.args:
values = self._args_set(req, var)
tbl = tbl.loc[getattr(tbl, var).isin(values)]
# filtering for categories
if b'categories' in req.args:
categories = self._args_set(req, 'categories')
tbl = tbl.loc[tbl.category.isin(categories)]
tbl = tbl.loc[:,hdr]
return self._serve_dataframe(tbl, req)
def complexes(self, req):
bad_req = self._check_args(req)
if bad_req:
return bad_req
if b'databases' in req.args:
req.args[b'resources'] = req.args[b'databases']
# starting from the entire dataset
tbl = self.data['complexes']
hdr = list(tbl.columns)
hdr.remove('set_sources')
hdr.remove('set_proteins')
# filtering for resources
if b'resources' in req.args:
resources = self._args_set(req, 'resources')
tbl = tbl.loc[
[
bool(sources & resources)
for sources in tbl.set_sources
]
]
# filtering for proteins
if b'proteins' in req.args:
proteins = self._args_set(req, 'proteins')
tbl = tbl.loc[
[
bool(this_proteins & proteins)
for this_proteins in tbl.set_proteins
]
]
license = self._get_license(req)
tbl = self._filter_by_license_complexes(tbl, license)
tbl = tbl.loc[:,hdr]
return self._serve_dataframe(tbl, req)
def resources(self, req):
datasets = (
{
self._query_type(dataset.decode('ascii'))
for dataset in req.args[b'datasets']
}
if b'datasets' in req.args else
None
)
res_ctrl = resources_mod.get_controller()
license = self._get_license(req)
return json.dumps(
dict(
(k, v)
for k, v in iteritems(self._resources_dict)
if (
res_ctrl.license(k).enables(license) and
(
not datasets or
datasets & set(v['datasets'].keys())
)
)
)
)
@staticmethod
def _get_license(req):
return req.args[b'license'][0].decode('utf-8')
@classmethod
def _filter_by_license_complexes(cls, tbl, license):
return cls._filter_by_license(
tbl = tbl,
license = license,
res_col = 'sources',
simple = False,
prefix_col = 'identifiers',
)
@classmethod
def _filter_by_license_interactions(cls, tbl, license):
return cls._filter_by_license(
tbl = tbl,
license = license,
res_col = 'sources',
simple = False,
prefix_col = 'references',
)
@classmethod
def _filter_by_license_annotations(cls, tbl, license):
return cls._filter_by_license(
tbl = tbl,
license = license,
res_col = 'source',
simple = True,
)
@classmethod
def _filter_by_license_intercell(cls, tbl, license):
return cls._filter_by_license(
tbl = tbl,
license = license,
res_col = 'database',
simple = True,
)
@staticmethod
def _filter_by_license(
tbl,
license,
res_col,
simple = False,
prefix_col = None,
):
def filter_resources(res):
res = {
r for r in res
if res_ctrl.license(r).enables(license)
}
composite = [
r for r in res
if res_ctrl.license(r).name == 'Composite'
]
if composite:
composite_to_remove = {
comp_res
for comp_res in composite
if not res_ctrl.secondary_resources(comp_res, True) & res
}
res = res - composite_to_remove
return res
if license == LICENSE_IGNORE or tbl.shape[0] == 0:
return tbl
res_ctrl = resources_mod.get_controller()
_res_col = getattr(tbl, res_col)
if simple:
bool_idx = [
res_ctrl.license(res).enables(license)
for res in _res_col
]
else:
_set_res_col = tbl.set_sources
_res_to_keep = [
filter_resources(ress)
for ress in _set_res_col
]
with ignore_pandas_copywarn():
tbl[res_col] = [
';'.join(sorted(ress))
for ress in _res_to_keep
]
if prefix_col:
_prefix_col = getattr(tbl, prefix_col)
_new_prefix_col = [
';'.join(sorted(
pref_res
for pref_res in pref_ress.split(';')
if (
pref_res.split(':', maxsplit = 1)[0] in
_res_to_keep[i]
)
))
if isinstance(pref_ress, str) else
pref_ress
for i, pref_ress in enumerate(_prefix_col)
]
with ignore_pandas_copywarn():
tbl[prefix_col] = _new_prefix_col
bool_idx = [bool(res) for res in tbl[res_col]]
tbl = tbl.loc[bool_idx]
return tbl
@classmethod
def _serve_dataframe(cls, tbl, req):
if b'limit' in req.args:
limit = req.args[b'limit'][0].decode('utf-8')
if limit.isdigit():
limit = int(limit)
tbl = tbl.head(limit)
if b'format' in req.args and req.args[b'format'][0] == b'json':
data_json = tbl.to_json(orient = 'records')
# this is necessary because in the data frame we keep lists
# as `;` separated strings but in json is nicer to serve
# them as lists
data_json = json.loads(data_json)
for i in data_json:
for k, v in iteritems(i):
if k in cls.list_fields:
i[k] = (
[
(
int(f)
if (
k in cls.int_list_fields and
f.isdigit()
) else
f
)
for f in v.split(';')
]
if isinstance(v, str) else
[]
)
return json.dumps(data_json)
else:
return tbl.to_csv(
sep = '\t',
index = False,
header = bool(req.args[b'header']),
chunksize = 2e5,
)
@staticmethod
def _args_set(req, arg):
arg = arg.encode('utf-8')
return (
set(req.args[arg][0].decode('utf-8').split(','))
if arg in req.args
else set()
)
[docs]
class Rest(object):
[docs]
def __init__(
self,
port,
serverclass = TableServer,
start = True,
**kwargs
):
"""
Runs a webserver serving a `PyPath` instance listening
to a custom port.
Args
-----
:param int port:
The port to listen to.
:param str serverclass'
The class implementing the server.
:param **kwargs:
Arguments for initialization of the server class.
"""
self.port = port
_log('Creating the server class.')
self.server = serverclass(**kwargs)
_log('Server class ready.')
if start:
_log('Starting the twisted server.')
self.start()
def start(self):
self.site = TwistedWebSite(self.server)
_log('Site created.')
twisted_listen_tcp(self.port, self.site)
_log('Server going to listen on port %u from now.' % self.port)
twisted_run()