#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import collections
try:
import collections.abc as collections_abc
except:
import collections as collections_abc
import pypath.share.settings as settings
import pypath.share.common as common
import pypath.core.entity as entity
AnnotDefKey = collections.namedtuple(
'AnnotDefKey',
[
'name',
'parent',
'resource',
],
)
[docs]
class AnnotDef(
collections.namedtuple(
'AnnotDefBase',
[
'name',
'resource',
'parent',
'aspect',
'scope',
'source',
'args',
'exclude',
'transmitter',
'receiver',
'resource_name',
'limit',
'avoid',
'enabled',
]
)
):
"""
Annotations are defined by a ``name``, a ``resource`` and an ``args``
parameter. If the former is a string it will be first looked up among the
annotation resources in ``pypath.annot.db``. Otherwise among the keys of
the classes in the ``CustomAnnotation`` object or in the dictionary of
the class_definitions. If ``source`` is a `set`, it will be used as a
category without further processing. If it is callable it will be called
and should return a `set`. If ``bool(args)`` is `False`, in case of
annotations in ``pypath.annot.db`` the ``to_set`` method will be called.
Otherwise ``args`` will be passed to the ``get_subset`` method.
If ``resource`` is callable, ``args`` will be passed if available.
"""
def __new__(
cls,
name,
resource,
parent = None,
aspect = 'functional',
scope = 'specific',
source = 'resource_specific',
args = None,
exclude = None,
transmitter = None,
receiver = None,
resource_name = None,
limit = None,
avoid = None,
enabled = True,
):
resource_name = (
resource
if cls._is_resource_name(resource) else
(
resource_name or
settings.get('annot_composite_database_name') or
'Unknown'
)
)
return super().__new__(
cls,
name = name,
resource = resource,
parent = parent or name,
aspect = aspect,
scope = scope,
source = source,
args = args,
exclude = exclude,
transmitter = transmitter,
receiver = receiver,
resource_name = resource_name,
limit = cls._zero_one_or_more(limit),
avoid = cls._zero_one_or_more(avoid),
enabled = enabled,
)
@property
def key(self):
return AnnotDefKey(
name = self.name,
parent = self.parent,
resource = self.resource_name,
)
@staticmethod
def _is_resource_name(name):
return (
isinstance(name, str) and
not (
name.startswith('~') or
name.startswith('#')
)
)
@staticmethod
def _zero_one_or_more(arg):
return (
()
if not arg else
(arg,)
if isinstance(arg, (str, _annot_type)) else
arg
)
[docs]
class AnnotOp(
collections.namedtuple(
'AnnotOpBase',
['annots', 'op'],
)
):
"""
Annotation operations consist of list of annotation definitions or names as
they can be looked up in the ``class_definitions`` of the
``CustomAnnotation`` object and an operator to be called on the sets
(union, intersection or difference).
"""
def __new__(cls, annots, op = set.union):
if op in AnnotationGroup._set_methods:
op = getattr(AnnotationGroup, op.__name__)
return super().__new__(cls, annots, op)
[docs]
class AnnotationGroup(collections_abc.Set):
"""
Represents a set of molecular entities sharing a custom defined
annotation. This class behaves like a ``set`` and set operations on it
result set objects. Normally this class is instantiated by
``pypath.core.annot.CustomAnnotation`` in the process of populating
categories and the contents of the groups defined in
``pypath.core.intercell_annot`` in case of annotations of the
intercellular communication roles.
For detailed definitions of the parameter values see the Supplementary
Table S10 in Turei et al. 2020 (in prep).
:param list,set,tuple members:
The identifiers of the entities in the category.
:param str name:
The name of the category.
:param str parent:
The name of the parent category; might be the same as ``name`` in
case of high level (generic) categories.
:param str aspect:
Either *functional* or *locational*.
:param str source:
Either *resource_specific* or *composite*.
:param str scope:
Either *specific* or *generic*.
:param str resource:
The resource (database) name; in case of composite categories it
should be the name of the database you are actually building, this
by default is `OmniPath` and you can change by the
``pypath.share.settings`` module using the
``annot_composite_database_name`` key.
:param bool transmitter:
Whether the category contains transmitters of signaling information
from the cell expressing the molecular entities in direction of other
cells.
:param bool receiver:
Whether the category contains receivers of signaling information
from other cells in direction of the cells expressing the molecular
entites in the category.
:param str,set limit:
Limit to this or these categories. E.g. if it's 'extracellular'
the result will be the intersection of this category and
'extracellular' i.e. the category will be limited to extracellular
proteins.
:param str,set avoid:
Avoid elements of this or these categories. E.g. if it's
'cell_surface' then all cell_surface proteins will be removed from
this category.
"""
_set_methods = {
set.union,
set.intersection,
set.difference,
set.symmetric_difference,
}
[docs]
def __init__(
self,
members,
name = None,
parent = None,
aspect = 'functional',
source = 'resource_specific',
scope = 'specific',
resource = None,
transmitter = None,
receiver = None,
limit = None,
avoid = None,
enabled = True,
):
collections_abc.Set.__init__(self)
self.members = set(members)
self.name = name or 'unnamed'
self.parent = parent or self.name
self.aspect = aspect
self.source = source
self.scope = scope
self.resource = (
resource or
settings.get('annot_composite_database_name') or
'Unknown'
)
self.transmitter = transmitter
self.receiver = receiver
self.limit = common.to_set(limit)
self.avoid = common.to_set(avoid)
self.enabled = enabled
def __iter__(self):
return self.members.__iter__()
def __contains__(self, other):
return other in self.members
def __len__(self):
return len(self.members)
@classmethod
def _from_iterable(cls, iterable):
return set(iterable)
def __repr__(self):
return '<AnnotationGroup `%s` from %s, %u elements>' % (
self.name,
self.resource,
len(self),
)
@property
def label(self):
return '%s%s@%s' % (
self.parent,
'::%s' % self.name if self.name != self.parent else '',
self.resource
)
@property
def name_label(self):
return common.upper0(self.name).replace('_', ' ')
@property
def key(self):
return AnnotDefKey(
name = self.name,
parent = self.parent,
resource = self.resource,
)
[docs]
def filter_entity_type(self, entity_type = None):
"""
Returns a copy of the group with only the selected entity types.
If ``entity_type`` is None returns the object itself.
"""
if entity_type is None:
return self
else:
members = entity.Entity.filter_entity_type(
self.members,
entity_type = entity_type,
)
return AnnotationGroup(
members = members,
name = self.name,
parent = self.parent,
aspect = self.aspect,
source = self.source,
scope = self.scope,
resource = self.resource,
transmitter = self.transmitter,
receiver = self.transmitter,
)
def count_entity_type(self, entity_type = None):
return (
entity.Entity.count_entity_type(
self.members,
entity_type = entity_type,
)
)
@property
def args(self):
return dict(**self)
def keys(self):
return ['name', 'parent', 'source', 'scope', 'aspect']
def __getitem__(self, key):
return getattr(self, key)
@property
def n_proteins(self):
return self.count_entity_type(entity_type = 'protein')
@property
def n_mirnas(self):
return self.count_entity_type(entity_type = 'mirna')
@property
def n_complexes(self):
return self.count_entity_type(entity_type = 'complex')
@property
def proteins(self):
return self.filter_entity_type(entity_type = 'protein')
@property
def complexes(self):
return self.filter_entity_type(entity_type = 'complex')
@property
def mirnas(self):
return self.filter_entity_type(entity_type = 'mirna')
@staticmethod
def sets(*args):
return (
(
a
if isinstance(a, set) else
a.members
if hasattr(a, 'members') else
common.to_set(a)
)
for a in args
)
@classmethod
def union(cls, *args):
return set.union(*cls.sets(*args))
@classmethod
def intersection(cls, *args):
return set.intersection(*cls.sets(*args))
@classmethod
def difference(cls, *args):
return set.difference(*cls.sets(*args))
@classmethod
def symmetric_difference(cls, *args):
return set.symmetric_difference(*cls.sets(*args))
[docs]
@classmethod
def isdisjoint(cls, *args):
return set.isdisjoint(*cls.sets(*args))
_set_type = (set, AnnotationGroup)
_annot_type = _set_type + (AnnotDef, AnnotOp)