Source code for pypath.core.common

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

"""
Methods working on objects from `pypath.core` which might be useful to access
without importing the main modules.
"""

from future.utils import iteritems

import re

import pandas as pd

import pypath.share.common as common
import pypath_common._constants as _const



[docs]
def filter_network_df(
        df,
        resource = None,
        entity_type = None,
        data_model = None,
        interaction_type = None,
        only_directed = None,
        only_undirected = None,
        only_signed = None,
        only_proteins = None,
        effect = None,
        entities = None,
        source_entities = None,
        target_entities = None,
        swap_undirected = True,
        remove_loops = True,
        entities_or = False,
        **kwargs
    ):
    """
    Filters a network data frame.
    """

    args = locals().copy()
    args.update(kwargs)

    query_elements = {}

    _node_attrs = {'id', 'entity_type'}

    _synonyms = {
        'resource': 'sources',
        'data_model': 'dmodel',
        'interaction_type': 'type',
        'entities': 'id',
        'entity': 'id',
    }

    if swap_undirected and not only_directed and not only_signed:

        undirected = df.query('not directed', inplace = False)

        with pd.option_context('mode.chained_assignment', None):

            undirected.rename(
                {
                    'id_a': 'id_b',
                    'id_b': 'id_a',
                    'type_a': 'type_b',
                    'type_b': 'type_a',
                },
                inplace = True,
                axis = 'columns',
            )

        df = df.append(undirected, ignore_index = True, sort = False)

    if only_directed:

        args['directed'] = True

    if only_signed:

        args['effect'] = {1, -1}

    if only_undirected:

        args['directed'] = False

    if only_proteins:

        args['entity_type'] = 'protein'

    for var, val in iteritems(args):

        if val is None:

            continue

        node_postfix = None

        if var.startswith('source_') or var.endswith('_a'):

            node_postfix = ['_a']
            var = re.sub(r'(?:source_)?(\w+)(?:_a)?', r'\1', var)

        if var.startswith('target_') or var.endswith('_b'):

            node_postfix = ['_b']
            var = re.sub(r'(?:target_)?(\w+)(?:_b)?', r'\1', var)

        if var == 'type':

            raise ValueError(
                '`type` is ambiguous, use either '
                '`interaction_type` or `entity_type`.'
            )


        var = _synonyms[var] if var in _synonyms else var

        node_postfix = (
            ['_a', '_b']
                if var in _node_attrs and not node_postfix else
            node_postfix or ['']
        )

        for pf in node_postfix:

            var_pf = '%s%s' % (var, pf)
            var_pf = var_pf.replace('entity_type', 'type')

            if var_pf in df.columns:

                query_elements[var_pf] = val

    query = []
    entity_query = []

    for var, val in iteritems(query_elements):

        first = var
        second = '@query_elements["%s"]' % var

        if df.dtypes[var].name == 'object':

            first, second = second, first
            op = 'in' if isinstance(val, _const.SIMPLE_TYPES) else '&'

        else:

            op = '==' if isinstance(val, _const.SIMPLE_TYPES) else 'in'

        q = '%s %s %s' % (first, op, second)

        if var.startswith('id_'):

            entity_query.append(q)

        else:

            query.append(q)

    if entities_or and len(entity_query) > 1:

        query.append('(%s)' % ' or '.join(entity_query))

    else:

        query.extend(entity_query)

    if remove_loops:

        query.append('id_a.astype("str") != id_b.astype("str")')

    query = ' and '.join(query)

    result = df.query(query) if query else df

    return result