Source code for pypath.inputs.biogrid

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from typing import List, Optional
from numbers import Number

import collections

import pypath.resources.urls as urls
import pypath.share.curl as curl


[docs] def biogrid_interactions( organism: int = 9606, htp_limit: Optional[Number] = 1, ltp: bool = True, ) -> List[tuple]: """ Downloads and processes Physical multi-validated BioGRID interactions. Keeps only the "low throughput" interactions. Returns list of interactions. Args organism: NCBI Taxonomy ID of organism. htp_limit: Exclude interactions only from references cited at more than this number of interactions. """ BiogridPhysicalInteraction = collections.namedtuple( 'BiogridPhysicalInteraction', ( 'partner_a', 'partner_b', 'pmid', ), ) organism = str(organism) interactions = [] refc = [] url = urls.urls['biogrid']['mv'] c = curl.Curl(url, silent = False, large = True, slow = True) f = next(iter(c.result.values())) nul = f.readline() for l in f: l = l.split('\t') if len(l) > 17: if ( ( l[17].startswith('Low') or not ltp ) and ( organism == 'None' or ( l[15] == organism and l[16] == organism ) ) ): interactions.append( BiogridPhysicalInteraction( partner_a = l[7], partner_b = l[8], pmid = l[14], ) ) refc.append(l[14]) refc = collections.Counter(refc) if htp_limit is not None: interactions = [i for i in interactions if refc[i[2]] <= htp_limit] return interactions
[docs] def biogrid_all_interactions( organism: int = 9606, htp_limit: Optional[Number] = 1, ltp: bool = True, ) -> List[tuple]: """ Downloads and processes all BioGRID interactions. Keeps only the "low throughput" interactions. Returns list of interactions. Args organism: NCBI Taxonomy ID of organism. htp_limit: Exclude interactions only from references cited at more than this number of interactions. """ BiogridInteraction = collections.namedtuple( 'BiogridInteraction', ( 'partner_a', 'partner_b', 'pmid', 'experimental_system', 'experimental_system_type', 'ltp', 'htp_score', 'multi_validated', 'tax_a', 'tax_b', ), ) organism = str(organism) interactions = [] refc = [] mv_dict=collections.defaultdict(list) for i in biogrid_interactions(organism, htp_limit, ltp): mv_dict[i.partner_a].append(i.partner_b) url = urls.urls['biogrid']['all'] c = curl.Curl(url, silent = False, large = True, slow = True) f = next(iter(c.result.values())) nul = f.readline() for l in f: l = l.split('\t') if len(l) > 17: if ( ( l[17].startswith('Low') or not ltp ) and ( organism == 'None' or ( l[15] == organism and l[16] == organism ) ) ): mv = l[8] in mv_dict[l[7]] or l[7] in mv_dict[l[8]] interactions.append( BiogridInteraction( partner_a = l[7], partner_b = l[8], pmid = l[14], experimental_system = l[11], experimental_system_type = l[12], ltp = 'Low T' in l[17], htp_score = None if l[18] == '-' else float(l[18]), multi_validated = mv, tax_a = l[15], tax_b = l[16], ) ) refc.append(l[14]) refc = collections.Counter(refc) if htp_limit is not None: interactions = [i for i in interactions if refc[i[2]] <= htp_limit] return interactions