Source code for pypath.inputs.intact

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from numbers import Number
from typing import List

import collections

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.progress as progress


[docs] def intact_interactions( miscore: Number = .6, organism: int = 9606, complex_expansion: bool = False, only_proteins: bool = False, only_ids: bool = False, ) -> List[tuple]: """ only_proteins : bool Keep only records of protein-protein interactions. only_ids : bool Load only the identifiers of interacting pairs (smaller memory footprint). """ id_types = { 'uniprotkb': 'uniprot', } IntactInteraction = collections.namedtuple( 'IntactInteraction', ( 'id_a', 'id_b', 'id_type_a', 'id_type_b', 'pubmeds', 'methods', 'interaction_types', 'mi_score', 'isoform_a', 'isoform_b', ), ) IntactInteraction.__new__.__defaults__ = (None,) * 7 def get_id_type(field): id_type = None if field == '-' else field.split(':')[0] return id_types[id_type] if id_type in id_types else id_type def get_uniprot_id(field): uniprot, isoform = _try_isoform( field.split(':')[1].replace('"', '') ) uniprot = uniprot.split('-')[0] return uniprot, isoform def get_ebi_id(field): if field == '-': return None, None else: partner_id=field.split(':')[1] return partner_id, None def get_taxon(field): return ( 0 if field == '-' else field.split('|')[0].split(':')[1].split('(')[0] ) results = [] url = urls.urls['intact']['mitab'] if type(organism) is int: organism = '%u' % organism c = curl.Curl( url, silent = False, large = True, files_needed = ['intact.txt'], slow = True, ) data = c.result['intact.txt'] size = c.sizes['intact.txt'] prg = progress.Progress(size, 'Reading IntAct MI-tab file', 99) for lnum, l in enumerate(data): prg.step(len(l)) if lnum == 0: continue l = l.strip('\n\r ').split('\t') taxon_a = get_taxon(l[9]) taxon_b = get_taxon(l[10]) if ( ( organism is None or ( taxon_a == organism and taxon_b == organism ) ) and ( complex_expansion or 'expansion' not in l[15] ) ): # finding mi-score and author sc = 0 au = '0' for s in l[14].split('|'): if s.startswith('intact-miscore'): sc = float(s.split(':')[1]) if s.startswith('author'): au = len(s.split(':')[1]) # filtering for mi-score if sc < miscore: continue id_type_a = get_id_type(l[0]) id_type_b = get_id_type(l[1]) if ( only_proteins and not ( id_type_a == 'uniprot' and id_type_b == 'uniprot' ) ): continue id_a, isoform_a = ( get_uniprot_id(l[0]) if id_type_a == 'uniprot' else get_ebi_id(l[0]) ) id_b, isoform_b = ( get_uniprot_id(l[1]) if id_type_b == 'uniprot' else get_ebi_id(l[1]) ) # key = tuple(sorted((id_a, id_b))) pubmeds = set( ref[1] for ref in ( ref.split(':') for ref in l[8].split('|') ) if ref[0] == 'pubmed' ) methods = set( met.split('(')[1].strip(')"') for met in l[6].split('|') ) interaction_types= set( int_type.split('(')[1].strip(')"') for int_type in l[11].split('|') ) results.append( IntactInteraction( id_a = id_a, id_b = id_b, id_type_a = id_type_a, id_type_b = id_type_b, pubmeds = pubmeds, methods = methods, interaction_types = interaction_types, mi_score = sc, isoform_a = isoform_a, isoform_b = isoform_b, ) ) prg.terminate() return results
def _try_isoform(name): name = name.split('-') if len(name) > 1 and name[1].isdigit(): isoform = int(name[1]) main = name[0] else: main = '-'.join(name) isoform = None return main, isoform