#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsfromtypingimportIterable,Literalimportcollectionsimportpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.share.commonascommonCONFIDENCE_THRESHOLDS={'highest_confidence':900,'high_confidence':700,'medium_confidence':400,'low_confidence':.150,}
[docs]defstring_links_interactions(ncbi_tax_id:int=9606,score_threshold:int|Literal['highest_confidence','high_confidence','medium_confidence','low_confidence',]='highest_confidence',physical_interaction_score:bool=True,)->list[tuple]:""" Downloads protein network data, including subscores per channel. The output contains both functional and physical protein associations. The combined physical interaction score is defined between the proteins for which we have evidence of their binding or forming a physical complex. Args score_threshold: Minimum required interaction score. user can use pre-defined confidence limits or can define a custom value. """StringLinksInteraction=collections.namedtuple('StringLinksInteraction',('protein_a','protein_b','neighborhood_score','fusion','cooccurence','coexpression','experimental','database','textmining','combined_score','physical_combined_score',),)ifphysical_interaction_score:phy_links=dict(((i.protein_a,i.protein_b),i.combined_score)foriinstring_physical_interactions(ncbi_tax_id=ncbi_tax_id,score_threshold=0,))url=urls.urls['string']['links']%ncbi_tax_idc=curl.Curl(url,silent=False,large=True)_=next(c.result)min_score=CONFIDENCE_THRESHOLDS.get(score_threshold,score_threshold)forlinc.result:l=l.strip().split(' ')prot_a_id=l[0].split('.')[1]prot_b_id=l[1].split('.')[1]ifint(l[9])<min_score:continuephy_score=(phy_links.get((prot_a_id,prot_b_id),None)ifphysical_interaction_scoreelseNone)yieldStringLinksInteraction(protein_a=prot_a_id,protein_b=prot_b_id,neighborhood_score=int(l[2]),fusion=int(l[3]),cooccurence=int(l[4]),coexpression=int(l[5]),experimental=int(l[6]),database=int(l[7]),textmining=int(l[8]),combined_score=int(l[9]),physical_combined_score=phy_score,)
[docs]defstring_physical_interactions(ncbi_tax_id:int=9606,score_threshold:int|Literal['highest_confidence','high_confidence','medium_confidence','low_confidence',]='highest_confidence',)->list[tuple]:""" Downloads protein physical subnetwork data, including subscores per channel. The interactions indicate that the proteins are part of a physical complex. Args score_threshold: Minimum required interaction score. user can use pre-defined confidence limits or can define a custom value. """StringPhysicalInteraction=collections.namedtuple('StringPhysicalInteraction',('protein_a','protein_b','experimental','database','textmining','combined_score',),)links=[]url=urls.urls['string']['physical_links']%ncbi_tax_idc=curl.Curl(url,silent=False,large=True)_=next(c.result)min_score=CONFIDENCE_THRESHOLDS.get(score_threshold,score_threshold)forlinc.result:l=l.strip().split(' ')ifint(l[5])>=min_score:links.append(StringPhysicalInteraction(protein_a=l[0].split('.')[1],protein_b=l[1].split('.')[1],experimental=int(l[2]),database=int(l[3]),textmining=int(l[4]),combined_score=int(l[5]),))returnlinks
[docs]defstring_species()->dict[int,str]:""" Downloads list of organisms in STRING. Returns Dict of tax ids as keys and scientific names of organisms as values. """species={}url=urls.urls['string']['species']c=curl.Curl(url,silent=False,large=True)_=next(c.result)forlinc.result:l=l.strip().split('\t')tax_id=l[0]official_name=l[3]species[tax_id]=official_namereturnspecies