#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritemsimportcollectionsimportbs4importpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.internals.interaasintera
[docs]defcomplexportal_complexes(organism=9606,return_details=False):""" Complex dataset from IntAct. See more: http://www.ebi.ac.uk/intact/complex/ http://nar.oxfordjournals.org/content/early/2014/10/13/nar.gku975.full.pdf """spec={9606:'Homo_sapiens'}zipurl='%s/%s.zip'%(urls.urls['complex_portal']['url'],spec[organism],)c=curl.Curl(zipurl,large=True,silent=False)files=c.resulterrors=[]complexes={}details=[]name_key='complex recommended name'forxmlname,xmliniteritems(c.result):soup=bs4.BeautifulSoup(xml,'html.parser')interactors_xml=soup.find_all('interactor')interactors={}interactions={}foriininteractors_xml:ifi.find('primaryref').attrs['db']=='uniprotkb':interactors[i.attrs['id']]=i.find('primaryref').attrs['id']interactions_xml=soup.find_all('interaction')foriininteractions_xml:description=''pubmeds=[]fullname=''names={}pdbs=[]uniprots=[]ids=collections.defaultdict(set)foraini.find_all('attribute'):ifa.attrs['name']=='curated-complex':description=a.textforsrini.find_all('secondaryref'):ifsr.attrs['db']=='pubmed':pubmeds.append(sr.attrs['id'])ifsr.attrs['db']=='wwpdb':pdbs.append(sr.attrs['id'])forprini.find_all('primaryref'):ifpr.attrs['db']in{'wwpdb','rcsb pdb','pdbe'}:pdbs.append(pr.attrs['id'])forsrini.find('xref').find_all('secondaryref'):if('reftype'insr.attrsandsr.attrs['db']in{'intact','reactome'}andsr.attrs['reftype']=='identity'):ids[sr.attrs['db']].add(sr.attrs['id'])pubmeds=list(set(pubmeds))pdbs=list(set(pdbs))fullname=(Noneifi.find('fullname')isNoneelsei.find('fullname').text)foraini.find_all('alias'):names[a.attrs['type']]=a.textforintrefini.find_all('interactorref'):int_id=intref.textifint_idininteractors:uniprot=interactors[int_id]ifuniprot.startswith('PRO'):continueuniprot=uniprot.split('-')[0]uniprots.append(uniprot)ifuniprots:ifpdbs:ids['PDB'].update(set(pdbs))cplex=intera.Complex(components=uniprots,name=names[name_key]ifname_keyinnameselseNone,references=set(pubmeds),sources='ComplexPortal',ids=ids,)ifcplex.__str__()incomplexes:complexes[cplex.__str__()]+=cplexelse:complexes[cplex.__str__()]=cplexdetails.append({'uniprots':uniprots,'pdbs':pdbs,'pubmeds':pubmeds,'fullname':fullname,'names':names,'description':description})ifreturn_details:returncomplexes,detailselse:returncomplexes