Source code for pypath.inputs.lmpid

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import bs4

import pypath.share.curl as curl
import pypath.share.progress as progress
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.inputs.uniprot_db as uniprot_db


[docs] def load_lmpid(organism = 9606): """ Reads and processes LMPID data from local file `pypath.data/LMPID_DATA_pubmed_ref.xml`. The file was provided by LMPID authors and is now redistributed with the module. Returns list of domain-motif interactions. """ result = [] url = urls.urls['lmpid']['url'] c = curl.Curl(url, silent = False, large = False) soup = bs4.BeautifulSoup(c.result, features = 'xml') uniprots = uniprot_db.get_db(organism = organism, swissprot = None) prg = progress.Progress( len(soup.find_all('record')), 'Processing data from LMPID', 21 ) for rec in soup.find_all('record'): prg.step() uniprot_bait = rec.bait_uniprot_id.text uniprot_prey = rec.prey_uniprot_id.text if uniprot_bait in uniprots and uniprot_prey in uniprots: result.append({ 'bait': uniprot_bait, 'prey': uniprot_prey, 'refs': [x.strip() for x in rec.references.text.split(',')], 'pos': [int(x) for x in rec.sequence_position.text.split('-')], 'inst': rec.motif_instance.text, 'dom': rec.interacting_domain.text }) prg.terminate() return result
[docs] def lmpid_interactions(organism = 9606): """ Converts list of domain-motif interactions supplied by ``pypath.inputs.lmpid.load_lmpid`` to list of interactions. """ data = load_lmpid(organism = organism) return [[l['prey'], l['bait'], ';'.join(l['refs'])] for l in data]
[docs] def lmpid_dmi(organism = 9606): """ Converts list of domain-motif interactions supplied by ``pypath.inputs.lmpid.load_lmpid`` to list of ``pypath.intera.DomainMotif`` objects. """ data = load_lmpid(organism = organism) return [{ 'motif_protein': l['bait'], 'domain_protein': l['prey'], 'instance': l['inst'], 'motif_start': l['pos'][0], 'motif_end': l['pos'][1], 'domain_name': l['dom'], 'domain_name_type': 'name', 'refs': l['refs'] } for l in data]