Introduction

This notebook shows how to get neighbour nodes of a given protein in pypath.

Analysis

In [1]:
# Show all the plots inside the notebook
%matplotlib inline
In [4]:
# load packages
import pypath
import igraph  # import igraph to use the plot function

import numpy as np
import pandas as pd
import seaborn as sns
/usr/lib/python2.7/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
In [6]:
pa = pypath.PyPath()

	=== d i s c l a i m e r ===

	All data coming with this module
	either as redistributed copy or downloaded using the
	programmatic interfaces included in the present module
	are available under public domain, are free to use at
	least for academic research or education purposes.
	Please be aware of the licences of all the datasets
	you use in your analysis, and please give appropriate
	credits for the original sources when you publish your
	results. To find out more about data sources please
	look at `pypath.descriptions` and
	`pypath.data_formats.urls`.

	ยป New session started,
	session ID: '165bb'
	logfile:'./log/165bb.log'.
In [7]:
pa.init_network(pfile = 'cache/default_network.pickle')
	:: Network loaded from `cache/default_network.pickle`. 6710 nodes, 24833 edges.
	:: Loading 'genesymbol' to 'uniprot' mapping table
In [8]:
# remove links reported in papers with more than 50 interactions (by default)
pa.remove_htp()
	:: Interactions from only high-throughput resources have been removed.
	   0 interactions removed.
	   Number of edges decreased from 24833 to 24833, number of vertices from 6710 to 6710.

After loading OmniPath, we will prepare a list of the proteins we want to query. The following are the 5 most frequently mutated genes in prostate cancer.

In [9]:
query_nodes = set(['PTEN', 'FOXA1', 'TP53', 'SPOP', 'AR'])
In [10]:
for igene in query_nodes:
    # to query a node based on the value of an attribute we can use the igraph find() method
    #prot = pa.graph.vs.find(label=i)['name']
    # if the attribute is the vertex label (genesymbol) we can use pypath's genesymbol() function
    prot = pa.genesymbol(igene)['name']
    #neighbours_of_prot = pa.first_neighbours(prot)
    neighbours_of_prot = list(pa.gs_neighbors(igene).gs())
    print('{} ({}) has {} neighbours:'.format(igene, prot, len(neighbours_of_prot)))
    if len(neighbours_of_prot)<10:
        print(neighbours_of_prot)
    else:
        print('(showing only the first 10 proteins)')
        print(neighbours_of_prot[0:10])
    print('---')
FOXA1 (P55317) has 4 neighbours:
['AR', 'NFIX', 'TLE1', 'NFIB']
---
PTEN (P60484) has 49 neighbours:
(showing only the first 10 proteins)
['AKT1', 'RPS27A', 'AR', 'PPM1A', 'CDC42', 'PDPK1', 'SP1', 'CSNK2A1', 'STK11', 'PTK2']
---
SPOP (O43791) has 3 neighbours:
['TRAF6', 'CUL3', 'TRAF1']
---
AR (P10275) has 230 neighbours:
(showing only the first 10 proteins)
['AKT1', 'DDX5', 'IRF1', 'STAT3', 'NCOR2', 'EGFR', 'MDM2', 'TNK2', 'DAPK3', 'NCOR1']
---
TP53 (P04637) has 259 neighbours:
(showing only the first 10 proteins)
['RPS27A', 'ULK1', 'DDX5', 'CYCS', 'NTRK1', 'CSNK2B', 'RAD51', 'BTRC', 'NOXA1', 'ZBTB17']
---
In [11]:
# modify some of the visual style settings for the igraph plotting function
visual_style = {'bbox': (300, 300),
               'margin': 50}
In [13]:
# get neighbourhood graphs for each of the query nodes
subgraph = {}
for igene in query_nodes:
    subgraph[igene] = pa.neighbourhood_network(pa.genesymbol(igene)['name'])
    igraph.plot(subgraph[igene], layout=subgraph[igene].layout_auto(), **visual_style)
In [14]:
# plot neighbourhood of SPOP
igene = 'SPOP'
print(subgraph[igene].vs['label'])
igraph.plot(subgraph[igene], layout=subgraph[igene].layout_auto(), **visual_style)
['SPOP', 'TRAF6', 'CUL3', 'TRAF1']
Out[14]:
In [11]:
# for some reason, the node labels are not always correctly displayed inside IPython notebook
# however, they appear correctly if printed to a file
igene = 'FOXA1'
print(subgraph[igene].vs['label'])
igraph.plot(subgraph[igene], layout=subgraph[igene].layout_auto(), **visual_style)
['AR', 'NFIX', 'TLE1', 'FOXA1', 'NFIB']
Out[11]:
In [12]:
# the *.pdf file generated after executing this line contains the graph with the correct labels
#igraph.plot(subgraph[igene], 'FOXA1_neighbourhood.pdf', layout=subgraph[igene].layout_auto(), **visual_style)
In [15]:
# find shortest path between SPOP and FOXA1
path = pa.graph.get_shortest_paths(pa.genesymbol('SPOP')['name'], to=pa.genesymbol('FOXA1')['name'])
# the result is returned as a list with a single element
path = path[0]
In [16]:
path_SPOP_to_FOXA1_length = len(path)-1
print('The path from SPOP to FOXA1 has {} steps:'.format(path_SPOP_to_FOXA1_length))
print('\t' + ' --> '.join(pa.graph.vs[i]['label'] for i in path))
The path from SPOP to FOXA1 has 4 steps:
	SPOP --> TRAF6 --> AKT1 --> AR --> FOXA1
In [17]:
# find shortest path between FOXA1 and SPOP
path = pa.graph.get_shortest_paths(pa.genesymbol('FOXA1')['name'], to=pa.genesymbol('SPOP')['name'])
In [18]:
# alternative way of showing path members
for i in path[0]:
    print(pa.graph.vs[i]['label'])
FOXA1
AR
AKT1
TRAF6
SPOP
In [19]:
# find all paths between SPOP and FOXA1 (of length equal to the shortest path length)

# to find the index based on the value of an attribute we can use igraph's select() function
#node_start = pa.graph.vs.select(label='SPOP').indices[0]
#node_end = pa.graph.vs.select(label='FOXA1').indices[0]
# or, if the attribute is the gene symbol, we can use pypath's genesymbol() function
node_start = pa.genesymbol('SPOP').index
node_end = pa.genesymbol('FOXA1').index

paths = pa.find_all_paths(start=node_start, end=node_end, maxlen=path_SPOP_to_FOXA1_length)
print('Number of paths: {}'.format(len(paths)))
	:: Looking up all paths up to length 4: finished, 100.0%
Number of paths: 16
In [20]:
p = [item for sublist in paths for item in sublist]
p = set(p)
print('Number of nodes: {}'.format(len(p)))
Number of nodes: 20
In [21]:
# extract graph expanded by the nodes included in the path
connection_graph = pa.graph.induced_subgraph(p)
In [22]:
igraph.plot(connection_graph, layout=connection_graph.layout_auto(), **visual_style)
Out[22]:
In [23]:
igraph.plot(connection_graph.get_adjacency())
Out[23]:
In [24]:
xs, ys = zip(*[(left, count) for left, _, count in connection_graph.degree_distribution().bins()])
sns.plt.bar(xs, ys)
sns.plt.title('Degree distribution of shortest path network between SPOP and FOXA1')
Out[24]:
<matplotlib.text.Text at 0x7efe0fa59dd0>
In [25]:
degree_threshold = 10
label_tmp = [node if d>degree_threshold else '\n' for node, d in zip(connection_graph.vs['label'], connection_graph.degree())]
igraph.plot(connection_graph, layout=connection_graph.layout_auto(), vertex_label=label_tmp, vertex_size=connection_graph.degree(), vertex_color='#ff000022', **visual_style)
Out[25]:
In [26]:
label_high_degree = [node for node, d in zip(connection_graph.vs['label'], connection_graph.degree()) if d>degree_threshold]
print('Nodes with degree greater than {}:'.format(degree_threshold))
print(label_high_degree)
Nodes with degree greater than 10:
['AR', 'TRAF6']
In [27]:
# find all paths between SPOP and FOXA1 (of maximum length equal to the shortest path length + 1)
new_maxlen = path_SPOP_to_FOXA1_length + 1
node_start = pa.genesymbol('SPOP').index
node_end = pa.genesymbol('FOXA1').index
paths = pa.find_all_paths(start=node_start, end=node_end, maxlen=new_maxlen)
print('Number of paths: {}'.format(len(paths)))
	:: Looking up all paths up to length 5: finished, 100.0%
Number of paths: 639
In [28]:
p = [item for sublist in paths for item in sublist]
p = set(p)
print('Number of nodes: {}'.format(len(p)))
Number of nodes: 209
In [29]:
connection_graph = pa.graph.induced_subgraph(p)
In [30]:
igraph.plot(connection_graph, layout=connection_graph.layout_auto(), **visual_style)
Out[30]:
In [31]:
xs, ys = zip(*[(left, count) for left, _, count in connection_graph.degree_distribution().bins()])
sns.plt.bar(xs, ys)
sns.plt.title('Degree distribution of shortest path network between SPOP and FOXA1')
Out[31]:
<matplotlib.text.Text at 0x7efe06652b90>
In [32]:
label_tmp = [node if d>90 else '\n' for node, d in zip(connection_graph.vs['label'], connection_graph.degree())]
igraph.plot(connection_graph, layout=connection_graph.layout_auto(), vertex_label=label_tmp, vertex_size=connection_graph.degree(), vertex_color='#ff000022', **visual_style)
Out[32]: