"""
Visualize results from :py:mod:flotilla.compute.network
"""
import sys
import networkx as nx
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from ..compute.network import Networker
from ..util import dict_to_str
from .color import dark2, almost_black, green, red
[docs]class NetworkerViz(Networker):
# TODO: needs to be decontaminated, as it requires methods from
# data_object;
# maybe this class should move to data_model.BaseData
def __init__(self, DataModel):
self.DataModel = DataModel
Networker.__init__(self)
[docs] def draw_graph(self,
n_pcs=5,
use_pc_1=True, use_pc_2=True, use_pc_3=True, use_pc_4=True,
degree_cut=2, cov_std_cut=1.8,
weight_function='no_weight',
featurewise=False, # else feature_components
rpkms_not_events=False, # else event features
feature_of_interest='RBFOX2', draw_labels=True,
reduction_name=None,
feature_ids=None,
sample_ids=None,
graph_file='',
compare="",
sample_id_to_color=None,
label_to_color=None,
label_to_marker=None, groupby=None,
data_type=None):
"""Draw the graph of similarities between samples or features
Parameters
----------
feature_ids : list of str, or None
Feature ids to subset the data. If None, all features will be used.
sample_ids : list of str, or None
Sample ids to subset the data. If None, all features will be used.
x_pc : str, optional
Which component to use for the x-axis, default "pc_1"
y_pc :
y component for PCA, default "pc_2"
n_pcs : int
Number of components to use for cells' covariance calculation
cov_std_cut : float
Covariance cutoff for edges
use_pc{1-4} : bool
Use these pcs in cov calculation (default True)
degree_cut : int
miniumum degree for a node to be included in graph display
weight_function : ['arctan' | 'sq' | 'abs' | 'arctan_sq']
weight function (arctan (arctan cov), sq (sq cov), abs (abs cov),
arctan_sq (sqared arctan of cov))
gene_of_interest : str
map a gradient representing this gene's data onto nodes (ENSEMBL
id or gene symbol)
Returns
-------
graph : networkx.Graph
positions : (x,y) positions of nodes
"""
node_color_mapper = self._default_node_color_mapper
node_size_mapper = self._default_node_color_mapper
settings = locals().copy()
# not pertinent to the graph, these are what we want to be able to
# re-apply to the same graph if it exists
pca_settings = dict()
pca_settings['sample_ids'] = sample_ids
pca_settings['featurewise'] = featurewise
pca_settings['feature_ids'] = feature_ids
# pca_settings['obj_id'] = reduction_name
adjacency_settings = dict((k, settings[k]) for k in
['use_pc_1', 'use_pc_2', 'use_pc_3',
'use_pc_4', 'n_pcs', ])
plt.figure(figsize=(10, 10))
plt.axis((-0.2, 1.2, -0.2, 1.2))
main_ax = plt.gca()
ax_pev = plt.axes([0.1, .8, .2, .15])
ax_cov = plt.axes([0.1, 0.1, .2, .15])
ax_degree = plt.axes([0.9, .8, .2, .15])
pca = self.DataModel.reduce(
# label_to_color=label_to_color,
# label_to_marker=label_to_marker,
# groupby=groupby,
**pca_settings)
try:
feature_id = self.DataModel.maybe_renamed_to_feature_id(
feature_of_interest)[0]
except (ValueError, KeyError, IndexError):
feature_id = ''
if featurewise:
def node_color_mapper(x):
if (x == feature_id):
return green
else:
return almost_black
def node_size_mapper(x):
return (pca.means.ix[x] ** 2) + 10
else:
if sample_id_to_color is not None:
def node_color_mapper(x):
return sample_id_to_color[x]
else:
def node_color_mapper(x):
return dark2[0]
def node_size_mapper(x):
return 95
ax_pev.plot(pca.explained_variance_ratio_ * 100.)
ax_pev.axvline(n_pcs, label='cutoff', color=green)
ax_pev.legend()
ax_pev.set_ylabel("% explained variance")
ax_pev.set_xlabel("component")
ax_pev.set_title("Explained variance from dim reduction")
sns.despine(ax=ax_pev)
adjacency = self.adjacency(pca.reduced_space, **adjacency_settings)
cov_dist = np.array(
[i for i in adjacency.values.ravel() if np.abs(i) > 0])
cov_cut = np.mean(cov_dist) + cov_std_cut * np.std(cov_dist)
graph_settings = dict(
(k, settings[k]) for k in ['weight_function', 'degree_cut', ])
graph_settings['cov_cut'] = cov_cut
this_graph_name = "_".join(map(dict_to_str,
[pca_settings, adjacency_settings,
graph_settings]))
graph_settings['name'] = this_graph_name
sns.kdeplot(cov_dist, ax=ax_cov)
xmin, xmax = ax_cov.get_xlim()
ax_cov.set_xlim(0, xmax)
ax_cov.axvline(cov_cut, label='cutoff', color=green)
ax_cov.set_title("Covariance in dim reduction space")
ax_cov.set_ylabel("Density")
ax_cov.legend()
sns.despine(ax=ax_cov)
graph, pos = self.graph(adjacency, **graph_settings)
nx.draw_networkx_nodes(
graph, pos,
node_color=map(node_color_mapper, graph.nodes()),
node_size=map(node_size_mapper, graph.nodes()),
ax=main_ax, alpha=0.5)
try:
node_color = map(lambda x: pca.X[feature_id].ix[x], graph.nodes())
nx.draw_networkx_nodes(graph, pos, node_color=node_color,
cmap=mpl.cm.Greys,
node_size=map(
lambda x: node_size_mapper(x) * .5,
graph.nodes()), ax=main_ax, alpha=1)
except (KeyError, ValueError):
pass
if featurewise:
namer = self.DataModel.feature_renamer
else:
def namer(x):
return x
labels = dict([(name, namer(name)) for name in graph.nodes()])
if draw_labels:
nx.draw_networkx_labels(graph, pos, labels=labels, ax=main_ax)
nx.draw_networkx_edges(graph, pos, ax=main_ax, alpha=0.1)
main_ax.set_axis_off()
degree = nx.degree(graph)
sns.kdeplot(np.array(degree.values()), ax=ax_degree)
xmin, xmax = ax_degree.get_xlim()
ax_degree.set_xlim(0, xmax)
ax_degree.set_xlabel("degree")
ax_degree.set_ylabel("density")
try:
ax_degree.axvline(x=degree[feature_id],
label=feature_of_interest,
color=green)
ax_degree.legend()
except Exception as e:
sys.stdout.write(str(e))
pass
sns.despine(ax=ax_degree)
if graph_file != '':
try:
nx.write_gml(graph, graph_file)
except Exception as e:
sys.stdout.write("error writing graph file:"
"\n{}".format(str(e)))
return graph, pos
[docs] def draw_nonreduced_graph(self,
degree_cut=2, cov_std_cut=1.8,
wt_fun='abs',
featurewise=False, # else feature_components
rpkms_not_events=False, # else event features
feature_of_interest='RBFOX2', draw_labels=True,
feature_ids=None,
group_id=None,
graph_file='',
compare=""):
"""
Parameters
----------
feature_ids : list of str, or None
Feature ids to subset the data. If None, all features will be used.
sample_ids : list of str, or None
Sample ids to subset the data. If None, all features will be used.
x_pc : str
x component for DataFramePCA, default "pc_1"
y_pc :
y component for DataFramePCA, default "pc_2"
n_pcs : int???
n components to use for cells' covariance calculation
cov_std_cut : float??
covariance cutoff for edges
use_pc{1-4} use these pcs in cov calculation (default True)
degree_cut : int??
miniumum degree for a node to be included in graph display
weight_function : ['arctan' | 'sq' | 'abs' | 'arctan_sq']
weight function (arctan (arctan cov), sq (sq cov), abs (abs cov),
arctan_sq (sqared arctan of cov))
gene_of_interest : str
map a gradient representing this gene's data onto nodes (ENSEMBL
id or gene name???)
Returns
-------
#TODO: Mike please fill these in
graph : networkx.Graph
???
positions : ???
???
"""
node_color_mapper = self._default_node_color_mapper
node_size_mapper = self._default_node_color_mapper
settings = locals().copy()
adjacency_settings = dict(('non_reduced', True))
plt.figure(figsize=(10, 10))
plt.axis((-0.2, 1.2, -0.2, 1.2))
main_ax = plt.gca()
ax_cov = plt.axes([0.1, 0.1, .2, .15])
ax_degree = plt.axes([0.9, .8, .2, .15])
data = self.DataModel.df
try:
feature_id = self.DataModel.maybe_renamed_to_feature_id(
feature_of_interest)[0]
except (ValueError, KeyError):
feature_id = ''
if featurewise:
def node_color_mapper(x):
if x == feature_id:
return red
else:
return 'k'
def node_size_mapper(x):
return (data.mean().ix[x] ** 2) + 10
else:
def node_color_mapper(x):
return self.DataModel.sample_metadata.color[x]
def node_size_mapper(x):
return 75
adjacency_name = "_".join([dict_to_str(adjacency_settings)])
adjacency = self.adjacency(data, name=adjacency_name,
**adjacency_settings)
cov_dist = np.array(
[i for i in adjacency.values.ravel() if np.abs(i) > 0])
cov_cut = np.mean(cov_dist) + cov_std_cut * np.std(cov_dist)
graph_settings = dict(
(k, settings[k]) for k in ['wt_fun', 'degree_cut', ])
graph_settings['cov_cut'] = cov_cut
this_graph_name = "_".join(
map(dict_to_str, [adjacency_settings, graph_settings]))
graph_settings['name'] = this_graph_name
sns.kdeplot(cov_dist, ax=ax_cov)
ax_cov.axvline(cov_cut, label='cutoff')
ax_cov.set_title("covariance in original space")
ax_cov.set_ylabel("density")
ax_cov.legend()
sns.despine(ax=ax_cov)
graph, positions = self.graph(adjacency, **graph_settings)
nx.draw_networkx_nodes(graph, positions,
node_color=map(node_color_mapper,
graph.nodes()),
node_size=map(node_size_mapper, graph.nodes()),
ax=main_ax, alpha=0.5)
try:
node_color = map(lambda x: data[feature_id].ix[x],
graph.nodes())
nx.draw_networkx_nodes(graph, positions, node_color=node_color,
cmap=plt.cm.Greys,
node_size=map(
lambda x: node_size_mapper(x) * .5,
graph.nodes()), ax=main_ax, alpha=1)
except (KeyError, ValueError):
pass
def renamer(x):
return x
labels = dict([(name, renamer(name)) for name in graph.nodes()])
if draw_labels:
nx.draw_networkx_labels(graph, positions, labels=labels,
ax=main_ax)
nx.draw_networkx_edges(graph, positions, ax=main_ax, alpha=0.1)
main_ax.set_axis_off()
degree = nx.degree(graph)
sns.kdeplot(np.array(degree.values()), ax=ax_degree)
ax_degree.set_xlabel("degree")
ax_degree.set_ylabel("density")
try:
ax_degree.axvline(x=degree[feature_of_interest],
label=feature_of_interest)
ax_degree.legend()
except Exception as e:
sys.stdout.write(str(e))
pass
sns.despine(ax=ax_degree)
if graph_file != '':
try:
nx.write_gml(graph, graph_file)
except Exception as e:
sys.stdout.write("error writing graph file:"
"\n{}".format(str(e)))
return (graph, positions)