Source code for flotilla.visualize.network

"""
Visualize results from :py:mod:flotilla.compute.network
"""

import sys

import networkx as nx
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from ..compute.network import Networker
from ..util import dict_to_str
from .color import dark2, almost_black, green, red


[docs]class NetworkerViz(Networker): # TODO: needs to be decontaminated, as it requires methods from # data_object; # maybe this class should move to data_model.BaseData def __init__(self, DataModel): self.DataModel = DataModel Networker.__init__(self)
[docs] def draw_graph(self, n_pcs=5, use_pc_1=True, use_pc_2=True, use_pc_3=True, use_pc_4=True, degree_cut=2, cov_std_cut=1.8, weight_function='no_weight', featurewise=False, # else feature_components rpkms_not_events=False, # else event features feature_of_interest='RBFOX2', draw_labels=True, reduction_name=None, feature_ids=None, sample_ids=None, graph_file='', compare="", sample_id_to_color=None, label_to_color=None, label_to_marker=None, groupby=None, data_type=None): """Draw the graph of similarities between samples or features Parameters ---------- feature_ids : list of str, or None Feature ids to subset the data. If None, all features will be used. sample_ids : list of str, or None Sample ids to subset the data. If None, all features will be used. x_pc : str, optional Which component to use for the x-axis, default "pc_1" y_pc : y component for PCA, default "pc_2" n_pcs : int Number of components to use for cells' covariance calculation cov_std_cut : float Covariance cutoff for edges use_pc{1-4} : bool Use these pcs in cov calculation (default True) degree_cut : int miniumum degree for a node to be included in graph display weight_function : ['arctan' | 'sq' | 'abs' | 'arctan_sq'] weight function (arctan (arctan cov), sq (sq cov), abs (abs cov), arctan_sq (sqared arctan of cov)) gene_of_interest : str map a gradient representing this gene's data onto nodes (ENSEMBL id or gene symbol) Returns ------- graph : networkx.Graph positions : (x,y) positions of nodes """ node_color_mapper = self._default_node_color_mapper node_size_mapper = self._default_node_color_mapper settings = locals().copy() # not pertinent to the graph, these are what we want to be able to # re-apply to the same graph if it exists pca_settings = dict() pca_settings['sample_ids'] = sample_ids pca_settings['featurewise'] = featurewise pca_settings['feature_ids'] = feature_ids # pca_settings['obj_id'] = reduction_name adjacency_settings = dict((k, settings[k]) for k in ['use_pc_1', 'use_pc_2', 'use_pc_3', 'use_pc_4', 'n_pcs', ]) plt.figure(figsize=(10, 10)) plt.axis((-0.2, 1.2, -0.2, 1.2)) main_ax = plt.gca() ax_pev = plt.axes([0.1, .8, .2, .15]) ax_cov = plt.axes([0.1, 0.1, .2, .15]) ax_degree = plt.axes([0.9, .8, .2, .15]) pca = self.DataModel.reduce( # label_to_color=label_to_color, # label_to_marker=label_to_marker, # groupby=groupby, **pca_settings) try: feature_id = self.DataModel.maybe_renamed_to_feature_id( feature_of_interest)[0] except (ValueError, KeyError, IndexError): feature_id = '' if featurewise: def node_color_mapper(x): if (x == feature_id): return green else: return almost_black def node_size_mapper(x): return (pca.means.ix[x] ** 2) + 10 else: if sample_id_to_color is not None: def node_color_mapper(x): return sample_id_to_color[x] else: def node_color_mapper(x): return dark2[0] def node_size_mapper(x): return 95 ax_pev.plot(pca.explained_variance_ratio_ * 100.) ax_pev.axvline(n_pcs, label='cutoff', color=green) ax_pev.legend() ax_pev.set_ylabel("% explained variance") ax_pev.set_xlabel("component") ax_pev.set_title("Explained variance from dim reduction") sns.despine(ax=ax_pev) adjacency = self.adjacency(pca.reduced_space, **adjacency_settings) cov_dist = np.array( [i for i in adjacency.values.ravel() if np.abs(i) > 0]) cov_cut = np.mean(cov_dist) + cov_std_cut * np.std(cov_dist) graph_settings = dict( (k, settings[k]) for k in ['weight_function', 'degree_cut', ]) graph_settings['cov_cut'] = cov_cut this_graph_name = "_".join(map(dict_to_str, [pca_settings, adjacency_settings, graph_settings])) graph_settings['name'] = this_graph_name sns.kdeplot(cov_dist, ax=ax_cov) xmin, xmax = ax_cov.get_xlim() ax_cov.set_xlim(0, xmax) ax_cov.axvline(cov_cut, label='cutoff', color=green) ax_cov.set_title("Covariance in dim reduction space") ax_cov.set_ylabel("Density") ax_cov.legend() sns.despine(ax=ax_cov) graph, pos = self.graph(adjacency, **graph_settings) nx.draw_networkx_nodes( graph, pos, node_color=map(node_color_mapper, graph.nodes()), node_size=map(node_size_mapper, graph.nodes()), ax=main_ax, alpha=0.5) try: node_color = map(lambda x: pca.X[feature_id].ix[x], graph.nodes()) nx.draw_networkx_nodes(graph, pos, node_color=node_color, cmap=mpl.cm.Greys, node_size=map( lambda x: node_size_mapper(x) * .5, graph.nodes()), ax=main_ax, alpha=1) except (KeyError, ValueError): pass if featurewise: namer = self.DataModel.feature_renamer else: def namer(x): return x labels = dict([(name, namer(name)) for name in graph.nodes()]) if draw_labels: nx.draw_networkx_labels(graph, pos, labels=labels, ax=main_ax) nx.draw_networkx_edges(graph, pos, ax=main_ax, alpha=0.1) main_ax.set_axis_off() degree = nx.degree(graph) sns.kdeplot(np.array(degree.values()), ax=ax_degree) xmin, xmax = ax_degree.get_xlim() ax_degree.set_xlim(0, xmax) ax_degree.set_xlabel("degree") ax_degree.set_ylabel("density") try: ax_degree.axvline(x=degree[feature_id], label=feature_of_interest, color=green) ax_degree.legend() except Exception as e: sys.stdout.write(str(e)) pass sns.despine(ax=ax_degree) if graph_file != '': try: nx.write_gml(graph, graph_file) except Exception as e: sys.stdout.write("error writing graph file:" "\n{}".format(str(e))) return graph, pos
[docs] def draw_nonreduced_graph(self, degree_cut=2, cov_std_cut=1.8, wt_fun='abs', featurewise=False, # else feature_components rpkms_not_events=False, # else event features feature_of_interest='RBFOX2', draw_labels=True, feature_ids=None, group_id=None, graph_file='', compare=""): """ Parameters ---------- feature_ids : list of str, or None Feature ids to subset the data. If None, all features will be used. sample_ids : list of str, or None Sample ids to subset the data. If None, all features will be used. x_pc : str x component for DataFramePCA, default "pc_1" y_pc : y component for DataFramePCA, default "pc_2" n_pcs : int??? n components to use for cells' covariance calculation cov_std_cut : float?? covariance cutoff for edges use_pc{1-4} use these pcs in cov calculation (default True) degree_cut : int?? miniumum degree for a node to be included in graph display weight_function : ['arctan' | 'sq' | 'abs' | 'arctan_sq'] weight function (arctan (arctan cov), sq (sq cov), abs (abs cov), arctan_sq (sqared arctan of cov)) gene_of_interest : str map a gradient representing this gene's data onto nodes (ENSEMBL id or gene name???) Returns ------- #TODO: Mike please fill these in graph : networkx.Graph ??? positions : ??? ??? """ node_color_mapper = self._default_node_color_mapper node_size_mapper = self._default_node_color_mapper settings = locals().copy() adjacency_settings = dict(('non_reduced', True)) plt.figure(figsize=(10, 10)) plt.axis((-0.2, 1.2, -0.2, 1.2)) main_ax = plt.gca() ax_cov = plt.axes([0.1, 0.1, .2, .15]) ax_degree = plt.axes([0.9, .8, .2, .15]) data = self.DataModel.df try: feature_id = self.DataModel.maybe_renamed_to_feature_id( feature_of_interest)[0] except (ValueError, KeyError): feature_id = '' if featurewise: def node_color_mapper(x): if x == feature_id: return red else: return 'k' def node_size_mapper(x): return (data.mean().ix[x] ** 2) + 10 else: def node_color_mapper(x): return self.DataModel.sample_metadata.color[x] def node_size_mapper(x): return 75 adjacency_name = "_".join([dict_to_str(adjacency_settings)]) adjacency = self.adjacency(data, name=adjacency_name, **adjacency_settings) cov_dist = np.array( [i for i in adjacency.values.ravel() if np.abs(i) > 0]) cov_cut = np.mean(cov_dist) + cov_std_cut * np.std(cov_dist) graph_settings = dict( (k, settings[k]) for k in ['wt_fun', 'degree_cut', ]) graph_settings['cov_cut'] = cov_cut this_graph_name = "_".join( map(dict_to_str, [adjacency_settings, graph_settings])) graph_settings['name'] = this_graph_name sns.kdeplot(cov_dist, ax=ax_cov) ax_cov.axvline(cov_cut, label='cutoff') ax_cov.set_title("covariance in original space") ax_cov.set_ylabel("density") ax_cov.legend() sns.despine(ax=ax_cov) graph, positions = self.graph(adjacency, **graph_settings) nx.draw_networkx_nodes(graph, positions, node_color=map(node_color_mapper, graph.nodes()), node_size=map(node_size_mapper, graph.nodes()), ax=main_ax, alpha=0.5) try: node_color = map(lambda x: data[feature_id].ix[x], graph.nodes()) nx.draw_networkx_nodes(graph, positions, node_color=node_color, cmap=plt.cm.Greys, node_size=map( lambda x: node_size_mapper(x) * .5, graph.nodes()), ax=main_ax, alpha=1) except (KeyError, ValueError): pass def renamer(x): return x labels = dict([(name, renamer(name)) for name in graph.nodes()]) if draw_labels: nx.draw_networkx_labels(graph, positions, labels=labels, ax=main_ax) nx.draw_networkx_edges(graph, positions, ax=main_ax, alpha=0.1) main_ax.set_axis_off() degree = nx.degree(graph) sns.kdeplot(np.array(degree.values()), ax=ax_degree) ax_degree.set_xlabel("degree") ax_degree.set_ylabel("density") try: ax_degree.axvline(x=degree[feature_of_interest], label=feature_of_interest) ax_degree.legend() except Exception as e: sys.stdout.write(str(e)) pass sns.despine(ax=ax_degree) if graph_file != '': try: nx.write_gml(graph, graph_file) except Exception as e: sys.stdout.write("error writing graph file:" "\n{}".format(str(e))) return (graph, positions)
Olga B. Botvinnik is funded by the NDSEG fellowship and is a NumFOCUS John Hunter Technology Fellow.
Michael T. Lovci was partially funded by a fellowship from Genentech.
Partially funded by NIH grants NS075449 and HG004659 and CIRM grants RB4-06045 and TR3-05676 to Gene Yeo.