Source code for flotilla.visualize.ipython_interact

"""
Named `ipython_interact.py` rather than just `interact.py` to differentiate
between IPython interactive visualizations vs D3 interactive visualizations.
"""
import os
import sys
import warnings

from IPython.html.widgets import interact, TextWidget, ButtonWidget
import matplotlib.pyplot as plt

from ..visualize.color import red
from .network import NetworkerViz
from .color import str_to_color
from ..util import natural_sort, link_to_list


default_classifier = 'ExtraTreesClassifier'
default_regressor = 'ExtraTreesRegressor'
default_score_coefficient = 2


[docs]class Interactive(object): """ Attributes ---------- Methods ------- """ def __init__(self, *args, **kwargs): self._default_x_pc = 1 self._default_y_pc = 2 @staticmethod
[docs] def get_feature_subsets(study, data_types): """Given a study and list of data types, get the relevant feature subsets Parameters ---------- study : flotilla.Study A study object which Returns ------- Raises ------ """ feature_subsets = ['custom'] # datatype_to_ if 'expression' in data_types: try: feature_subsets.extend(study.expression.feature_subsets.keys()) except AttributeError: pass if 'splicing' in data_types: try: feature_subsets.extend(study.splicing.feature_subsets.keys()) except AttributeError: pass # Cast to "set" to get rid of duplicates, then back to list because you # can't sort a set, then back to list after sorting because you get # an iterator... yeah .... feature_subsets = list(natural_sort(list(set(feature_subsets)))) # Make sure "variant" is first because all datasets have that try: feature_subsets.pop(feature_subsets.index('variant')) except ValueError: pass feature_subsets.insert(0, 'variant') return feature_subsets
@staticmethod
[docs] def interactive_pca(self, data_types=('expression', 'splicing'), sample_subsets=None, feature_subsets=None, color_samples_by=None, featurewise=False, x_pc=(1, 10), y_pc=(1, 10), show_point_labels=False, list_link='', plot_violins=False, scale_by_variance=True, savefile='figures/last.pca.pdf'): def do_interact(data_type='expression', sample_subset=self.default_sample_subsets, feature_subset=self.default_feature_subsets, featurewise=False, list_link='', x_pc=1, y_pc=2, plot_violins=False, show_point_labels=False, color_samples_by=self.metadata.phenotype_col, bokeh=False, scale_by_variance=True, most_variant_features=False, std_multiplier=(0, 5.0)): for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}\n'.format(k, v)) if feature_subset != "custom" and list_link != "": raise ValueError( "Set feature_subset to \"custom\" to use list_link") if feature_subset == "custom" and list_link == "": raise ValueError("Use a custom list name please") if feature_subset == 'custom': feature_subset = link_to_list(list_link) elif feature_subset not in self.default_feature_subsets[data_type]: warnings.warn("This feature_subset ('{}') is not available in " "this data type ('{}'). Falling back on all " "features.".format(feature_subset, data_type)) return self.plot_pca(sample_subset=sample_subset, data_type=data_type, featurewise=featurewise, x_pc=x_pc, y_pc=y_pc, show_point_labels=show_point_labels, feature_subset=feature_subset, plot_violins=plot_violins, color_samples_by=color_samples_by, bokeh=bokeh, std_multiplier=std_multiplier, scale_by_variance=scale_by_variance, most_variant_features=most_variant_features) # self.plot_study_sample_legend() if feature_subsets is None: feature_subsets = Interactive.get_feature_subsets(self, data_types) if sample_subsets is None: sample_subsets = self.default_sample_subsets color_samples_by = self.metadata.data.columns.tolist() gui = interact(do_interact, data_type=data_types, sample_subset=sample_subsets, feature_subset=feature_subsets + ['custom'], featurewise=featurewise, x_pc=x_pc, y_pc=y_pc, show_point_labels=show_point_labels, list_link=list_link, plot_violins=plot_violins, color_samples_by=color_samples_by, scale_by_variance=scale_by_variance) def save(w): # Make the directory if it's not already there filename, extension = os.path.splitext(savefile.value) self.maybe_make_directory(savefile.value) gui.widget.result.fig_reduced.savefig(savefile.value, format=extension) # add "violins" after the provided filename, but before the # extension violins_file = '{}.{}'.format("_".join([filename, 'violins']), extension) try: gui.widget.result.fig_violins.savefig( violins_file, format=extension.lstrip('.')) except AttributeError: pass savefile = TextWidget(description='savefile') save_widget = ButtonWidget(description='save') gui.widget.children = list(gui.widget.children) + [savefile, save_widget] save_widget.on_click(save) return gui
@staticmethod
[docs] def interactive_graph(self, data_types=('expression', 'splicing'), sample_subsets=None, feature_subsets=None, featurewise=False, cov_std_cut=(0.1, 3), degree_cut=(0, 10), n_pcs=(2, 100), draw_labels=False, feature_of_interest="RBFOX2", weight_fun=None, use_pc_1=True, use_pc_2=True, use_pc_3=True, use_pc_4=True, savefile='figures/last.graph.pdf'): from IPython.html.widgets import interact # not sure why nested fxns are required for this, but they are... i # think... def do_interact(data_type='expression', sample_subset=self.default_sample_subsets, feature_subset=self.default_feature_subsets, weight_fun=NetworkerViz.weight_funs, featurewise=False, use_pc_1=True, use_pc_2=True, use_pc_3=True, use_pc_4=True, degree_cut=1, cov_std_cut=1.8, n_pcs=5, feature_of_interest="RBFOX2", draw_labels=False): for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}\n'.format(k, v)) if data_type == 'expression': assert (feature_subset in self.expression.feature_subsets.keys()) if data_type == 'splicing': assert (feature_subset in self.splicing.feature_subsets.keys()) self.plot_graph(data_type=data_type, sample_subset=sample_subset, feature_subset=feature_subset, featurewise=featurewise, draw_labels=draw_labels, degree_cut=degree_cut, cov_std_cut=cov_std_cut, n_pcs=n_pcs, feature_of_interest=feature_of_interest, use_pc_1=use_pc_1, use_pc_2=use_pc_2, use_pc_3=use_pc_3, use_pc_4=use_pc_4, weight_function=weight_fun) if feature_subsets is None: feature_subsets = Interactive.get_feature_subsets(self, data_types) if sample_subsets is None: sample_subsets = self.default_sample_subsets if weight_fun is None: weight_fun = NetworkerViz.weight_funs gui = interact(do_interact, data_type=data_types, sample_subset=sample_subsets, feature_subset=feature_subsets, featurewise=featurewise, cov_std_cut=cov_std_cut, degree_cut=degree_cut, n_pcs=n_pcs, draw_labels=draw_labels, weight_fun=weight_fun, feature_of_interest=feature_of_interest, use_pc_1=use_pc_1, use_pc_2=use_pc_2, use_pc_3=use_pc_3, use_pc_4=use_pc_4) def save(w): # Make the directory if it's not already there filename, extension = os.path.splitext(savefile.value) self.maybe_make_directory(savefile.value) plt.gcf().savefig(savefile, format=extension.lstrip('.')) savefile = TextWidget(description='savefile') save_widget = ButtonWidget(description='save') gui.widget.children = list(gui.widget.children) + [savefile, save_widget] save_widget.on_click(save) return gui
@staticmethod
[docs] def interactive_classifier(self, data_types=('expression', 'splicing'), sample_subsets=None, feature_subsets=None, categorical_variables=None, predictor_types=None, score_coefficient=(0.1, 20), draw_labels=False): def do_interact(data_type, sample_subset, feature_subset, predictor_type=default_classifier, categorical_variable='outlier', score_coefficient=2, plot_violins=False, show_point_labels=False): for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}\n'.format(k, v)) self.plot_classifier(trait=categorical_variable, feature_subset=feature_subset, sample_subset=sample_subset, predictor_name=predictor_type, score_coefficient=score_coefficient, data_type=data_type, plot_violins=plot_violins, show_point_labels=show_point_labels) if feature_subsets is None: feature_subsets = Interactive.get_feature_subsets(self, data_types) feature_subsets.insert(0, 'variant') if sample_subsets is None: sample_subsets = self.default_sample_subsets if categorical_variables is None: categorical_variables = [i for i in self.default_sample_subsets if not i.startswith( "~") and i != 'all_samples'] if predictor_types is None: predictor_types = \ self.predictor_config_manager.predictor_configs.keys() # self.plot_study_sample_legend() gui = interact(do_interact, data_type=data_types, sample_subset=sample_subsets, feature_subset=feature_subsets, categorical_variable=categorical_variables, score_coefficient=score_coefficient, draw_labels=draw_labels, predictor_type=predictor_types) def save(w): # Make the directory if it's not already there filename, extension = os.path.splitext(savefile.value) self.maybe_make_directory(savefile.value) gui.widget.result.fig_reduced.savefig(savefile.value, format=extension) # add "violins" after the provided filename, but before the # extension violins_file = '{}.{}'.format("_".join([filename, 'violins']), extension) try: gui.widget.result.fig_violins.savefig( violins_file, format=extension.lstrip('.')) except AttributeError: pass savefile = TextWidget(description='savefile') save_widget = ButtonWidget(description='save') gui.widget.children = list(gui.widget.children) + [savefile, save_widget] save_widget.on_click(save) return gui
@staticmethod
[docs] def interactive_localZ(self): from IPython.html.widgets import interact def do_interact(data_type='expression', sample1='', sample2='', pCut='0.01'): for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}'.format(k, v)) pCut = float(pCut) assert pCut > 0 if data_type == 'expression': data_obj = self.expression if data_type == 'splicing': data_obj = self.splicing try: assert sample1 in data_obj.df.index except: sys.stdout.write("sample: {}, is not in {} DataFrame, " "try a different sample ID".format(sample1, data_type)) return try: assert sample2 in data_obj.df.index except: sys.stdout.write("sample: {}, is not in {} DataFrame, " "try a different sample ID".format(sample2, data_type)) return self.localZ_result = data_obj.plot_twoway(sample1, sample2, pCut=pCut).result_ sys.stdout.write("local_z finished, find the result in " "<this_object>.localZ_result_") return interact(do_interact, data_type=('expression', 'splicing'), sample1='replaceme', sample2='replaceme', pCut='0.01')
@staticmethod
[docs] def interactive_plot_modalities_lavalamps(self, sample_subsets=None, feature_subsets=None, color=red, x_offset=0, use_these_modalities=True, bootstrapped=False, bootstrapped_kws=None, savefile=''): def do_interact(sample_subset=None, feature_subset=None, color=red, x_offset=0, use_these_modalities=True, bootstrapped=False, bootstrapped_kws=None, savefile=''): for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}\n'.format(k, v)) assert (feature_subset in self.splicing.feature_subsets.keys()) feature_ids = self.splicing.feature_subsets[feature_subset] from sklearn.preprocessing import LabelEncoder le = LabelEncoder() n_in_this_class = len(set( le.fit_transform(self.experiment_design.data[sample_subset]))) try: assert n_in_this_class except: raise RuntimeError("this sample designator is not binary") # TODO: cast non-boolean binary ids to boolean dtype = self.experiment_design.data[sample_subset].dtype try: assert dtype == 'bool' except: raise RuntimeError("this sample designator is not boolean") sample_ids = self.experiment_design.data[sample_subset].index[ self.experiment_design.data[sample_subset]] self.splicing.plot_modalities_lavalamps( sample_ids, feature_ids, color=color, x_offset=x_offset, use_these_modalities=use_these_modalities, bootstrapped=bootstrapped, bootstrapped_kws=bootstrapped_kws, ax=None) plt.tight_layout() if feature_subsets is None: feature_subsets = Interactive.get_feature_subsets(self, ['splicing']) if sample_subsets is None: sample_subsets = self.default_sample_subsets if bootstrapped_kws is None: bootstrapped_kws = {} gui = interact(do_interact, sample_subset=sample_subsets, feature_subset=feature_subsets, color=color, x_offset=x_offset, use_these_modalities=use_these_modalities, bootstrapped=bootstrapped, bootstrapped_kws=bootstrapped_kws, savefile=savefile) def save(w): filename, extension = os.path.splitext(savefile.value) self.maybe_make_directory(savefile.value) gui.widget.result.savefig(savefile.value, format=extension.lstrip('.')) savefile = TextWidget(description='savefile', value='figures/clustermap.pdf') save_widget = ButtonWidget(description='save') gui.widget.children = list(gui.widget.children) + [savefile, save_widget] save_widget.on_click(save)
@staticmethod
[docs] def interactive_lavalamp_pooled_inconsistent( self, sample_subsets=None, feature_subsets=None, difference_threshold=(0.001, 1.00), colors=['red', 'green', 'blue', 'purple', 'yellow'], savefile=''): from IPython.html.widgets import interact savefile = 'figures/last.lavalamp_pooled_inconsistent.pdf' def do_interact(sample_subset=self.default_sample_subsets, feature_subset=self.default_feature_subsets, difference_threshold=0.1, color=red, savefile=savefile): for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}\n'.format(k, v)) assert (feature_subset in self.splicing.feature_subsets.keys()) feature_ids = self.splicing.feature_subsets[feature_subset] sample_ids = self.sample_subset_to_sample_ids(sample_subset) color = str_to_color[color] self.splicing.plot_lavalamp_pooled_inconsistent( sample_ids, feature_ids, difference_threshold, color=color) plt.tight_layout() if feature_subsets is None: feature_subsets = Interactive.get_feature_subsets( self, ['splicing', 'expression']) if sample_subsets is None: sample_subsets = self.default_sample_subsets gui = interact(do_interact, sample_subset=sample_subsets, feature_subset=feature_subsets, difference_threshold=difference_threshold, color=colors, savefile='') def save(w): filename, extension = os.path.splitext(savefile.value) self.maybe_make_directory(savefile.value) gui.widget.result.savefig(savefile.value, format=extension.lstrip('.')) savefile = TextWidget(description='savefile', value='figures/clustermap.pdf') save_widget = ButtonWidget(description='save') gui.widget.children = list(gui.widget.children) + [savefile, save_widget] save_widget.on_click(save)
@staticmethod
[docs] def interactive_choose_outliers(self, data_types=('expression', 'splicing'), sample_subsets=None, feature_subsets=None, featurewise=False, x_pc=(1, 3), y_pc=(1, 3), show_point_labels=False, kernel=('rbf', 'linear', 'poly', 'sigmoid'), gamma=(0, 25), nu=(0.1, 9.9)): def do_interact(data_type='expression', sample_subset=self.default_sample_subset, feature_subset=self.default_feature_subset, x_pc=1, y_pc=2, show_point_labels=False, kernel='rbf', gamma=16, nu=.2): print "transforming input gamma by 2^-(input): %f" % gamma gamma = 2 ** -float(gamma) print "transforming input nu by input/10: %f" % nu nu = float(nu) / 10 kernel = str(kernel) for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}\n'.format(k, v)) else: def renamer(x): return x if feature_subset not in self.default_feature_subsets[data_type]: warnings.warn("This feature_subset ('{}') is not available in " "this data type ('{}'). Falling back on all " "features.".format(feature_subset, data_type)) reducer, outlier_detector = self.detect_outliers( data_type=data_type, sample_subset=sample_subset, feature_subset=feature_subset, reducer=None, reducer_kwargs=None, outlier_detection_method=None, outlier_detection_method_kwargs={'gamma': gamma, 'nu': nu, 'kernel': kernel}) if data_type == "expression": datamodel = self.expression elif data_type == "splicing": datamodel = self.splicing else: raise ValueError('No valid data type provided') datamodel.plot_outliers(reducer, outlier_detector, feature_renamer=renamer, show_point_labels=show_point_labels, x_pc="pc_" + str(x_pc), y_pc="pc_" + str(y_pc)) print "total samples:", len(outlier_detector.outliers) print "outlier samples:", outlier_detector.outliers.sum() print "metadata column name:", outlier_detector.title if feature_subsets is None: feature_subsets = Interactive.get_feature_subsets(self, data_types) if sample_subsets is None: sample_subsets = self.default_sample_subsets return interact(do_interact, data_type=data_types, sample_subset=sample_subsets, feature_subset=feature_subsets, x_pc=x_pc, y_pc=y_pc, show_point_labels=show_point_labels, kernel=kernel, nu=nu, gamma=gamma)
@staticmethod
[docs] def interactive_reset_outliers(self): """User selects from columns that start with 'outlier_' to merge multiple outlier classifications""" outlier_columns = dict() for column in self.metadata.data.columns: if column.startswith("outlier_"): outlier_columns[column] = False def do_interact(**columns): if len(columns.keys()) == 0: print "You have not specified any 'outlier_' columns in " \ "study.metadata.data... \n" \ "This function will do nothing to your data." else: self.metadata.set_outliers_by_merging_columns( [k for (k, v) in columns.items() if v]) interact(do_interact, **outlier_columns)
@staticmethod
[docs] def interactive_clustermap(self): def do_interact(data_type='expression', sample_subset=self.default_sample_subsets, feature_subset=self.default_feature_subset, metric='euclidean', method='median', list_link='', scale_fig_by_data=True, fig_width='', fig_height=''): for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}\n'.format(k, v)) if feature_subset != "custom" and list_link != "": raise ValueError( "set feature_subset to \"custom\" to use list_link") if feature_subset == "custom" and list_link == "": raise ValueError("use a custom list name please") if feature_subset == 'custom': feature_subset = list_link elif feature_subset not in self.default_feature_subsets[data_type]: warnings.warn("This feature_subset ('{}') is not available in " "this data type ('{}'). Falling back on all " "features.".format(feature_subset, data_type)) return self.plot_clustermap( sample_subset=sample_subset, feature_subset=feature_subset, data_type=data_type, metric=metric, method=method, scale_fig_by_data=scale_fig_by_data) feature_subsets = Interactive.get_feature_subsets(self, ['expression', 'splicing']) method = ('average', 'weighted', 'single', 'complete', 'ward') metric = ('euclidean', 'seuclidean', 'sqeuclidean', 'chebyshev', 'cosine', 'cityblock', 'mahalonobis', 'minowski', 'jaccard') gui = interact(do_interact, data_type=('expression', 'splicing'), sample_subset=self.default_sample_subsets, feature_subset=feature_subsets, metric=metric, method=method) def save(w): filename, extension = os.path.splitext(savefile.value) self.maybe_make_directory(savefile.value) gui.widget.result.savefig(savefile.value, format=extension.lstrip('.')) savefile = TextWidget(description='savefile', value='figures/clustermap.pdf') save_widget = ButtonWidget(description='save') gui.widget.children = list(gui.widget.children) + [savefile, save_widget] save_widget.on_click(save) return gui
@staticmethod
[docs] def interactive_correlations(self): def do_interact(data_type='expression', sample_subset=self.default_sample_subsets, feature_subset=self.default_feature_subset, metric='euclidean', method='average', list_link='', scale_fig_by_data=True, fig_width='', fig_height='', featurewise=False): for k, v in locals().iteritems(): if k == 'self': continue sys.stdout.write('{} : {}\n'.format(k, v)) if feature_subset != "custom" and list_link != "": raise ValueError( "set feature_subset to \"custom\" to use list_link") if feature_subset == "custom" and list_link == "": raise ValueError("use a custom list name please") if feature_subset == 'custom': feature_subset = list_link elif feature_subset not in self.default_feature_subsets[data_type]: warnings.warn("This feature_subset ('{}') is not available in " "this data type ('{}'). Falling back on all " "features.".format(feature_subset, data_type)) return self.plot_correlations( sample_subset=sample_subset, feature_subset=feature_subset, data_type=data_type, scale_fig_by_data=scale_fig_by_data, method=method, metric=metric, featurewise=featurewise) feature_subsets = Interactive.get_feature_subsets(self, ['expression', 'splicing']) method = ('average', 'weighted', 'single', 'complete', 'ward') metric = ('euclidean', 'seuclidean', 'sqeuclidean', 'chebyshev', 'cosine', 'cityblock', 'mahalonobis', 'minowski', 'jaccard') gui = interact(do_interact, data_type=('expression', 'splicing'), sample_subset=self.default_sample_subsets, feature_subset=feature_subsets, metric=metric, method=method, featurewise=False) def save(w): filename, extension = os.path.splitext(savefile.value) self.maybe_make_directory(savefile.value) gui.widget.result.savefig(savefile.value, format=extension.lstrip('.')) savefile = TextWidget(description='savefile', value='figures/correlations.pdf') save_widget = ButtonWidget(description='save') gui.widget.children = list(gui.widget.children) + [savefile, save_widget] save_widget.on_click(save) return gui
Olga B. Botvinnik is funded by the NDSEG fellowship and is a NumFOCUS John Hunter Technology Fellow.
Michael T. Lovci was partially funded by a fellowship from Genentech.
Partially funded by NIH grants NS075449 and HG004659 and CIRM grants RB4-06045 and TR3-05676 to Gene Yeo.