"""
Splicing-specific visualization classes and methods
"""
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# from .color import red, blue, purple, grey, green
from ..compute.splicing import get_switchy_score_order
from ..util import as_numpy
seaborn_colors = map(mpl.colors.rgb2hex, sns.color_palette('deep'))
class _ModalityEstimatorPlotter(object):
def __init__(self):
self.fig = plt.figure(figsize=(5 * 2, 3 * 2))
self.ax_violin = plt.subplot2grid((3, 5), (0, 0), rowspan=3, colspan=1)
self.ax_loglik = plt.subplot2grid((3, 5), (0, 1), rowspan=3, colspan=3)
self.ax_bayesfactor = plt.subplot2grid((3, 5), (0, 4), rowspan=3,
colspan=1)
def plot(self, event, logliks, logsumexps, modality_colors,
renamed=''):
modality = logsumexps.idxmax()
sns.violinplot(event.dropna(), bw=0.2, ax=self.ax_violin,
color=modality_colors[modality])
self.ax_violin.set_ylim(0, 1)
self.ax_violin.set_title('Guess: {}'.format(modality))
self.ax_violin.set_xticks([])
self.ax_violin.set_yticks([0, 0.5, 1])
# self.ax_violin.set_xlabel(renamed)
for name, loglik in logliks.iteritems():
# print name,
self.ax_loglik.plot(loglik, 'o-', label=name,
color=modality_colors[name])
self.ax_loglik.legend(loc='best')
self.ax_loglik.set_title('Log likelihoods at different '
'parameterizations')
self.ax_loglik.grid()
self.ax_loglik.set_xlabel('phantom', color='white')
for i, (name, height) in enumerate(logsumexps.iteritems()):
self.ax_bayesfactor.bar(i, height, label=name,
color=modality_colors[name])
self.ax_bayesfactor.set_title('$\log$ Bayes factors')
self.ax_bayesfactor.set_xticks([])
self.ax_bayesfactor.grid()
self.fig.tight_layout()
self.fig.text(0.5, .025, '{} ({})'.format(event.name, renamed),
fontsize=10, ha='center', va='bottom')
sns.despine()
return self
[docs]class ModalitiesViz(object):
"""Visualize results of modality assignments"""
modality_colors = {'bimodal': seaborn_colors[3],
'Psi~0': seaborn_colors[0],
'Psi~1': seaborn_colors[2],
'middle': seaborn_colors[1],
'ambiguous': seaborn_colors[4]}
modality_order = ['Psi~0', 'middle', 'Psi~1', 'bimodal', 'ambiguous']
colors = [modality_colors[modality] for modality in
modality_order]
[docs] def plot_reduced_space(self, binned_reduced, modality_assignments,
ax=None, title=None, xlabel='', ylabel=''):
if ax is None:
fig, ax = plt.subplots(figsize=(8, 8))
# For easy aliasing
X = binned_reduced
for modality, df in X.groupby(modality_assignments, axis=0):
color = self.modality_colors[modality]
ax.plot(df.ix[:, 0], df.ix[:, 1], 'o', color=color, alpha=0.7,
label=modality)
sns.despine()
xmax, ymax = X.max()
ax.set_xlim(0, 1.05 * xmax)
ax.set_ylim(0, 1.05 * ymax)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.legend()
if title is not None:
ax.set_title(title)
[docs] def bar(self, counts, phenotype_to_color=None, ax=None, percentages=True):
"""Draw barplots grouped by modality of modality percentage per group
Parameters
----------
Returns
-------
Raises
------
"""
if percentages:
counts = 100 * (counts.T / counts.T.sum()).T
# with sns.set(style='whitegrid'):
if ax is None:
ax = plt.gca()
full_width = 0.8
width = full_width / counts.shape[0]
for i, (group, series) in enumerate(counts.iterrows()):
left = np.arange(len(self.modality_order)) + i * width
height = [series[i] if i in series else 0
for i in self.modality_order]
color = phenotype_to_color[group]
ax.bar(left, height, width=width, color=color, label=group,
linewidth=.5, edgecolor='k')
ylabel = 'Percentage of events' if percentages else 'Number of events'
ax.set_ylabel(ylabel)
ax.set_xticks(np.arange(len(self.modality_order)) + full_width / 2)
ax.set_xticklabels(self.modality_order)
ax.set_xlabel('Splicing modality')
ax.set_xlim(0, len(self.modality_order))
ax.legend(loc='best')
ax.grid(axis='y', linestyle='-', linewidth=0.5)
sns.despine()
[docs] def event_estimation(self, event, logliks, logsumexps, renamed=''):
"""Show the values underlying bayesian modality estimations of an event
Parameters
----------
Returns
-------
Raises
------
"""
plotter = _ModalityEstimatorPlotter()
plotter.plot(event, logliks, logsumexps, self.modality_colors,
renamed=renamed)
return plotter
[docs]def lavalamp(psi, color=None, x_offset=0, title='', ax=None,
switchy_score_psi=None, marker='o', plot_kws=None,
yticks=None):
"""Make a 'lavalamp' scatter plot of many splicing events
Useful for visualizing many splicing events at once.
Parameters
----------
psi : array
A (n_events, n_samples) matrix either as a numpy array or as a pandas
DataFrame
color : matplotlib color
Color of the scatterplot. Defaults to a dark teal
x_offset : numeric or None
How much to offset the x-values off of 1. Useful for plotting several
celltypes at once.
title : str
Title of the plot. Default ''
ax : matplotlib.Axes object
The axes to plot on. If not provided, will be created
switchy_score_psi : pandas.DataFrame
The psi scores to sort on for the plotting order. By default use the
psi provided, but sometimes you want to plot multiple psi scores on
the same plot, with the same events.
marker : str
A valid matplotlib marker. Default is 'd' (thin diamond)
plot_kws : dict
Keyword arguments to supply to plot()
Returns
-------
fig : matplotlib.Figure
A figure object for saving.
"""
if psi.shape[1] == 0:
return
if ax is None:
fig, ax = plt.subplots(figsize=(16, 4))
color = seaborn_colors[0] if color is None else color
plot_kws = {} if plot_kws is None else plot_kws
plot_kws.setdefault('color', color)
plot_kws.setdefault('alpha', 0.2)
plot_kws.setdefault('markersize', 10)
plot_kws.setdefault('marker', marker)
plot_kws.setdefault('linestyle', 'None')
plot_kws.setdefault('markeredgecolor', '#262626')
plot_kws.setdefault('markeredgewidth', .1)
plot_kws.setdefault('rasterized', True)
y = as_numpy(psi.dropna(how='all', axis=1))
if switchy_score_psi is not None:
switchy_score_y = as_numpy(switchy_score_psi)
else:
switchy_score_y = y
order = get_switchy_score_order(switchy_score_y)
y = y[:, order]
n_samples, n_events = y.shape
# .astype(float) is to get rid of a deprecation warning
x = np.vstack((np.arange(n_events) for _ in xrange(n_samples)))
x = x.astype(float)
x += x_offset
# Add one so the last value is actually included instead of cut off
xmax = x.max() + 1
ax.plot(x, y, **plot_kws)
sns.despine()
ax.set_ylabel('$\Psi$')
ax.set_xlabel('{} splicing events'.format(n_events))
ax.set_xticks([])
ax.set_xlim(-0.5, xmax + .5)
ax.set_ylim(0, 1)
if yticks is None:
ax.set_yticks([0, 0.5, 1])
else:
ax.set_yticks(yticks)
ax.set_title(title)
[docs]def hist_single_vs_pooled_diff(diff_from_singles, diff_from_singles_scaled,
color=None, title='', nbins=50, hist_kws=None):
"""Plot a histogram of both the original difference difference of psi
scores from the pooled to the singles, and the scaled difference
"""
hist_kws = {} if hist_kws is None else hist_kws
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
dfs = (diff_from_singles, diff_from_singles_scaled)
names = ('total_diff', 'scaled_diff')
for ax, df, name in zip(axes, dfs, names):
vmin = df.min().min()
vmax = df.max().max()
ax.hist(df.values.flat, bins=np.linspace(vmin, vmax, nbins),
color=color, edgecolor='white', linewidth=0.5, **hist_kws)
ax.set_title(title)
# ax.set_title('{}, {}'.format(celltype, name))
ax.grid(which='y', color='white')
sns.despine()
[docs]def lavalamp_pooled_inconsistent(singles, pooled, pooled_inconsistent,
color=None, percent=None):
fig, axes = plt.subplots(nrows=2, figsize=(16, 8))
ax_inconsistent = axes[0]
ax_consistent = axes[1]
plot_order = \
pooled_inconsistent.sum() / pooled_inconsistent.count().astype(float)
plot_order.sort()
color = seaborn_colors[0] if color is None else color
pooled_plot_kws = {'alpha': 0.5, 'markeredgecolor': 'k',
'markerfacecolor': 'none', 'markeredgewidth': 1}
pooled = pooled.dropna(axis=1, how='all')
suffix = ' of events measured in both pooled and single'
ax_inconsistent.set_xticks([])
ax_consistent.set_xticks([])
try:
singles_values = singles.ix[:, pooled_inconsistent.columns].values
lavalamp(singles_values, color=color, ax=ax_inconsistent)
lavalamp(pooled.ix[:, pooled_inconsistent.columns], marker='o',
color='k',
switchy_score_psi=singles_values,
ax=ax_inconsistent, plot_kws=pooled_plot_kws)
title_suffix = '' if percent is None else ' ({:.1f}%){}'.format(
percent, suffix)
ax_inconsistent.set_title('Pooled splicing events inconsistent '
'with singles{}'.format(title_suffix))
except IndexError:
# There are no inconsistent events
pass
singles = singles.dropna(axis=1, how='all')
consistent_events = singles.columns[
~singles.columns.isin(pooled_inconsistent.columns)]
lavalamp(singles.ix[:, consistent_events], color=color, ax=ax_consistent)
lavalamp(pooled.ix[:, consistent_events], color='k', marker='o',
switchy_score_psi=singles.ix[:, consistent_events],
ax=ax_consistent, plot_kws=pooled_plot_kws)
title_suffix = '' if percent is None else ' ({:.1f}%){}'.format(
100 - percent, suffix)
ax_consistent.set_title('Pooled splicing events consistent with singles{}'
.format(title_suffix))
sns.despine()