import itertools
import matplotlib as mpl
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
[docs]def violinplot(data, groupby=None, color_ordered=None, ax=None,
pooled_data=None,
order=None, violinplot_kws=None, title=None,
label_pooled=False, outliers=None, data_type=None):
"""
Parameters
----------
data : pandas.Series
The main data to plot as violins
groupby : dict-like, optional
How to group the samples (e.g. by phenotype)
color_ordered : list, optional
List of colors, in the order you want to plot
ax : matplotlib.Axes, optional
Where to plot the violins. If None, get the current axes
pooled_data : pandas.Series, optional
Pooled samples. Will be plotted as black dots
order : list, optional
The order in which to plot the phenotypes, e.g. if the data is
form a differentiation time course
violinplot_kws : dict, optional
Other keywords to pass to seaborn.violinplot
title : str, optional
Title of the plot
label_pooled : bool, optional
If True, label the sample id of the pooled samples
outliers : pandas.Series
Outlier samples. Will be plotted in their phenotype category,
as a grey shadow
data_type : 'expression' | 'splicing' | None
If 'splicing', then force the y-axis to be from 0 to 1. If
'expression' or None, don't mess with the y-axis
"""
data_type = 'none' if data_type is None else data_type
splicing = 'splicing'.startswith(data_type)
violinplot_kws = {} if violinplot_kws is None else violinplot_kws
violinplot_kws.setdefault('alpha', 0.75)
if ax is None:
ax = plt.gca()
if order is None:
data_groups = data.groupby(groupby).groups.keys()
if outliers is not None:
outliers_groups = outliers.groupby(groupby).groups.keys()
else:
outliers_groups = []
if pooled_data is not None:
pooled_groups = pooled_data.groupby(groupby).groups.keys()
else:
pooled_groups = []
all_groups = set(itertools.chain(data_groups, pooled_groups,
outliers_groups))
order = sorted(all_groups)
_violinplot_single_dataset(data, groupby=groupby, color=color_ordered,
ax=ax, order=order,
violinplot_kws=violinplot_kws, splicing=splicing)
if pooled_data is not None and groupby is not None:
grouped = pooled_data.groupby(groupby)
if order is not None:
for i, name in enumerate(order):
try:
subset = pooled_data.ix[grouped.groups[name]]
plot_pooled_dot(ax, subset, x_offset=i, label=label_pooled)
except KeyError:
pass
else:
plot_pooled_dot(ax, pooled_data)
if outliers is not None:
outlier_violinplot_kws = violinplot_kws
# make sure this is behind the non outlier data
outlier_violinplot_kws['zorder'] = -1
_violinplot_single_dataset(outliers, groupby=groupby, color='lightgrey',
ax=ax,
order=order,
violinplot_kws=outlier_violinplot_kws,
splicing=splicing)
if splicing:
ax.set_ylim(0, 1)
ax.set_yticks([0, 0.5, 1])
ax.set_ylabel('$\Psi$')
if title is not None:
ax.set_title(title)
if order is not None:
ax.set_xlim(-.5, len(order) - .5)
if groupby is not None and order is not None:
sizes = data.dropna().groupby(groupby).size()
xticks = range(len(order))
xticklabels = ['{}\nn={}'.format(group, sizes[group])
if group in sizes else '{}\nn=0'.format(group)
for group in order]
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels)
sns.despine()
def _violinplot_single_dataset(data, groupby=None, order=None,
violinplot_kws=None, color=None, ax=None,
splicing=False):
"""Plot a single set of violinplot.
Separated out so real data plotting and outlier plotting works the same
"""
data = data.dropna()
if data.empty:
return
single_points = data.groupby(groupby).filter(lambda x: len(x) < 2)
data = data.groupby(groupby).filter(lambda x: len(x) > 1)
# Check that all the groups are represented, if not, add some data out of
# range to the missing group
verified_color = color
if groupby is not None and order is not None:
verified_groups = data.groupby(groupby).size().keys()
verified_order = [x for x in order if x in verified_groups]
positions = [i for i, x in enumerate(order) if x in verified_groups]
single_groups = single_points.groupby(groupby).size().keys()
single_positions = dict((x, i) for i, x in enumerate(order) if
x in single_groups)
if not mpl.colors.is_color_like(color):
verified_color = [x for i, x in enumerate(color)
if order[i] in verified_groups]
single_color = dict((group, c) for i, (c, group) in
enumerate(zip(color, single_groups))
if group in single_groups)
else:
single_color = dict((group, color) for group in single_groups if
group in single_groups)
else:
verified_order = order
positions = None
single_positions = None
single_color = None
violinplot_kws = {} if violinplot_kws is None else violinplot_kws
# Add a tiny amount of random noise in case the values are all identical,
# Otherwise we get a LinAlg error.
data += np.random.uniform(0, 0.001, data.shape[0])
inner = 'points' if splicing else 'box'
if len(data) > 0:
sns.violinplot(data, groupby=groupby, bw=0.1, inner=inner,
color=verified_color, linewidth=0.5,
order=verified_order,
ax=ax, positions=positions, **violinplot_kws)
if single_points is not None:
for group, y in single_points.groupby(groupby):
x = single_positions[group]
c = single_color[group]
ax.scatter([x], [y], color=c, s=50)
ax.annotate(y.index[0], (x, y), textcoords='offset points',
xytext=(7, 0), fontsize=14)
[docs]def plot_pooled_dot(ax, pooled, x_offset=0, label=False):
"""
Parameters
----------
ax : matplotlib.axes.Axes
Axes object to plot on
pooled : pandas.Series
Pooled data of this gene
Returns
-------
Raises
------
"""
pooled = pooled.dropna()
try:
xs = np.zeros(pooled.shape[0])
except AttributeError:
xs = np.zeros(1)
xs += x_offset
ax.plot(xs, pooled, 'o', color='#262626')
if label:
for x, y in zip(xs, pooled):
if np.isnan(y):
continue
ax.annotate('pooled', (x, y), textcoords='offset points',
xytext=(7, 0), fontsize=14)
[docs]def nmf_space_transitions(nmf_space_positions, feature_id,
phenotype_to_color, phenotype_to_marker, order,
ax=None, xlabel=None, ylabel=None):
df = nmf_space_positions.ix[feature_id]
if ax is None:
ax = plt.gcf()
for color, s in df.groupby(phenotype_to_color, axis=0):
phenotype = s.index[0]
marker = phenotype_to_marker[phenotype]
ax.plot(s.pc_1, s.pc_2, color=color, marker=marker, markersize=14,
alpha=0.75, label=phenotype, linestyle='none')
# ax.scatter(df.ix[:, 0], df.ix[:, 1], color=color, s=100, alpha=0.75)
# ax.legend(points, df.index.tolist())
ax.set_xlim(0, nmf_space_positions.ix[:, 0].max() * 1.05)
ax.set_ylim(0, nmf_space_positions.ix[:, 1].max() * 1.05)
x = [df.ix[phenotype, 0] for phenotype in order if phenotype in df.index]
y = [df.ix[phenotype, 1] for phenotype in order if phenotype in df.index]
ax.plot(x, y, zorder=-1, color='#262626', alpha=0.5, linewidth=1)
ax.legend()
if xlabel is not None:
ax.set_xlabel(xlabel)
ax.set_xticks([])
if ylabel is not None:
ax.set_ylabel(ylabel)
ax.set_yticks([])
[docs]def simple_twoway_scatter(sample1, sample2, **kwargs):
"""Plot a two-dimensional scatterplot between two samples
Parameters
----------
sample1 : pandas.Series
Data to plot on the x-axis
sample2 : pandas.Series
Data to plot on the y-axis
Any other keyword arguments valid for seaborn.jointplot
Returns
-------
jointgrid : seaborn.axisgrid.JointGrid
Returns a JointGrid instance
See Also
-------
seaborn.jointplot
"""
joint_kws = kwargs.pop('joint_kws', {})
kind = kwargs.pop('kind', 'scatter')
marginal_kws = kwargs.pop('marginal_kws', {})
if kind == 'scatter':
vmin = min(sample1.min(), sample2.min())
vmax = max(sample1.max(), sample2.max())
bins = np.linspace(vmin, vmax, 50)
marginal_kws.setdefault('bins', bins)
if kind not in ('reg', 'resid'):
joint_kws.setdefault('alpha', 0.5)
jointgrid = sns.jointplot(sample1, sample2, joint_kws=joint_kws,
marginal_kws=marginal_kws, kind=kind, **kwargs)
xmin, xmax, ymin, ymax = jointgrid.ax_joint.axis()
xmin = max(xmin, sample1.min() - .1)
ymin = max(ymin, sample2.min() - .1)
jointgrid.ax_joint.set_xlim(xmin, xmax)
jointgrid.ax_joint.set_ylim(ymin, ymax)