import numpy as np
from lib5c.algorithms.correlation import \
make_pairwise_correlation_matrix_from_counts_matrix
from lib5c.plotters.correlation import plot_correlation_matrix
from hic3defdr.plotting.distance_dependence import plot_dd_curves
from hic3defdr.plotting.histograms import plot_pvalue_histogram
from hic3defdr.plotting.dispersion import plot_mvr, plot_ddr
from hic3defdr.plotting.ma import plot_ma
from hic3defdr.plotting.grid import plot_grid
from hic3defdr.plotting.heatmap import plot_heatmap
[docs]class PlottingHiC3DeFDR(object):
"""
Mixin class containing plotting functions for HiC3DeFDR.
"""
[docs] def plot_dd_curves(self, chrom, log=True, **kwargs):
"""
Plots the distance dependence curve before and after size factor
adjustment.
Parameters
----------
chrom : str
The name of the chromosome to plot the curve for.
log : bool
Whether or not to log the axes of the plot.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# load everything
bias = self.load_bias(chrom)
row = self.load_data('row', chrom)
col = self.load_data('col', chrom)
raw = self.load_data('raw', chrom)
scaled = self.load_data('scaled', chrom)
# compute balanced
balanced = np.zeros_like(raw, dtype=float)
for r in range(self.design.shape[0]):
balanced[:, r] = raw[:, r] / (bias[row, r] * bias[col, r])
return plot_dd_curves(row, col, balanced, scaled,
repnames=self.design.index, log=log, **kwargs)
[docs] def plot_dispersion_fit(self, cond, xaxis='dist', yaxis='disp',
dist_max=None, scatter_fit=-1, scatter_size=36,
distance=None, hexbin=False, logx=False, logy=False,
**kwargs):
"""
Plots a hexbin plot of pixel-wise distance vs either dispersion or
variance, overlaying the estimated and fitted dispersions.
Parameters
----------
cond : str
The name of the chromosome and condition, respectively, to plot the
fit for.
xaxis : 'mean' or 'dist'
What to plot on the x-axis.
yaxis : 'disp' or 'var'
What to plot on the y-axis.
dist_max : int
If ``xaxis`` is 'dist', the maximum distance to include on the plot
in bin units. Pass None to use ``self.dist_thresh_max``.
scatter_fit : int
Pass a nonzero integer to draw the fitted dispersions passed in
``disp`` as a scatterplot of ``scatter_fit`` selected points. Pass
-1 to plot the fitted dispersions passed in ``disp`` as a curve.
Pass 0 to omit plotting the dispersion estimates altogether.
scatter_size : int
The marker size when plotting scatterplots.
distance : int, optional
Pick a specific distance in bin units to plot only interactions at
that distance.
hexbin : bool
Pass False to skip plotting the hexbin plot, leaving only the
estimated variances or dispersions.
logx, logy : bool
Whether or not to log the x- or y-axis, respectively.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# short circuit to plot_ddr() if possible
if xaxis == 'dist' and yaxis == 'disp' and scatter_fit == -1 \
and distance is None and hexbin is False and logx is False \
and logy is False:
return self.plot_ddr(cond, dist_max=dist_max,
scatter_size=scatter_size, **kwargs)
# resolve max_dist
if dist_max is None:
dist_max = self.dist_thresh_max
# identify cond_idx
cond_idx = self.design.columns.tolist().index(cond)
# load everything
disp_idx, _ = self.load_data('disp_idx', 'all')
scaled = self.load_data(
'scaled', 'all', idx=disp_idx)[0][:, self.design[cond]]
disp = self.load_data('disp', 'all')[0][:, cond_idx]
try:
disp_per_dist = self.load_data('disp_per_dist')[:, cond_idx]
idx = np.isfinite(disp_per_dist)
disp_per_bin = disp_per_dist[idx]
dist_per_bin = np.arange(self.dist_thresh_max + 1)[idx]
except IOError:
disp_per_bin = None
dist_per_bin = None
row, _ = self.load_data('row', 'all', idx=disp_idx)
col, _ = self.load_data('col', 'all', idx=disp_idx)
dist = col - row
# compute mean and sample variance
mean = np.mean(scaled, axis=1)
var = np.var(scaled, ddof=1, axis=1)
# resolve distance
if distance is not None:
dist_idx = dist == distance
mean = mean[dist_idx]
var = var[dist_idx]
dist = None
disp = np.ones(dist_idx.sum()) * disp_per_dist[distance]
dist_per_bin = None
disp_per_bin = None
fit_align_dist = False
else:
fit_align_dist = xaxis == 'mean' or yaxis == 'var'
return plot_mvr(
pixel_mean=mean,
pixel_var=var,
pixel_dist=dist,
pixel_disp_fit=disp,
dist_per_bin=dist_per_bin,
disp_per_bin=disp_per_bin,
fit_align_dist=fit_align_dist,
xaxis=xaxis, yaxis=yaxis,
dist_max=dist_max, mean_min=self.mean_thresh,
scatter_fit=scatter_fit, scatter_size=scatter_size, hexbin=hexbin,
logx=logx, logy=logy, **kwargs
)
[docs] def plot_ddr(self, cond, dist_max=None, scatter_size=36, **kwargs):
"""
Fast alternative to plot_dispersion_fit() that only supports plotting
distance versus dispersion, with no hexbin or ``scatter_points``
support.
Parameters
----------
cond : str
The name of the chromosome and condition, respectively, to plot the
fit for.
dist_max : int
If ``xaxis`` is 'dist', the maximum distance to include on the plot
in bin units. Pass None to use ``self.dist_thresh_max``.
scatter_size : int
The marker size when plotting scatterplots.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# resolve max_dist
if dist_max is None:
dist_max = self.dist_thresh_max
# identify cond_idx
cond_idx = self.design.columns.tolist().index(cond)
# load everything
disp_per_dist = self.load_data('disp_per_dist')[:, cond_idx]
idx = np.isfinite(disp_per_dist)
disp_per_bin = disp_per_dist[idx]
dist_per_bin = np.arange(self.dist_thresh_max + 1)[idx]
disp_fn = self.load_disp_fn(cond)
# plot
return plot_ddr(dist_per_bin, disp_per_bin, disp_fn,
scatter_size=scatter_size, **kwargs)
[docs] def plot_pvalue_distribution(self, idx='disp', **kwargs):
"""
Plots the p-value distribution across all chromosomes.
Parameters
----------
idx : {'disp', 'loop'}
Pass 'disp' to plot p-values for all points for which dispersion was
estimated. Pass 'loop' to plot p-values for all points which are in
loops (available only if ``loop_patterns`` was passed to the
constructor).
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# load everything
if idx == 'loop':
loop_idx, _ = self.load_data('loop_idx', 'all')
pvalues, _ = self.load_data('pvalues', 'all', idx=loop_idx)
elif idx == 'disp':
pvalues, _ = self.load_data('pvalues', 'all')
else:
raise ValueError('idx must be loop or disp')
# plot
return plot_pvalue_histogram(pvalues, **kwargs)
[docs] def plot_qvalue_distribution(self, **kwargs):
"""
Plots the q-value distribution across all chromosomes.
Parameters
----------
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# load everything
qvalues, _ = self.load_data('qvalues', 'all')
# plot
return plot_pvalue_histogram(qvalues, xlabel='qvalue', **kwargs)
[docs] def plot_ma(self, fdr=0.05, conds=None, include_non_loops=True, s=-1,
nonloop_s=None, density_dpi=72, vmax=None, nonloop_vmax=None,
ax=None, legend=True, **kwargs):
"""
Plots an MA plot for a given chromosome.
Parameters
----------
fdr : float
The threshold to use for labeling significantly differential loop
pixels.
conds : tuple of str, optional
Pass a tuple of two condition names to compare those two
conditions. Pass None to compare the first two conditions.
include_non_loops : bool
Whether or not to include non-looping pixels in the MA plot.
s : float
The marker size to use for the scatterplot, or -1 to use a
scatter density plot.
nonloop_s : float, optional
Pass a separate marker size to use specifically for the non-loop
pixels if `include_non_loops` is True. Useful for drawing just the
non-loop pixels as a density by passing `s=1, nonloop_s=-1`. Pass
None to use `s` as the size for both loop and non-loop pixels.
density_dpi : int
If `s` or `nonloop_s` are -1 this specifies the DPI to use for the
density grid.
vmax, nonloop_vmax : float, optional
The vmax to use for `ax.scatter_density()` if `s` or `nonloop_s` is
-1, respectively. Pass None to choose values automatically.
ax : pyplot axis
The axis to plot to. Must have been created with
`projection='scatter_density'`. Pass None to create a new axis.
legend : bool
Pass True to add a legend. Note that passing `legend='outside'` is
not supported.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# resolve conds
if conds is None:
conds = self.design.columns.tolist()[:2]
cond_idx = [self.design.columns.tolist().index(cond) for cond in conds]
# load data
disp_idx, _ = self.load_data('disp_idx', 'all')
loop_idx, _ = self.load_data('loop_idx', 'all')
scaled, _ = self.load_data('scaled', 'all', idx=disp_idx)
qvalues, _ = self.load_data('qvalues', 'all')
# compute mean
mean = np.dot(scaled, self.design) / np.sum(self.design, axis=0).values
mean = mean[:, cond_idx]
# prepare sig_idx
sig_idx = qvalues < fdr
# stuff common kwargs into kwargs
kwargs['names'] = conds
kwargs['s'] = s
kwargs['nonloop_s'] = nonloop_s
kwargs['density_dpi'] = density_dpi
kwargs['vmax'] = vmax
kwargs['nonloop_vmax'] = vmax
kwargs['ax'] = ax
kwargs['legend'] = legend
# plot
if include_non_loops:
plot_ma(mean, sig_idx, loop_idx=loop_idx, **kwargs)
else:
plot_ma(mean[loop_idx], sig_idx, **kwargs)
[docs] def plot_correlation_matrix(self, stage='scaled', idx='loop',
correlation='spearman', colorscale=(0.75, 1.0),
**kwargs):
"""
Plots a matrix of pairwise correlations among all replicates.
Parameters
----------
stage : {'raw', 'scaled'}
Specify the stage of the data to compute correlations between.
idx : {'disp', 'loop'}
Pass 'disp' to compute correlations for all points for which
dispersion was estimated. Pass 'loop' to compute correlations for
all points which are in loops (available only if ``loop_patterns``
was passed to the constructor).
correlation : {'spearman', 'pearson'}
Which correlation coefficient to compute.
colorscale : tuple of float
The min and max values of the correlation to use to color the
matrix.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# resolve idx
if idx == 'disp':
idx, _ = self.load_data('disp_idx', 'all')
elif idx == 'loop':
idx = (
self.load_data('disp_idx', 'all')[0],
self.load_data('loop_idx', 'all')[0]
)
else:
raise ValueError('idx must be \'disp\' or \'loop\'')
# load data
data = self.load_data(stage, 'all', idx=idx)[0].T
# plot
return plot_correlation_matrix(
make_pairwise_correlation_matrix_from_counts_matrix(
data, correlation=correlation
),
label_values=self.design.index.tolist(),
colorscale=colorscale,
**kwargs
)
[docs] def plot_heatmap(self, chrom, row_slice, col_slice, stage='scaled',
rep=None, cond=None, cmap='Reds', vmin=0, vmax=100,
**kwargs):
"""
Plots a simple heatmap of a slice of the contact matrix.
Parameters
----------
chrom : str
The chromosome to plot.
row_slice, col_slice : slice
The row and column slice, respectively, to plot.
stage : str
The stage of the data to plot.
rep, cond : str, optional
Pass the rep name or condition name if the data specified by
``stage`` has multiple columns.
cmap : matplotlib colormap or dict
The colormap to use for the heatmap.
vmin, vmax : float
The vmin and vmax to use for the heatmap colorscale.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
plot_heatmap(
self.get_matrix(
stage, chrom, row_slice, col_slice, rep=rep, cond=cond),
cmap=cmap, vmin=vmin, vmax=vmax, **kwargs)
[docs] def plot_grid(self, chrom, i, j, w, vmax=100, fdr=0.05, cluster_size=3,
fdr_vmid=0.05,
color_cycle=('blue', 'green', 'purple', 'yellow', 'cyan',
'red'),
despine=False, **kwargs):
"""
Plots a combination visualization grid focusing on a specific pixel on a
specific chromosome, combining heatmaps, cluster outlines, and
stripplots.
Parameters
----------
chrom : str
The name of the chromosome to slice matrices from.
i, j : int
The row and column index of the pixel to focus on.
w : int
The size of the heatmap will be ``2*w + 1`` bins in each dimension.
vmax : float
The maximum of the colorscale to use when plotting normalized
heatmaps.
fdr : float
The FDR threshold to use when outlining clusters.
cluster_size : int
The cluster size threshold to use when outlining clusters.
fdr_vmid : float
The FDR value at the middle of the colorscale used for plotting the
q-value heatmap.
color_cycle : list of matplotlib colors
The color cycle to use over conditions.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis, grid of pyplot axes, function
The first pyplot axis returned is injected by ``@plotter``.
The grid of pyplot axes is the second return value from the call to
``plt.subplots()`` that is used to create the grid.
The function takes two args, an FDR and a cluster size, and redraws
the cluster outlines using the new parameters.
"""
# load everything
row = self.load_data('row', chrom)
col = self.load_data('col', chrom)
raw = self.load_data('raw', chrom)
scaled = self.load_data('scaled', chrom)
disp_idx = self.load_data('disp_idx', chrom)
loop_idx = self.load_data('loop_idx', chrom)
mu_hat_alt = self.load_data('mu_hat_alt', chrom)
mu_hat_null = self.load_data('mu_hat_null', chrom)
qvalues = self.load_data('qvalues', chrom)
return plot_grid(i, j, w, row, col, raw, scaled, mu_hat_alt,
mu_hat_null, qvalues, disp_idx, loop_idx, self.design,
fdr, cluster_size, vmax=vmax, fdr_vmid=fdr_vmid,
color_cycle=color_cycle, despine=despine, **kwargs)