import numpy as np
import matplotlib.pyplot as plt
from lib5c.util.plotting import plotter
from lib5c.plotters.scatter import scatter
from hic3defdr.util.scaled_nb import mvr, inverse_mvr
[docs]@plotter
def compare_disp_fits(fit_fns, labels, max_dist=200, colors=None,
linestyles=None, legend=True, **kwargs):
"""
Compares multiple dispersion fit functions.
Parameters
----------
fit_fns : list of functions
The fit functions to compare. Each function should be vectorized and
take one argument (the distance) and return one value (the fitted
dispersion at that distance).
labels : list of str
The labels to use for each fit function being compared.
max_dist : int
The maximum distance out to which the plot should be drawn.
colors : list of valid matplotlib colors, optional
Pass a list of colors to color each fit function with. Pass None to
color them automatically.
linestyles : list of valid matplotlib linestyles, optional
Pass a list of linestyles to plot each fit function with. Pass None to
use the default linestyle.
legend : bool
Pass True to include a legend on the plot.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# resolve colors
if colors is None:
colors = ['C%s' % i for i in range(len(fit_fns))]
# resolve linestyles
if linestyles is None:
linestyles = [None] * len(fit_fns)
# define x values
xs = np.arange(max_dist + 1)
# plot
for f, l, c, ls in zip(fit_fns, labels, colors, linestyles):
plt.plot(xs, f(xs), color=c, linestyle=ls, label=l)
# labels
plt.xlabel('distance')
plt.ylabel('dispersion')
# add legend
if legend:
plt.legend()
[docs]@plotter
def plot_mvr(pixel_mean, pixel_var=None, pixel_disp=None, pixel_dist=None,
pixel_var_fit=None, pixel_disp_fit=None, mean_per_bin=None,
dist_per_bin=None, var_per_bin=None, disp_per_bin=None,
fit_align_dist=False, xaxis='mean', yaxis='var', dist_max=200,
mean_min=5.0, scatter_fit=-1, scatter_size=36,
hexbin=True, logx=True, logy=True, xlim=None, ylim=None,
legend=True, **kwargs):
"""
Plots pixel-wise, bin-wise, and estimated dispersions in terms of either
dispersion or variance versus either pixel-wise mean or distance.
Parameters
----------
pixel_mean : np.ndarray
The pixel-wise mean.
pixel_var, pixel_disp : np.ndarray, optional
The pixel-wise variance and dispersion. Pass only one of these.
pixel_dist : np.ndarray, optional
The pixel-wise distance. Not needed for simple MVR plotting.
pixel_var_fit, pixel_disp_fit : np.ndarray, optional
The smoothed pixel-wise variance or dispersion estimates. Pass only one
of these.
mean_per_bin, dist_per_bin : np.ndarray, optional
The mean or distance of each bin used for estimating the dispersions but
before any smoothing was performed. Pass only one of these.
var_per_bin, disp_per_bin : np.ndarray, optional
The estimated variance or dispersion of each bin before any smoothing
was performed. Pass only one of these.
fit_align_dist : bool
Pass True if the var/disp was fitted as a function of distance, in which
case the fitted vars/disps will be aligned to distance rather than in a
pixel-wise fashion. Use this to fix "jagged" fit lines caused by
setting ``xaxis`` to a different value than the one the fitting was
actually performed against.
xaxis : 'mean' or 'dist'
What to plot on the x-axis.
yaxis : 'var' or 'disp'
What to plot on the y-axis.
dist_max : int
If ``xaxis`` is "dist", the maximum distance to plot in bin units.
mean_min : float
If `xaxis` is "mean", the minimum mean to plot.
scatter_fit : int
Pass a nonzero integer to draw the fitted vars/disps as a scatter plot
of ``scatter_fit`` selected points. Pass -1 to plot the fitted
vars/disps as a curve. Pass 0 to omit plotting the var/disp estimates
altogether.
scatter_size : int
The marker size when plotting scatter plots.
hexbin : bool
Pass False to skip plotting the hexbins, leaving only the estimated
variances or dispersions.
logx, logy : bool
Whether or not to log the x- or y-axis, respectively.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
# convert pixel_* to [unique_]dist_* quantities part 1: dist_mean (dd)
# dist_* quantities are aligned to np.arange(pixel_dist.max() + 1)
# dist_* quantities are only made if pixel_dist is passed
# it's really nice if dist_mean is monotonic, so we will force that here
if pixel_dist is not None:
dist_range = np.arange(pixel_dist.max() + 1)
smallest_seen = np.inf
dist_mean_list = []
for d in dist_range:
currently_seen = np.mean(pixel_mean[pixel_dist == d])
if np.isfinite(currently_seen) and currently_seen < smallest_seen:
smallest_seen = currently_seen
dist_mean_list.append(currently_seen)
else:
dist_mean_list.append(smallest_seen)
dist_mean = np.array(dist_mean_list)
else:
dist_mean = None
dist_range = None
# mean_per_bin vs dist_per_bin
if dist_mean is not None and mean_per_bin is None \
and dist_per_bin is not None:
mean_per_bin = np.interp(dist_per_bin, dist_range, dist_mean)
elif dist_per_bin is None and mean_per_bin is not None:
dist_per_bin = np.interp(mean_per_bin, dist_mean[::-1],
dist_range[::-1])
# var_per_bin vs disp_per_bin
if var_per_bin is None and disp_per_bin is not None:
var_per_bin = mvr(mean_per_bin, disp_per_bin)
elif disp_per_bin is None and var_per_bin is not None:
disp_per_bin = inverse_mvr(mean_per_bin, var_per_bin)
# pixel_var vs pixel_disp
if pixel_var is None and pixel_disp is not None:
pixel_var = mvr(pixel_mean, pixel_disp)
elif pixel_disp is None and pixel_var is not None:
pixel_disp = inverse_mvr(pixel_mean, pixel_var)
else:
raise ValueError('exactly one of pixel_var and pixel_disp must be '
'passed')
# pixel_var_fit vs pixel_disp_fit
if pixel_var_fit is None and pixel_disp_fit is not None:
pixel_var_fit = mvr(pixel_mean, pixel_disp_fit)
elif pixel_disp_fit is None and pixel_var_fit is not None:
pixel_disp_fit = inverse_mvr(pixel_mean, pixel_var_fit)
# convert pixel_* to dist_* quantities part 2: the fitted values
dist_var_fit = None
dist_disp_fit = None
if dist_mean is not None and pixel_disp_fit is not None:
dist_disp_fit = np.array([np.mean(pixel_disp_fit[pixel_dist == d])
for d in dist_range])
dist_var_fit = mvr(dist_mean, dist_disp_fit)
# establish which y values will go into the cloud, bins, and fit
if yaxis == 'var':
cloud_y = pixel_var
bin_y = var_per_bin
fit_y = dist_var_fit if fit_align_dist else pixel_var_fit
fit_label = r'$\hat{\sigma}^2$'
ylabel = 'variance'
elif yaxis == 'disp':
cloud_y = pixel_disp
bin_y = disp_per_bin
fit_y = dist_disp_fit if fit_align_dist else pixel_disp_fit
fit_label = r'$\hat{\alpha}$'
ylabel = 'dispersion'
else:
raise ValueError('yaxis must be \'disp\' or \'var\'')
# establish which x values will go into the cloud, bins, and fit
if xaxis == 'dist':
cloud_x = pixel_dist
bin_x = dist_per_bin
fit_x = dist_range if fit_align_dist else pixel_dist
xlabel = 'distance'
elif xaxis == 'mean':
cloud_x = pixel_mean
bin_x = mean_per_bin
fit_x = dist_mean if fit_align_dist else pixel_mean
xlabel = 'mean'
else:
raise ValueError('yaxis must be \'disp\' or \'var\'')
# determine reasonable plot limits
if xaxis == 'mean':
xmin = mean_min
xmax = np.percentile(pixel_mean, 99.99)
elif xaxis == 'dist':
xmin = 0
xmax = dist_max
else:
raise ValueError('xaxis must be \'mean\' or \'dist\'')
# prepare and apply filter indexes
cloud_idx = np.isfinite(cloud_x) & np.isfinite(cloud_y) & \
((cloud_y > 0) if logy else True) & \
((cloud_x > 0) if logx else True) & \
(cloud_x >= xmin) & (cloud_x <= xmax)
cloud_x = cloud_x[cloud_idx]
cloud_y = cloud_y[cloud_idx]
try:
bin_idx = np.isfinite(bin_x) & np.isfinite(bin_y) & \
((bin_y > 0) if logy else True) & \
((bin_x > 0) if logx else True) & \
(bin_x >= xmin) & (bin_x <= xmax)
bin_x = bin_x[bin_idx]
bin_y = bin_y[bin_idx]
except TypeError:
pass
fit_idx = np.isfinite(fit_x) & np.isfinite(fit_y) & \
((fit_y > 0) if logy else True) & \
((fit_x > 0) if logx else True) & \
(fit_x >= xmin) & (fit_x <= xmax)
fit_x = fit_x[fit_idx]
fit_y = fit_y[fit_idx]
if hexbin:
ymin = max(np.percentile(cloud_y, 0.1), 1e-7) if logy \
else min(np.percentile(cloud_y, 0.1), 0)
ymax = np.percentile(cloud_y, 99.9)
else:
ymin = None
ymax = None
# override the default plot limits above if xlim/ylim were passed
if xlim is not None:
xmin, xmax = xlim
if ylim is not None:
ymin, ymax = ylim
# hexbin per-pixel values
if hexbin:
scatter(cloud_x, cloud_y, logx=logx, logy=logy, hexbin=True,
xlim=[xmin, xmax], ylim=[ymin, ymax])
# scatter per-bin estimates
if bin_x is not None and bin_y is not None:
plt.scatter(bin_x, bin_y, label='%s per bin' % fit_label, color='C1',
s=scatter_size)
# plot or scatter fitted estimates
if fit_x is not None and fit_y is not None:
sort_idx = np.argsort(fit_x) if not fit_align_dist \
else np.arange(fit_x.shape[0])
if scatter_fit > 0:
rate = int(fit_x.shape[0] / scatter_fit)
plt.scatter(fit_x[sort_idx][::rate],
fit_y[sort_idx][::rate],
label=fit_label, color='C4', s=scatter_size)
elif scatter_fit == -1:
plt.plot(fit_x[sort_idx], fit_y[sort_idx],
label='smoothed %s' % fit_label, linewidth=3, color='C4')
# add poisson line
if xaxis == 'mean':
if yaxis == 'var':
xs = 10**np.linspace(np.log10(xmin), np.log10(xmax), 100) if logx \
else np.linspace(xmin, xmax, 100)
plt.plot(xs, xs, label='Poisson', linestyle='--', linewidth=3,
color='C3')
elif yaxis == 'disp':
plt.hlines(0, xmin, xmax, label='Poisson', linestyle='--',
linewidth=3, color='C3')
else:
raise ValueError('yaxis must be \'disp\' or \'var\'')
else:
if yaxis == 'disp' and not logy:
plt.hlines(0, xmin, xmax, label='Poisson', linestyle='--',
linewidth=3, color='C3')
# cleanup
if hexbin:
plt.ylim((ymin, ymax))
plt.xlim((xmin, xmax))
if not hexbin:
if logx:
plt.xscale('log')
if logy:
plt.yscale('log')
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if legend:
plt.legend()
[docs]@plotter
def plot_ddr(dist_per_bin, disp_per_bin, disp_fn, scatter_size=36, legend=True,
**kwargs):
"""
Simplified plotter to visualized distance-dispersion relationships.
Plots per-distance dispersion estimates and the fitted dispersion curve.
Parameters
----------
dist_per_bin, disp_per_bin : np.ndarray
The distances and estimated dispersions for each distance, respectively.
disp_fn : function
The smoothed dispersion function. Returns the smoothed dispersion as a
function of distance.
scatter_size : int
The marker size when plotting scatter plots.
kwargs : kwargs
Typical plotter kwargs.
Returns
-------
pyplot axis
The axis plotted on.
"""
xmin = dist_per_bin.min()
xmax = dist_per_bin.max()
xs = np.arange(xmin, xmax + 1)
ys = disp_fn(xs)
plt.scatter(dist_per_bin, disp_per_bin, label=r'$\hat{\alpha}$ per bin',
color='C1', s=scatter_size)
plt.plot(xs, ys, label=r'smoothed $\hat{\alpha}$', color='C4', lw=3)
plt.hlines(0, xmin, xmax, label='Poisson', color='C3', lw=3, linestyle='--')
plt.ylabel('dispersion')
plt.xlabel('distance')
if legend:
plt.legend()