Source code for hic3defdr.plotting.dispersion

import numpy as np
import matplotlib.pyplot as plt

from lib5c.util.plotting import plotter
from lib5c.plotters.scatter import scatter

from hic3defdr.util.scaled_nb import mvr, inverse_mvr


[docs]@plotter def compare_disp_fits(fit_fns, labels, max_dist=200, colors=None, linestyles=None, legend=True, **kwargs): """ Compares multiple dispersion fit functions. Parameters ---------- fit_fns : list of functions The fit functions to compare. Each function should be vectorized and take one argument (the distance) and return one value (the fitted dispersion at that distance). labels : list of str The labels to use for each fit function being compared. max_dist : int The maximum distance out to which the plot should be drawn. colors : list of valid matplotlib colors, optional Pass a list of colors to color each fit function with. Pass None to color them automatically. linestyles : list of valid matplotlib linestyles, optional Pass a list of linestyles to plot each fit function with. Pass None to use the default linestyle. legend : bool Pass True to include a legend on the plot. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ # resolve colors if colors is None: colors = ['C%s' % i for i in range(len(fit_fns))] # resolve linestyles if linestyles is None: linestyles = [None] * len(fit_fns) # define x values xs = np.arange(max_dist + 1) # plot for f, l, c, ls in zip(fit_fns, labels, colors, linestyles): plt.plot(xs, f(xs), color=c, linestyle=ls, label=l) # labels plt.xlabel('distance') plt.ylabel('dispersion') # add legend if legend: plt.legend()
[docs]@plotter def plot_mvr(pixel_mean, pixel_var=None, pixel_disp=None, pixel_dist=None, pixel_var_fit=None, pixel_disp_fit=None, mean_per_bin=None, dist_per_bin=None, var_per_bin=None, disp_per_bin=None, fit_align_dist=False, xaxis='mean', yaxis='var', dist_max=200, mean_min=5.0, scatter_fit=-1, scatter_size=36, hexbin=True, logx=True, logy=True, xlim=None, ylim=None, legend=True, **kwargs): """ Plots pixel-wise, bin-wise, and estimated dispersions in terms of either dispersion or variance versus either pixel-wise mean or distance. Parameters ---------- pixel_mean : np.ndarray The pixel-wise mean. pixel_var, pixel_disp : np.ndarray, optional The pixel-wise variance and dispersion. Pass only one of these. pixel_dist : np.ndarray, optional The pixel-wise distance. Not needed for simple MVR plotting. pixel_var_fit, pixel_disp_fit : np.ndarray, optional The smoothed pixel-wise variance or dispersion estimates. Pass only one of these. mean_per_bin, dist_per_bin : np.ndarray, optional The mean or distance of each bin used for estimating the dispersions but before any smoothing was performed. Pass only one of these. var_per_bin, disp_per_bin : np.ndarray, optional The estimated variance or dispersion of each bin before any smoothing was performed. Pass only one of these. fit_align_dist : bool Pass True if the var/disp was fitted as a function of distance, in which case the fitted vars/disps will be aligned to distance rather than in a pixel-wise fashion. Use this to fix "jagged" fit lines caused by setting ``xaxis`` to a different value than the one the fitting was actually performed against. xaxis : 'mean' or 'dist' What to plot on the x-axis. yaxis : 'var' or 'disp' What to plot on the y-axis. dist_max : int If ``xaxis`` is "dist", the maximum distance to plot in bin units. mean_min : float If `xaxis` is "mean", the minimum mean to plot. scatter_fit : int Pass a nonzero integer to draw the fitted vars/disps as a scatter plot of ``scatter_fit`` selected points. Pass -1 to plot the fitted vars/disps as a curve. Pass 0 to omit plotting the var/disp estimates altogether. scatter_size : int The marker size when plotting scatter plots. hexbin : bool Pass False to skip plotting the hexbins, leaving only the estimated variances or dispersions. logx, logy : bool Whether or not to log the x- or y-axis, respectively. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ # convert pixel_* to [unique_]dist_* quantities part 1: dist_mean (dd) # dist_* quantities are aligned to np.arange(pixel_dist.max() + 1) # dist_* quantities are only made if pixel_dist is passed # it's really nice if dist_mean is monotonic, so we will force that here if pixel_dist is not None: dist_range = np.arange(pixel_dist.max() + 1) smallest_seen = np.inf dist_mean_list = [] for d in dist_range: currently_seen = np.mean(pixel_mean[pixel_dist == d]) if np.isfinite(currently_seen) and currently_seen < smallest_seen: smallest_seen = currently_seen dist_mean_list.append(currently_seen) else: dist_mean_list.append(smallest_seen) dist_mean = np.array(dist_mean_list) else: dist_mean = None dist_range = None # mean_per_bin vs dist_per_bin if dist_mean is not None and mean_per_bin is None \ and dist_per_bin is not None: mean_per_bin = np.interp(dist_per_bin, dist_range, dist_mean) elif dist_per_bin is None and mean_per_bin is not None: dist_per_bin = np.interp(mean_per_bin, dist_mean[::-1], dist_range[::-1]) # var_per_bin vs disp_per_bin if var_per_bin is None and disp_per_bin is not None: var_per_bin = mvr(mean_per_bin, disp_per_bin) elif disp_per_bin is None and var_per_bin is not None: disp_per_bin = inverse_mvr(mean_per_bin, var_per_bin) # pixel_var vs pixel_disp if pixel_var is None and pixel_disp is not None: pixel_var = mvr(pixel_mean, pixel_disp) elif pixel_disp is None and pixel_var is not None: pixel_disp = inverse_mvr(pixel_mean, pixel_var) else: raise ValueError('exactly one of pixel_var and pixel_disp must be ' 'passed') # pixel_var_fit vs pixel_disp_fit if pixel_var_fit is None and pixel_disp_fit is not None: pixel_var_fit = mvr(pixel_mean, pixel_disp_fit) elif pixel_disp_fit is None and pixel_var_fit is not None: pixel_disp_fit = inverse_mvr(pixel_mean, pixel_var_fit) # convert pixel_* to dist_* quantities part 2: the fitted values dist_var_fit = None dist_disp_fit = None if dist_mean is not None and pixel_disp_fit is not None: dist_disp_fit = np.array([np.mean(pixel_disp_fit[pixel_dist == d]) for d in dist_range]) dist_var_fit = mvr(dist_mean, dist_disp_fit) # establish which y values will go into the cloud, bins, and fit if yaxis == 'var': cloud_y = pixel_var bin_y = var_per_bin fit_y = dist_var_fit if fit_align_dist else pixel_var_fit fit_label = r'$\hat{\sigma}^2$' ylabel = 'variance' elif yaxis == 'disp': cloud_y = pixel_disp bin_y = disp_per_bin fit_y = dist_disp_fit if fit_align_dist else pixel_disp_fit fit_label = r'$\hat{\alpha}$' ylabel = 'dispersion' else: raise ValueError('yaxis must be \'disp\' or \'var\'') # establish which x values will go into the cloud, bins, and fit if xaxis == 'dist': cloud_x = pixel_dist bin_x = dist_per_bin fit_x = dist_range if fit_align_dist else pixel_dist xlabel = 'distance' elif xaxis == 'mean': cloud_x = pixel_mean bin_x = mean_per_bin fit_x = dist_mean if fit_align_dist else pixel_mean xlabel = 'mean' else: raise ValueError('yaxis must be \'disp\' or \'var\'') # determine reasonable plot limits if xaxis == 'mean': xmin = mean_min xmax = np.percentile(pixel_mean, 99.99) elif xaxis == 'dist': xmin = 0 xmax = dist_max else: raise ValueError('xaxis must be \'mean\' or \'dist\'') # prepare and apply filter indexes cloud_idx = np.isfinite(cloud_x) & np.isfinite(cloud_y) & \ ((cloud_y > 0) if logy else True) & \ ((cloud_x > 0) if logx else True) & \ (cloud_x >= xmin) & (cloud_x <= xmax) cloud_x = cloud_x[cloud_idx] cloud_y = cloud_y[cloud_idx] try: bin_idx = np.isfinite(bin_x) & np.isfinite(bin_y) & \ ((bin_y > 0) if logy else True) & \ ((bin_x > 0) if logx else True) & \ (bin_x >= xmin) & (bin_x <= xmax) bin_x = bin_x[bin_idx] bin_y = bin_y[bin_idx] except TypeError: pass fit_idx = np.isfinite(fit_x) & np.isfinite(fit_y) & \ ((fit_y > 0) if logy else True) & \ ((fit_x > 0) if logx else True) & \ (fit_x >= xmin) & (fit_x <= xmax) fit_x = fit_x[fit_idx] fit_y = fit_y[fit_idx] if hexbin: ymin = max(np.percentile(cloud_y, 0.1), 1e-7) if logy \ else min(np.percentile(cloud_y, 0.1), 0) ymax = np.percentile(cloud_y, 99.9) else: ymin = None ymax = None # override the default plot limits above if xlim/ylim were passed if xlim is not None: xmin, xmax = xlim if ylim is not None: ymin, ymax = ylim # hexbin per-pixel values if hexbin: scatter(cloud_x, cloud_y, logx=logx, logy=logy, hexbin=True, xlim=[xmin, xmax], ylim=[ymin, ymax]) # scatter per-bin estimates if bin_x is not None and bin_y is not None: plt.scatter(bin_x, bin_y, label='%s per bin' % fit_label, color='C1', s=scatter_size) # plot or scatter fitted estimates if fit_x is not None and fit_y is not None: sort_idx = np.argsort(fit_x) if not fit_align_dist \ else np.arange(fit_x.shape[0]) if scatter_fit > 0: rate = int(fit_x.shape[0] / scatter_fit) plt.scatter(fit_x[sort_idx][::rate], fit_y[sort_idx][::rate], label=fit_label, color='C4', s=scatter_size) elif scatter_fit == -1: plt.plot(fit_x[sort_idx], fit_y[sort_idx], label='smoothed %s' % fit_label, linewidth=3, color='C4') # add poisson line if xaxis == 'mean': if yaxis == 'var': xs = 10**np.linspace(np.log10(xmin), np.log10(xmax), 100) if logx \ else np.linspace(xmin, xmax, 100) plt.plot(xs, xs, label='Poisson', linestyle='--', linewidth=3, color='C3') elif yaxis == 'disp': plt.hlines(0, xmin, xmax, label='Poisson', linestyle='--', linewidth=3, color='C3') else: raise ValueError('yaxis must be \'disp\' or \'var\'') else: if yaxis == 'disp' and not logy: plt.hlines(0, xmin, xmax, label='Poisson', linestyle='--', linewidth=3, color='C3') # cleanup if hexbin: plt.ylim((ymin, ymax)) plt.xlim((xmin, xmax)) if not hexbin: if logx: plt.xscale('log') if logy: plt.yscale('log') plt.xlabel(xlabel) plt.ylabel(ylabel) if legend: plt.legend()
[docs]@plotter def plot_ddr(dist_per_bin, disp_per_bin, disp_fn, scatter_size=36, legend=True, **kwargs): """ Simplified plotter to visualized distance-dispersion relationships. Plots per-distance dispersion estimates and the fitted dispersion curve. Parameters ---------- dist_per_bin, disp_per_bin : np.ndarray The distances and estimated dispersions for each distance, respectively. disp_fn : function The smoothed dispersion function. Returns the smoothed dispersion as a function of distance. scatter_size : int The marker size when plotting scatter plots. kwargs : kwargs Typical plotter kwargs. Returns ------- pyplot axis The axis plotted on. """ xmin = dist_per_bin.min() xmax = dist_per_bin.max() xs = np.arange(xmin, xmax + 1) ys = disp_fn(xs) plt.scatter(dist_per_bin, disp_per_bin, label=r'$\hat{\alpha}$ per bin', color='C1', s=scatter_size) plt.plot(xs, ys, label=r'smoothed $\hat{\alpha}$', color='C4', lw=3) plt.hlines(0, xmin, xmax, label='Poisson', color='C3', lw=3, linestyle='--') plt.ylabel('dispersion') plt.xlabel('distance') if legend: plt.legend()