Source code for hic3defdr.util.lrt

import numpy as np
import scipy.stats as stats

from hic3defdr.util.scaled_nb import logpmf, fit_mu_hat


[docs]def lrt(raw, f, disp, design, refit_mu=True):
    """
    Performs a likelihood ratio test on raw data ``raw`` given scaling factors
    ``f`` and dispersion ``disp``.

    Parameters
    ----------
    raw, f, disp : np.ndarray
        Matrices of raw values, combined scaling factors, and dispersions,
        respectively. Rows correspond to pixels, columns correspond to
        replicates.
    design : np.ndarray
        Describes the grouping of replicates into conditions. Rows correspond to
        replicates, columns correspond to conditions, and values should be True
        where a replicate belongs to a condition and False otherwise.

    Returns
    -------
    pvalues : np.ndarray
        The LRT p-values per pixel.
    llr : np.ndarray
        The log likelihood ratio per pixel.
    mu_hat_null, mu_hat_alt : np.ndarray
        The fitted mean parameters under the null and alt models, respectively,
        per pixel.
    """
    if refit_mu:
        mu_hat_null = fit_mu_hat(raw, f, disp)
        mu_hat_alt = np.array(
            [fit_mu_hat(raw[:, design[:, c]],
                        f[:, design[:, c]],
                        disp[:, design[:, c]])
             for c in range(design.shape[1])]).T
    else:
        mu_hat_null = np.mean(raw / f, axis=1)
        mu_hat_alt = np.array(
            [np.mean(raw[:, design[:, c]] / f[:, design[:, c]], axis=1)
             for c in range(design.shape[1])]).T
    mu_hat_alt_wide = np.dot(mu_hat_alt, design.T)
    null_ll = np.sum(logpmf(raw, mu_hat_null[:, None] * f, disp), axis=1)
    alt_ll = np.sum(logpmf(raw, mu_hat_alt_wide * f, disp), axis=1)
    llr = null_ll - alt_ll
    pvalues = stats.chi2(design.shape[1] - 1).sf(-2 * llr)
    return pvalues, llr, mu_hat_null, mu_hat_alt