Source code for hic3defdr.util.matrices

from functools import reduce

import numpy as np
import scipy.sparse as sparse
from scipy.ndimage import zoom


[docs]def deconvolute(matrix, bias, invert=False):
    """
    Applies bias factors to a sparse matrix.

    Parameters
    ----------
    matrix : scipy.sparse.spmatrix
        The matrix to bias. Will be converted to CSR by this function.
    bias : np.ndarray
        The dense bias vector.
    invert : bool
        Whether or not to invert the bias before applying it. By default this
        function multiplies the matrix by the bias. Infinite bias (1 / 0) will
        be converted to zero bias to allow rows with infinite bias to be dropped
        from the resulting sparse matrix.

    Returns
    -------
    scipy.sparse.csr_matrix
        The deconvoluted matrix.
    """
    csr = matrix.tocsr()
    if invert:
        inf_idx = bias == 0
        bias[inf_idx] = 1
        bias = 1 / bias
        bias[inf_idx] = 0
    bias_csr = sparse.diags([bias], [0])
    biased_csr = bias_csr.dot(csr)
    biased_csr = biased_csr.dot(bias_csr)
    return biased_csr


[docs]def wipe_distances(matrix, min_dist, max_dist):
    """
    Eliminates entries from a sparse matrix outside of a specified distance
    range.

    Parameters
    ----------
    matrix : scipy.sparse.spmatrix
        The matrix to wipe. Will be converted to COO by this function.
    min_dist, max_dist : int
        The minimum and maximum distance allowed, respectively, in bin units.

    Returns
    -------
    scipy.sparse.coo_matrix
        The wiped matrix.
    """
    coo = matrix.tocoo()
    dist = coo.col - coo.row
    coo.data[(dist < min_dist) | (dist > max_dist)] = 0
    coo.eliminate_zeros()
    return coo


[docs]def sparse_intersection(fnames, bias=None):
    """
    Computes the intersection set of (row, col) pairs across multiple sparse
    matrices.

    Parameters
    ----------
    fnames : list of str
        File paths to sparse matrices loadable by ``scipy.sparse.load_npz()``.
        Will be converted to COO by this function.
    bias : np.ndarray, optional
        Pass the bias matrix to drop rows and columns with bias factors of
        zero in any replicate.

    Returns
    -------
    row, col : np.ndarray
        The intersection set of (row, col) pairs.
    """
    csr_sum_coo = reduce(
        lambda x, y: x + y,
        ((deconvolute(sparse.load_npz(fname), bias[:, i]) > 0).astype(int)
         for i, fname in enumerate(fnames))).tocoo()
    full_idx = csr_sum_coo.data == len(fnames)
    return csr_sum_coo.row[full_idx], csr_sum_coo.col[full_idx]


[docs]def sparse_union(fnames, dist_thresh=1000, bias=None, size_factors=None,
                 mean_thresh=0.0):
    """
    Computes the intersection set of (row, col) pairs across multiple sparse
    matrices.

    Parameters
    ----------
    fnames : list of str
        File paths to sparse matrices loadable by ``scipy.sparse.load_npz()``.
        Will be converted to COO by this function.
    dist_thresh : int
        The maximum distance allowed, respectively, in bin units.
    bias : np.ndarray, optional
        Rectangular matrix containing bias factors for each bin (rows) and each
        replicate (columns).
    size_factors : np.ndarray, optional
        Size factors for each replicate.
    mean_thresh : float
        Minimum mean value (in normalized space) to keep pixels for.

    Returns
    -------
    row, col : np.ndarray
        The union set of (row, col) pairs.
    """
    if size_factors is None:
        size_factors = np.ones(len(fnames))
    csr_sum_coo = reduce(
        lambda x, y: x + y,
        (wipe_distances((deconvolute(sparse.load_npz(fname), bias[:, i],
                                     invert=True)
                         if bias is not None else sparse.load_npz(fname))
                        / size_factors[i], 0, dist_thresh)
         for i, fname in enumerate(fnames))).tocoo()
    full_idx = (csr_sum_coo.data >= len(fnames)*mean_thresh) \
        & np.isfinite(csr_sum_coo.data)
    return csr_sum_coo.row[full_idx], csr_sum_coo.col[full_idx]


[docs]def select_matrix(row_slice, col_slice, row, col, data, symmetrize=True):
    """
    Slices out a dense matrix from COO-formatted sparse data, filling empty
    values with ``np.nan``.

    Parameters
    ----------
    row_slice, col_slice : slice
        Row and column slice to use, respectively.
    row, col, data : np.ndarray
        COO-format sparse matrix to be sliced.
    symmetrize : bool
        Pass True to fill in lower-triangle points of the matrix.

    Returns
    -------
    np.ndarray
        Dense matrix.
    """
    r_start, r_stop = row_slice.start, row_slice.stop
    c_start, c_stop = col_slice.start, col_slice.stop
    idx = (row >= r_start) & (row < r_stop) & (col >= c_start) & (col < c_stop)
    matrix = np.ones((r_stop - r_start, c_stop - c_start)) * np.nan
    matrix[row[idx]-r_start, col[idx]-c_start] = data[idx]
    if symmetrize:
        t_idx = (col >= r_start) & (col < r_stop) & (row >= c_start) & \
            (row < c_stop)
        matrix[col[t_idx]-r_start, row[t_idx]-c_start] = data[t_idx]
    return matrix


[docs]def dilate(matrix, doublings):
    """
    Doubles the "resolution" of a matrix using nearest-neighbor interpolation.

    Parameters
    ----------
    matrix : np.ndarray
        Input matrix.
    doublings : int
        The number of times the resolution will be doubled.

    Returns
    -------
    np.ndarray
        The dilated matrix.
    """
    for _ in range(doublings):
        matrix = zoom(matrix, 2, order=0)
    return matrix