Source code for sdt.brightness

# SPDX-FileCopyrightText: 2020 Lukas Schrangl <lukas.schrangl@tuwien.ac.at>
#
# SPDX-License-Identifier: BSD-3-Clause

"""Brightness analysis
===================

This is a collection of tools to analyze the brightness of fluorophores. It
offers

- The :py:func:`from_raw_image` function which may be used to calculate the
  brightness of single molecules by extracting the information directly from
  the raw image data.
- The :py:class:`Distribution` class, with which one can determine the
  brightness distribution of fluorophores via kernel density estimation.


Examples
--------

To determine single molecule brightness using :py:func:`from_raw_image`, one
needs the raw image data and localization data of molecules (which can be
determined from the image date e.g. using functionality from the
:py:mod:`sdt.loc` module):

>>> loc = sdt.io.load("localizations.h5")  # Load single molecule localizations
>>> with io.ImageSequence("images.tif") as img_seq:
...     from_raw_image(loc, img_seq, radius=4)

The last line updates the ``loc`` DataFrame with the brightness data extracted
from the raw image.

The distribution of the brightness of single molecules can be calculated
by means of the :py:class:`Distribution` class:

>>> loc = [sdt.io.load(f) for f in glob.glob("*.h5")]  # Load sm data
>>> cam_eff = 300 / 15.7  # number of camera counts per photon
>>> bdist = Distribution(loc, cam_eff)
>>> bdist.mean()
314.1592653589793
>>> bdist.std()
31.41592653589793
>>> bdist.plot()


Programming reference
---------------------

.. autofunction:: from_raw_image
.. autoclass:: Distribution
    :members:
"""
import warnings
import math

import numpy as np
import pandas as pd
from scipy import signal

from .helper import numba
from .image import CircleMask, RectMask
from . import config


def _make_mask_image(feat_idx, mask, shape):
    """Make a boolean mask with all `True` except for `mask` around features

    Parameters
    ----------
    feat_idx : array-like, shape(n, m), dtype(int)
        Rounded indices (coordinates reversed) of features in the image. These
        indices have to be clipped to `shape`.
    mask : array-like, dtype(bool)
        Feature mask
    shape : tuple of int
        Desired shape of the resulting image

    Returns
    -------
    numpy.ndarray, dtype(bool)
        Boolean mask of the desired shape
    """
    # Create return array which is two pixels larger than shape. That way we
    # can use the add-1-trick below and rounding, which has likely been done
    # to create integer `feat_idx` cannot get us out-of-bounds.
    ret = np.zeros(np.add(shape, 2))
    # Add 1 if mask dimension is odd so that in the end ret[1:-1, 1:-1, …]
    # has the mask centered in the odd dimensions and moved to the top left
    # in even dimensions
    fi = feat_idx + np.mod(mask.shape, 2)
    ret[tuple(fi.T)] = 1
    ret = np.isclose(signal.fftconvolve(ret, mask, "same"), 0)
    return ret[(slice(1, -1),) * len(shape)]


@numba.jit(nopython=True, cache=True, nogil=True)
def _make_mask_image_numba(i_start, i_end, m_start, m_end, mask, shape):
    """Make a boolean mask with all `True` except for `mask` around features

    numba-accelerated implementation. This is called differently than
    :py:func:`_make_mask_image`. **Works only for 2D data.**

    Parameters
    ----------
    i_start : array-like, shape(n, 2), dtype(int)
        Indices of start pixels in the image of the feature mask for each
        feature. This is typically the `img_start` array returned by
        :py:func:`_get_mask_boundaries`.
    i_end : array-like, shape(n, 2), dtype(int)
        Indices of end pixels in the image of the feature mask for each
        feature. This is typically the `img_end` array returned by
        :py:func:`_get_mask_boundaries`.
    m_start : array-like, shape(n, 2), dtype(int)
        Indices of start pixels in the feature mask for each
        feature. This is typically the `mask_start` array returned by
        :py:func:`_get_mask_boundaries`.
    m_end : array-like, shape(n, 2), dtype(int)
        Indices of end pixels in the feature mask for each
        feature. This is typically the `mask_start` array returned by
        :py:func:`_get_mask_boundaries`.
    mask : array-like, dtype(bool)
        Feature mask
    shape : tuple of int
        Desired shape of the resulting image

    Returns
    -------
    numpy.ndarray, dtype(bool)
        Boolean mask of the desired shape
    """
    ret = np.ones(shape, dtype=np.bool_)
    mask_inv = ~mask
    for j in range(len(i_start)):
        # TODO: This is 2D only until someone comes up with a solution
        mask_part = mask_inv[m_start[j, 0]:m_end[j, 0],
                             m_start[j, 1]:m_end[j, 1]]
        ret[i_start[j, 0]:i_end[j, 0], i_start[j, 1]:i_end[j, 1]] &= mask_part
    return ret


def _get_mask_boundaries(feat_idx, mask_shape, shape):
    """Get boundaries for masks around features

    For each feature, this calculates where in the image a mask has to begin
    and to end to be centered around the feature. It takes the image size into
    account such that masks are clipped at the edges.

    Parameters
    ----------
    feat_idx : array-like, shape(n, m), dtype(int)
        Rounded indices (coordinates reversed) of features in the image.
    mask_shape : tuple of int
        Shape of the mask
    shape : tuple of int
        Shape of the image

    Returns
    -------
    img_start, img_end : numpy.ndarray, dtype(int), shape(m, n)
        Where in the image the mask starts and ends
    mask_start, mask_end : numpy.ndarray, dtype(int), shape(m, n)
        Which part of the mask to apply. This is important if the mask gets
        clipped near the edges of the image
    """
    start = feat_idx - np.floor_divide(mask_shape, 2)
    end = start + mask_shape

    img_start = np.clip(start, 0, shape)
    img_end = np.clip(end, 0, shape)

    mask_start = np.clip(-start, 0, mask_shape)
    mask_end = np.clip(shape - start, 0, mask_shape)

    return img_start, img_end, mask_start, mask_end


@numba.jit(nopython=True, cache=True, nogil=True)
def _get_mask_boundaries_numba(feat_idx, mask_shape, shape):
    """Get boundaries for masks around features (numba-accelerated)

    For each feature, this calculates where in the image a mask has to begin
    and to end to be centered around the feature. It takes the image size into
    account such that masks are clipped at the edges.

    Parameters
    ----------
    feat_idx : array-like, shape(n, m), dtype(int)
        Rounded indices (coordinates reversed) of features in the image.
    mask_shape : tuple of int
        Shape of the mask
    shape : tuple of int
        Shape of the image

    Returns
    -------
    img_start, img_end : numpy.ndarray, dtype(int), shape(m, n)
        Where in the image the mask starts and ends
    mask_start, mask_end : numpy.ndarray, dtype(int), shape(m, n)
        Which part of the mask to apply. This is important if the mask gets
        clipped near the edges of the image
    """
    n, ndim = feat_idx.shape
    ret = np.empty((4, n, ndim), dtype=np.int64)
    for j in range(ndim):
        s = shape[j]
        ms = mask_shape[j]
        r = ms // 2

        for i in range(n):
            start = feat_idx[i, j] - r
            end = start + ms
            ret[0, i, j] = max(0, min(start, s))
            ret[1, i, j] = max(0, min(end, s))
            ret[2, i, j] = max(0, min(-start, ms))
            ret[3, i, j] = max(0, min(s - start, ms))

    return ret


def _from_raw_image_python(pos, frame, feat_mask, bg_mask, bg_estimator,
                           global_bg=False):
    """Get brightness by counting pixel values (single frame, python impl.)

    This is called for each frame by :py:func:`from_raw_image`

    Parameters
    ----------
    pos : array-like, shape(n, m)
        Localization coordinates (n features in an m-dimensional space)
    frame : numpy.ndarray
        Raw image data
    feat_mask : array-like, dtype(bool)
        Mask around each localization to determine which pixel belong to the
        feature
    bg_mask : array-like, dtype(bool)
        Mask around each localization to determine which pixel belong to the
        background.
    bg_estimator : numpy ufunc
        How to determine the background from the background pixels. A typical
        example would be :py:func:`np.mean`.
    global_bg : bool, optional
        If True, calculate background globally from all pixels that are not
        part of any feature. `bg_mask` is ignored in this case. Defaults to
        False.

    Returns
    -------
    numpy.ndarray, shape(n, 4)
        Each line represents one feature. Columns are "signal", "mass",
        "bg", "bg_std".
    """
    feat_mask_ones = feat_mask.sum()  # Number of pixels selected by the mask

    feat_idx = np.around(pos[:, ::-1]).astype(int)

    # Make a mask for the whole image where everything is True except for the
    # feat_mask around features
    mask_img = _make_mask_image(feat_idx, feat_mask, frame.shape)

    feat_start, feat_end, feat_mask_start, feat_mask_end = \
        _get_mask_boundaries(feat_idx, feat_mask.shape, frame.shape)

    if global_bg:
        bg_pixels = frame[mask_img]
        bg = bg_estimator(bg_pixels)
        bg_std = np.std(bg_pixels)
    else:
        bg_start, bg_end, bg_mask_start, bg_mask_end = \
            _get_mask_boundaries(feat_idx, bg_mask.shape, frame.shape)

    ret = np.empty((len(pos), 4))
    for i in range(len(ret)):
        feat_slice = tuple(
            slice(s, e) for s, e in zip(feat_start[i], feat_end[i]))
        feat_region = frame[feat_slice]

        if feat_region.shape != feat_mask.shape:
            # The signal was too close to the egde of the image, we could not
            # read all the pixels we wanted
            ret[i, :] = np.NaN
            continue

        feat_pixels = feat_region[feat_mask]

        if not global_bg:
            bg_slice = tuple(
                slice(s, e) for s, e in zip(bg_start[i], bg_end[i]))
            bg_mask_slice = tuple(
                slice(s, e) for s, e in zip(bg_mask_start[i], bg_mask_end[i]))
            bg_mask_with_feat = bg_mask[bg_mask_slice] & mask_img[bg_slice]
            bg_pixels = frame[bg_slice][bg_mask_with_feat]

            if bg_pixels.size:
                bg = bg_estimator(bg_pixels)
                bg_std = np.std(bg_pixels)
            else:
                bg = np.NaN
                bg_std = np.NaN

        mass_uncorr = feat_pixels.sum()
        signal_uncorr = feat_pixels.max()
        if math.isfinite(bg):
            mass = mass_uncorr - feat_mask_ones * bg
            signal = signal_uncorr - bg
        else:
            mass = mass_uncorr
            signal = signal_uncorr

        ret[i, 0] = signal
        ret[i, 1] = mass
        ret[i, 2] = bg
        ret[i, 3] = bg_std

    return ret


@numba.jit(nopython=True, cache=True, nogil=True)
def _from_raw_image_numba(pos, frame, feat_mask, bg_mask, bg_estimator,
                          global_bg=False):
    """Get brightness by counting pixel values (single frame, numba impl.)

    This is called for each frame by :py:func:`from_raw_image` with numba
    engine. It works for 2D data only.

    Parameters
    ----------
    pos : array-like, shape(n, m)
        Localization coordinates (n features in an m-dimensional space)
    frame : numpy.ndarray
        Raw image data
    feat_mask : array-like, dtype(bool)
        Mask around each localization to determine which pixel belong to the
        feature
    bg_mask : array-like, dtype(bool)
        Mask around each localization to determine which pixel belong to the
        background.
    bg_estimator : numpy ufunc
        How to determine the background from the background pixels. A typical
        example would be :py:func:`np.mean`.
    global_bg : bool, optional
        If True, calculate background globally from all pixels that are not
        part of any feature. `bg_mask` is ignored in this case. Defaults to
        False.

    Returns
    -------
    numpy.ndarray, shape(n, 4)
        Each line represents one feature. Columns are "signal", "mass",
        "bg", "bg_std".
    """
    feat_mask_ones = feat_mask.sum()  # Number of pixels selected by the mask

    feat_idx = np.empty_like(pos, dtype=np.int64)
    np.around(pos[:, ::-1], 0, feat_idx)

    feat_bd = _get_mask_boundaries_numba(feat_idx, feat_mask.shape,
                                         frame.shape)
    # Make a mask for the whole image where everything is True except for the
    # feat_mask around features
    mask_img = _make_mask_image_numba(feat_bd[0], feat_bd[1], feat_bd[2],
                                      feat_bd[3], feat_mask, frame.shape)

    if global_bg:
        bg_pixels = frame.flatten()[mask_img.flatten()]
        if bg_estimator == 1:
            bg = np.median(bg_pixels)
        else:
            bg = np.mean(bg_pixels)
        bg_std = np.std(bg_pixels)
    else:
        bg_bd = _get_mask_boundaries_numba(feat_idx, bg_mask.shape,
                                           frame.shape)

    ret = np.empty((len(pos), 4))
    for i in numba.prange(len(pos)):
        feat_pixels = frame[feat_bd[0, i, 0]:feat_bd[1, i, 0],
                            feat_bd[0, i, 1]:feat_bd[1, i, 1]].flatten()
        if feat_pixels.size != feat_mask.size:
            # The signal was too close to the egde of the image, we could not
            # read all the pixels we wanted
            ret[i, :] = np.NaN
            continue

        f_mask_pixels = feat_mask[feat_bd[2, i, 0]:feat_bd[3, i, 0],
                                  feat_bd[2, i, 1]:feat_bd[3, i, 1]].flatten()

        feat_pixels_masked = feat_pixels[f_mask_pixels]
        mass_uncorr = np.sum(feat_pixels_masked)
        signal_uncorr = np.max(feat_pixels_masked)

        if not global_bg:
            bg_pixels = np.ravel(frame[bg_bd[0, i, 0]:bg_bd[1, i, 0],
                                       bg_bd[0, i, 1]:bg_bd[1, i, 1]])
            bg_mask_with_feat = np.ravel(
                bg_mask[bg_bd[2, i, 0]:bg_bd[3, i, 0],
                        bg_bd[2, i, 1]:bg_bd[3, i, 1]] &
                mask_img[bg_bd[0, i, 0]:bg_bd[1, i, 0],
                         bg_bd[0, i, 1]:bg_bd[1, i, 1]])

            bg_pixels_masked = bg_pixels[bg_mask_with_feat]

            if bg_pixels_masked.size:
                if bg_estimator == 1:
                    bg = np.median(bg_pixels_masked)
                else:
                    bg = np.mean(bg_pixels_masked)
                bg_std = np.std(bg_pixels_masked)
            else:
                bg = np.NaN
                bg_std = np.NaN

        if math.isfinite(bg):
            mass = mass_uncorr - feat_mask_ones * bg
            signal = signal_uncorr - bg
        else:
            mass = mass_uncorr
            signal = signal_uncorr

        ret[i, 0] = signal
        ret[i, 1] = mass
        ret[i, 2] = bg
        ret[i, 3] = bg_std

    return ret


[docs]@config.set_columns def from_raw_image(positions, frames, radius, bg_frame=2, bg_estimator="mean", columns={}, engine="numba", mask="square"): """Determine particle brightness by counting pixel values Around each localization, pixel values are summed up (see the `mask` parameter) to determine the brightness (mass). Additionally, local background is determined by calculating the brightness (see `bg_estimator` parameter in a frame (again, see `mask` parameter) around this box, where all pixels belonging to any localization are excluded. This background is subtracted from the signal brightness. Parameters ---------- positions : pandas.DataFrame Localization data. "signal", "mass", "bg", and "bg_dev" columns are added and/or replaced directly in this object. frames : iterable of numpy.ndarrays Raw image data radius : int Half width of the box in which pixel values are summed up. See `mask` parameter for details. bg_frame : int or infinity, optional Width of frame (in pixels) around a feature for background determination. If infinity, background is calculated globally from all pixels that are not part of any feature. Defaults to 2. bg_estimator : {"mean", "median"} or numpy ufunc, optional How to determine the background from the background pixels. "mean" will use :py:func:`numpy.mean` and "median" will use :py:func:`numpy.median`. If a function is given (which takes the pixel data as arguments and returns a scalar), apply this to the pixels. Defaults to "mean". mask : {"square", "circle"} or (array-like, array-like), optional If "square", sum pixels in a ``2 * radius + 1`` sized square around each localization as the brightness and use `bg_estimator` in a frame of `bg_frame` width around it to determine the local background, which is subtracted from the brightness. If "circle", sum pixels in a ``2 * radius + 1`` sized circle (a :py:class:`CircleMask` with radius `radius` and ``extra=0.5`` is used) around each localization and get the background from an annulus of width ``bg_frame``. One can also pass a tuple ``(feat_mask, bg_mask)`` of boolean arrays for brightness and background detection. In this case, `radius` and `bg_frame` are ignored. If `bg_mask` is None, calculate background globally (per frame) from all pixels that are not part of any feature. In all cases, all pixels belonging to any signal are automatically excluded from the background detection. Defaults to "square". Other parameters ---------------- columns : dict, optional Override default column names as defined in :py:attr:`config.columns`. Relevant names are `coords`, `time`, `mass`, `signal`, `bg`, `bg_dev`. This means, if your DataFrame has coordinate columns "x" and "z" and the time column "alt_frame", set ``columns={"coords": ["x", "z"], "time": "alt_frame"}``. engine : {"numba", "python"}, optional Numba is faster, but only supports 2D data and mean or median bg_estimator. If numba cannot be used, automatically fall back to pure python, which support arbitray dimensions and bg_estimator functions. Defaults to "numba". """ if not len(positions): positions[columns["signal"]] = [] positions[columns["mass"]] = [] positions[columns["bg"]] = [] positions[columns["bg_dev"]] = [] return if isinstance(bg_estimator, str): bg_estimator = getattr(np, bg_estimator) ndim = len(columns["coords"]) if engine == "numba": if ndim != 2: warnings.warn("numba engine supports only 2D data. Falling back " "to python backend.") engine = "python" if bg_estimator is np.mean: bg_estimator = 0 elif bg_estimator is np.median: bg_estimator = 1 else: warnings.warn("numba engine supports only mean and median as " "bg_estimators. Falling back to python backend.") engine = "python" # Create masks for foreground pixels and background pixels around a # feature if isinstance(mask, str): if mask == "square": feat_mask = RectMask((2 * radius + 1,) * ndim) if math.isfinite(bg_frame): bg_mask = RectMask((2 * (radius + bg_frame) + 1,) * ndim) else: bg_mask = None elif mask == "circle": feat_mask = CircleMask(radius, 0.5) if math.isfinite(bg_frame): bg_mask = CircleMask(radius + bg_frame, 0.5) else: bg_mask = None else: raise ValueError('"{}" does not describe a mask'.format(mask)) else: # Assume it is a tuple of arrays feat_mask, bg_mask = mask # Convert to numpy array for performance reasons # This is faster than pos_matrix = positions[columns["coords"]].values pos_matrix = [] for p in columns["coords"]: pos_matrix.append(positions[p].values) pos_matrix = np.array(pos_matrix).T fno_matrix = positions[columns["time"]].values.astype(int) # Pre-allocate result array ret = np.empty((len(pos_matrix), 4)) if engine == "numba": worker = _from_raw_image_numba elif engine == "python": worker = _from_raw_image_python else: raise ValueError("Unknown engine \"{}\".".format(engine)) if bg_mask is None: bg_mask = np.emtpy((0,)*ndim, dtype=bool) global_bg = True else: global_bg = False fnos = np.unique(fno_matrix) for f in fnos: current = (fno_matrix == f) ret[current] = worker(pos_matrix[current], frames[f], feat_mask, bg_mask, bg_estimator, global_bg) positions[columns["signal"]] = ret[:, 0] positions[columns["mass"]] = ret[:, 1] positions[columns["bg"]] = ret[:, 2] positions[columns["bg_dev"]] = ret[:, 3]
def _norm_pdf_python(x, m, s): return 1 / np.sqrt(2 * np.pi * s**2) * np.exp(-(x-m)**2/(2*s**2)) def _calc_dist_python(x, mean, sigma, gauss_width): y = np.zeros_like(x, dtype=float) for m, s in zip(mean, sigma): x_mask = (x >= m - s * gauss_width) & (x <= m + s * gauss_width) y[x_mask] += _norm_pdf_python(x[x_mask], m, s) return y _norm_pdf_numba = numba.jit(_norm_pdf_python, nopython=True, cache=True, nogil=True) @numba.jit(nopython=True, cache=True, nogil=True) def _calc_dist_numba(x, mean, sigma, gauss_width): y = np.zeros_like(x, dtype=np.float64) for i in numba.prange(len(mean)): m = mean[i] s = sigma[i] for j in range(len(x)): xj = x[j] if (xj < m - s * gauss_width) or (xj > m + s * gauss_width): continue y[j] += _norm_pdf_numba(xj, m, s) return y
[docs]class Distribution(object): """Brightness distribution of fluorescent signals Given a list of peak masses (integrated intensities), calculate the masses' probability density function. This is a Gaussian KDE with variable bandwith. See the `bw` parameter documentation for details. """ @config.set_columns def __init__(self, data, abscissa=None, bw=2., cam_eff=1., kern_width=5., engine="numba", columns={}): """Parameters ---------- data : list of pandas.DataFrame or pandas.DataFrame or numpy.ndarray If a DataFrame is given, extract the masses from the "mass" column. A list of DataFrames will be concatenated. Brightness values can also be passed as an one-dimensional ndarray. abscissa : None or numpy.ndarray or float The abscissa (x axis) values for the calculated distribution. Providing a float is equivalent to ``numpy.arange(abscissa + 1)``. If `None`, automatically choose something appropriate based on the values of `data` and the bandwidth. bw : float, optional Bandwidth factor. The bandwidth for each data point ``d`` is calculated as ``bw * np.sqrt(d)``. Defaults to 2. cam_eff : float, optional Camera efficiency, i. e. how many camera counts correspond to one photon. The brightness data will be divided by this number. Defaults to 1. kern_width : float, optional Calculate kernels only in the range of +/- `kern_width` times the bandwidth to save computation time. Defaults to 5. Other parameters ---------------- engine : {"numba", "python"}, optional Whether to use the faster numba-based implementation or the slower pure python one. Defaults to "numba". columns : dict, optional Override default column names as defined in :py:attr:`config.columns`. The only relevant name is `mass`. """ if isinstance(data, pd.DataFrame): data = data[columns["mass"]].values elif not isinstance(data, np.ndarray): # assume it is an iterable of DataFrames data = np.concatenate([d[columns["mass"]].values for d in data]) data = data / cam_eff # don't change original data by using /= sigma = bw * np.sqrt(data) if abscissa is None: am = np.argmax(data) x = np.arange(data[am] + 2 * sigma[am]) elif isinstance(abscissa, np.ndarray): x = abscissa else: x = np.arange(float(abscissa + 1)) if engine == "numba": y = _calc_dist_numba(x, data, sigma, kern_width) elif engine == "python": y = _calc_dist_python(x, data, sigma, kern_width) else: raise ValueError("Unknown engine \"{}\"".format(engine)) self.norm_factor = np.trapz(y, x) y /= self.norm_factor self.graph = np.array([x, y], copy=False) """:py:class:`numpy.ndarray` of shape (2, n); First row is the abscissa, second row is the ordinate of the normalized distribution function. """ self.num_data = len(data) """Number of data points (single molecules) used to create the distribution. """
[docs] def mean(self): """Mean Returns ------- float Mean (1st moment) of the distribution """ x, y = self.graph return np.trapz(x*y, x)
[docs] def std(self): """Standard deviation Returns ------- float Standard deviation (square root of the 2nd central moment) of the distribution """ m = self.mean() x, y = self.graph var = np.trapz((x-m)**2*y, x) return np.sqrt(var)
[docs] def most_probable(self): """Most probable value (mode) Returns ------- float Most probable brightness (brightness value were the distribution function has its maximum) """ x, y = self.graph return x[np.argmax(y)]
mode = most_probable
[docs] def plot(self, ax=None, label=None): """Plot the distribution function graph Parameters ---------- ax : matplotlib.axes.Axes or None, optional Axes to draw on. If `None`, use :py:func:`matplotlib.gca`. Defaults to `None`. label : str or None, optional Label for the legend. Defaults to None """ import matplotlib.pyplot as plt if ax is None: ax = plt.gca() ax.plot(*self.graph, label=label)
def __repr__(self): return """Brightness distribution Number of data points: {num_d} Most probable value: {mp:.4g} Mean: {mn:.4g} Standard deviation: {std:.4g}""".format( num_d=self.num_data, mp=self.most_probable(), mn=self.mean(), std=self.std())