Source code for sdt.fret.sm_track

# SPDX-FileCopyrightText: 2020 Lukas Schrangl <lukas.schrangl@tuwien.ac.at>
#
# SPDX-License-Identifier: BSD-3-Clause

"""Module containing a class for tracking smFRET data """
import itertools
from contextlib import suppress
from typing import Callable, Dict, Optional, Sequence, Union

import numpy as np
import pandas as pd

from .. import brightness, config, multicolor, spatial


try:
    import trackpy
    trackpy_available = True
except ImportError:
    trackpy_available = False


[docs]class SmFRETTracker: """Class for tracking of smFRET data There is support for dumping and loading to/from YAML using :py:mod:`sdt.io.yaml`. """ yaml_tag = "!SmFRETTracker" _yaml_keys = ("excitation_seq", "registrator", "link_options", "min_length", "brightness_options", "interpolate", "coloc_dist", "acceptor_channel", "neighbor_radius") frame_selector: multicolor.FrameSelector """A :py:class:`FrameSelector` instance with the matching :py:attr:`excitation_seq`. """ registrator: multicolor.Registrator """multicolor.Registrator used to overlay channels""" link_options: Dict """Options passed to :py:func:`trackpy.link_df`""" min_length: int """Minimum length of tracks""" neighbor_radius: float """How far two features may be apart while still being considered close enough so that one influences the brightness measurement of the other. This is related to the `radius` option of :py:func:`brightness.from_raw_image`. """ brightness_options: Dict """Options passed to :py:func:`brightness.from_raw_image`. Make sure to adjust :py:attr:`neighbor_radius` if you change either the `mask` or the `radius` option! """ interpolate: bool """Whether to interpolate coordinates of features that have been missed by the localization algorithm. """ coloc_dist: float """After overlaying donor and acceptor channel features, this gives the maximum distance up to which donor and acceptor signal are considered to come from the same molecule. """ acceptor_channel: int """Can be either 1 or 2, depending the acceptor is the first or the second channel in :py:attr:`registrator`. """ columns: Dict """Column names in DataFrames. Defaults are taken from :py:attr:`config.columns`. """ @config.set_columns def __init__(self, excitation_seq: str = "da", registrator: Optional[multicolor.Registrator] = None, link_radius: float = 5, link_mem: int = 1, min_length: int = 1, feat_radius: int = 4, bg_frame: int = 2, bg_estimator: Union[str, Callable[[np.ndarray], float]] = "mean", neighbor_radius: Union[float, str] = "auto", interpolate: bool = True, coloc_dist: float = 2.0, acceptor_channel: int = 2, link_quiet: bool = True, link_options: Dict = {}, columns: Dict = {}): """Parameters ---------- excitation_seq Set the :py:attr:`excitation_seq` attribute. registrator Registrator used to overlay channels. If `None`, use the identity transform. link_radius Maximum movement of features between frames. See `search_range` option of :py:func:`trackpy.link_df`. link_mem Maximum number of frames for which a feature may not be detected. See `memory` option of :py:func:`trackpy.link_df`. min_length Minimum length of tracks. feat_radius Radius of circle that is a little larger than features. See `radius` option of :py:func:`brightness.from_raw_image`. bg_frame Size of frame around features for background determination. See `bg_frame` option of :py:func:`brightness.from_raw_image`. bg_estimator Statistic to estimate background. See `bg_estimator` option of :py:func:`brightness.from_raw_image`. neighbor_radius How far two features may be apart while still being considered close enough so that one influences the brightness measurement of the other. This is related to the `radius` option of :py:func:`brightness.from_raw_image`. If "auto", use the smallest value that avoids overlaps. interpolate Whether to interpolate coordinates of features that have been missed by the localization algorithm. coloc_dist After overlaying donor and acceptor channel features, this gives the maximum distance up to which donor and acceptor signal are considered to come from the same molecule. acceptor_channel Whether the acceptor channel is number 1 or 2 in `registrator`. link_options Specify additional options to :py:func:`trackpy.link_df`. "search_range" and "memory" will be overwritten by the `link_radius` and `link_mem` parameters. link_quiet If `True`, call :py:func:`trackpy.quiet`. Other parameters ---------------- columns Override default column names as defined in :py:attr:`config.columns`. Relevant names are `coords`, `time`, `mass`, `signal`, `bg`, `bg_dev`. This means, if your DataFrame has coordinate columns "x" and "z" and the time column "alt_frame", set ``columns={"coords": ["x", "z"], "time": "alt_frame"}``. This parameters sets the :py:attr:`columns` attribute. """ self.frame_selector = multicolor.FrameSelector(excitation_seq) self.registrator = (registrator if registrator is not None else multicolor.Registrator()) self.link_options = link_options.copy() self.link_options["search_range"] = link_radius self.link_options["memory"] = link_mem self.min_length = min_length self.brightness_options = dict( radius=feat_radius, bg_frame=bg_frame, bg_estimator=bg_estimator, mask="circle") self.interpolate = interpolate self.coloc_dist = coloc_dist self.acceptor_channel = acceptor_channel self.columns = columns if isinstance(neighbor_radius, str): # auto radius neighbor_radius = 2 * feat_radius + 1 self.neighbor_radius = neighbor_radius if link_quiet and trackpy_available: trackpy.quiet() @property def excitation_seq(self) -> str: """Excitation sequence. "d" stands for donor, "a" for acceptor, anything else describes other kinds of frames which are irrelevant for tracking. One needs only specify the shortest sequence that is repeated, i. e. "ddddaddddadddda" is the same as "dddda". """ return self.frame_selector.excitation_seq @excitation_seq.setter def excitation_seq(self, seq: str): self.frame_selector.excitation_seq = seq
[docs] def track(self, donor_img: Sequence[np.ndarray], acceptor_img: Sequence[np.ndarray], donor_loc: pd.DataFrame, acceptor_loc: pd.DataFrame, d_mass: bool = False ) -> pd.DataFrame: """Track smFRET data Localization data for both the donor and the acceptor channel is merged (since a FRET construct has to be visible in at least one channel). The merged data is than linked into trajectories using py:func:`trackpy.link_df`. For this the :py:mod:`trackpy` package needs to be installed. Additionally, the feature brightness is determined for both donor and acceptor for raw image data using :py:func:`brightness.from_raw_image`. These data are written into a a :py:class:`pandas.DataFrame` whose columns have a MultiIndex containing the "donor" and "acceptor" items in the top level. A column specifying whether the entry originates from donor or acceptor excitation is also added: ("fret", "exc_type"). It is "d" for donor and "a" for acceptor excitation; see the :py:meth:`flag_excitation_type` method. Parameters ---------- donor_img, acceptor_img Raw image frames for donor and acceptor channel. This need to be of type `list`, but anything that returns image data when indexed with a frame number will do. donor_loc, acceptor_loc Localization data for donor and acceptor channel d_mass If `True`, get total brightness upon donor excitation by from the sum of donor and acceptor image. If `False`, the donor excitation brightness can still be calculated as the sum of donor and acceptor brightness in :py:meth:`analyze`. Defaults to `False`. Note: Until :py:mod:`slicerator` with support for multiple inputs to pipelines is released, setting this to `True` will load all of `donor_img` and `acceptor_img` into memory, even if :py:mod:`pims` is used. Returns ------- The columns are indexed with a :py:class:`pandas.MultiIndex`. The top index level consists of "donor" (tracking data for the donor channel), "acceptor" (tracking data for the acceptor channel), and "fret". The latter contains a column with the particle number ("particle"), an indicator (0 / 1) whether there is a near neighbor ("has_neighbor"), and an indicator whether the data point was interpolated ("interp") because it was not in the localization data in either channel. """ if not trackpy_available: raise RuntimeError("`trackpy` package required but not installed.") # Names of brightness-related columns br_cols = ["signal", "mass", "bg", "bg_dev"] # Position and frame columns posf_cols = self.columns["coords"] + [self.columns["time"]] # Don't modify originals donor_loc = donor_loc.copy() acceptor_loc = acceptor_loc.copy() for df in (donor_loc, acceptor_loc): for c in br_cols: # Make sure those columns exist df[c] = 0. donor_channel = 1 if self.acceptor_channel == 2 else 2 donor_loc_corr = self.registrator(donor_loc, channel=donor_channel) # Create FRET tracks (in the acceptor channel) # Acceptor channel is used because in ALEX there are frames without # any donor locs, therefore minimizing the error by transforming # back and forth. coloc = multicolor.find_colocalizations( donor_loc_corr, acceptor_loc, max_dist=self.coloc_dist, channel_names=["donor", "acceptor"], keep_unmatched=True) coloc_pos_f = coloc.loc[:, (slice(None), posf_cols)] coloc_pos_f = coloc_pos_f.values.reshape( (len(coloc), 2, len(self.columns["coords"]) + 1)) # Use the mean of positions as the new position merged = np.nanmean([coloc["donor"][posf_cols].values, coloc["acceptor"][posf_cols].values], axis=0) merged = pd.DataFrame(merged, columns=posf_cols) merged["__trc_idx__"] = coloc.index # works as long as index is unique self.link_options["pos_columns"] = self.columns["coords"] self.link_options["t_column"] = self.columns["time"] # Track only "d" and "a" frames. Renumber frames for that. merged = self.frame_selector.select( merged, "da", renumber=True, columns=self.columns) track_merged = trackpy.link_df(merged, **self.link_options) if self.interpolate: # Interpolate coordinates where no features were localized track_merged = spatial.interpolate_coords(track_merged, self.columns) else: # Mark all as not interpolated track_merged["interp"] = 0 # Flag localizations that are too close together if self.neighbor_radius: spatial.has_near_neighbor(track_merged, self.neighbor_radius, self.columns) # Restore original frame numbers that were changed when calling # selector.__call__ above. # Do this after calling spatial.interpolate_coords, otherwise the # excluded frames will be interpolated! self.frame_selector.restore_frame_numbers(track_merged, "da", columns=self.columns) # Get non-interpolated colocalized features non_interp_mask = track_merged["interp"] == 0 non_interp_idx = track_merged.loc[non_interp_mask, "__trc_idx__"] ret = coloc.loc[non_interp_idx] ret["fret", "particle"] = \ track_merged.loc[non_interp_mask, "particle"].values # Append interpolated features (which "appear" only in the acceptor # channel) interp_mask = ~non_interp_mask interp = track_merged.loc[interp_mask, posf_cols] interp.columns = pd.MultiIndex.from_product([["acceptor"], posf_cols]) interp["fret", "particle"] = \ track_merged.loc[interp_mask, "particle"].values ret = pd.concat([ret, interp]) # Add interp and has_neighbor column cols = ["interp"] if "has_neighbor" in track_merged.columns: cols.append("has_neighbor") ic = pd.concat([track_merged.loc[non_interp_mask, cols], track_merged.loc[interp_mask, cols]]) for c in cols: ret["fret", c] = ic[c].values # If coordinates or frame columns are NaN in any channel (which means # that it didn't have a colocalized partner), use the position and # frame number from the other channel. for c1, c2 in itertools.permutations(("donor", "acceptor")): mask = np.any(~np.isfinite(ret.loc[:, (c1, posf_cols)]), axis=1) d = ret.loc[mask, (c2, posf_cols)] ret.loc[mask, (c1, posf_cols)] = d.values # get feature brightness from raw image data ret_d = self.registrator(ret["donor"], channel=self.acceptor_channel) ret_a = ret["acceptor"].copy() brightness.from_raw_image(ret_d, donor_img, **self.brightness_options, columns=self.columns) brightness.from_raw_image(ret_a, acceptor_img, **self.brightness_options, columns=self.columns) # If desired, get brightness upon donor excitation from image overlay if d_mass: # Until slicerator with support for multiple inputs to pipelines # is released, load everything into memory overlay = [] for di, da in zip(donor_img, acceptor_img): o = di + self.registrator(da, cval=np.mean, channel=self.acceptor_channel, columns=self.columns) overlay.append(o) df = ret_d[posf_cols].copy() brightness.from_raw_image(df, overlay, **self.brightness_options) ret["fret", "d_mass"] = df[self.columns["mass"]] ret_d.columns = pd.MultiIndex.from_product((["donor"], ret_d.columns)) ret_a.columns = pd.MultiIndex.from_product((["acceptor"], ret_a.columns)) ret.drop(["donor", "acceptor"], axis=1, level=0, inplace=True) ret = pd.concat([ret_d, ret_a, ret], axis=1) ret.sort_values( [("fret", "particle"), ("donor", self.columns["time"])], inplace=True) # Filter short tracks (only count non-interpolated localizations) u, c = np.unique( ret.loc[ret["fret", "interp"] == 0, ("fret", "particle")], return_counts=True) valid = u[c >= self.min_length] ret = ret[ret["fret", "particle"].isin(valid)] ret = ret.reset_index(drop=True) self.flag_excitation_type(ret) return ret
[docs] def flag_excitation_type(self, tracks: pd.DataFrame): """Add a column indicating excitation type (donor/acceptor/...) Add ("fret", "exc_type") column in place. It is of "category" type. Parameters ---------- tracks Result of :py:meth:`track` """ # FIXME: Don't force convert to int, but raise an error (?) # First, the track method needs to preserve the data type of the time # column eseq = self.frame_selector.eval_seq() frames = tracks["donor", self.columns["time"]].to_numpy(dtype=int) et = pd.Series(eseq[frames % len(eseq)], dtype="category") # Assignment to dataframe is done by matching indices, not line-wise # Thus copy index et.index = tracks.index tracks["fret", "exc_type"] = et
[docs] @classmethod def to_yaml(cls, dumper, data): """Dump as YAML Pass this as the `representer` parameter to :py:meth:`yaml.Dumper.add_representer` """ m = {k: getattr(data, k) for k in cls._yaml_keys} return dumper.represent_mapping(cls.yaml_tag, m)
[docs] @classmethod def from_yaml(cls, loader, node): """Construct from YAML Pass this as the `constructor` parameter to :py:meth:`yaml.Loader.add_constructor` """ m = loader.construct_mapping(node) ret = cls() for k in cls._yaml_keys: setattr(ret, k, m[k]) return ret
with suppress(ImportError): from ..io import yaml yaml.register_yaml_class(SmFRETTracker)