"""Tools for simulating smFRET time traces"""
# Copyright 2017-2018 Lukas Schrangl
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from collections import namedtuple
import numpy as np
[docs]class TwoStateExpTruth:
"""Simulate ground truth smFRET time traces (two states, exp. lifetimes)
Lifetimes are exponentially distributed.
"""
def __init__(self, lifetimes, efficiencies):
"""Parameters
----------
life_times : array_like, shape=(2,)
Mean (of the exponentially distributed) life times of the states
efficiencies : array_like, shape=(2,)
FRET efficiencies for the states
"""
self.lifetimes = lifetimes
self.efficiencies = efficiencies
# If 1 or 2, disable random number generation and yield predictable
# results. See _rand_uniform and _rand_exp methods
self._test = 0
[docs] def generate(self, duration):
"""Create a time trace that of at least `duration` length
Parameters
----------
duration : float
Minimum duration of the time trace. In practice, it will be
longer.
Returns
-------
time : numpy.ndarray
Time points of state changes. These are bi-exponentially
distributed.
eff : numpy.ndarray
Corresponding FRET efficiencies. The `i`-th entry lasts from
``time[i-1]`` (or 0 for i=0) to ``time[i]``.
"""
num_states = len(self.lifetimes)
# num_events increased by 50% to be (quite) sure that we generate
# a long enough trace in one run in the `while` loop below
num_events = (int(duration / np.mean(self.lifetimes) * 1.5) //
num_states) or 1 # minimum is one event
# prob. to start in state 1
prob_1 = self.lifetimes[0] / np.sum(self.lifetimes)
# whether to start in state 1
start_with_1 = self._rand_uniform() < prob_1
if start_with_1:
# If we start with state 1, the order is already correct
lt_sorted = self.lifetimes
eff_sorted = self.efficiencies
else:
# If we start with state 2, the order needs to be reversed
lt_sorted = self.lifetimes[::-1]
eff_sorted = self.efficiencies[::-1]
time = []
eff = []
dur = 0
# Run in a loop for the very unlikely case where not enough transitions
# were generated (i.e. all randomly generated life times were very
# short)
while dur < duration:
# Generate transition times by generating exponentially distributed
# random numbers
t = self._rand_exp(lt_sorted, (num_events, num_states))
# FRET efficiencies are constant, just broadcast the array
e = np.broadcast_to(eff_sorted, (num_events, num_states))
# Generate time series by merging the transition times
t = np.cumsum(t.flatten())
if time:
t += time[-1][-1]
time.append(t)
dur = t[-1]
# Generate FRET efficiency sequence by merging
eff.append(e.flatten())
time = np.concatenate(time)
eff = np.concatenate(eff)
# Make result as short as possible while still being longer than
# `duration`
long_idx = np.nonzero(time > duration)[0][0] + 1
return time[:long_idx], eff[:long_idx]
[docs] def __call__(self, *args, **kwargs):
"""Synonym for :py:func:`generate`"""
return self.generate(*args, **kwargs)
def _rand_exp(self, m, shape):
if self._test == 0:
return np.random.exponential(m, shape)
if self._test == 1:
return np.broadcast_to(m, shape)
if self._test == 2:
return np.broadcast_to(m, shape) / 10
return np.random.exponential(m, shape)
def _rand_uniform(self):
if self._test == 0:
return np.random.rand()
if (self._test == 1) or (self._test == 2):
return 0
return np.random.rand()
[docs]def sample(time, eff, exposure_time, data_points=np.inf, frame_time=0.):
"""Sample the true smFRET time trace with finite exposure time
This means that there will be integration over all transitions that happen
during a single exposure.
Parameters
----------
time : array_like, shape=(n,)
Sequence of transition times as returned by :py:func:`two_state_truth`
eff : array_like, shape=(n,)
Sequence of FRET efficiencies as returned by :py:func:`two_state_truth`
exposure_time : float
Exposure time for sampling. All transition during a exposure will be
integrated.
data_points : scalar, optional
Number of data points to return. If the FRET time trace is too short,
the maximum number of data points the trace allows is returned.
Defaults to infinity.
frame_time : float, optional
Total time for one frame, i.e. minimum time between two data points.
If this is less than `exposure_time`, use `exposure_time`. Defaults
to 0, i.e. use `exposure_time`.
Returns
-------
sample_time : numpy.ndarray
Time points of sampling. They are evenly spaced with `exposure_time`
difference.
sample_eff : numpy.ndarray
Corresponding sampled FRET efficiencies
"""
# Prepend 0 to `time`
t0 = np.empty(len(time)+1)
t0[0] = 0
t0[1:] = time
frame_time = max(exposure_time, frame_time)
data_points = int(min(time[-1] / frame_time, data_points))
end_t = np.linspace(frame_time, data_points*frame_time, data_points,
endpoint=True)
start_t = end_t - exposure_time
sample_t = np.empty(2*len(start_t)+1)
sample_t[0] = 0
sample_t[1::2] = start_t
sample_t[2::2] = end_t
# Integrate the efficiency step function, prepend 0
lifetimes = np.diff(t0)
int_eff = np.empty(len(t0))
int_eff[0] = 0
np.cumsum(np.multiply(lifetimes, eff), out=int_eff[1:])
# Get integrated function at points of interest (i.e. sampling time points)
sample_int_eff = np.interp(sample_t, t0, int_eff,
left=np.NaN, right=np.NaN)
# Now take use diff to get the integrated efficiencies between the
# sampling time points
sample_eff = np.diff(sample_int_eff)[1::2]
# Scale correctly
sample_eff /= exposure_time
return end_t, sample_eff
[docs]def experiment(eff, photons, donor_brightness, acceptor_brightness):
"""From sampled smFRET time traces, simulate (noisy) experiment results
This adds noise coming from the rather broad brightness distributions of
fluorophores to the time traces, resulting in a trace as one would get
out of an experiment.
The acceptor signal is generated by
``acceptor_brightness(eff * photons)``, the donor signal is generated
by ``donor_brightness((1 - eff) * photons)``.
Parameters
----------
eff : array_like
Sampled FRET efficiencies
photons : float
Mean summed number of photons emitted by donor and acceptor per
exposure.
donor_brightness, acceptor_brightness : callable
Takes one argument, an array of mean brightness values. For each entry
`m` it returns a random value drawn from the brightness distribution
with mean `m`.
Returns
-------
donor_noisy, acceptor_noisy : numpy.ndarray
Donor and acceptor fluorescence including noise
"""
acc_p = eff * photons
don_p = (1 - eff) * photons
if callable(acceptor_brightness):
acc_p_noisy = acceptor_brightness(acc_p)
else:
acc_p_noisy = acceptor_brightness.generate(acc_p)
if callable(donor_brightness):
don_p_noisy = donor_brightness(don_p)
else:
don_p_noisy = donor_brightness.generate(don_p)
return don_p_noisy, acc_p_noisy
DataSet = namedtuple("DataSet", ["true_time", "true_eff",
"samp_time", "samp_eff",
"exp_don", "exp_acc", "exp_eff"])
DataSet.__doc__ = """Named tuple containing a full data set of a simulation run
Attributes
----------
true_time, true_eff : array_like
True state trajectory as produced by :py:func:`two_state_truth`
samp_time, samp_eff : array_like
Sampled state trajectory as produced by :py:func:`sample`
exp_don, exp_acc : array_like
(Noisy) donor and acceptor intensities as produced by
:py:func:`experiment`
exp_eff : array_like
Experiment FRET efficiency, i.e. ``exp_acc / (exp_acc + exp_don)``
"""
[docs]def simulate_dataset(truth, exposure_time, data_points, photons,
donor_brightness, acceptor_brightness, frame_time=0.):
"""Simulate a whole data set
Consecutively run :py:func:`two_state_truth`, :py:func:`sample`, and
:py:func:`experiment`.
Parameters
----------
truth : callable or tuple of numpy.ndarray, shape=(n,)
Ground truth smFRET time trace. If this is an array, ``truth[0]``
should be the sequence of time points where state transitions
occur and ``truth[1]`` the FRET efficiency between the (i-1)-th
time point (or 0 for i = 0) and the i-th time point.
If this is callable, it should return a ground truth smFRET time
trace as described above. It should take one float parameter, which is
the minimum duration of the generated trace.
exposure_time : float
Exposure time for sampling. All transition during a exposure will be
integrated.
data_points : scalar, optional
Number of data points to return. If the FRET time trace is too short,
the maximum number of data points the trace allows is returned.
Defaults to infinity.
photons : float
Mean summed number of photons emitted by donor and acceptor per
exposure.
donor_brightness, acceptor_brightness : callable
Takes one argument, an array of mean brightness values. For each entry
`m` it returns a random value drawn from the brightness distribution.
frame_time : float, optional
Total time for one frame, i.e. minimum time between two data points.
If this is less than `exposure_time`, use `exposure_time`. Defaults
to 0, i.e. use `exposure_time`.
Returns
-------
DataSet
Collected simulated data
"""
frame_time = max(exposure_time, frame_time)
dur = data_points * frame_time
if callable(truth):
t, e = truth(dur)
elif hasattr(truth, "generate"):
t, e = truth.generate(dur)
else:
t, e = truth
st, se = sample(t, e, exposure_time, data_points=data_points,
frame_time=frame_time)
d, a = experiment(se, photons, donor_brightness, acceptor_brightness)
return DataSet(t, e, st, se, d, a, a/(d+a))