import logging
import os
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc_context
from matplotlib.axes import Axes
from mrestimator import utility as ut
from mrestimator.coefficients import coefficients
from mrestimator.fit import (
f_complex,
f_exponential,
f_exponential_offset,
fit,
fitfunc_check,
)
from mrestimator.input_output import (
OutputHandler,
input_handler,
overview,
)
log = ut.log
[docs]
def full_analysis(
data=None,
dt=None,
kmax=None,
dtunit=" time unit",
fitfuncs=None,
coefficientmethod=None,
tmin=None, # include somehow into 'missing' req. arg
tmax=None,
steps=None, # dt conversion? optional replace tmin/tmax
substracttrialaverage=False,
targetdir=None,
title=None, # overwrites old files in same targetdir
numboot="auto", # optional. default depends on fitfunc
seed=1, # default: 1 uses hard coded seeds
loglevel=None, # only concerns local logfile
targetplot=None,
showoverview=True,
saveoverview=False,
method=None, # overload for coefficientmethod
):
"""
Wrapper function that performs the following four steps:
- check `data` with `input_handler()`
- calculate correlation coefficients via `coefficients()`
- fit autocorrelation function with `fit()`
- export/plot using the `OutputHandler`
Usually it should suffice to tweak the arguments and call this
wrapper function (multiple times).
Calling the underlying functions individually
gives slightly more control, though.
We recommend to set `showoverview=False` when calling in loops to avoid
opening many figures (and wasting RAM).
Parameters
----------
data: str, list or numpy.ndarray
Passed to `input_handler()`. Ideally, import and check data first.
A `string` is assumed to be the path
to file(s) that is then imported as pickle or plain text.
Alternatively, you can provide a `list` or `ndarray` containing
strings or already imported data. In the latter case,
`input_handler()` attempts to convert it to the right format.
dt: float
How many `dtunits` separate the measurements of the provided data.
For example, if measurements are taken every 4ms:
`dt=4`, `dtunit=\'ms\'`.
kmax: int
Maximum time lag k (in time steps of size `dt`) to use for
coefficients. Alternatively, `tmax` or `steps` can be specified
Other Parameters
----------------
dtunit: str, optional
Unit description/name of the time steps of the provided data.
fitfuncs: list, optional
Which fitfunctions to use e.g. ``fitfuncs=['e', 'eo', 'c']``.
Renamed from `fitfunctions` in v0.1.4.
coefficientmethod: str, optional
`ts` or `sm`, method used for determining the correlation
coefficients. See the :func:`coefficients` function for details.
Default is `ts`.
method: str, optional
same as `coefficientmethod`, introduced in v0.1.6.
tmin: float
Smallest time separation to use for coefficients, in units of
`dtunit`.
Only one argument is possible, either `kmax` or `steps` or
`tmin` and `tmax`.
tmax: float
Maximum time separation to use for coefficients.
For example, to fit the autocorrelation between 8ms and
2s set: `tmin=8`, `tmax=2000`, `dtunit=\'ms\'`
(independent of `dt`).
steps : ~numpy.array, optional
Specify the fitrange in steps :math:`k` for which to compute
coefficients :math:`r_k`.
Note that :math:`k` provided here would need
to be multiplied with units of [`dt` * `dtunit`] to convert
back to (real) time.
If an array of length two is provided, e.g.
``steps=(minstep, maxstep)``, all enclosed integer values will be
used.
Arrays larger than two are assumed to contain a manual choice of
steps. Strides other than one are possible.
Only one argument is possible, either `steps` or `kmax` or
`tmin` and `tmax`.
substracttrialaverage: bool, optional
Substract the average across all trials before calculating
correlation coefficients.
Default is `False`.
targetdir: str, optional
String containing the path to the target directory where files
are saved with the filename `title`.
Per default, `targetdir=None` and no files are written to disk.
title: str, optional
String for the filenames. Also sets the main title of the
overview panel.
numboot: int or 'auto', optional
Number of bootstrap samples to draw.
This repeats every fit `numboot` times so that we can
provide an uncertainty estimate of the resulting branching
parameter and autocorrelation time.
Per default, bootstrapping is only applied in
`coefficeints()` as most of computing time is needed for the
fitting. Thereby we have uncertainties on the :math:`r_k`
(which will be plotted) but each fit is only
done once.
Default is `numboot='auto'` where the number of samples depends on
the fitfunction (100 for the exponential).
seed : int, None or 'random', optional
If `numboot` is not zero, provide a seed for the random number
generator. If `seed=None`, seeding will be skipped.
Per default, the rng is (re)seeded every time `full_analysis()` is
called so that every repeated call returns the same error
estimates.
showoverview: bool, optional
Whether to show the overview panel. Default is 'True'.
Set to 'False' when calling `full_analysis()` repeatedly or just
saving the panels to disk with `saveoverview` (this temporarily
overwrites your matplotlib rc parameters for more consistency).
Otherwise, matplotlib may create large amounts of figures and leak
memory.
Note that even when set to 'True' the panel might not show if
`full_analysis()` is called through a script instead of an
(interactive) shell, this depends on your matplotlib configuration.
saveoverview: bool, optional
Whether to save the overview panel in `targetdir`.
Default is 'False'.
loglevel: str, optional
The loglevel to use for the logfile created
as `title.log` in the `targetdir`.
'ERROR', 'WARNING', 'INFO' or 'DEBUG'.
Per default, no log is written unless `loglevel` and `targetdir`
are provided.
targetplot: `matplotlib.axes.Axes`, optional
You can provide a matplotlib axes element (i.e. a subplot of an
existing figure) to plot the correlations into.
The axis will be passed to the `OutputHandler`
and all plotting will happen within that axes.
Per default, a new figure is created - that cannot be added
as a subplot to any other figure later on. This is due to
the way matplotlib handles subplots.
Returns
-------
OutputHandler
that is associated
with the correlation plot, fits and coefficients.
Also saves meta data and plotted pdfs to `targetdir`.
Example
-------
.. code-block:: python
# test data, subsampled branching process
bp = mre.simulate_branching(m=0.95, h=10, subp=0.1, numtrials=50)
mre.full_analysis(
data=bp,
dt=1,
tmin=0, tmax=1500,
dtunit='step',
fitfuncs=['exp', 'exp_offs', 'complex'],
targetdir='./output',
title='Branching Process')
..
"""
# ------------------------------------------------------------------ #
# Arguments
# ------------------------------------------------------------------ #
how_to_str = (
"full_analysis() requires the following arguments:\n"
+ "'data', 'dt', 'method' and either one of 'kmax', 'tmax' or 'steps'"
)
# workaround: if full_analysis() does not reach its end where we remove
# the local loghandler, it survives and keps logging with the old level
for hdlr in log.handlers:
if isinstance(hdlr, logging.FileHandler):
if hdlr != ut._logfilehandler:
hdlr.close()
log.removeHandler(hdlr)
if data is None or dt is None:
log.exception(how_to_str)
raise TypeError
if kmax is None and tmax is None and steps is None:
log.exception(how_to_str)
raise TypeError
# if there is a targetdir specified, create and use for various output
if targetdir is not None:
if isinstance(targetdir, str):
td = os.path.abspath(os.path.expanduser(targetdir + "/"))
os.makedirs(td, exist_ok=True)
ut._set_permissions(td)
targetdir = td
else:
log.exception("Argument 'targetdir' needs to be of type 'str'")
raise TypeError
# setup log early so argument errors appear in the logfile
if loglevel is None:
# dont create a logfile
pass
else:
if isinstance(loglevel, int) and loglevel > 0:
pass
elif str(loglevel).upper() in ["ERROR", "WARNING", "INFO", "DEBUG"]:
loglevel = str(loglevel).upper()
else:
log.debug(f"Unrecognized log level {loglevel}, using 'INFO'")
loglevel = "INFO"
# open new handler and add it to logging module
loghandler = logging.handlers.RotatingFileHandler(
targetdir
+ "/{}.log".format(
"full_analysis" if title is None else title,
),
maxBytes=5 * 1024 * 1024,
backupCount=1,
)
loghandler.setLevel(logging.getLevelName(loglevel))
loghandler.setFormatter(
ut.CustomExceptionFormatter(
"%(asctime)s %(levelname)8s: %(message)s", "%Y-%m-%d %H:%M:%S"
)
)
log.addHandler(loghandler)
else:
if saveoverview:
log.warning(
"Cannot save overview since no targetdir specified, " + "skipping"
)
log.debug("full_analysis()")
if ut._log_locals:
log.debug(f"Locals: {locals()}")
try:
dt = float(dt)
assert dt > 0
except Exception:
log.exception("Argument 'dt' needs to be a float > 0")
raise
if not isinstance(dtunit, str):
log.exception("Argument 'dtunit' needs to be of type 'str'")
raise TypeError
if steps is None:
if kmax is not None:
try:
kmax = float(kmax)
assert kmax > 0
except Exception:
log.exception("Argument 'kmax' needs to be a number > 0")
raise
if tmax is not None:
log.exception(
"Arguments do not match: Please provide either"
+ " 'kmax' or 'tmin' and 'tmax' or 'steps'"
)
raise TypeError
else:
tmax = kmax * dt
if tmin is None:
tmin = 1
try:
tmin = float(tmin)
tmax = float(tmax)
assert tmin >= 0 and tmax > tmin
except Exception:
if kmax is None:
log.exception(
"Arguments: 'tmax' and 'tmin' "
+ "need to be floats with 'tmax' > 'tmin' >= 0"
)
else:
log.exception("Argument: 'kmax' is too small")
raise
steps = (int(tmin / dt), int(tmax / dt))
else:
if tmin is not None or tmax is not None or kmax is not None:
log.exception(
"Arguments do not match: Please provide either "
+ "'kmax' or 'tmin' and 'tmax' or 'steps'"
)
raise TypeError
log.debug("Argument 'steps' was provided to full_analysis()")
defaultfits = False
if fitfuncs is None:
fitfuncs = ["e", "eo"]
defaultfits = True
elif isinstance(fitfuncs, str):
fitfuncs = [fitfuncs]
if not isinstance(fitfuncs, list):
log.exception(
"Argument 'fitfuncs' needs to be of type 'str' or "
+ "a list e.g. ['exponential', 'exponential_offset']"
)
raise TypeError
if targetplot is not None and not isinstance(targetplot, Axes):
log.exception(
"Optional argument 'targetplot' needs "
+ "to be an instance of 'matplotlib.axes.Axes'"
)
raise TypeError
if title is not None:
title = str(title)
# as of v0.1.6 we decided to choose the default method based on the number of trials
src = input_handler(data)
if coefficientmethod is None and method is not None:
coefficientmethod = method
elif coefficientmethod is not None and method is not None:
log.exception(
"Keywords 'method' and 'coefficientmethod' are synonymous.\n"
+ "Provide one or the other!"
)
raise ValueError
# for one trial the two methods are equal
if coefficientmethod is None and src.shape[0] == 1:
coefficientmethod = "stationarymean" # redundant with coefficients()
elif coefficientmethod is None and src.shape[0] > 1:
log.exception(
"The provided data seems to have more than one trial. "
+ "Please specify a 'coefficientmethod':\n"
+ "'trialseparated' --- if your trials are long, or\n"
+ "'stationarymean' --- if you are sure that activity is stationary "
+ "across trials.\n"
+ "If you are unsure, we suggest to compare results from both methods."
)
raise TypeError
if coefficientmethod not in ["trialseparated", "ts", "stationarymean", "sm"]:
log.exception(
"Optional argument 'coefficientmethod' needs "
+ "to be either 'trialseparated' or 'stationarymean'"
)
raise TypeError
if ut._log_locals:
log.debug(f"Finished argument check. Locals: {locals()}")
# ------------------------------------------------------------------ #
# Continue with trusted arguments
# ------------------------------------------------------------------ #
if substracttrialaverage:
src = src - np.mean(src, axis=0)
log.debug(f"full_analysis() seeding to {seed}")
if seed is None or seed == "random":
rkseed = seed
ftseed = seed
else:
rkseed = seed * 5330
ftseed = seed * 101
if numboot == "auto":
nbt = 100
else:
nbt = numboot
rks = coefficients(
data=src,
steps=steps,
dt=dt,
dtunit=dtunit,
method=coefficientmethod,
numboot=nbt,
seed=rkseed,
)
fits = []
for f in fitfuncs:
if numboot == "auto":
if (
fitfunc_check(f) is f_exponential
or fitfunc_check(f) is f_exponential_offset
):
nbt = 100
elif fitfunc_check(f) is f_complex:
nbt = 0
else:
nbt = 100
else:
nbt = numboot
fits.append(fit(data=rks, fitfunc=f, steps=steps, numboot=nbt, seed=ftseed))
# ------------------------------------------------------------------ #
# Output and Consistency Checks
# ------------------------------------------------------------------ #
warning = None
if defaultfits:
shownfits = [fits[1]]
# no trials, no confidence
if src.shape[0] == 1:
warning = "Not enough trials to calculate confidence intervals."
# check that tau from exp and exp_off
elif not ut._c_fits_consistent(fits[0], fits[1]):
# warning = 'Exponential with offset resulted in ' + \
# '$\\tau = {:.2f}$ {}'.format(fits[1].tau, fits[1].dtunit)
warning = (
"Results from other fits differed beyond confidence.\n"
+ "Try the 'fitfuncs' argument!"
)
else:
shownfits = fits
warning = None
# we do not want to have any python figures poping up when showoverivew
# is set to false.
# also, interactive: true seems to leak memory when many figures are opened
with rc_context(rc={"interactive": showoverview}):
if showoverview or saveoverview:
panel = overview(
src,
[rks],
shownfits,
title=title,
warning=warning,
interactive=showoverview,
)
res = OutputHandler([rks] + shownfits, ax=targetplot)
if targetdir is not None:
if title is not None and title != "":
res.save(targetdir + "/" + title)
if saveoverview:
panel.savefig(targetdir + "/" + title + "_overview.pdf")
else:
res.save(targetdir + "/full_analysis")
if saveoverview:
panel.savefig(targetdir + "/full_analysis_overview.pdf")
if showoverview:
panel.show()
else:
# if interactive mode is on, panel would still be shown
try:
plt.close(panel)
except:
log.debug("Exception passed", exc_info=True)
try:
log.removeHandler(loghandler)
except:
log.debug("No handler to remove")
log.info("full_analysis() done")
return res