Source code for mrestimator.wrapper

import logging
import os

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rc_context
from matplotlib.axes import Axes

from mrestimator import utility as ut
from mrestimator.coefficients import coefficients
from mrestimator.fit import (
    f_complex,
    f_exponential,
    f_exponential_offset,
    fit,
    fitfunc_check,
)
from mrestimator.input_output import (
    OutputHandler,
    input_handler,
    overview,
)

log = ut.log



[docs]
def full_analysis(
    data=None,
    dt=None,
    kmax=None,
    dtunit=" time unit",
    fitfuncs=None,
    coefficientmethod=None,
    tmin=None,  # include somehow into 'missing' req. arg
    tmax=None,
    steps=None,  # dt conversion? optional replace tmin/tmax
    substracttrialaverage=False,
    targetdir=None,
    title=None,  # overwrites old files in same targetdir
    numboot="auto",  # optional. default depends on fitfunc
    seed=1,  # default: 1 uses hard coded seeds
    loglevel=None,  # only concerns local logfile
    targetplot=None,
    showoverview=True,
    saveoverview=False,
    method=None,  # overload for coefficientmethod
):
    """
    Wrapper function that performs the following four steps:

    - check `data` with `input_handler()`
    - calculate correlation coefficients via `coefficients()`
    - fit autocorrelation function with `fit()`
    - export/plot using the `OutputHandler`

    Usually it should suffice to tweak the arguments and call this
    wrapper function (multiple times).
    Calling the underlying functions individually
    gives slightly more control, though.
    We recommend to set `showoverview=False` when calling in loops to avoid
    opening many figures (and wasting RAM).

    Parameters
    ----------
    data: str, list or numpy.ndarray
        Passed to `input_handler()`. Ideally, import and check data first.
        A `string` is assumed to be the path
        to file(s) that is then imported as pickle or plain text.
        Alternatively, you can provide a `list` or `ndarray` containing
        strings or already imported data. In the latter case,
        `input_handler()` attempts to convert it to the right format.

    dt: float
        How many `dtunits` separate the measurements of the provided data.
        For example, if measurements are taken every 4ms:
        `dt=4`, `dtunit=\'ms\'`.

    kmax: int
        Maximum time lag k (in time steps of size `dt`) to use for
        coefficients. Alternatively, `tmax` or `steps` can be specified

    Other Parameters
    ----------------

    dtunit: str, optional
        Unit description/name of the time steps of the provided data.

    fitfuncs: list, optional
        Which fitfunctions to use e.g. ``fitfuncs=['e', 'eo', 'c']``.
        Renamed from `fitfunctions` in v0.1.4.

    coefficientmethod: str, optional
        `ts` or `sm`, method used for determining the correlation
        coefficients. See the :func:`coefficients` function for details.
        Default is `ts`.

    method: str, optional
        same as `coefficientmethod`, introduced in v0.1.6.

    tmin: float
        Smallest time separation to use for coefficients, in units of
        `dtunit`.
        Only one argument is possible, either `kmax` or `steps` or
        `tmin` and `tmax`.

    tmax: float
        Maximum time separation to use for coefficients.
        For example, to fit the autocorrelation between 8ms and
        2s set: `tmin=8`, `tmax=2000`, `dtunit=\'ms\'`
        (independent of `dt`).

    steps : ~numpy.array, optional
        Specify the fitrange in steps :math:`k` for which to compute
        coefficients :math:`r_k`.
        Note that :math:`k` provided here would need
        to be multiplied with units of [`dt` * `dtunit`] to convert
        back to (real) time.
        If an array of length two is provided, e.g.
        ``steps=(minstep, maxstep)``, all enclosed integer values will be
        used.
        Arrays larger than two are assumed to contain a manual choice of
        steps. Strides other than one are possible.
        Only one argument is possible, either `steps` or `kmax` or
        `tmin` and `tmax`.

    substracttrialaverage: bool, optional
        Substract the average across all trials before calculating
        correlation coefficients.
        Default is `False`.

    targetdir: str, optional
        String containing the path to the target directory where files
        are saved with the filename `title`.
        Per default, `targetdir=None` and no files are written to disk.

    title: str, optional
        String for the filenames. Also sets the main title of the
        overview panel.

    numboot: int or 'auto', optional
        Number of bootstrap samples to draw.
        This repeats every fit `numboot` times so that we can
        provide an uncertainty estimate of the resulting branching
        parameter and autocorrelation time.
        Per default, bootstrapping is only applied in
        `coefficeints()` as most of computing time is needed for the
        fitting. Thereby we have uncertainties on the :math:`r_k`
        (which will be plotted) but each fit is only
        done once.
        Default is `numboot='auto'` where the number of samples depends on
        the fitfunction (100 for the exponential).

    seed : int, None or 'random', optional
        If `numboot` is not zero, provide a seed for the random number
        generator. If `seed=None`, seeding will be skipped.
        Per default, the rng is (re)seeded every time `full_analysis()` is
        called so that every repeated call returns the same error
        estimates.

    showoverview: bool, optional
        Whether to show the overview panel. Default is 'True'.
        Set to 'False' when calling `full_analysis()` repeatedly or just
        saving the panels to disk with `saveoverview` (this temporarily
        overwrites your matplotlib rc parameters for more consistency).
        Otherwise, matplotlib may create large amounts of figures and leak
        memory.
        Note that even when set to 'True' the panel might not show if
        `full_analysis()` is called through a script instead of an
        (interactive) shell, this depends on your matplotlib configuration.

    saveoverview: bool, optional
        Whether to save the overview panel in `targetdir`.
        Default is 'False'.

    loglevel: str, optional
        The loglevel to use for the logfile created
        as `title.log` in the `targetdir`.
        'ERROR', 'WARNING', 'INFO' or 'DEBUG'.
        Per default, no log is written unless `loglevel` and `targetdir`
        are provided.

    targetplot: `matplotlib.axes.Axes`, optional
        You can provide a matplotlib axes element (i.e. a subplot of an
        existing figure) to plot the correlations into.
        The axis will be passed to the `OutputHandler`
        and all plotting will happen within that axes.
        Per default, a new figure is created - that cannot be added
        as a subplot to any other figure later on. This is due to
        the way matplotlib handles subplots.

    Returns
    -------
    OutputHandler
        that is associated
        with the correlation plot, fits and coefficients.
        Also saves meta data and plotted pdfs to `targetdir`.

    Example
    -------

    .. code-block:: python

        # test data, subsampled branching process
        bp = mre.simulate_branching(m=0.95, h=10, subp=0.1, numtrials=50)

        mre.full_analysis(
            data=bp,
            dt=1,
            tmin=0, tmax=1500,
            dtunit='step',
            fitfuncs=['exp', 'exp_offs', 'complex'],
            targetdir='./output',
            title='Branching Process')
    ..
    """

    # ------------------------------------------------------------------ #
    # Arguments
    # ------------------------------------------------------------------ #

    how_to_str = (
        "full_analysis() requires the following arguments:\n"
        + "'data', 'dt', 'method' and either one of 'kmax', 'tmax' or 'steps'"
    )

    # workaround: if full_analysis() does not reach its end where we remove
    # the local loghandler, it survives and keps logging with the old level
    for hdlr in log.handlers:
        if isinstance(hdlr, logging.FileHandler):
            if hdlr != ut._logfilehandler:
                hdlr.close()
                log.removeHandler(hdlr)

    if data is None or dt is None:
        log.exception(how_to_str)
        raise TypeError

    if kmax is None and tmax is None and steps is None:
        log.exception(how_to_str)
        raise TypeError

    # if there is a targetdir specified, create and use for various output
    if targetdir is not None:
        if isinstance(targetdir, str):
            td = os.path.abspath(os.path.expanduser(targetdir + "/"))
            os.makedirs(td, exist_ok=True)
            ut._set_permissions(td)
            targetdir = td
        else:
            log.exception("Argument 'targetdir' needs to be of type 'str'")
            raise TypeError

        # setup log early so argument errors appear in the logfile
        if loglevel is None:
            # dont create a logfile
            pass
        else:
            if isinstance(loglevel, int) and loglevel > 0:
                pass
            elif str(loglevel).upper() in ["ERROR", "WARNING", "INFO", "DEBUG"]:
                loglevel = str(loglevel).upper()
            else:
                log.debug(f"Unrecognized log level {loglevel}, using 'INFO'")
                loglevel = "INFO"
            # open new handler and add it to logging module
            loghandler = logging.handlers.RotatingFileHandler(
                targetdir
                + "/{}.log".format(
                    "full_analysis" if title is None else title,
                ),
                maxBytes=5 * 1024 * 1024,
                backupCount=1,
            )
            loghandler.setLevel(logging.getLevelName(loglevel))
            loghandler.setFormatter(
                ut.CustomExceptionFormatter(
                    "%(asctime)s %(levelname)8s: %(message)s", "%Y-%m-%d %H:%M:%S"
                )
            )
            log.addHandler(loghandler)
    else:
        if saveoverview:
            log.warning(
                "Cannot save overview since no targetdir specified, " + "skipping"
            )

    log.debug("full_analysis()")
    if ut._log_locals:
        log.debug(f"Locals: {locals()}")

    try:
        dt = float(dt)
        assert dt > 0
    except Exception:
        log.exception("Argument 'dt' needs to be a float > 0")
        raise

    if not isinstance(dtunit, str):
        log.exception("Argument 'dtunit' needs to be of type 'str'")
        raise TypeError

    if steps is None:
        if kmax is not None:
            try:
                kmax = float(kmax)
                assert kmax > 0
            except Exception:
                log.exception("Argument 'kmax' needs to be a number > 0")
                raise
            if tmax is not None:
                log.exception(
                    "Arguments do not match: Please provide either"
                    + " 'kmax' or 'tmin' and 'tmax' or 'steps'"
                )
                raise TypeError
            else:
                tmax = kmax * dt
        if tmin is None:
            tmin = 1
        try:
            tmin = float(tmin)
            tmax = float(tmax)
            assert tmin >= 0 and tmax > tmin
        except Exception:
            if kmax is None:
                log.exception(
                    "Arguments: 'tmax' and 'tmin' "
                    + "need to be floats with 'tmax' > 'tmin' >= 0"
                )
            else:
                log.exception("Argument: 'kmax' is too small")

            raise
        steps = (int(tmin / dt), int(tmax / dt))
    else:
        if tmin is not None or tmax is not None or kmax is not None:
            log.exception(
                "Arguments do not match: Please provide either "
                + "'kmax' or 'tmin' and 'tmax' or 'steps'"
            )
            raise TypeError
        log.debug("Argument 'steps' was provided to full_analysis()")

    defaultfits = False
    if fitfuncs is None:
        fitfuncs = ["e", "eo"]
        defaultfits = True
    elif isinstance(fitfuncs, str):
        fitfuncs = [fitfuncs]
    if not isinstance(fitfuncs, list):
        log.exception(
            "Argument 'fitfuncs' needs to be of type 'str' or "
            + "a list e.g. ['exponential', 'exponential_offset']"
        )
        raise TypeError

    if targetplot is not None and not isinstance(targetplot, Axes):
        log.exception(
            "Optional argument 'targetplot' needs "
            + "to be an instance of 'matplotlib.axes.Axes'"
        )
        raise TypeError

    if title is not None:
        title = str(title)

    # as of v0.1.6 we decided to choose the default method based on the number of trials
    src = input_handler(data)

    if coefficientmethod is None and method is not None:
        coefficientmethod = method
    elif coefficientmethod is not None and method is not None:
        log.exception(
            "Keywords 'method' and 'coefficientmethod' are synonymous.\n"
            + "Provide one or the other!"
        )
        raise ValueError

    # for one trial the two methods are equal
    if coefficientmethod is None and src.shape[0] == 1:
        coefficientmethod = "stationarymean"  # redundant with coefficients()
    elif coefficientmethod is None and src.shape[0] > 1:
        log.exception(
            "The provided data seems to have more than one trial. "
            + "Please specify a 'coefficientmethod':\n"
            + "'trialseparated' --- if your trials are long, or\n"
            + "'stationarymean' --- if you are sure that activity is stationary "
            + "across trials.\n"
            + "If you are unsure, we suggest to compare results from both methods."
        )
        raise TypeError

    if coefficientmethod not in ["trialseparated", "ts", "stationarymean", "sm"]:
        log.exception(
            "Optional argument 'coefficientmethod' needs "
            + "to be either 'trialseparated' or 'stationarymean'"
        )
        raise TypeError

    if ut._log_locals:
        log.debug(f"Finished argument check. Locals: {locals()}")

    # ------------------------------------------------------------------ #
    # Continue with trusted arguments
    # ------------------------------------------------------------------ #

    if substracttrialaverage:
        src = src - np.mean(src, axis=0)

    log.debug(f"full_analysis() seeding to {seed}")
    if seed is None or seed == "random":
        rkseed = seed
        ftseed = seed
    else:
        rkseed = seed * 5330
        ftseed = seed * 101

    if numboot == "auto":
        nbt = 100
    else:
        nbt = numboot
    rks = coefficients(
        data=src,
        steps=steps,
        dt=dt,
        dtunit=dtunit,
        method=coefficientmethod,
        numboot=nbt,
        seed=rkseed,
    )

    fits = []
    for f in fitfuncs:
        if numboot == "auto":
            if (
                fitfunc_check(f) is f_exponential
                or fitfunc_check(f) is f_exponential_offset
            ):
                nbt = 100
            elif fitfunc_check(f) is f_complex:
                nbt = 0
            else:
                nbt = 100
        else:
            nbt = numboot
        fits.append(fit(data=rks, fitfunc=f, steps=steps, numboot=nbt, seed=ftseed))

    # ------------------------------------------------------------------ #
    # Output and Consistency Checks
    # ------------------------------------------------------------------ #

    warning = None
    if defaultfits:
        shownfits = [fits[1]]

        # no trials, no confidence
        if src.shape[0] == 1:
            warning = "Not enough trials to calculate confidence intervals."

        # check that tau  from exp and exp_off
        elif not ut._c_fits_consistent(fits[0], fits[1]):
            # warning = 'Exponential with offset resulted in ' + \
            #     '$\\tau = {:.2f}$ {}'.format(fits[1].tau, fits[1].dtunit)
            warning = (
                "Results from other fits differed beyond confidence.\n"
                + "Try the 'fitfuncs' argument!"
            )
    else:
        shownfits = fits
        warning = None

    # we do not want to have any python figures poping up when showoverivew
    # is set to false.
    # also, interactive: true seems to leak memory when many figures are opened
    with rc_context(rc={"interactive": showoverview}):
        if showoverview or saveoverview:
            panel = overview(
                src,
                [rks],
                shownfits,
                title=title,
                warning=warning,
                interactive=showoverview,
            )

        res = OutputHandler([rks] + shownfits, ax=targetplot)

        if targetdir is not None:
            if title is not None and title != "":
                res.save(targetdir + "/" + title)
                if saveoverview:
                    panel.savefig(targetdir + "/" + title + "_overview.pdf")
            else:
                res.save(targetdir + "/full_analysis")
                if saveoverview:
                    panel.savefig(targetdir + "/full_analysis_overview.pdf")

        if showoverview:
            panel.show()
        else:
            # if interactive mode is on, panel would still be shown
            try:
                plt.close(panel)
            except:
                log.debug("Exception passed", exc_info=True)

        try:
            log.removeHandler(loghandler)
        except:
            log.debug("No handler to remove")

        log.info("full_analysis() done")
        return res