Source code for mdtools.statistics

# This file is part of MDTools.
# Copyright (C) 2021, The MDTools Development Team and all contributors
# listed in the file AUTHORS.rst
#
# MDTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# MDTools is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with MDTools.  If not, see <http://www.gnu.org/licenses/>.


"""Functions for the statistical analysis of data."""


# Standard libraries
import warnings

# Third-party libraries
import numpy as np
from scipy import constants, special
from scipy.special import comb
from scipy.stats import norm

# First-party libraries
import mdtools as mdt



[docs]
def acf(x, axis=None, dt=1, dtau=1, tau_max=None, center=True, unbiased=False):
    r"""
    Calculate the autocorrelation function of an array.

    Parameters
    ----------
    x : array_like
        The input array.
    axis : int or None, optional
        The axis along which to calculate the ACF.  By default, the
        flattened input array is used.
    dt : int or None, optional
        Distance between restarting times (actually restarting indices)
        :math:`t_0`.  If ``None`` is given, `dt` is set to the current
        lag time :math:`\tau` which results in independent sampling
        windows.
    dtau : int, optional
        Distance between evaluated lag times (actually lag indices)
        :math:`\tau`.
    tau_max : int or None, optional
        Maximum lag time :math:`\tau` upon which to compute the ACF.
        Because the first lag time is 0, the last evaluated lag time is
        actually ``tau_max - 1``.  By default, `tau_max` is set to
        ``x.size`` is `axis` is ``None`` and to ``x.shape[axis]``
        otherwise.  `tau_max` must lie within the interval
        ``[0, len(x)]``.
    center : bool, optional
        If ``True``, center the input array around its mean, i.e.
        subtract the sample mean :math:`\bar{X}` when calculating the
        variance and covariance.
    unbiased : bool, optional
        If ``True``, the covariance
        :math:`\text{Cov}[X_{t_0}, X_{t_0 + \tau}]` is normed by the
        actual number of sample points :math:`t_0` (which depends on the
        lag time :math:`\tau`).  If ``False``, the covariance is for all
        lag times normed by the number of sampling points at
        :math:`t_0 = 0` (which is equal to the length of the input array
        `x`).  Note that setting `unbiased` to ``True`` might result in
        values of the ACF that lie outside the interval [-1, 1],
        especially for lag times larger than ``len(x) // 2`` if `center`
        is ``True``.

    Returns
    -------
    ac : numpy.ndarray
        Autocorrelation function of `x`.  If `axis` is ``None``, the
        output array has shape ``(n,)`` where ``n`` is
        ``int(np.ceil(tau_max / dtau))``.  If a specific axis is given,
        `ac` has the same shape as `x` except along the given axis where
        the length is given by ``n``.

    Raises
    ------
    ValueError :
        If the first element of `ac` is not unity or one or more
        elements of `ac` fall outside the interval [-1, 1].

    See Also
    --------
    :func:`mdtools.statistics.acf_np` :
        Different implementation of the ACF using
        :func:`numpy.correlate`
    :func:`mdtools.statistics.acf_se` :
        Calculate the standard errors of an autocorrelation function
    :func:`mdtools.statistics.acf_confint` :
        Calculate the confidence intervals of an autocorrelation
        function

    Notes
    -----
    This function computes the autocorrelation function (ACF) of the
    input array `x` assuming that `x` describes a `weakly stationary
    stochastic process`_.  That means, the mean does not vary with time,
    the variance is finite for all times and the autocovariance does not
    depend on the starting time :math:`t_0` but only on the lag time
    :math:`\tau`.  In this case the ACF is given by: [1]_:sup:`,` [2]_

    .. math::

        C(\tau) = \frac{\text{Cov}[X_{t_0}, X_{t_0 + \tau}]}{\text{Var}[X]}
        = \frac{\langle (X_{t_0} - \mu) \cdot (X_{t_0 + \tau} - \mu) \rangle}{\langle (X - \mu)^2 \rangle}

    The ACF strictly fulfills
    :math:`C(\tau) \in [-1, 1] \text{ } \forall \text{ } \tau \in
    \mathbb{R}`.  Numerically, the ACF is calculated using the following
    formula: [3]_:sup:`,` [4]_

    .. math::

        C_\tau = \frac{\frac{1}{T^*} \sum_{t_0=0}^{T-\tau} (X_{t_0} - \bar{X}) (X_{t_0 + \tau} - \bar{X})}{\frac{1}{T} \sum_{t_0=0}^T (X_{t_0} - \bar{X})^2}
        = \frac{\frac{1}{T^*} \sum_{t_0=0}^{T-\tau} (X_{t_0} - \bar{X}) (X_{t_0 + \tau} - \bar{X})}{C_0}

    The increment of the sums in the formula of :math:`C_\tau` is given
    by `dt`.  :math:`\bar{X} = \frac{1}{T} \sum_{t=1}^T X_t` is the
    sample mean.  For the sample mean, the increment of the sum is
    always 1, independent of the choice of `dt`.  Note that **the sample
    mean is only subtracted when** `center` **is** ``True``.  `T` is
    always the length of the input array `x`, independent of the choice
    of `tau_max`.  :math:`T^*` is either :math:`T - \tau` if `unbiased`
    is ``True`` or simply :math:`T` if `unbiased` is ``False``.

    For the sake of completeness, the ACF of a non-stationary
    stochastic process is given by: [1]_:sup:`,` [2]_

    .. math::

        C(t_1, t_2) = \frac{\text{Cov}[X_{t_1}, X_{t_2}]}{\sqrt{\text{Var}[X_{t_1}]} \cdot \sqrt{\text{Var}[X_{t_2}]}}
        = \frac{\langle (X_{t_1} - \mu_{t_1}) \cdot (X_{t_2} - \mu_{t_2}) \rangle}{\sqrt{\langle (X_{t_1} - \mu_{t_1})^2 \rangle} \cdot \sqrt{\langle (X_{t_2} - \mu_{t_2})^2 \rangle}}

    .. _weakly stationary stochastic process:
        https://en.wikipedia.org/wiki/Stationary_process#Weak_or_wide-sense_stationarity

    References
    ----------
    .. [1] Wikipedia `Autocorrelation
        <https://en.wikipedia.org/wiki/Autocorrelation#Normalization>`_

    .. [2] Kun Il Park, `Fundamentals of Probability and Stochastic
        Processes with Applications to Communications
        <https://doi.org/10.1007/978-3-319-68075-0>`_, Springer, 2018,
        p. 169, Holmdel, New Jersey, USA, ISBN 978-3-319-68074-3

    .. [3] Julius O. Smith, `Unbiased Cross-Correlation
        <https://www.dsprelated.com/freebooks/mdft/Unbiased_Cross_Correlation.html>`_.
        In Mathematics of the Discrete Fourier Transform (DFT), 2002,
        p. 189, Stanford, California, USA

    .. [4] Wikipedia `Correlogram
        <https://en.wikipedia.org/wiki/Correlogram#Estimation_of_autocorrelations>`_

    Examples
    --------
    >>> a = np.array([-2,  2, -2,  2, -2])
    >>> mdt.stats.acf(a)
    array([ 1.        , -0.8       ,  0.56666667, -0.4       ,  0.13333333])
    >>> mdt.stats.acf(a, dt=None)
    array([ 1.        , -0.8       ,  0.26666667, -0.2       ,  0.13333333])
    >>> mdt.stats.acf(a, unbiased=True)
    array([ 1.        , -1.        ,  0.94444444, -1.        ,  0.66666667])
    >>> mdt.stats.acf(a, unbiased=True, dt=None)
    array([ 1.        , -1.        ,  0.66666667, -1.        ,  0.66666667])
    >>> mdt.stats.acf(a, center=False)
    array([ 1. , -0.8,  0.6, -0.4,  0.2])
    >>> mdt.stats.acf(a, center=False, dtau=2)
    array([1. , 0.6, 0.2])
    >>> mdt.stats.acf(a, center=False, tau_max=len(a)//2)
    array([ 1. , -0.8])
    >>> mdt.stats.acf(a, center=False, dt=None)
    array([ 1. , -0.8,  0.4, -0.2,  0.2])
    >>> mdt.stats.acf(a, center=False, unbiased=True)
    array([ 1., -1.,  1., -1.,  1.])
    >>> mdt.stats.acf(a, center=False, unbiased=True, dt=None)
    array([ 1., -1.,  1., -1.,  1.])

    >>> a = np.array([-2, -2, -2,  2,  2])
    >>> mdt.stats.acf(a)
    array([ 1.        ,  0.36666667, -0.26666667, -0.4       , -0.2       ])
    >>> mdt.stats.acf(a, dt=None)
    array([ 1.        ,  0.36666667, -0.06666667, -0.2       , -0.2       ])
    >>> mdt.stats.acf(a, unbiased=True)
    array([ 1.        ,  0.45833333, -0.44444444, -1.        , -1.        ])
    >>> mdt.stats.acf(a, unbiased=True, dt=None)
    array([ 1.        ,  0.45833333, -0.16666667, -1.        , -1.        ])
    >>> mdt.stats.acf(a, center=False)
    array([ 1. ,  0.4, -0.2, -0.4, -0.2])
    >>> mdt.stats.acf(a, center=False, dt=None)
    array([ 1. ,  0.4,  0. , -0.2, -0.2])
    >>> mdt.stats.acf(a, center=False, unbiased=True)
    array([ 1.        ,  0.5       , -0.33333333, -1.        , -1.        ])
    >>> mdt.stats.acf(a, center=False, unbiased=True, dt=None)
    array([ 1. ,  0.5,  0. , -1. , -1. ])

    >>> a = np.array([[-2,  2, -2,  2, -2],
    ...               [-2,  2, -2,  2, -2],
    ...               [-2, -2, -2,  2,  2],
    ...               [-2, -2, -2,  2,  2]])
    >>> mdt.stats.acf(a, center=False)
    array([ 1.  , -0.15,  0.  , -0.25, -0.2 ,  0.55,  0.  ,  0.15, -0.1 ,
           -0.25,  0.1 ,  0.05,  0.1 ,  0.05, -0.2 ,  0.05,  0.  ,  0.05,
            0.  , -0.05])
    >>> mdt.stats.acf(a, center=False, axis=0)
    array([[ 1.  ,  1.  ,  1.  ,  1.  ,  1.  ],
           [ 0.75,  0.25,  0.75,  0.75,  0.25],
           [ 0.5 , -0.5 ,  0.5 ,  0.5 , -0.5 ],
           [ 0.25, -0.25,  0.25,  0.25, -0.25]])
    >>> mdt.stats.acf(a, center=False, axis=0, unbiased=True)
    array([[ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ],
           [ 1.        ,  0.33333333,  1.        ,  1.        ,  0.33333333],
           [ 1.        , -1.        ,  1.        ,  1.        , -1.        ],
           [ 1.        , -1.        ,  1.        ,  1.        , -1.        ]])
    >>> mdt.stats.acf(a, center=False, axis=1)
    array([[ 1. , -0.8,  0.6, -0.4,  0.2],
           [ 1. , -0.8,  0.6, -0.4,  0.2],
           [ 1. ,  0.4, -0.2, -0.4, -0.2],
           [ 1. ,  0.4, -0.2, -0.4, -0.2]])
    >>> mdt.stats.acf(a, center=False, axis=1, unbiased=True)
    array([[ 1.        , -1.        ,  1.        , -1.        ,  1.        ],
           [ 1.        , -1.        ,  1.        , -1.        ,  1.        ],
           [ 1.        ,  0.5       , -0.33333333, -1.        , -1.        ],
           [ 1.        ,  0.5       , -0.33333333, -1.        , -1.        ]])

    >>> a = np.array([[[-2,  2, -2,  2, -2],
    ...                [-2,  2, -2,  2, -2]],
    ...
    ...               [[-2, -2, -2,  2,  2],
    ...                [-2, -2, -2,  2,  2]]])
    >>> mdt.stats.acf(a, center=False)
    array([ 1.  , -0.15,  0.  , -0.25, -0.2 ,  0.55,  0.  ,  0.15, -0.1 ,
           -0.25,  0.1 ,  0.05,  0.1 ,  0.05, -0.2 ,  0.05,  0.  ,  0.05,
            0.  , -0.05])
    >>> mdt.stats.acf(a, center=False, axis=0)
    array([[[ 1. ,  1. ,  1. ,  1. ,  1. ],
            [ 1. ,  1. ,  1. ,  1. ,  1. ]],
    <BLANKLINE>
           [[ 0.5, -0.5,  0.5,  0.5, -0.5],
            [ 0.5, -0.5,  0.5,  0.5, -0.5]]])
    >>> mdt.stats.acf(a, center=False, axis=0, unbiased=True)
    array([[[ 1.,  1.,  1.,  1.,  1.],
            [ 1.,  1.,  1.,  1.,  1.]],
    <BLANKLINE>
           [[ 1., -1.,  1.,  1., -1.],
            [ 1., -1.,  1.,  1., -1.]]])

    >>> mdt.stats.acf(a, center=False, axis=1)
    array([[[1. , 1. , 1. , 1. , 1. ],
            [0.5, 0.5, 0.5, 0.5, 0.5]],
    <BLANKLINE>
           [[1. , 1. , 1. , 1. , 1. ],
            [0.5, 0.5, 0.5, 0.5, 0.5]]])
    >>> mdt.stats.acf(a, center=False, axis=1, unbiased=True)
    array([[[1., 1., 1., 1., 1.],
            [1., 1., 1., 1., 1.]],
    <BLANKLINE>
           [[1., 1., 1., 1., 1.],
            [1., 1., 1., 1., 1.]]])
    >>> mdt.stats.acf(a, center=False, axis=2)
    array([[[ 1. , -0.8,  0.6, -0.4,  0.2],
            [ 1. , -0.8,  0.6, -0.4,  0.2]],
    <BLANKLINE>
           [[ 1. ,  0.4, -0.2, -0.4, -0.2],
            [ 1. ,  0.4, -0.2, -0.4, -0.2]]])
    >>> mdt.stats.acf(a, center=False, axis=2, unbiased=True)
    array([[[ 1.        , -1.        ,  1.        , -1.        ,
              1.        ],
            [ 1.        , -1.        ,  1.        , -1.        ,
              1.        ]],
    <BLANKLINE>
           [[ 1.        ,  0.5       , -0.33333333, -1.        ,
             -1.        ],
            [ 1.        ,  0.5       , -0.33333333, -1.        ,
             -1.        ]]])
    """  # noqa: E501, W505
    x = np.array(x, dtype=np.float64, copy=center)
    if center:
        x = mdt.stats.center(x, axis=axis, dtype=np.float64, inplace=True)
    # Maximum possible tau value.
    if axis is None:
        tau_max_real = x.size
    else:
        if abs(axis) >= x.ndim:
            raise np.AxisError(axis=axis, ndim=x.ndim)
        tau_max_real = x.shape[axis]

    if dt is not None and dt < 1:
        raise ValueError(
            "'dt' ({}) must be a positive integer or None".format(dt)
        )
    if dtau < 1:
        raise ValueError(
            "'dtau' ({}) must be a positive integer or None".format(dtau)
        )
    if tau_max is None:
        tau_max = tau_max_real
    elif tau_max < 0 or tau_max > len(x):
        raise ValueError(
            "'tau_max' ({}) must lie within the interval"
            " [0, {}]".format(tau_max, len(x))
        )

    ac = []
    for tau in range(0, tau_max, dtau):
        if dt is None:
            t_step = max(1, tau)  # Slice step cannot be zero
        else:
            t_step = dt
        t0_max = tau_max_real - tau
        cov = np.copy(mdt.nph.take(x, stop=t0_max, step=t_step, axis=axis))
        cov *= mdt.nph.take(x, start=tau, step=t_step, axis=axis)
        ac.append(np.sum(cov, axis=axis))
        if unbiased:
            if axis is None:
                ac[-1] /= cov.size
            else:
                ac[-1] /= cov.shape[axis]
    if axis is None:
        ac = np.concatenate(ac, axis=axis)
    else:
        ac = np.stack(ac, axis=axis)
    ac0 = mdt.nph.take(ac, start=0, stop=1, axis=axis)
    ac /= ac0

    # Consistency check.
    if axis is None:
        shape = (int(np.ceil(tau_max / dtau)),)
    else:
        shape = list(x.shape)
        shape[axis] = int(np.ceil(tau_max / dtau))
        shape = tuple(shape)
    if ac.shape != shape:
        raise ValueError(
            "'ac' has shape {} but should have shape {}.  This should not have"
            " happened".format(ac.shape, shape)
        )
    if not np.allclose(
        mdt.nph.take(ac, start=0, stop=1, axis=axis), 1, rtol=0
    ):
        raise ValueError(
            "The first element of the ACF is not unity but {}.  This should"
            " not have"
            " happened".format(mdt.nph.take(ac, start=0, stop=1, axis=axis))
        )
    if np.any(np.abs(ac) > 1):
        raise ValueError(
            "At least one element of the ACF is absolutely seen greater than"
            " unity.  This should not have happened.  min(ACF) = {}.  max(ACF)"
            " = {}".format(np.min(ac), np.max(ac))
        )
    return ac




[docs]
def acf_np(x, center=True, unbiased=False):
    r"""
    Calculate the autocorrelation function of a 1-dimensional array.

    Parameters
    ----------
    x : array_like
        1-dimensional input array.
    center : bool, optional
        If ``True``, center the input array around its mean, i.e.
        subtract the sample mean :math:`\bar{X}` when calculating the
        variance and covariance.
    unbiased : bool, optional
        If ``True``, the covariance
        :math:`\text{Cov}[X_{t_0}, X_{t_0 + \tau}]` is normed by the
        actual number of sample points :math:`t_0` (which depends on the
        lag time :math:`\tau`).  If ``False``, the covariance is for all
        lag times normed by the number of sampling points at
        :math:`t_0 = 0` (which is equal to the length of the input array
        `x`).  Note that setting `unbiased` to ``True`` might result in
        values of the ACF that lie outside the interval [-1, 1],
        especially for lag times larger than ``len(x) // 2`` if `center`
        is ``True``.

    Returns
    -------
    ac : numpy.ndarray
        Array of the same shape as `x` containing the autocorrelation
        function of `x`.

    Raises
    ------
    ValueError :
        If the first element of `ac` is not unity or one or more
        elements of `ac` fall outside the interval [-1, 1].

    See Also
    --------
    :func:`mdtools.statistics.acf` :
        Different implementation of the ACF using using a for loop
    :func:`mdtools.statistics.acf_se` :
        Calculate the standard errors of an autocorrelation function
    :func:`mdtools.statistics.acf_confint` :
        Calculate the confidence intervals of an autocorrelation
        function

    Notes
    -----
    See :func:`mdtools.statistics.acf` for the mathematical details of
    computing the autocorrelation function (ACF).

    This function uses :func:`numpy.correlate` to calculate the
    autocorrelation function whereas :func:`mdtools.statistics.acf` uses
    an explicit for loop.  Therefore, :func:`mdtools.statistics.acf`
    can handle arbitrarily shaped input arrays and offers more options.

    Examples
    --------
    >>> a = np.random.normal(loc=3, scale=7, size=11)
    >>> np.allclose(mdt.stats.acf_np(a), mdt.stats.acf(a), rtol=0)
    True
    >>> np.allclose(
    ...     mdt.stats.acf_np(a, center=False),
    ...     mdt.stats.acf(a, center=False),
    ...     rtol=0
    ... )
    True
    """
    x = np.array(x, dtype=np.float64, copy=center)
    if x.ndim != 1:
        raise ValueError(
            "'x' must be 1-dimensional but is {}-dimensional".format(x.ndim)
        )

    if center:
        x -= np.mean(x, dtype=np.float64)
    ac = np.correlate(x, x, mode="full")
    ac = ac[-len(x) :]
    if unbiased:
        ac /= np.arange(len(ac), 0, -1)
    ac /= ac[0]

    if not np.isclose(ac[0], 1, rtol=0):
        raise ValueError(
            "The first element of the ACF is not unity but {}.  This should"
            " not have happened".format(ac[0])
        )
    if np.any(np.abs(ac) > 1):
        raise ValueError(
            "At least one element of the ACF is absolutely seen greater than"
            " unity.  This should not have happened.  min(ACF) = {}.  max(ACF)"
            " = {}".format(np.min(ac), np.max(ac))
        )
    return ac




[docs]
def acf_se(x, axis=None, n=None):
    r"""
    Calculate the standard errors of an autocorrelation function.

    Parameters
    ----------
    x : array_like
        The values of the ACF.  Intermediate lag times must not be
        missing.  That means if you used e.g.
        :func:`mdtools.statistics.acf` to compute the ACF, the argument
        `dtau` must have been ``1``.
    axis : int or None, optional
        The axis along which to calculate the SE.  By default, the
        flattened input array is used.
    n : int, optional
        Sample size (i.e. number of recorded time points) of the
        underlying time series.  By default, `n` is set to the number of
        lag times :math:`\tau` in the given ACF.  This is valid for ACFs
        that were computed at all possible lag times from the underlying
        time series.  That means if you used e.g.
        :func:`mdtools.statistics.acf` to compute the ACF, the argument
        `dtau` must have been ``1`` and `tau_max` must have been
        ``None`` or the length of `x` along the given axis..

    Returns
    -------
    se : numpy.ndarray
        Array of the same shape as `x` containing the standard error of
        the ACF at each lag time.

    See Also
    --------
    :func:`mdtools.statistics.acf` :
        Calculate the autocorrelation function of an array
    :func:`mdtools.statistics.acf_confint` :
        Calculate the confidence intervals of an autocorrelation
        function

    Notes
    -----
    The standard error (SE) of the autocorrelation function (ACF)
    :math:`C_\tau` at lag time :math:`\tau` is estimated according to
    Bartlett's formula for MA(l) processes: [#]_:sup:`,` [#]_

    .. math::

        SE(C_\tau) = \sqrt{\frac{1 + 2 \sum_{\tau^{'} = 1}^{\tau - 1} C_{\tau^{'}}}{N}}

    for :math:`\tau \gt 1`.  For :math:`\tau = 1` the standard error is
    estimated by :math:`SE(C_1) = \frac{1}{\sqrt{N}}` and for
    :math:`\tau = 0` the standard error is :math:`SE(C_0) = 0`.

    References
    ----------
    .. [#] Wikipedia `Correlogram
        <https://en.wikipedia.org/wiki/Correlogram#Statistical_inference_with_correlograms>`_

    .. [#] Wikipedia `Autoregressive-moving-average model
        <https://en.wikipedia.org/wiki/Autoregressive%E2%80%93moving-average_model#Moving-average_model>`_

    Examples
    --------
    >>> mdt.stats.acf_se([3])
    array([0.])
    >>> a = np.arange(4)
    >>> mdt.stats.acf_se(a)
    array([0.       , 0.5      , 0.8660254, 1.6583124])
    >>> b = np.column_stack([a, a])
    >>> mdt.stats.acf_se(b, axis=0)
    array([[0.       , 0.       ],
           [0.5      , 0.5      ],
           [0.8660254, 0.8660254],
           [1.6583124, 1.6583124]])
    >>> b = np.row_stack([a, a])
    >>> mdt.stats.acf_se(b, axis=1)
    array([[0.       , 0.5      , 0.8660254, 1.6583124],
           [0.       , 0.5      , 0.8660254, 1.6583124]])
    >>> c = np.array([b, b])
    >>> mdt.stats.acf_se(c, axis=2)
    array([[[0.       , 0.5      , 0.8660254, 1.6583124],
            [0.       , 0.5      , 0.8660254, 1.6583124]],
    <BLANKLINE>
           [[0.       , 0.5      , 0.8660254, 1.6583124],
            [0.       , 0.5      , 0.8660254, 1.6583124]]])
    """  # noqa: E501, W505
    x = np.asarray(x)
    if n is None and axis is None:
        n = x.size
    elif n is None:  # and axis is not None
        if abs(axis) >= x.ndim:
            raise np.AxisError(axis=axis, ndim=x.ndim)
        n = x.shape[axis]
    elif n <= 0:
        raise ValueError("'n' ({}) must be a positive integer".format(n))

    # SE(lag) = \sqrt{1 + 2 * \sum_{i=1}^{lag-1} ACF(i) / N}.
    # Sum starts at lag=1.
    acf = mdt.nph.take(x, start=1, axis=axis)
    se = np.cumsum(acf**2, axis=axis, dtype=np.float64)
    # The sum must only run until lag-1 but now runs until lag => Shift
    # the array by 1 to the right.
    se = np.roll(se, shift=1, axis=axis)
    se *= 2
    se += 1
    se /= n
    # SE(lag=1) = 1/\sqrt{N}.
    se1 = mdt.nph.take(se, start=0, stop=1, axis=axis)
    se1[:] = 1 / n
    se = np.sqrt(se, out=se)
    # SE(lag=0) = 0 because ACF(lag=0) is always 1 and has zero error.
    se = np.insert(se, 0, 0, axis=axis)

    # Consistency check
    if se.shape != x.shape:
        raise ValueError(
            "'se' has shape {} but should have shape {}.  This should not have"
            " happened".format(se.shape, x.shape)
        )
    if np.any(mdt.nph.take(se, start=0, stop=1, axis=axis) != 0):
        raise ValueError(
            "The first element of the standard error of the ACF is not zero"
            " but {}.  This should not have"
            " happened".format(mdt.nph.take(se, start=0, stop=1, axis=axis))
        )
    if not np.allclose(
        mdt.nph.take(se, start=1, stop=2, axis=axis), 1 / np.sqrt(n), rtol=0
    ):
        raise ValueError(
            "The second element of the standard error of the ACF is not"
            " 1/sqrt(n) = {} but {}.  This should not have happened".format(
                1 / np.sqrt(n),
                mdt.nph.take(se, start=1, stop=2, axis=axis),
            )
        )
    if (axis is None and np.any(np.diff(se) < 0)) or (
        axis is not None and np.any(np.diff(se, axis=axis) < 0)
    ):
        raise ValueError("'se' is not monotonically increasing")
    return se




[docs]
def acf_confint(x, axis=None, alpha=0.05, n=None):
    r"""
    Calculate the confidence intervals of an autocorrelation function.

    Parameters
    ----------
    x : array_like
        The values of the ACF.  Intermediate lag times must not be
        missing.  That means if you used e.g.
        :func:`mdtools.statistics.acf` to compute the ACF, the argument
        `dtau` must have been ``1``.
    axis : int or None, optional
        The axis along which to calculate the confidence interval.  By
        default, the flattened input array is used.
    alpha : scalar, optional
        The significance level (also known as probability of error).
        The significance level is the maximum probability for rejecting
        the null hypothesis although its true (Type 1 error).  Here, the
        null hypothesis is that the underlying time series has no
        autocorrelation.  Typical values for the significance level are
        0.01 or 0.05.  The smaller the significance level (probability
        for Type 1 error), the higher the probability of a Type 2 error
        (i.e. the null hypothesis is not rejected although it is wrong).
    n : int, optional
        Sample size (i.e. number of recorded time points) of the
        underlying time series.  By default, `n` is set to the number of
        lag times :math:`\tau` in the given ACF.  This is valid for ACFs
        that were computed at all possible lag times from the underlying
        time series.  That means if you used e.g.
        :func:`mdtools.statistics.acf` to compute the ACF, the argument
        `dtau` must have been ``1`` and `tau_max` must have been
        ``None`` or the length of `x` along the given axis..

    Returns
    -------
    confint : numpy.ndarray
        Array of the same shape as `x` containing the upper limit of the
        confidence interval of the ACF at each lag time.  The lower
        limit is simply given by ``-confint``.  The confidence interval
        is centered at zero.  To get the confidence interval around the
        actual ACF values compute ``x + confint`` and ``x - confint``.

    See Also
    --------
    :func:`mdtools.statistics.acf` :
        Calculate the autocorrelation function of an array
    :func:`mdtools.statistics.acf_se` :
        Calculate the standard errors of an autocorrelation function

    Notes
    -----
    The confidence interval of the autocorrelation function (ACF)
    :math:`C_\tau` at lag time :math:`\tau` is estimated according to:
    [1]_

    .. math::

        B(C_\tau) = z_{1-\alpha/2} SE(C_\tau)

    with the significance level :math:`\alpha`, the quantile function
    :math:`z_p` of the *standard* normal distribution
    (:math:`\sigma = 1`) and the standard error :math:`SE(C_\tau)`.

    The quantile function :math:`z_p` is also known as the inverse
    cumulative distribution function :math:`F^{-1}(p)`.  The cumulative
    distribution function :math:`F(X)` returns the probability
    :math:`p` to gain a value below or equal to :math:`X`.  Hence, the
    inverse cumulative distribution function :math:`F^{-1}(p)` returns
    the value :math:`X` such that the probability of gaining a value
    below or equal to :math:`X` is given by :math:`p`.  Thus, the
    probability for gaining a value higher than :math:`X` is
    :math:`1-p`.

    A normal random variable :math:`X` will exceed the value
    :math:`\mu + z_p \sigma` with probability :math:`1-p` and will lie
    outside the interval :math:`\mu \pm z_p \sigma` with probability
    :math:`2(1-p)` (for :math:`p \ge 0.5` or :math:`2p` for
    :math:`p \le 0.5`).  In particular, the quantile :math:`z_{0.975}`
    is :math:`1.96`.  Therefore, a normal random variable will lie
    outside the interval :math:`\mu \pm 1.96\sigma` in only 5% of cases.
    [2]_  Here :math:`\mu` is the mean of the random variable and
    :math:`\sigma` is its standard deviation.  Note that for the normal
    distribution :math:`z_p - \mu = -(z_{1-p} - \mu)` holds, because it
    is symmetric around the mean.

    The standard error of the ACF is estimated according to Bartlett's
    formula for MA(l) processes: [1]_:sup:`,` [3]_

    .. math::

        SE(C_\tau) = \sqrt{\frac{1 + 2 \sum_{\tau^{'} = 1}^{\tau - 1} C_{\tau^{'}}}{N}}

    for :math:`\tau \gt 1`.  For :math:`\tau = 1` the standard error is
    estimated by :math:`SE(C_1) = \frac{1}{\sqrt{N}}` and for
    :math:`\tau = 0` the standard error is :math:`SE(C_0) = 0`.

    If the ACF :math:`C_\tau` is zero within the confidence interval,
    the null hypothesis that there is no autocorrelation at the given
    lag time :math:`\tau` is rejected with a significance level of
    :math:`\alpha`.  This is an approximate test that assumes that the
    underlying time series is normally distributed. [1]_

    References
    ----------
    .. [1] Wikipedia `Correlogram
        <https://en.wikipedia.org/wiki/Correlogram#Statistical_inference_with_correlograms>`_

    .. [2] Wikipedia `Normal distribution
        <https://en.wikipedia.org/wiki/Normal_distribution#Quantile_function>`_

    .. [3] Wikipedia `Autoregressive-moving-average model
        <https://en.wikipedia.org/wiki/Autoregressive%E2%80%93moving-average_model#Moving-average_model>`_

    Examples
    --------
    >>> mdt.stats.acf_confint([3])
    array([0.])
    >>> a = np.arange(4)
    >>> se = mdt.stats.acf_se(a)
    >>> ci = mdt.stats.acf_confint(a)
    >>> ci
    array([0.        , 0.97998199, 1.6973786 , 3.25023257])
    >>> np.allclose(ci, 1.959960*se, rtol=0, atol=1e-5)
    True
    >>> ci = mdt.stats.acf_confint(a, alpha=0.01)
    >>> ci
    array([0.        , 1.28791465, 2.23073361, 4.27152966])
    >>> np.allclose(ci, 2.575830*se, rtol=0, atol=1e-5)
    True

    >>> b = np.column_stack([a, a])
    >>> mdt.stats.acf_confint(b, axis=0)
    array([[0.        , 0.        ],
           [0.97998199, 0.97998199],
           [1.6973786 , 1.6973786 ],
           [3.25023257, 3.25023257]])
    >>> b = np.row_stack([a, a])
    >>> mdt.stats.acf_confint(b, axis=1)
    array([[0.        , 0.97998199, 1.6973786 , 3.25023257],
           [0.        , 0.97998199, 1.6973786 , 3.25023257]])
    >>> c = np.array([b, b])
    >>> mdt.stats.acf_confint(c, axis=2)
    array([[[0.        , 0.97998199, 1.6973786 , 3.25023257],
            [0.        , 0.97998199, 1.6973786 , 3.25023257]],
    <BLANKLINE>
           [[0.        , 0.97998199, 1.6973786 , 3.25023257],
            [0.        , 0.97998199, 1.6973786 , 3.25023257]]])
    """  # noqa: E501, W505
    rv = norm(loc=0, scale=1)
    quantile = rv.ppf(1 - alpha / 2)
    confint = mdt.stats.acf_se(x, axis=axis, n=n)
    confint *= quantile
    return confint




[docs]
def center(x, axis=None, dtype=None, inplace=False):
    """
    Center a distribution around its sample mean.

    Parameters
    ----------
    x : array_like
        Input array.
    axis : int or None, optional
        The axis along which to compute the sample mean.  By default,
        the flattened input array is used.
    dtype : data-type, optional
        The dtype of the output array.  This dtype is also used to
        compute the mean.  Note that computing means can be inaccurate
        when using the floating-point dtype ``numpy.float32`` or less.
        See :func:`numpy.mean` for more details.
    inplace : bool, optional
        If ``True``, change the input array inplace if possible (the
        given dtype must be castable to the dtype of `x` and `x` must
        already be a :class:`numpy.ndarray`).

    Returns
    -------
    x_centered : numpy.ndarray
        The centered input distribution: ``x - np.mean(x)``.

    See Also
    --------
    :func:`numpy.mean` :
        Compute the arithmetic mean along the specified axis
    :func:`mdtools.statistics.standardize` :
        Standardize a distribution

    Examples
    --------
    >>> a = np.arange(24).reshape(2,3,4)
    >>> mdt.stats.center(a)
    array([[[-11.5, -10.5,  -9.5,  -8.5],
            [ -7.5,  -6.5,  -5.5,  -4.5],
            [ -3.5,  -2.5,  -1.5,  -0.5]],
    <BLANKLINE>
           [[  0.5,   1.5,   2.5,   3.5],
            [  4.5,   5.5,   6.5,   7.5],
            [  8.5,   9.5,  10.5,  11.5]]])
    >>> mdt.stats.center(a, axis=0)
    array([[[-6., -6., -6., -6.],
            [-6., -6., -6., -6.],
            [-6., -6., -6., -6.]],
    <BLANKLINE>
           [[ 6.,  6.,  6.,  6.],
            [ 6.,  6.,  6.,  6.],
            [ 6.,  6.,  6.,  6.]]])
    >>> mdt.stats.center(a, axis=1)
    array([[[-4., -4., -4., -4.],
            [ 0.,  0.,  0.,  0.],
            [ 4.,  4.,  4.,  4.]],
    <BLANKLINE>
           [[-4., -4., -4., -4.],
            [ 0.,  0.,  0.,  0.],
            [ 4.,  4.,  4.,  4.]]])
    >>> mdt.stats.center(a, axis=2)
    array([[[-1.5, -0.5,  0.5,  1.5],
            [-1.5, -0.5,  0.5,  1.5],
            [-1.5, -0.5,  0.5,  1.5]],
    <BLANKLINE>
           [[-1.5, -0.5,  0.5,  1.5],
            [-1.5, -0.5,  0.5,  1.5],
            [-1.5, -0.5,  0.5,  1.5]]])

    >>> a = np.ones(3, dtype=np.float64)
    >>> a
    array([1., 1., 1.])
    >>> b = mdt.stats.center(a, inplace=True)
    >>> b
    array([0., 0., 0.])
    >>> b is a
    True
    >>> b[0] = 1
    >>> a
    array([1., 0., 0.])
    """
    x = np.asarray(x, dtype=dtype)
    mean = np.mean(x, axis=axis, dtype=dtype)
    if axis is not None:
        mean = np.expand_dims(mean, axis=axis)
    if inplace:
        x -= mean
    else:
        x = x - mean
    return x




[docs]
def standardize(x, axis=None, dtype=None, inplace=False, ddof=0):
    """
    Standardize a distribution.

    Standardize a distribution such that its mean is zero and its
    standard deviation is one.

    Parameters
    ----------
    x : array_like
        Input array.
    axis : int or None, optional
        The axis along which to compute the sample mean and standard
        deviation.  By default, the flattened input array is used.
    dtype : data-type, optional
        The dtype of the output array.  This dtype is also used to
        compute the mean and standard deviation.  Note that computing
        means and standard deviations can be inaccurate when using the
        floating-point dtype ``numpy.float32`` or less.  See
        :func:`numpy.mean` for more details.
    inplace : bool, optional
        If ``True``, change the input array inplace if possible (the
        given dtype must be castable to the dtype of `x` and `x` must
        already be a :class:`numpy.ndarray`).
    ddof : int, optional
        Delta Degrees of Freedom.  When calculating the standard
        deviation, the used divisor is ``N - ddof``, where ``N``
        represents the number of elements.  See :func:`numpy.std` for
        more details.

    Returns
    -------
    x_standardized : numpy.ndarray
        The standardized input distribution:
        ``(x - np.mean(x)) / np.std(x)``.

    See Also
    --------
    :func:`numpy.mean` :
        Compute the arithmetic mean along the specified axis
    :func:`numpy.std` :
        Compute the standard deviation along the specified axis
    :func:`mdtools.statistics.center` :
        Center a distribution around its sample mean

    Examples
    --------
    >>> a = np.arange(24).reshape(2,3,4)
    >>> mdt.stats.standardize(a)
    array([[[-1.66132477, -1.51686175, -1.37239873, -1.2279357 ],
            [-1.08347268, -0.93900965, -0.79454663, -0.65008361],
            [-0.50562058, -0.36115756, -0.21669454, -0.07223151]],
    <BLANKLINE>
           [[ 0.07223151,  0.21669454,  0.36115756,  0.50562058],
            [ 0.65008361,  0.79454663,  0.93900965,  1.08347268],
            [ 1.2279357 ,  1.37239873,  1.51686175,  1.66132477]]])
    >>> mdt.stats.standardize(a, axis=0)
    array([[[-1., -1., -1., -1.],
            [-1., -1., -1., -1.],
            [-1., -1., -1., -1.]],
    <BLANKLINE>
           [[ 1.,  1.,  1.,  1.],
            [ 1.,  1.,  1.,  1.],
            [ 1.,  1.,  1.,  1.]]])
    >>> mdt.stats.standardize(a, axis=1)
    array([[[-1.22474487, -1.22474487, -1.22474487, -1.22474487],
            [ 0.        ,  0.        ,  0.        ,  0.        ],
            [ 1.22474487,  1.22474487,  1.22474487,  1.22474487]],
    <BLANKLINE>
           [[-1.22474487, -1.22474487, -1.22474487, -1.22474487],
            [ 0.        ,  0.        ,  0.        ,  0.        ],
            [ 1.22474487,  1.22474487,  1.22474487,  1.22474487]]])
    >>> mdt.stats.standardize(a, axis=2)
    array([[[-1.34164079, -0.4472136 ,  0.4472136 ,  1.34164079],
            [-1.34164079, -0.4472136 ,  0.4472136 ,  1.34164079],
            [-1.34164079, -0.4472136 ,  0.4472136 ,  1.34164079]],
    <BLANKLINE>
           [[-1.34164079, -0.4472136 ,  0.4472136 ,  1.34164079],
            [-1.34164079, -0.4472136 ,  0.4472136 ,  1.34164079],
            [-1.34164079, -0.4472136 ,  0.4472136 ,  1.34164079]]])

    >>> a = np.arange(3, dtype=np.float64)
    >>> a
    array([0., 1., 2.])
    >>> b = mdt.stats.standardize(a, inplace=True)
    >>> b
    array([-1.22474487,  0.        ,  1.22474487])
    >>> b is a
    True
    >>> b[0] = 1
    >>> a
    array([1.        , 0.        , 1.22474487])
    """
    x = np.asarray(x, dtype=dtype)
    x = mdt.stats.center(x, axis=axis, dtype=dtype, inplace=inplace)
    std = np.std(x, axis=axis, dtype=dtype, ddof=ddof)
    if axis is not None:
        std = np.expand_dims(std, axis=axis)
    if inplace:
        x /= std
    else:
        x = x / std
    return x




[docs]
def gaussian(x, mu=0, sigma=1):
    r"""
    Gaussian distribution.

    .. math::

        f(x) = \frac{1}{\sqrt{2\pi\sigma^2}} e^{-\frac{(x-\mu)^2}{2\sigma^2}}

    Parameters
    ----------
    x : scalar or array_like
        Array of :math:`x` values for which to evaluate :math:`f(x)`.
    mu : scalar or array_like, optional
        Mean of the distribution.  If an array of means is given, it
        must be broadcastable to a common shape with `x`.
    sigma : scalar or array_like, optional
        Standard deviation of the distribution.  If an array of standard
        deviations is given, it must be broadcastable to a common shape
        with `x`.

    Returns
    -------
    f : scalar or array_like
        The function values :math:`f(x)` at the given :math:`x` values.

    See Also
    --------
    :obj:`scipy.stats.norm` :
        Normal continuous random variable.

    Examples
    --------
    >>> from scipy.stats import norm
    >>> x = np.linspace(-1, 3, 5)
    >>> y1 = mdt.stats.gaussian(x, mu=1, sigma=1)
    >>> y2 = norm.pdf(x, loc=1, scale=1)
    >>> np.allclose(y1, y2, rtol=0)
    True
    >>> y1
    array([0.05399097, 0.24197072, 0.39894228, 0.24197072, 0.05399097])
    """  # noqa: E501, W505
    s2 = 2 * sigma**2
    return np.exp(-((x - mu) ** 2) / s2) / np.sqrt(np.pi * s2)




[docs]
def ngp(x, axis=None, d=1, center=False, is_squared=False):
    r"""
    Compute the non-Gaussian parameter of a distribution of random
    variables.

    Parameters
    ----------
    x : array_like
        Input array.
    axis : None or int or tuple of ints, optional
        Axis or axes of `x` along which to compute the NGP.  By default,
        the flattened input array is used.
    d : int, optional
        Dimensionality of the random variables (this is *not* the
        dimensionality of the input array).  Originally, this option is
        provided to compute the NGP of a distribution of particle
        displacements :math:`\Delta\mathbf{r}` that were calculated in
        one, two or three dimensional euclidean space.
    center : bool, optional
        If ``True``, center the input distribution around its mean, i.e.
        use central moments :math:`\langle (X - \bar{X})^n \rangle` for
        computing the NGP (with :math:`\bar{X} = \sum_{i=1}^N X_i` being
        the sample mean).  If ``False``, use the raw moments
        :math:`\langle X^n \rangle` without subtracting the sample mean.
        Note that the central moments of the sample are biased
        estimators for the central moments of the population, whereas
        the raw moments of the sample are always equal to the raw
        moments of the population.  See Wikipedia
        `Moment (mathematics) § Sample moments
        <https://en.wikipedia.org/wiki/Moment_(mathematics)#Sample_moments>`_
        `center` must not be used together with `is_squared`, because we
        cannot estimate the original sample mean if `x` is already
        squared.
    is_squared : bool, optional
        If ``True``, `x` is assumed to be already squared.  If ``False``
        (default), `x` will be squared before calculating the moments.
        Setting `is_squared` to ``True`` is for example useful if `x`
        is the distribution of squared particle displacements
        :math:`\Delta\mathbf{r}^2(\tau)` of an ensemble of particles at
        a given lag time :math:`\tau`.  `is_squared` must not be used
        together with `center`.

    Returns
    -------
    a : scalar
        The non-Gaussian parameter :math:`\alpha_{2,d}`.

    Notes
    -----
    The non-Gaussian parameter (NGP) was first introduced by Rahman,
    Singwi, and Sjölander. [#]_:sup:`,` [#]_  Since then it has become a
    standard test to check for Gaussian diffusion, i.e. whether the
    particle displacements :math:`\Delta\mathbf{r}(\tau)` follow a
    Gaussian distribution.  For a Gaussian distribution the NGP is zero.

    This function uses the definition of the NGP in
    :math:`d`-dimensional euclidean space given in the text on page
    12735 of Reference: [#]_

    .. math::

        \alpha_{2,d} = \frac{1}{1+\frac{2}{d}} \frac{\langle X^4 \rangle}{\langle X^2 \rangle^2} - 1

    A more general definition of the NGP :math:`\alpha_{n,d}` in
    :math:`d`-dimensional euclidean space was given by Huang, Wang and
    Yu in Equation (14) of Reference: [#]_

    .. math::

        \alpha_{n,d} = \frac{(d-2)!! d^n}{(2n + d - 2)!!} \frac{\langle X^{2n} \rangle}{\langle X^2 \rangle^n} - 1

    References
    ----------
    .. [#] A. Rahman, K. S. Singwi, A. Sjölander, `Theory of Slow
        Neutron Scattering by Liquids. I
        <https://doi.org/10.1103/PhysRev.126.986>`_, Physical Review,
        1962, 126, 986.

    .. [#] A. Rahman, `Correlations in the Motion of Atoms in Liquid
        Argon <https://doi.org/10.1103/PhysRev.136.A405>`_, Physical
        Review, 1964, 136, A405.

    .. [#] Sanggeun Song, Seong Jun Park, Minjung Kim, Jun Soo Kim,
        Bong June Sung, Sangyoub Lee, Ji-Hyun Kim, Jaeyoung Sung,
        `Transport dynamics of complex fluids
        <https://doi.org/10.1073/pnas.1900239116>`_, Proceedings of the
        National Academy of Sciences, 2019, 116, 26, 12733-12742

    .. [#] Zihan Huang, Gaoming Wang, Zhao Yu, `Non-Gaussian Parameter
        in k-Dimensional Euclidean Space
        <https://doi.org/10.48550/arXiv.1511.06672>`_, arXiv preprint
        arXiv:1511.06672v1, 2015.

    Examples
    --------
    >>> mdt.stats.ngp(np.array([-2, -1, -1,  0,  0,  0,  1,  1,  2]))
    -0.25
    >>> a = mdt.stats.gaussian(np.linspace(-5, 5, 11))
    >>> mdt.stats.ngp(a)
    0.48352183005980653
    >>> a = np.arange(24).reshape(2,3,4)
    >>> mdt.stats.ngp(a)
    -0.3876040703052728
    >>> mdt.stats.ngp(a, center=True)
    -0.401391304347826
    >>> mdt.stats.ngp(a, axis=0)
    array([[-0.33333333, -0.34113033, -0.35946667, -0.382643  ],
           [-0.4071511 , -0.43103845, -0.45333333, -0.47363871],
           [-0.49187475, -0.50812525, -0.52254957, -0.53533412]])
    >>> mdt.stats.ngp(a, axis=0, center=True)
    array([[-0.66666667, -0.66666667, -0.66666667, -0.66666667],
           [-0.66666667, -0.66666667, -0.66666667, -0.66666667],
           [-0.66666667, -0.66666667, -0.66666667, -0.66666667]])
    >>> mdt.stats.ngp(a, axis=1)
    array([[-0.32      , -0.37225959, -0.42285714, -0.46559096],
           [-0.6152    , -0.62068471, -0.62535515, -0.62936154]])
    >>> mdt.stats.ngp(a, axis=1, center=True)
    array([[-0.5, -0.5, -0.5, -0.5],
           [-0.5, -0.5, -0.5, -0.5]])
    >>> mdt.stats.ngp(a, axis=2)
    array([[-0.33333333, -0.61552028, -0.64866075],
           [-0.65763599, -0.66126512, -0.66307898]])
    >>> mdt.stats.ngp(a, axis=2, center=True)
    array([[-0.45333333, -0.45333333, -0.45333333],
           [-0.45333333, -0.45333333, -0.45333333]])
    """  # noqa: E501, W505
    if is_squared and center:
        raise ValueError(
            "'center' must not be used together with 'is_squared'"
        )

    x2 = np.array(x, dtype=np.float64, copy=not is_squared)
    if not is_squared:
        if center:
            x2 = mdt.stats.center(
                x2, axis=axis, dtype=np.float64, inplace=True
            )
        x2 **= 2

    x4_mean = np.mean(x2**2, axis=axis, dtype=np.float64)
    x2_mean = np.mean(x2, axis=axis, dtype=np.float64)
    return x4_mean / ((1 + 2 / d) * x2_mean**2) - 1




[docs]
def exp_dist(x, rate=1):
    r"""
    Exponential distribution.

    .. math::

        f(x) = \lambda e^{-\lambda x}

    The mean and standard deviation of an exponential distribution are
    given by :math:`1/\lambda`.

    Parameters
    ----------
    x : scalar or array_like
        Array of :math:`x` values for which to evaluate :math:`f(x)`.
    rate : scalar or array_like, optional
        The value(s) for :math:`\lambda`.  If an array is provided, it
        must be broadcastable to a common shape with `x`.

    Returns
    -------
    f : scalar or array_like
        The function values :math:`f(x)` at the given :math:`x` values.

    See Also
    --------
    :obj:`scipy.stats.expon` :
        Exponential continuous random variable

    Examples
    --------
    >>> from scipy.stats import expon
    >>> x = np.linspace(0, 4, 5)
    >>> y1 = mdt.stats.exp_dist(x, rate=2)
    >>> y2 = expon.pdf(x, loc=0, scale=1/2)
    >>> np.allclose(y1, y2, rtol=0)
    True
    >>> y1
    array([2.00000000e+00, 2.70670566e-01, 3.66312778e-02, 4.95750435e-03,
           6.70925256e-04])
    """  # noqa: W505
    return rate * np.exp(-rate * x)




[docs]
def exp_dist_log(x, rate):
    r"""
    Logarithm of the exponential distribution.

    .. math::

        f(x) = \ln(\lambda) - \lambda x

    Parameters
    ----------
    x : scalar or array_like
        Array of :math:`x` values for which to evaluate :math:`f(x)`.
    rate : scalar or array_like, optional
        The value(s) for :math:`\lambda`.  If an array is provided, it
        must be broadcastable to a common shape with `x`.

    Returns
    -------
    f : scalar or array_like
        The function values :math:`f(x)` at the given :math:`x` values.

    See Also
    --------
    :func:`exp_dist` :
        Exponential distribution

    Examples
    --------
    >>> x = np.linspace(0, 4, 5)
    >>> y1 = mdt.stats.exp_dist_log(x, rate=2)
    >>> y2 = mdt.stats.exp_dist(x, rate=2)
    >>> np.allclose(y1, np.log(y2), rtol=0)
    True
    >>> y1
    array([ 0.69314718, -1.30685282, -3.30685282, -5.30685282, -7.30685282])
    """  # noqa: W505
    return np.log(rate) - rate * x




[docs]
def mb_dist(v, temp=None, mass=None, var=None, drift=0, d=3):
    r"""
    Maxwell-Boltzmann speed distribution in :math:`d`-dimensional space.
    [#]_

    .. math::

        p(v) = S_d \cdot (v - u)^{d-1}
        \left(\frac{1}{2\pi\sigma^2}\right)^{\frac{d}{2}}
        \exp{\left[-\frac{(v - u)^2}{2\sigma^2} \right]}

    With the speed

    .. math::

        v = \sqrt{\sum_{i=1}^d v_i^2},

    a drift speed :math:`u`, the variance of the gaussian part (note
    that this is not the variance of :math:`p(v)`)

    .. math::

        \sigma^2 = \frac{k_B T}{m}

    and the surface area of a :math:`d`-dimensional unit sphere [#]_

    .. math::

        S_d = \frac{2\pi^{\frac{d}{2}}}{\Gamma\left(\frac{d}{2}\right)}
            =
            \begin{cases}
                2      , & d = 1\\
                2\pi   , & d = 2\\
                4\pi   , & d = 3\\
                2\pi^2 , & d = 4\\
                \vdots &
            \end{cases}

    where :math:`\Gamma` is the `gamma function
    <https://en.wikipedia.org/wiki/Gamma_function>`_.

    Parameters
    ----------
    v : array_like
        Array of speed values for which to evaluate :math:`p(v)`.
    temp : scalar or None, optional
        Temperature of the system.  Must be provided if `var` is
        ``None``.
    mass : scalar or None, optional
        Mass of the particles.  Must be provided if `var` is ``None``.
    var : scalar or None, optional
        Variance :math:`\sigma^2` of the Maxwell-Boltzmann distribution.
        If ``None``, it is calculated as :math:`\sigma^2 = k_B T / m`,
        where :math:`k_B` is the Boltzmann constant, :math:`T` is the
        temperature and :math:`m` is the mass of the particles.  If
        both, `temp` and `mass`, are provided, `var` is ignored.
    drift : scalar, optional
        Drift speed of the particles.
    d : int, optional
        Number of spatial dimensions in which the particles can move.

    Returns
    -------
    p : scalar or array_like
        The function values :math:`p(v)` at the given :math:`v` values.

    See Also
    --------
    :func:`mdtools.statistics.ekin_dist` :
        Distribution of kinetic energies in :math:`d`-dimensional space

    Notes
    -----
    Be aware of the units of your input values!  The unit of the
    Boltzmann constant :math:`k_B` is J/K.  Thus, temperatures must
    be given in K, masses in kg and velocities in m/s.  If you parse
    `var` directly instead of `temp` and `mass`, ``v**2`` and `var` must
    have the same units.

    Note that all arguments should be positive to get a physically
    meaningful output.  However, it is also possible to parse negative
    values to all input arguments.

    Note that a (positive) drift velocity only leads to a shift of the
    distribution along the x axis (to higher values).  Therefore, parts
    of the distribution that, without drift, would lie in the region of
    (unphysical) negative speeds now appear at positive speed values.
    The user is responsible to take care of this unphysical artifact.
    Generally, it's easiest to calculate the Maxwell-Boltzmann
    distribution without drift at positive speed values and afterwards
    shift the outcome by the drift speed.

    References
    ----------
    .. [#] Wikipedia `Maxwell-Boltzmann distribution § In n-dimensional
        space
        <https://en.wikipedia.org/wiki/Maxwell%E2%80%93Boltzmann_distribution#In_n-dimensional_space>`_
    .. [#] Wikipedia `n-sphere § Volume and area
        <https://en.wikipedia.org/wiki/N-sphere#Volume_and_area>`_

    Examples
    --------
    >>> x = np.linspace(0, 4, 5)
    >>> mdt.stats.mb_dist(x, var=1)
    array([0.        , 0.48394145, 0.43192773, 0.07977327, 0.00428257])
    >>> mdt.stats.mb_dist(x, var=2)
    array([0.        , 0.21969564, 0.4151075 , 0.26759315, 0.08266794])
    >>> mdt.stats.mb_dist(x, var=1, d=1)
    array([7.97884561e-01, 4.83941449e-01, 1.07981933e-01, 8.86369682e-03,
           2.67660452e-04])
    >>> mdt.stats.mb_dist(x, var=1, d=2)
    array([0.        , 0.60653066, 0.27067057, 0.03332699, 0.00134185])
    >>> mdt.stats.mb_dist(x, var=1, d=4)
    array([0.        , 0.30326533, 0.54134113, 0.14997145, 0.0107348 ])
    >>> mdt.stats.mb_dist(x, var=1, drift=1)
    array([0.48394145, 0.        , 0.48394145, 0.43192773, 0.07977327])
    >>> np.allclose(
    ...     mdt.stats.mb_dist(x, var=1)[:-1],
    ...     mdt.stats.mb_dist(x, var=1, drift=1)[1:],
    ...     rtol=0,
    ... )
    True

    >>> from scipy import constants
    >>> temp, mass = 273.15, 1.660e-27
    >>> var = constants.k * temp / mass
    >>> np.allclose(
    ...     mdt.stats.mb_dist(x, temp=temp, mass=mass),
    ...     mdt.stats.mb_dist(x, var=var),
    ...     rtol=0,
    ... )
    True
    >>> np.allclose(
    ...     mdt.stats.mb_dist(x, temp=temp, mass=mass, d=2),
    ...     mdt.stats.mb_dist(x, var=var, d=2),
    ...     rtol=0,
    ... )
    True
    >>> np.allclose(
    ...     mdt.stats.mb_dist(x, temp=temp, mass=mass, drift=2),
    ...     mdt.stats.mb_dist(x, var=var, drift=2),
    ...     rtol=0,
    ... )
    True

    The area below the Maxwell-Boltzmann distribution is one:

    >>> x = np.linspace(0, 20, 200)
    >>> for d in range(1, 6):
    ...     np.isclose(
    ...         np.trapz(mdt.stats.mb_dist(x, var=1, d=d), x),
    ...         1,
    ...         rtol=0,
    ...         atol=1e-3,
    ...     )
    True
    True
    True
    True
    True
    """  # noqa: E501, W505
    if var is None:
        if temp is None and mass is None:
            raise ValueError(
                "'mass' and 'temp' must be provided if 'var' is None"
            )
        var = constants.k * temp / mass
    else:
        if temp is not None and mass is not None:
            warnings.warn(
                "'var' ({}) is ignored, because 'temp' and 'mass' are given",
                UserWarning,
            )
    dist = norm.pdf(v, loc=drift, scale=np.sqrt(var))
    dist *= (v - drift) ** (d - 1)
    # Remaining factor: S_d * [1/(2*pi*sigma^2)]**(d/2), but
    # [1/(2*pi*sigma^2)]**(1/2) is already contained in `norm.pdf`
    # => The remaining factor is actually
    #    S_d * [1/(2*pi*sigma^2)]**((d-1)/2)
    sd = 2 * np.pi ** (d / 2)
    sd /= special.gamma(d / 2)
    dist *= sd
    dist /= (2 * np.pi * var) ** ((d - 1) / 2)
    return dist




[docs]
def ekin_dist(ekin, temp, d=3):
    r"""
    Distribution of kinetic energies in :math:`d`-dimensional space.
    [#]_

    Maxwell-Boltzmann distribution of the kinetic energies of the
    particles in a canonical (:math:`NVT`) ensemble:

    .. math::

        p(E) = \frac{S_d}{2} (\pi k_B T)^{-\frac{d}{2}}
        E^{-\frac{d-2}{2}}
        \exp{\left( -\frac{E}{k_B T} \right)}

    With the kinetic energy :math:`E`, the Boltzmann constant
    :math:`k_B`, the temperature of the system :math:`T` and the surface
    area of a :math:`d`-dimensional unit sphere [#]_

    .. math::

        S_d = \frac{2\pi^{\frac{d}{2}}}{\Gamma\left(\frac{d}{2}\right)}
            =
            \begin{cases}
                2      , & d = 1\\
                2\pi   , & d = 2\\
                4\pi   , & d = 3\\
                2\pi^2 , & d = 4\\
                \vdots &
            \end{cases}

    where :math:`\Gamma` is the `gamma function
    <https://en.wikipedia.org/wiki/Gamma_function>`_.

    Parameters
    ----------
    ekin : array_like
        Array of kinetic energy values :math:`E` for which to evaluate
        :math:`p(E)`.
    temp : scalar
        Temperature of the system.
    d : int, optional
        Number of spatial dimensions in which the particles can move.

    Returns
    -------
    p : scalar or array_like
        The function values :math:`p(E)` at the given :math:`E` values.

    See Also
    --------
    :func:`mdtools.statistics.mb_dist` :
        Maxwell-Boltzmann speed distribution in :math:`d`-dimensional
        space

    Notes
    -----
    Be aware of the units of your input values!  The unit of the
    Boltzmann constant :math:`k_B` is J/K.  Thus, temperatures must
    be given in K and energy values must be given in J.

    Note that all arguments should be positive to get a physically
    meaningful output.  However, it is also possible to parse negative
    values to all input arguments.

    The distribution of the kinetic energies :math:`p(E) \text{ d}E` can
    be derived from the Maxwell-Boltzmann speed distribution

    .. math::

        p(v) \text{ d}v = S_d v^{d-1}
        \left( \frac{m}{2 \pi k_B T} \right)^{-\frac{d}{2}}
        \exp{\left( -\frac{mv^2}{2k_B T} \right)} \text{ d}v

    by substituting :math:`E = \frac{1}{2} mv^2`.

    References
    ----------
    .. [#] Wikipedia `Maxwell-Boltzmann distribution § In n-dimensional
        space
        <https://en.wikipedia.org/wiki/Maxwell%E2%80%93Boltzmann_distribution#Distribution_for_the_energy>`_
    .. [#] Wikipedia `n-sphere § Volume and area
        <https://en.wikipedia.org/wiki/N-sphere#Volume_and_area>`_

    Examples
    --------
    >>> from scipy import constants
    >>> temp = 1 / constants.k  # Set thermal energy kT to 1
    >>> x = np.linspace(0, 3, 5)
    >>> mdt.stats.ekin_dist(x, temp)
    array([0.        , 0.46159897, 0.30836066, 0.17839543, 0.09730435])
    >>> mdt.stats.ekin_dist(x, temp, d=1)
    array([       inf, 0.30773265, 0.10278689, 0.03964343, 0.01621739])
    >>> mdt.stats.ekin_dist(x, temp, d=2)
    array([1.        , 0.47236655, 0.22313016, 0.10539922, 0.04978707])
    >>> mdt.stats.ekin_dist(x, temp, d=4)
    array([0.        , 0.35427491, 0.33469524, 0.23714826, 0.14936121])

    The area below the distribution is one:

    >>> x = np.linspace(0, 20, 200)
    >>> # In the 1-dimensional case, the distribution goes to infinity
    >>> # for E->0 and therefore cannot be integrated numerically.
    >>> for d in range(2, 6):
    ...     dist = mdt.stats.ekin_dist(x, temp, d)
    ...     integral = np.trapz(dist, x)
    ...     np.isclose(integral, 1, rtol=0, atol=1e-2)
    True
    True
    True
    True
    """
    # Surface area of a d-dimensional unit sphere divided by 2.
    sd = np.pi ** (d / 2)
    sd /= special.gamma(d / 2)
    # Pre-factor
    pre = sd * (np.pi * constants.k * temp) ** (-d / 2)
    # Distribution
    dist = ekin ** ((d - 2) / 2)
    dist *= np.exp(-ekin / (constants.k * temp))
    dist *= pre
    return dist




[docs]
def var_weighted(a, weights=None, axis=None, return_mean=False):
    r"""
    Compute the weighted variance along a given axis.

    Parameters
    ----------
    a : array_like
        Input array.
    weights : array_like or None, optional
        An array of weights associated with the values in `a`. Each
        value in `a` contributes to the variance according to its
        associated weight.  The weights array can either be
        1-dimensional (in which case its length must be the size of `a`
        along the given axis) or of the same shape as `a`.  If `weights`
        is ``None``, then all data in `a` are assumed to have a weight
        equal to one.  The only constraint on `weights` is that
        ``np.sum(weights)`` must not be 0 along the given axis.
    axis : None or int or tuple of ints, optional
        Axis or axes along which the variance is computed.  The default
        is to compute the variance of the flattened array.
    return_mean : bool, optional
        If ``True``, also return the weighted sample mean
        :math:`\bar{X}` used to calculate the weighted variance.

    Returns
    -------
    var : scalar
        Weighted variance.
    mean : scalar
        Weighted sample mean.  Only returned if `return_mean` is
        ``True``.

    See Also
    --------
    :func:`numpy.average` :
        Compute the weighted average along the specified axis
    :func:`numpy.var` :
        Compute the variance along the specified axis
    :func:`mdtools.statistics.std_weighted` :
        Compute the weighted standard deviation along a given axis

    Notes
    -----
    The weighted variance is given by

    .. math::

        \sigma^2 = \sum_{i=1}^N (X_i - \bar{X})^2 \cdot p(X_i)

    with the weighted sample mean

    .. math::

        \bar{X} = \sum_{i=1}^N X_i \cdot p(X_i)

    the probability of element :math:`X_i`

    .. math::

        p(X_i) = \frac{\sum_{i=1}^N X_i w_i}{\sum_{j=1}^N w_j}

    and the weight :math:`w_i` of the :math:`i`-th element.
    """
    mean = np.average(a, axis=axis, weights=weights)
    var = np.average(np.abs(a - mean) ** 2, axis=axis, weights=weights)
    if return_mean:
        return var, mean
    else:
        return var




[docs]
def std_weighted(a, weights=None, axis=None, return_mean=False):
    r"""
    Compute the weighted standard deviation along a given axis.

    Parameters
    ----------
    a : array_like
        Input array.
    weights : array_like or None, optional
        An array of weights associated with the values in `a`.  Each
        value in `a` contributes to the standard deviation according to
        its associated weight.  The weights array can either be
        1-dimensional (in which case its length must be the size of `a`
        along the given axis) or of the same shape as `a`.  If `weights`
        is ``None``, then all data in `a` are assumed to have a weight
        equal to one.  The only constraint on `weights` is that
        ``np.sum(weights)`` must not be 0 along the given axis.
    axis : None or int or tuple of ints, optional
        Axis or axes along which the standard deviation is computed.
        The default is to compute the standard deviation of the
        flattened array.
    return_mean : bool, optional
        If ``True``, also return the weighted sample mean
        :math:`\bar{X}` used to calculate the weighted standard
        deviation.

    Returns
    -------
    std : scalar
        Weighted standard deviation.
    mean : scalar
        Weighted sample mean.  Only returned if `return_mean` is
        ``True``.

    See Also
    --------
    :func:`numpy.average` :
        Compute the weighted average along the specified axis
    :func:`numpy.std` :
        Compute the standard deviation along the specified axis
    :func:`mdtools.statistics.var_weighted` :
        Compute the weighted variance along a given axis

    Notes
    -----
    The weighted standard deviation is given by

    .. math::

        \sigma = \sqrt{\sum_{i=1}^N (X_i - \bar{X})^2 \cdot p(X_i)}

    with the weighted sample mean

    .. math::

        \bar{X} = \sum_{i=1}^N X_i \cdot p(X_i)

    the probability of element :math:`X_i`

    .. math::

        p(X_i) = \frac{\sum_{i=1}^N X_i w_i}{\sum_{j=1}^N w_j}

    and the weight :math:`w_i` of the :math:`i`-th element.
    """
    var, mean = mdt.stats.var_weighted(
        a, weights=weights, axis=axis, return_mean=True
    )
    std = np.sqrt(var)
    if return_mean:
        return std, mean
    else:
        return std




[docs]
def cumav(a, axis=None, out=None):
    r"""
    Calculate the cumulative average.

    Parameters
    ----------
    a : array_like
        Array of values for which to calculate the cumulative average.
        The dtype is converted to ``numpy.float64``.
    axis : None or int, optional
        Axis along which the cumulative average is computed.  The
        default is to compute the cumulative average of the flattened
        array.
    out : array_like
        Alternative output array in which to place the result.  It must
        have the same shape and buffer length as ``a.flatten()`` (if
        `axis` is ``None``) or `a` (if `axis` is not ``None``), but the
        type will be cast if necessary.  See :func:`numpy.cumsum` for
        more details.  Note that the result will be cast to the dtype of
        `out`, even if this dtype cannot hold the resulting values (e.g.
        if the dtype of `out` is :class:`int` but the result contains
        floating point values, the fractional part of these floats is
        cut).  So it's safest, to parse an output array of dtype
        ``numpy.float64``, also with respect to the accuracy of the
        average calculation (compare notes of :func:`numpy.mean`).

    Returns
    -------
    rav : numpy.ndarray
        The cumulative average along the specified axis.  The result has
        the same size as `a`, and also the same shape as `a` if `axis`
        is not ``None`` or if `a` is a 1-d array.

    See Also
    --------
    :func:`numpy.cumsum` :
        Return the cumulative sum of the elements along a given axis
    :func:`mdtools.statistics.movav` :
        Calculate a (weighted) moving average.


    Notes
    -----
    The cumulative average at the :math:`n`-th position is given by

    .. math::

        \mu_n = \frac{1}{n} \sum_{i=0}^{n} a_i

    Examples
    --------
    >>> a = np.arange(6)
    >>> mdt.stats.cumav(a)
    array([0. , 0.5, 1. , 1.5, 2. , 2.5])

    >>> a = np.arange(6).reshape(2,3)
    >>> mdt.stats.cumav(a)
    array([0. , 0.5, 1. , 1.5, 2. , 2.5])
    >>> mdt.stats.cumav(a, axis=0)
    array([[0. , 1. , 2. ],
           [1.5, 2.5, 3.5]])
    >>> mdt.stats.cumav(a, axis=1)
    array([[0. , 0.5, 1. ],
           [3. , 3.5, 4. ]])

    >>> a = np.arange(12).reshape(2,2,3)
    >>> mdt.stats.cumav(a)
    array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5])
    >>> mdt.stats.cumav(a, axis=0)
    array([[[0., 1., 2.],
            [3., 4., 5.]],
    <BLANKLINE>
           [[3., 4., 5.],
            [6., 7., 8.]]])
    >>> mdt.stats.cumav(a, axis=1)
    array([[[0. , 1. , 2. ],
            [1.5, 2.5, 3.5]],
    <BLANKLINE>
           [[6. , 7. , 8. ],
            [7.5, 8.5, 9.5]]])
    >>> mdt.stats.cumav(a, axis=2)
    array([[[ 0. ,  0.5,  1. ],
            [ 3. ,  3.5,  4. ]],
    <BLANKLINE>
           [[ 6. ,  6.5,  7. ],
            [ 9. ,  9.5, 10. ]]])
    """
    a = np.asarray(a, dtype=np.float64)
    cav = np.cumsum(a, axis=axis, out=out)
    if axis is None:
        norm = np.arange(1, a.size + 1, dtype=np.uint32)
    else:
        s = [1 for i in range(a.ndim)]
        s[axis] = a.shape[axis]
        norm = np.arange(1, a.shape[axis] + 1, dtype=np.uint32).reshape(s)
    cav /= norm
    return cav




[docs]
def movav(a, wlen, axis=None, out=None):
    r"""
    Calculate the moving average.

    Also known as rolling average or running average.

    Parameters
    ----------
    a : array_like
        Array of values for which to calculate the moving average.  The
        dtype is converted to ``numpy.float64``.
    wlen : int
        Length of the averaging window (corresponds to :math:`k` in the
        formula below).  Must be at least one and not greater than the
        size of `a` (if `axis` is ``None``) or the length of `a` along
        the given axis (if `axis` is not ``None``).
    axis : None or int, optional
        Axis along which to compute the moving average.  The default is
        to compute the moving average of the flattened array.
    out : array_like
        Alternative output array in which to place the result.  The
        first ``wlen - 1`` elements along the given axis must be
        discarded after receiving the result.  `out` must have the same
        shape and buffer length as ``a.flatten()`` (if `axis` is
        ``None``) or `a` (if `axis` is not ``None``), but the type will
        be cast if necessary.  See :func:`numpy.cumsum` for more
        details.  Note that the result will be cast to the dtype of
        `out`, even if this dtype cannot hold the resulting values (e.g.
        if the dtype of `out` is :class:`int` but the result contains
        floating point values, the fractional part of these floats is
        cut).  So it's safest, to parse an output array of dtype
        ``numpy.float64``, also with respect to the accuracy of the
        average calculation (compare notes of :func:`numpy.mean`).

    Returns
    -------
    mav : numpy.ndarray
        The moving average along the specified axis.  The result has the
        same shape as `a` except along the given axis where the length
        is smaller by ``wlen - 1``.

    See Also
    --------
    :func:`mdtools.statistics.cumav` :
        Calculate the cumulative average.

    Notes
    -----
    The moving average at the :math:`m`-th position with a window size
    of :math:`k` is given by

    .. math::

        \mu_m^k = \frac{1}{k} \sum_{i=m}^{m+k-1} a_i

    This function implements the moving average according to the
    following formula using :func:`numpy.cumsum`:

    .. math::

        \mu_m^k = \frac{1}{k}
        \left( \sum_{i=0}^{m+k-1} a_i - \sum_{i=0}^{m-1} a_i \right)

    Examples
    --------
    >>> a = np.arange(6)
    >>> mdt.stats.movav(a, wlen=3)
    array([1., 2., 3., 4.])
    >>> mdt.stats.movav(a, wlen=4)
    array([1.5, 2.5, 3.5])

    >>> a = np.arange(12).reshape(3,4)
    >>> mdt.stats.movav(a, wlen=5)
    array([2., 3., 4., 5., 6., 7., 8., 9.])
    >>> mdt.stats.movav(a, wlen=2, axis=0)
    array([[2., 3., 4., 5.],
           [6., 7., 8., 9.]])
    >>> mdt.stats.movav(a, wlen=2, axis=1)
    array([[ 0.5,  1.5,  2.5],
           [ 4.5,  5.5,  6.5],
           [ 8.5,  9.5, 10.5]])

    >>> a = np.arange(36).reshape(3,3,4)
    >>> mdt.stats.movav(a, wlen=29)
    array([14., 15., 16., 17., 18., 19., 20., 21.])
    >>> mdt.stats.movav(a, wlen=2, axis=0)
    array([[[ 6.,  7.,  8.,  9.],
            [10., 11., 12., 13.],
            [14., 15., 16., 17.]],
    <BLANKLINE>
           [[18., 19., 20., 21.],
            [22., 23., 24., 25.],
            [26., 27., 28., 29.]]])
    >>> mdt.stats.movav(a, wlen=2, axis=1)
    array([[[ 2.,  3.,  4.,  5.],
            [ 6.,  7.,  8.,  9.]],
    <BLANKLINE>
           [[14., 15., 16., 17.],
            [18., 19., 20., 21.]],
    <BLANKLINE>
           [[26., 27., 28., 29.],
            [30., 31., 32., 33.]]])
    >>> mdt.stats.movav(a, wlen=3, axis=2)
    array([[[ 1.,  2.],
            [ 5.,  6.],
            [ 9., 10.]],
    <BLANKLINE>
           [[13., 14.],
            [17., 18.],
            [21., 22.]],
    <BLANKLINE>
           [[25., 26.],
            [29., 30.],
            [33., 34.]]])
    """
    a = np.asarray(a, dtype=np.float64)
    mav = np.cumsum(a, axis=axis, out=out)
    if wlen < 1:
        raise ValueError(
            "The window length must be at least 1 but you gave"
            " {}.".format(wlen)
        )
    # Call `a.shape[axis]` only after `np.cumsum` to get a proper
    # `AxisError` instead of an `IndexError` if the axis does not exist.
    elif axis is None and wlen > a.size:
        raise ValueError(
            "The window length ({}) must not be greater than the size of the"
            " input array {}.".format(wlen, a.size)
        )
    elif axis is not None and wlen > a.shape[axis]:
        raise ValueError(
            "The window length ({}) must not be greater than the length of the"
            " input array along the given axis {}.".format(wlen, a.shape[axis])
        )
    # `mav_full_sum` is the first sum in the round brackets of the
    # implementation formula from the above docstring.
    # Note that `mdt.nph.take` always creates a view, not a copy.
    mav_full_sum = mdt.nph.take(mav, start=wlen, axis=axis)
    # Don't do an in-place subtraction here (`mav_windows -= ...`),
    # because then array elements will already be changed before the
    # subtraction process has finished.
    mav_full_sum[:] = mav_full_sum - mdt.nph.take(mav, stop=-wlen, axis=axis)
    mav_windows = mdt.nph.take(mav, start=wlen - 1, axis=axis)
    mav_windows /= wlen
    return mav_windows




[docs]
def block_average(data, axis=0, ddof=0, dtype=np.float64):
    """
    Calculate the sample means and their standard deviations over
    multiple series of measurement.

    Parameters
    ----------
    data : array_like
        The array that holds the measured values.  Must have at least
        two dimensions, one dimension containing the different series
        of measurement and at least one other dimension containing the
        measured values per series.
    axis : int, optional
        Axis along which the means and their standard deviations are
        computed.
    ddof : int, optional
        Delta Degrees of Freedom.  The divisor used in calculating the
        standard deviation is ``N-ddof``, where ``N`` is the number of
        measurements.
    dtype : type, optional
        The data type of the output arrays and the data type to use in
        computing the means and standard deviations.  Note: Computing
        means and standard deviation using ``numpy.float32`` or lower
        can be inaccurate.  See :func:`numpy.mean` for more details.

    Returns
    -------
    mean : numpy.ndarray
        The mean values of the measurements.
    sd : numpy.ndarray
        The standard deviations of the mean values.

    See Also
    --------
    :func:`numpy.mean` :
        Compute the arithmetic mean along the specified axis
    :func:`numpy.std` :
        Compute the standard deviation along the specified axis
    """
    data = np.asarray(data)
    num_measurements = data.shape[axis]
    mean = np.mean(data, axis=axis, dtype=dtype)
    sd = np.std(data, axis=axis, ddof=ddof, dtype=dtype)
    # Standard deviation of the mean value after n series of
    # measurement: sd_n = sd / sqrt(n).  The mean value remains always
    # the same inside the error region.
    np.divide(sd, np.sqrt(num_measurements), out=sd, dtype=dtype)
    return mean, sd




[docs]
def moment_raw2cen(rm, axis=-1):
    r"""
    Calculate the :math:`n`-th central moment :math:`\mu_n` from the
    first :math:`n` raw moments :math:`m_n`.
    [#]_

    .. math::

        \mu_n =
        \langle (x - \mu)^n \rangle =
        \sum_{k=0}^{n} {n \choose k} (-1)^{n-k} m_k \mu^{n-k}

    with the :math:`k`-th raw moment :math:`m_k = \langle x^k \rangle`
    and the mean :math:`\mu = m_1`.

    Parameters
    ----------
    rm : array_like
        Array containing the first :math:`n` raw moments.  The raw
        moments must be ordered along the given axis by the raw moments'
        order (i.e. 1st raw moment first, 2nd raw moment second, ...).
        The array must not contain the :math:`0`-th raw moment (which is
        anyway always 1).
    axis : int, optional
        The axis along which the raw moments are ordered.

    Returns
    -------
    cm : scalar or numpy.ndarray
        The :math:`n`-th central moment, where :math:`n` is given by
        ``numpy.asarray(rm).shape[-1]``.

    Notes
    -----
    This function cannot return the :math:`0`-th central moment.
    Anyway, this is always 1.

    References
    ----------
    .. [#] Wikipedia `Central moment § Relation to moments about the
        origin
        <https://en.wikipedia.org/wiki/Central_moment#Relation_to_moments_about_the_origin>`_

    Examples
    --------
    1-dimensional input arrays:

    >>> # 1st central moment (is always zero).
    >>> rm = np.array([3])
    >>> mdt.stats.moment_raw2cen(rm)
    0
    >>> # 2nd central moment (<x^2> - <x>^2 = variance)
    >>> rm = np.array([ 3, 11])
    >>> mdt.stats.moment_raw2cen(rm)
    2
    >>> # 3rd central moment
    >>> rm = np.array([ 3, 11, 49])
    >>> mdt.stats.moment_raw2cen(rm)
    4
    >>> # 4th central moment
    >>> rm = np.array([  3,  11,  49, 253])
    >>> mdt.stats.moment_raw2cen(rm)
    16
    >>> # The first central moment is always zero.
    >>> [mdt.stats.moment_raw2cen([rm]) for rm in [0, 1/3, 4, 7/3]]
    [0, 0.0, 0, 0.0]

    2-dimensional input arrays:

    >>> rm = np.array([[ 3, 11, 49]])
    >>> mdt.stats.moment_raw2cen(rm)
    array([4])
    >>> ax = 0
    >>> rm = np.array([[   3,   11,   49],
    ...                [  13,  121, 2403]])
    >>> mdt.stats.moment_raw2cen(rm, axis=ax)
    array([4, 0, 2])
    >>> ax = 1
    >>> mdt.stats.moment_raw2cen(rm, axis=ax)
    array([   4, 2078])

    3-dimensional input arrays:

    >>> rm = np.array([[[ 3, 11, 49]]])
    >>> mdt.stats.moment_raw2cen(rm)
    array([[4]])
    >>> ax = 0
    >>> rm = np.array([[[   3,   11,   49],
    ...                 [  13,  121, 2403]],
    ...
    ...                [[   1,    9,    51],
    ...                 [   2,   87,  2605]]])
    >>> mdt.stats.moment_raw2cen(rm, axis=ax)
    array([[      -8,     -112,    -2350],
           [    -167,   -14554, -5771804]])
    >>> ax = 1
    >>> mdt.stats.moment_raw2cen(rm, axis=ax)
    array([[4, 0, 2],
           [1, 6, 4]])
    >>> ax = 2
    >>> mdt.stats.moment_raw2cen(rm, axis=ax)
    array([[   4, 2078],
           [  26, 2099]])
    """
    rm = np.asarray(rm)
    if rm.ndim == 0:
        raise ValueError("`rm` must be at least 1-dimensional")
    n = rm.shape[axis]

    # Prepend the 0-th raw moment to the array of raw moments.
    # 0-th raw moment is always <x^0> = 1.
    shape = list(rm.shape)
    shape[axis] = 1
    rm0 = np.ones(shape, dtype=rm.dtype)
    rm = np.concatenate([rm0, rm], axis=axis)

    # Mean = 1st raw moment.
    mean = np.squeeze(mdt.nph.take(rm, start=1, stop=2, axis=axis))
    # Powers of the mean: (-1)^{n-k} * \mu^{n-k} = (-\mu)^{n-k}.
    mean_pows = np.stack([(-mean) ** (n - k) for k in range(n + 1)], axis=axis)

    # Binomial coefficients: {n \choose k}.
    coeffs = np.array([comb(n, k, exact=True) for k in range(n + 1)])
    shape = [1 for dim in rm.shape]
    shape[axis] = rm.shape[axis]
    coeffs = coeffs.reshape(shape)

    return np.sum(coeffs * mean_pows * rm, axis=axis)