Source code for mdtools.file_handler

# This file is part of MDTools.
# Copyright (C) 2021-2023  The MDTools Development Team and all
# contributors listed in the file AUTHORS.rst
#
# MDTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# MDTools is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with MDTools.  If not, see <http://www.gnu.org/licenses/>.


"""
Functions for file input/output handling

This module can be called from :mod:`mdtools` via the shortcut ``fh``::

    import mdtools as mdt
    mdt.fh  # instead of mdt.file_handler

"""


# Standard libraries
import bz2
import gzip
import lzma
import os
import warnings
from datetime import datetime

# Third-party libraries
import numpy as np

# First-party libraries
import mdtools as mdt



[docs]
def cd_up(n, path=__file__):
    """
    Move `n` steps upwards in the directory tree.

    Parameters
    ----------
    n : int
        Number of steps to go up in the directory tree.
    path : str or bytes or os.PathLike, optional
        Directory or file to use as start.  Default: Position of the
        file from which this function is called (``__file__``).

    Returns
    -------
    p : str
        The `n`-th parent directory of `path`.
    """
    p = os.path.abspath(os.path.expandvars(path))
    for _ in range(n):
        p = os.path.dirname(p)
    return p




[docs]
def backup(fname):
    """
    Backup a file by renaming it.

    Check if a file with name `fname` already exists.  If so, rename it
    to ``'fname.bak_timestamp'``, where ``'timestamp'`` is the time when
    the renaming was done in YYYY-MM-DD_HH-MM-SS format.

    Parameters
    ----------
    fname : str or bytes or os.PathLike
        The name of the file to backup.

    Returns
    -------
    renamed : bool
        Returns ``True`` if a file called `fname` already existed and
        was renamed.  ``False`` if no file called `fname` exists and no
        backup was done.
    """
    if os.path.isfile(fname):
        timestamp = datetime.now()
        backup_name = (
            fname + ".bak_" + str(timestamp.strftime("%Y-%m-%d_%H-%M-%S"))
        )
        os.rename(fname, backup_name)
        print("Backuped {} to {}".format(fname, backup_name))
        return True
    else:
        return False




[docs]
def xopen(fname, mode="rt", fformat=None, rename=True, **kwargs):
    """
    Open a (compressed) file and return a corresponding
    |file-like_object|.

    This function is a replacement for the built-in :func:`open`
    function that can additionally read and write compressed files.
    Supported compression formats:

        * gzip (.gz)
        * bzip2 (.bz2)
        * XZ/LZMA2 (.xz)
        * LZMA (.lzma)

    Parameters
    ----------
    fname : str or bytes or os.PathLike
        Name of the file to open.
    mode : {'r', 'rt', 'rb', 'w', 'wt', 'wb', 'x', 'xt', 'xb', 'a', \
'at', 'ab'}, optional
        Opening mode.  See the built-in :func:`open` function for more
        details.
    fformat : {None, 'gz', 'bz2', 'xz', 'lzma', 'uncompressed'}, \
optional
        Explicitly specify the file format.  If ``None``, the file
        format is guessed from the file name extension if present and
        otherwise from the file signature.  If ``'uncompressed'``, the
        file is treated as uncompressed file.
    rename : bool, optional
        If ``True`` and a file called `fname` already exists and the
        file is opened in writing mode, rename the existing file to
        ``'fname.bak_timestamp'``.  See
        :func:`mdtools.file_handler.backup` for more details.
    kwargs : dict, optional
        Additional keyword arguments to parse to the function that is
        used for opening the file.  See there for possible arguments and
        their description.

    Returns
    -------
    file : file-like object
        The created |file-like_object|.

    See Also
    --------
    :func:`open` :
        Function used to open uncompressed files
    :func:`gzip.open` :
        Function used to open gzip-compressed files
    :func:`bz2.open` :
        Function used to open bzip2-compressed files
    :func:`lzma.open` :
        Function used to open XZ- and LZMA-compressed files

    Notes
    -----
    When writing and `fformat` is ``None``, the compression algorithm is
    chosen based on the extension of the given file:

        * ``'.gz'`` uses gzip compression.
        * ``'.bz2'`` uses bzip2 compression.
        * ``'.xz'`` uses XZ/LZMA2 compression.
        * ``'.lzma'`` uses legacy LZMA compression.
        * otherwise, no compression is done.

    When reading and `fformat` is ``None``, the file format is detected
    from the file name extension if present.  If no extension is present
    or the extension is unknown, the format is detected from the file
    signature, i.e. the first few bytes of the file also known as
    "`magic numbers
    <https://www.garykessler.net/library/file_sigs.html>`__".

    References
    ----------
    Inspired by `xopen <https://github.com/pycompression/xopen>`__ by
    Marcel Martin, Ruben Vorderman et al.
    """
    fname = os.fspath(fname)
    signatures = {
        # https://datatracker.ietf.org/doc/html/rfc1952#page-6
        "gz": b"\x1f\x8b",
        # https://en.wikipedia.org/wiki/List_of_file_signatures
        "bz2": b"\x42\x5a\x68",
        # https://tukaani.org/xz/xz-file-format.txt
        "xz": b"\xfd\x37\x7a\x58\x5a\x00",
        # https://zenhax.com/viewtopic.php?t=27
        "lzma": b"\x5d\x00",
    }

    if fformat not in [None, "uncompressed"] + list(signatures.keys()):
        raise ValueError("Invalid value for 'fformat': {}".format(fformat))

    # Use text mode by default, like the built-in `open` function, also
    # when opening compressed files.
    if mode in ("r", "w", "x", "a"):
        mode += "t"

    # Detect file format from extension.
    if fformat is None:
        for extension in signatures.keys():
            if isinstance(fname, bytes):
                if fname.endswith(b"." + extension.encode()):
                    fformat = extension
            else:
                if fname.endswith("." + extension):
                    fformat = extension

    # Detect file format from file signature.
    if fformat is None and "w" not in mode and "x" not in mode:
        max_len = max(len(signature) for signature in signatures.values())
        try:
            with open(fname, "rb") as fh:
                file_start = fh.read(max_len)
        except OSError:
            # File could not be opened.
            file_start = False
        if file_start:
            for extension, signature in signatures.items():
                if file_start.startswith(signature):
                    fformat = extension
                    break

    if "w" in "mode" and rename:
        mdt.fh.backup(fname)
    if fformat == "gz":
        return gzip.open(fname, mode, **kwargs)
    elif fformat == "bz2":
        return bz2.open(fname, mode, **kwargs)
    elif fformat in ("xz", "lzma"):
        return lzma.open(fname, mode, **kwargs)
    elif fformat == "uncompressed" or fformat is None:
        return open(fname, mode, **kwargs)




[docs]
def tail(fname, n, **kwargs):
    """
    Read the last n lines from a file.

    Parameters
    ----------
    fname : str or bytes or os.PathLike
        Name of the input file.
    n : int
        The number of lines to read from the end of the input file.
    kwargs : dict, optional
        Additional keyword arguments to parse to
        :func:`mdtools.file_handler.xopen`.  See there for possible
        arguments and their description.  By default, `mode` is set to
        ``'rt'`` (open file for reading in text mode).

    Returns
    -------
    lines : list
        List containing the last `n` lines of the input file.  Each list
        item represents one line of the file.
    """
    lines = []
    if n <= 0:
        return lines
    # Step width by which to move the cursor (the given value was an
    # empirically determined to give best performance and might be
    # further optimized).
    step_width = max(10 * n, 1)
    kwargs.setdefault("mode", "rt")
    with mdt.fh.xopen(fname, **kwargs) as file:
        file.seek(0, 2)  # Set cursor to end of file.
        pos = file.tell()  # Get current cursor position.
        # Move cursor backwards until the n-th last line is reached.
        # Termination criterion must be n+1 to get the entire n-th last
        # line and not just a part of it.
        while len(lines) < n + 1:
            pos -= min(step_width, pos)
            file.seek(pos, 0)
            lines = file.readlines()
            if pos == 0:  # Reached start of file.
                break
    return lines[-n:]




[docs]
def indent(text, amount, char=" "):
    r"""
    Indent a text by a given amount.

    Pad every line of `text` with as many instances of `char` as given
    by `amount`.  Lines in `text` are identified by Python's string
    method :meth:`str.splitlines`.

    Parameters
    ----------
    text : str
        String to be indented.
    amount : int
        Pad every line of `text` by this many instances of `char`.
        Negative values are treated as zero.
    char : str, optional
        The string to be used as padding.

    Returns
    -------
    indented_text : str
        The input text with every line padded by the given amount of
        padding characters.

    Examples
    --------
    >>> s = "Hello, World!\n  It's me, Mario!"
    >>> print(s)
    Hello, World!
      It's me, Mario!
    >>> print(mdt.fh.indent(s, amount=4))
        Hello, World!
          It's me, Mario!
    >>> print(mdt.fh.indent(s, amount=1, char="# "))
    # Hello, World!
    #   It's me, Mario!
    """
    padding = amount * char
    return "".join(padding + line for line in text.splitlines(keepends=True))




[docs]
def header_str():
    """
    Create a standard header string for text files.

    The string can be printed directly to standard output using
    :func:`print`.

    The header string contains:

        * The date and time the text file was created (actually this
          function was called).
        * The MDTools copyright notice.
        * The information generated by
          :func:`mdtools.run_time_info.run_time_info`.

    Returns
    -------
    header : str
        Human readable string containing the above listed content.

    See Also
    --------
    :func:`mdtools.run_time_info.run_time_info` :
        Generate some run time information
    :func:`mdtools.run_time_info.run_time_info_str` :
        Create a string containing some run time information
    """
    timestamp = datetime.now()
    script, command_line, cwd, exe, version, pversion = mdt.rti.run_time_info()
    header = "Created by {} on {}\n".format(
        script, timestamp.strftime("%Y/%m/%d %H:%M:%S")
    )
    header += "\n"
    header += mdt.__copyright_notice__ + "\n"
    header += "\n"
    header += "\n"
    header += "Command line input:\n"
    header += "  {}\n".format(command_line)
    header += "Working directory:\n"
    header += "  {}\n".format(cwd)
    header += "Executable:\n"
    header += "  {}\n".format(exe)
    header += "mdtools version:\n"
    header += "  {}\n".format(version)
    header += "Python version:\n"
    header += "  {}\n".format(pversion)
    return header




[docs]
def write_header(fname, **kwargs):
    """
    Write the standard MDTools header to file.

    See :func:`mdtools.file_handler.header_str` for further information
    about what is contained in the header.

    Parameters
    ----------
    fname : str or bytes or os.PathLike
        The name of the file to which to write the header.
    kwargs : dict, optional
        Additional keyword arguments to parse to
        :func:`mdtools.file_handler.xopen`.  See there for possible
        arguments and their description.  By default, `mode` is set to
        ``'wt'`` (open file for writing in text mode, truncating the
        file first).

    See Also
    --------
    :func:`mdtools.file_handler.header_str` :
        Create a standard header string for text files
    :func:`mdtools.file_handler.backup` :
        Backup a file by renaming it
    """
    kwargs.setdefault("mode", "wt")
    with mdt.fh.xopen(fname, **kwargs) as outfile:
        outfile.write(mdt.fh.indent(mdt.fh.header_str(), amount=1, char="# "))




[docs]
def savetxt(fname, data, rename=True, **kwargs):
    """
    Save an array to a text file.

    Parameters
    ----------
    fname : str or os.PathLike
        The name of the file to create.
    data : array_like
        1- or 2-dimensional array of data to be saved.
    rename : bool, optional
        If ``True`` and a file called `fname` already exists, rename it
        to ``'fname.bak_timestamp'``.  See
        :func:`mdtools.file_handler.backup` for more details.
    kwargs : dict, optional
        Additional keyword arguments to parse to :func:`numpy.savetxt`.
        See there for possible arguments and their description.  By
        default, `fmt` is set to ``'%16.9e'``.

    See Also
    --------
    :func:`numpy.savetxt` :
        Save an array to a text file
    :func:`mdtools.file_handler.savetxt_matrix` :
        Save a data matrix to a text file
    :func:`mdtools.file_handler.header_str` :
        Create a standard header string for text files
    :func:`mdtools.file_handler.backup` :
        Backup a file by renaming it

    Notes
    -----
    This function simply calls :func:`numpy.savetxt` and adds a MDTools
    specific header to the output file.  See
    :func:`mdtools.file_handler.header_str` for further information
    about what is included in the header.
    """
    fname = os.fspath(fname)
    kwargs.setdefault("fmt", "%16.9e")
    header = kwargs.pop("header", None)
    if header is None or header.strip() == "":
        header = mdt.fh.header_str()
    else:
        header = mdt.fh.header_str() + "\n\n" + header
    kwargs["header"] = header
    if rename:
        mdt.fh.backup(fname)
    np.savetxt(fname, data, **kwargs)




[docs]
def savetxt_matrix(
    fname,
    data,
    var1,
    var2,
    init_values1=None,
    init_values2=None,
    upper_left=0,
    **kwargs,
):
    """
    Save a data matrix to a text file.

    Write data that are a function of two independent variables, `var1`
    and `var2`, as a matrix to a text file using
    :func:`mdtools.file_handler.savetxt`.  The dependency of the data
    from `var1` is represented by the rows and the dependency from
    `var2` is represented by the columns.

    Parameters
    ----------
    fname : str or os.PathLike
        The name of the file to create.
    data : array_like
        2-dimensional array of data to be saved.  Must be of shape
        ``(n, m)``, where ``n`` is the number of samples of the first
        independent variable (depicted row wise) and ``m`` is the number
        of samples of the second independent variable (depicted column
        wise).
    var1, var2 : array_like
        Array of shape ``(n,)`` (`var1`) or ``(m,)`` (`var2`) containing
        the values of the first or second independent variable at which
        the data were sampled.
    init_values1, init_values2 : array_like, optional
        If supplied, the values stored in this array will be handled as
        special initial data values corresponding to the very first
        value in `var1` or `var2`.  Must be an array of shape ``(m,)``
        (`init_values1`) or ``(n,)`` (`init_values2`).  If given, `data`
        must be of shape ``(n-1, m)`` or ``(n, m-1)`` or ``(n-1, m-1)``
        if both are given.
    upper_left : scalar, optional
        Value to put in the upper left corner of the final data matrix.
        Usually, this value is meaningless and set to zero.
    kwargs : dict, optional
        Additional keyword arguments to parse to
        :func:`mdtools.file_handler.savetxt`.  See there for possible
        arguments and their description.

    See Also
    --------
    :func:`mdtools.file_handler.savetxt` :
        Save an array to a text file
    :func:`mdtools.file_handler.write_matrix_block` :
        Save a data matrix as block to a text file
    :func:`mdtools.file_handler.backup` :
        Backup a file by renaming it

    Notes
    -----
    Internally, this function calls
    :func:`mdtools.file_handler.savetxt` which in turn calls
    :func:`numpy.savetxt`.
    """
    var1 = np.asarray(var1)
    var2 = np.asarray(var2)
    mdt.check.array(var1, dim=1)
    mdt.check.array(var2, dim=1)
    if init_values1 is None and init_values2 is None:
        mdt.check.array(data, shape=(len(var1), len(var2)))
    elif init_values1 is not None and init_values2 is None:
        mdt.check.array(init_values1, shape=var2.shape)
        mdt.check.array(data, shape=(len(var1) - 1, len(var2)))
        data = np.vstack((init_values1, data))
    elif init_values1 is None and init_values2 is not None:
        mdt.check.array(init_values2, shape=var1.shape)
        mdt.check.array(data, shape=(len(var1), len(var2) - 1))
        data = np.column_stack((init_values2, data))
    elif init_values1 is not None and init_values2 is not None:
        mdt.check.array(init_values1, shape=var2.shape)
        mdt.check.array(init_values2, shape=var1.shape)
        mdt.check.array(data, shape=(len(var1) - 1, len(var2) - 1))
        if init_values2[0] != init_values1[0]:
            raise ValueError(
                "init_values2[0] ({}) is not the same as init_values1[0]"
                " ({})".format(init_values2[0], init_values1[0])
            )
        data = np.column_stack((init_values2[1:], data))
        data = np.vstack((init_values1, data))
    data = np.column_stack((var1, data))
    var2 = np.insert(var2, 0, upper_left)
    data = np.vstack((var2, data))
    mdt.fh.savetxt(fname, data, **kwargs)




[docs]
def write_matrix_block(
    fname,
    data,
    var1,
    var2,
    init_values1=None,
    init_values2=None,
    upper_left=0,
    fmt=">16.9e",
    data_name="z",
    data_unit=None,
    var1_name="x",
    var2_name="y",
    var1_unit=None,
    var2_unit=None,
    block_number=None,
    **kwargs,
):
    """
    Save a data matrix as block to a text file.

    Write data that are a function of two independent variables, `var1`
    and `var2`, as a matrix to a text file.  The dependency of the data
    from `var1` is represented by the rows and the dependency from
    `var2` is represented by the columns.

    Parameters
    ----------
    fname : str or bytes or os.PathLike
        Name of the file to write to.
    data : array_like
        2-dimensional array of data to write to file.  Must be of shape
        ``(n, m)``, where ``n`` is the number of samples of the first
        independent variable (depicted row wise) and ``m`` is the number
        of samples of the second independent variable (depicted column
        wise).
    var1, var2 : array_like
        Array of shape ``(n,)`` (`var1`) or ``(m,)`` (`var2`) containing
        the values of the first or second independent variable at which
        the data were sampled.
    init_values1, init_values2 : array_like, optional
        If supplied, the values stored in this array will be handled as
        special initial data values corresponding to the very first
        value in `var1` or `var2`.  Must be an array of shape ``(m,)``
        (`init_values1`) or ``(n,)`` (`init_values2`).  If given, `data`
        must be of shape ``(n-1, m)`` or ``(n, m-1)`` or ``(n-1, m-1)``
        if both are given.
    upper_left : scalar, optional
        Value to put in the upper left corner of the final data matrix.
        Usually, this value is meaningless and set to zero.
    fmt : str, optional
        |format_specifier|.
    data_name : str, optional
        The name of the data.  If supplied, it will be printed in the
        block header.
    data_unit : str, optional
        The unit of the data.  If supplied, will be printed in the
        block header.
    var1_name, var2_name : str, optional
        The names of the independent variables.  If supplied, they will
        be printed in the block header.
    var1_unit, var2_unit : str, optional
        The units of the independent variables.  If supplied, they will
        be printed in the block header.
    block_number : int, optional
        The number of the data block in `fname`.  If supplied, it will
        be printed in the block header.
    kwargs : dict, optional
        Additional keyword arguments to parse to
        :func:`mdtools.file_handler.xopen`.  See there for possible
        arguments and their description.  By default, `mode` is set to
        ``'wt'`` (open file for writing in text mode, truncating the
        file first).

    See Also
    --------
    :func:`mdtools.file_handler.savetxt_matrix` :
        Save a data matrix to a text file
    :func:`mdtools.file_handler.write_header` :
        Create a file and write the standard MDTools header to it
    """
    var1 = np.asarray(var1)
    var2 = np.asarray(var2)
    mdt.check.array(var1, dim=1)
    mdt.check.array(var2, dim=1)
    if init_values1 is None and init_values2 is None:
        mdt.check.array(data, shape=(len(var1), len(var2)))
    elif init_values1 is not None and init_values2 is None:
        mdt.check.array(init_values1, shape=var2.shape)
        mdt.check.array(data, shape=(len(var1) - 1, len(var2)))
    elif init_values1 is None and init_values2 is not None:
        mdt.check.array(init_values2, shape=var1.shape)
        mdt.check.array(data, shape=(len(var1), len(var2) - 1))
    elif init_values1 is not None and init_values2 is not None:
        mdt.check.array(init_values1, shape=var2.shape)
        mdt.check.array(init_values2, shape=var1.shape)
        mdt.check.array(data, shape=(len(var1) - 1, len(var2) - 1))
        if init_values2[0] != init_values1[0]:
            raise ValueError(
                "init_values2[0] ({}) is not the same as init_values1[0]"
                " ({})".format(init_values2[0], init_values1[0])
            )

    kwargs.setdefault("mode", "wt")
    with mdt.fh.xopen(fname, **kwargs) as outfile:
        # Block header
        outfile.write("\n\n\n\n")
        if block_number is not None:
            outfile.write("# Block {}\n".format(block_number))
        if var1_name is not None:
            outfile.write("# First column:    {}".format(var1_name))
            if var1_unit is not None:
                outfile.write(" in {}".format(var1_unit))
            outfile.write("\n")
        if var2_name is not None:
            outfile.write("# First row:       {}".format(var2_name))
            if var2_unit is not None:
                outfile.write(" in {}".format(var2_unit))
            outfile.write("\n")
        if data_name is not None:
            outfile.write("# Matrix elements: {}".format(data_name))
            if data_unit is not None:
                outfile.write(" in {}".format(data_unit))
            outfile.write("\n")
        # Column numbers
        num_cols = len(var2)
        fmt_int = len("{:{fmt}}".format(0, fmt=fmt))
        fmt_int = ">" + str(fmt_int) + "d"
        outfile.write("# Column number:\n")
        outfile.write("# {:{fmt}}".format("1", fmt=fmt_int))
        for col_num in range(2, num_cols + 2):
            outfile.write(" {:{fmt}}".format(col_num, fmt=fmt_int))
        outfile.write("\n")
        # The row after the row with the column numbers contains the
        # values of `var2`.
        outfile.write("  {:{fmt}}".format(upper_left, fmt=fmt))
        for col_num in range(num_cols):
            outfile.write(" {:{fmt}}".format(var2[col_num], fmt=fmt))
        outfile.write("\n")
        # If there are any special initial values for the very first
        # value of `var1`, print them to the next row.
        if init_values1 is not None:
            outfile.write("  {:{fmt}}".format(var1[0], fmt=fmt))
            for col_num in range(num_cols):
                outfile.write(
                    " {:{fmt}}".format(init_values1[col_num], fmt=fmt)
                )
            outfile.write("\n")
            start_row = 1
        else:
            start_row = 0
        # Print remaining rows. The first column always contains the
        # current value of `var1`. The remaining columns contain the
        # data.
        num_rows = len(var1)
        for row_num in range(start_row, num_rows):
            outfile.write("  {:{fmt}}".format(var1[row_num], fmt=fmt))
            # If there are any special initial values for the very first
            # value of `var2`, print them to the second column.
            if init_values2 is not None:
                outfile.write(
                    " {:{fmt}}".format(init_values2[row_num], fmt=fmt)
                )
                start_col = 1
            else:
                start_col = 0
            for col_num in range(start_col, num_cols):
                outfile.write(
                    " {:{fmt}}".format(
                        data[row_num - start_row][col_num - start_col], fmt=fmt
                    )
                )
            outfile.write("\n")




[docs]
def save_dtrj(fname, dtrj, rename=True):
    """
    Save a discrete trajectory to file.

    Save a discrete trajectory as :class:`numpy.ndarray` to a compressed
    NumPy |npz_archive|.

    .. warning::

        Creating a gzip-compressed :file:`.npz` archive does not work.
        However bzip2-, xz- and lzma-compressed archives can be created.
        Note that the created :file:`.npz` archive is already compressed
        by :func:`numpy.savez_compressed`.  Therefore, further
        compression is usually not reasonable.

    Parameters
    ----------
    fname : str or bytes or os.PathLike
        Name of the file to write to.
    dtrj : array_like
        The discrete trajectory to save.  Must meet the requirements
        given in :func:`mdtools.check.dtrj`.  Note that the shape of
        `dtrj` is expanded to ``(1, f)`` if it is only of shape
        ``(f,)``.
    rename : bool, optional
        If ``True`` and a file called `fname` already exists, rename it
        to ``'fname.bak_timestamp'``.  See
        :func:`mdtools.file_handler.backup` for more details.

    See Also
    --------
    :func:`numpy.savez_compressed` :
        Save one or multiple arrays in a compressed :file:`.npz` archive
    :func:`mdtools.file_handler.load_dtrj` :
        Load a discrete trajectory from file

    Notes
    -----
    This function simply checks whether `dtrj` is a suitable discrete
    trajectory and then saves it to file using
    :func:`numpy.savez_compressed`.  Inside the created :file:`.npz`
    archive, the discrete trajectory is stored in the file
    :file:`dtrj.npy`.
    """
    try:
        dtrj = mdt.check.dtrj(dtrj)
    except ValueError as err:
        warnings.warn(
            "The given trajectory is not a suitable discrete trajectory:\n"
            "{}".format(err),
            UserWarning,
        )
    if rename:
        mdt.fh.backup(fname)
    with mdt.fh.xopen(fname, "wb") as fh:
        np.savez_compressed(fh, dtrj=dtrj)




[docs]
def load_dtrj(fname, **kwargs):
    """
    Load a discrete trajectory from file.

    Load a discrete trajectory stored as :class:`numpy.ndarray` from a
    binary NumPy |npy_file| or from a (compressed) NumPy |npz_archive|.

    Parameters
    ----------
    fname : str or bytes or os.PathLike
        Name of the file containing the discrete trajectory.  The
        discrete trajectory must be stored as :class:`numpy.ndarray`
        either in a binary NumPy |npy_file| or in a (compressed) NumPy
        |npz_archive|.  If loading from an :file:`.npz` archive, this
        function first tries to read the discrete trajectory from the
        file "dtrj.npy".  If this file is not present in the archive,
        the discrete trajectory is read from the first file in the
        archive.

        The discrete trajectory must be of shape ``(n, f)``, where ``n``
        is the number of compounds and ``f`` is the number of frames.
        The shape can also be ``(f,)``, in which case the array is
        expanded to shape ``(1, f)``.

        The array must only contain integers or floats whose fractional
        part is zero, because the elements of a discrete trajectory are
        interpreted as the indices of the states in which a given
        compound is at a given frame.
    kwargs : dict, optional
        Additional keyword arguments to parse to :func:`numpy.load`.
        See there for possible arguments and their description.  By
        default, `allow_pickle` is set to ``False``.

    Returns
    -------
    dtrj : numpy.ndarray
        The discrete trajectory loaded from the given file.

    See Also
    --------
    :func:`numpy.load` :
        Load arrays or pickled objects from :file:`.npy`, :file:`.npz`
        or pickled files
    :func:`mdtools.file_handler.save_dtrj` :
        Save a discrete trajectory to file

    Notes
    -----
    This function simply calls :func:`numpy.load` and checks whether
    the loaded :class:`numpy.ndarray` is a suitable discrete trajectory.
    """
    fh = mdt.fh.xopen(fname, "rb")
    kwargs.setdefault("allow_pickle", False)
    dtrj_loaded = np.load(fh, **kwargs)
    if isinstance(dtrj_loaded, np.lib.npyio.NpzFile):
        dtrj = dtrj_loaded.get("dtrj", None)
        if dtrj is None:
            dtrj = list(dtrj_loaded.values())[0]
        dtrj_loaded.close()
    elif isinstance(dtrj_loaded, np.ndarray):
        dtrj = dtrj_loaded
    else:
        raise TypeError(
            "Unknown type of the loaded data: {}".format(type(dtrj_loaded))
        )
    fh.close()
    return mdt.check.dtrj(dtrj)




[docs]
def str2none_or_type(val, dtype, empty_none=False, case_sensitive=True):
    """
    Convert a string to the NoneType ``None`` or to a given type.

    If the input string is ``'None'``, convert it to the NoneType
    ``None``, else convert it to the type given by `dtype`.

    Parameters
    ----------
    val : str_like
        The input value.  Can be anything that can be converted to a
        string.
    dtype : type
        The type to which `val` should be converted if ``str(val)`` is
        not ``'None'``.  An exception will be raised if the conversion
        ``dtype(str(val))`` is not possible.  The exact exception
        depends on `val` and `dtype`.
    empty_none : bool, optional
        If ``True``, also convert `val` to ``None`` if ``str(val)`` is
        the empty string ``''``.
    case_sensitive : bool, optional
        If ``False``, also convert the lower case string ``'none'`` to
        the NoneType ``None``.

    Returns
    -------
    val : None or dtype
        The input string, either converted to ``None`` or to `dtype`.

    See Also
    --------
    :func:`mdtools.file_handler.str2bool` :
        Convert a string to a boolean value

    Notes
    -----
    This function was written to enable passing ``None`` to scripts via
    the command line.  By default, :mod:`argparse` reads command-line
    arguments as simple strings.  This makes it impossible to pass
    ``None`` to a script via the command line, because it will always
    render it as the string ``'None'``.  Pass
    ``lambda val: mdt.fh.str2none_or_type(val, dtype=<dtype>)`` (where
    ``<dtype>`` is e.g. ``float`` or ``str``) to the `type` keyword of
    :meth:`argparse.ArgumentParser.add_argument` to convert the string
    ``'None'`` (and optionally the empty string ``''``) to the NoneType
    ``None``.

    References
    ----------
    This code was adapted from https://stackoverflow.com/a/55063765.

    Examples
    --------
    >>> mdt.fh.str2none_or_type('None', dtype=str)  # Returns None
    >>> mdt.fh.str2none_or_type(None, dtype=str)  # Returns None
    >>> mdt.fh.str2none_or_type('none', dtype=str)
    'none'
    >>> mdt.fh.str2none_or_type('none', dtype=str, \
case_sensitive=False)  # Returns None
    >>> mdt.fh.str2none_or_type('', dtype=str)
    ''
    >>> mdt.fh.str2none_or_type('', dtype=str, empty_none=True)  \
# Returns None
    >>> mdt.fh.str2none_or_type(2, dtype=str)
    '2'

    >>> mdt.fh.str2none_or_type('None', dtype=int)  # Returns None
    >>> mdt.fh.str2none_or_type('2', dtype=int)
    2
    >>> mdt.fh.str2none_or_type(2, dtype=int)
    2

    >>> import argparse
    >>> parser = argparse.ArgumentParser()
    >>> parser.add_argument(
    ...     '--spam',
    ...     type=lambda val: mdt.fh.str2none_or_type(val, dtype=str)
    ... )
    _StoreAction(option_strings=['--spam'], dest='spam', ...)
    >>> parser.add_argument('--eggs', type=str)
    _StoreAction(option_strings=['--eggs'], dest='eggs', ...)
    >>> args = parser.parse_args(['--spam', 'None', '--eggs', 'None'])
    >>> args.spam is None
    True
    >>> args.eggs is None
    False
    >>> args.eggs == 'None'
    True
    """
    val = str(val)
    if (
        (case_sensitive and val == "None")
        or (not case_sensitive and val.lower() == "none")
        or (empty_none and val == "")
    ):
        return None
    else:
        return dtype(val)




[docs]
def str2bool(val, accept_yes_no=True, accept_abbrev=True, accept_01=True):
    """
    Convert a string to a boolean value.

    Convert the strings ``'true'`` and ``'false'`` to their
    corresponding booleans ``True`` and ``False``.  This function is
    case-insensitive!

    Parameters
    ----------
    val : str_like
        The input value.  Can be anything that can be converted to a
        string.
    accept_yes_no : bool, optional
        If ``True``, also convert ``'yes'`` to ``True`` and ``'no'`` to
        ``False``.
    accept_abbrev : bool, optional
        If ``True``, also convert the following abbreviations:

            * ``'t'`` to ``True``.
            * ``'y'`` to ``True``.
            * ``'f'`` to ``False``.
            * ``'n'`` to ``False``.

    accept_01 : bool, optional
        If ``True``, also convert ``'1'`` to ``True`` and ``'0'`` to
        ``False``.

    Returns
    -------
    val : bool
        The input string converted to ``True`` or ``False``.

    Raises
    ------
    ValueError :
        If `val` is an unknown string that cannot be converted to a
        boolean value.

    See Also
    --------
    :func:`mdtools.file_handler.str2none_or_type` :
        Convert a string to the NoneType ``None`` or to a given type

    Notes
    -----
    This function was written to enable passing booleans to scripts via
    the command line.  By default, :mod:`argparse` reads command-line
    arguments as simple strings.  This makes it impossible to pass
    booleans to a script via the command line in an intuitive way,
    because the type conversion to bool will convert all non-empty
    strings to ``True`` (including the string ``'false'``) .  Pass
    ``str2bool(val)`` to the `type` keyword of
    :meth:`argparse.ArgumentParser.add_argument` to convert the strings
    ``'true'`` and ``'false'`` to their corresponding boolean values
    ``True`` or ``False``.

    References
    ----------
    This code was adapted from https://stackoverflow.com/a/43357954.

    Examples
    --------
    >>> mdt.fh.str2bool('True')
    True
    >>> mdt.fh.str2bool('true')
    True
    >>> mdt.fh.str2bool('false')
    False
    >>> mdt.fh.str2bool('False')
    False
    >>> mdt.fh.str2bool('fAlSe')
    False

    >>> mdt.fh.str2bool('yes', accept_yes_no=True)
    True
    >>> mdt.fh.str2bool('no', accept_yes_no=True)
    False

    >>> mdt.fh.str2bool('y', accept_yes_no=True, accept_abbrev=True)
    True
    >>> mdt.fh.str2bool('n', accept_yes_no=True, accept_abbrev=True)
    False
    >>> mdt.fh.str2bool('n', accept_yes_no=True, accept_abbrev=False)
    Traceback (most recent call last):
    ...
    ValueError: ...

    >>> mdt.fh.str2bool(1, accept_01=True)
    True
    >>> mdt.fh.str2bool('1', accept_01=True)
    True
    >>> mdt.fh.str2bool(0, accept_01=True)
    False
    >>> mdt.fh.str2bool('0', accept_01=True)
    False

    >>> import argparse
    >>> parser = argparse.ArgumentParser()
    >>> parser.add_argument('--spam', type=mdt.fh.str2bool)
    _StoreAction(option_strings=['--spam'], ...)
    >>> parser.add_argument('--eggs', type=str)
    _StoreAction(option_strings=['--eggs'], ...)
    >>> args = parser.parse_args(['--spam', 'yes', '--eggs', 'no'])
    >>> args.spam
    True
    >>> args.eggs
    'no'
    """
    val = str(val).lower()
    eval_true = ["true"]
    eval_false = ["false"]
    if accept_yes_no:
        eval_true.append("yes")
        eval_false.append("no")
    if accept_abbrev:
        eval_true += [s[0] for s in eval_true]
        eval_false += [s[0] for s in eval_false]
    if accept_01:
        eval_true.append("1")
        eval_false.append("0")
    if val in eval_true:
        return True
    elif val in eval_false:
        return False
    else:
        raise ValueError(
            "Could not convert 'val' ({}) to bool. 'val' is neither in {} nor"
            " in {}".format(val, eval_true, eval_false)
        )