Source code for mdtools.run_time_info

# This file is part of MDTools.
# Copyright (C) 2021  The MDTools Development Team and all contributors
# listed in the file AUTHORS.rst
#
# MDTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# MDTools is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with MDTools.  If not, see <http://www.gnu.org/licenses/>.


"""
Classes and functions for getting run time information

This module can be called from :mod:`mdtools` via the shortcut ``rti``::

    import mdtools as mdt
    mdt.rti  # insetad of mdt.run_time_info

"""


# Standard libraries
import sys
import os
import warnings
from datetime import datetime
# Third party libraries
import psutil
import numpy as np
import MDAnalysis as mda
from MDAnalysis.lib.log import ProgressBar
# Local application/library specific imports
import mdtools as mdt


[docs]class ProgressBar(ProgressBar):
    """
    Display a visual progress bar and time estimate.
    
    The :class:`ProgressBar` decorates an iterable object, returning an
    iterator which acts exactly like the original iterable, but prints a
    dynamically updating progressbar every time a value is requested.
    
    See Also
    --------
    :class:`MDAnalysis.lib.log.ProgressBar` : Parent class
    :class:`tqdm.auto.tqdm` : Grandparent class
    
    Notes
    -----
    This class is derived from :class:`MDAnalysis.lib.log.ProgressBar`,
    which in turn is derived from :class:`tqdm.auto.tqdm`.  The only
    difference to :class:`tqdm.auto.tqdm` is that some default arguments
    are changed:
        
        * `ascii` is set to ``True``.
        * `unit` is set to ``'frames'``.
        * `unit_scale` is set to ``True``.
        * The `bar_format` is changed to switch of a possible inversion
          of 'unit/s' to 's/unit' (see also
          https://github.com/tqdm/tqdm/issues/72).
        * `mininterval` is set to ``300`` seconds.
        * `maxinterval` is set to ``3600`` seconds.
    
    See the MDAnalysis_ and tqdm_ documentations for further information.
    
    .. _MDAnalysis: https://docs.mdanalysis.org/stable/documentation_pages/lib/log.html#MDAnalysis.lib.log.ProgressBar
    .. _tqdm: https://tqdm.github.io/docs/tqdm/#__init__
    
    Example
    -------
    .. code-block::
        
        for ts in mdt.rti.ProgressBar(u.trajectory):
            # Perform analysis

    will produce something similar to::

        25%|#####1           | 25/100 [00:15<00:45,  1.67frames/s]
    
    """
    
    def __init__(self, *args, **kwargs):
        """
        Initialize the :class:`ProgressBar`.
        
        Parameters
        ----------
        args : list, optional
            Non-keyword arguments.  See
            :class:`MDAnalysis.lib.log.ProgressBar` for possible choices.
        kwargs : dict, optional
            Keynword arguments.  See
            :class:`MDAnalysis.lib.log.ProgressBar` for possible choices.
        """
        kwargs['ascii'] = kwargs.pop('ascii', True)
        kwargs['unit'] = kwargs.pop('unit', "frames")
        kwargs['mininterval'] = kwargs.pop('mininterval', 300)
        kwargs['maxinterval'] = kwargs.pop('maxinterval', 3600)
        kwargs['unit_scale'] = kwargs.pop('unit_scale', True)
        bar_format = ("{l_bar}{bar}|" +
                      " {n_fmt}/{total_fmt}" +
                      " [{elapsed}<{remaining}," +
                      " {rate_noinv_fmt}" +
                      "{postfix}]")
        kwargs['bar_format'] = kwargs.pop('bar_format', bar_format)
        super().__init__(*args, **kwargs)


[docs]def get_num_CPUs():
    """
    Get the number of available CPUs.
    
    The number of available CPUs is obtained in decreasing precedence
    from the environment variables:
        
        1. OMP_NUM_THREADS
        2. SLURM_CPUS_PER_TASK
        3. SLURM_JOB_CPUS_PER_NODE
        4. SLURM_CPUS_ON_NODE
        5. or from the Python function :func:`os.cpu_count()`
    
    """
    if os.environ.get('OMP_NUM_THREADS') is not None:
        return int(os.environ['OMP_NUM_THREADS'])
    elif os.environ.get('SLURM_CPUS_PER_TASK') is not None:
        return int(os.environ['SLURM_CPUS_PER_TASK'])
    elif os.environ.get('SLURM_JOB_CPUS_PER_NODE') is not None:
        return int(os.environ['SLURM_JOB_CPUS_PER_NODE'])
    elif os.environ.get('SLURM_CPUS_ON_NODE') is not None:
        return int(os.environ['SLURM_CPUS_ON_NODE'])
    else:
        return os.cpu_count()


[docs]def mem_usage(unit='MiB'):
    """
    Get current memory usage.
    
    Returns the current memory usage of the process that calls this
    function.
    
    Parameters
    ----------
    unit : {'B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'KB', 'MB', 'GB', 'TB', 'PB'}
        String determining the unit in which the memory usage should be
        returned.  Default is mebibytes (``'MiB'``).
    
    Returns
    -------
    mem : float
        Memory currently used by the calling process.  Returns ``None``
        if the memory usage cannot be determined.
    """
    try:
        proc = psutil.Process(os.getpid())
    except (psutil.NoSuchProcess, psutil.AccessDenied) as err:
        warnings.warn("Cannot get memory usage of process {} because of"
                      " {}".format(os.getpid(), err), RuntimeWarning)
        return None
    if unit == 'B':
        scaling = 1
    elif unit == 'KiB':
        scaling = 2**10
    elif unit == 'MiB':
        scaling = 2**20
    elif unit == 'GiB':
        scaling = 2**30
    elif unit == 'TiB':
        scaling = 2**40
    elif unit == 'PiB':
        scaling = 2**50
    elif unit == 'KB':
        scaling = 1e3
    elif unit == 'MB':
        scaling = 1e6
    elif unit == 'GB':
        scaling = 1e9
    elif unit == 'TB':
        scaling = 1e12
    elif unit == 'PB':
        scaling = 1e15
    return proc.memory_info().rss / scaling


[docs]def run_time_info():
    """
    Generate some run time information.
    
    Returns
    -------
    script_name : string
        The name of the running script.
    command_line : string
        The command line input used to start the script.
    cwd : string
        The working directory the script was started from.
    exe : string
        The exact call of the executable script.
    mdt_version : string
        The version number of MDTools.
    python_version : string
        The version number of Python.
    
    See Also
    --------
    :func:`mdtools.run_time_info.run_time_info_str` :
        Create a string containing some run time information
    """
    script_name = str(os.path.basename(sys.argv[0]))
    command_line = script_name + " " + " ".join(sys.argv[1:])
    python_version = (str(sys.version_info[0]) + "." +
                      str(sys.version_info[1]) + "." +
                      str(sys.version_info[2]))
    return (script_name,
            command_line,
            os.getcwd(),
            sys.argv[0],
            mdt.__version__,
            python_version)


[docs]def run_time_info_str(indent=0):
    """
    Create a string containing some run time information.
    
    The string can be printed directly to standard output using
    :func:`print`.
    
    The information contained in the string is:
        
        * The date and time the script (actually this function) was
          called.
        * The MDTools copyright notice.
        * The information generated by
          :func:`mdtools.run_time_info.run_time_info`.
    
    Parameters
    ----------
    indent : int, optional
        Number of spaces to indent the information string.  Negative
        indentation is treated as zero indentation.
    
    Returns
    -------
    rti : str
        Human readable string containing the above listed content.
    
    See Also
    --------
    :func:`mdtools.run_time_info.run_time_info` :
        Generate some run time information
    :func:`mdtools.file_handler.header_str` :
        Create a string containing some run time information to be used
        as header for text files
    :func:`mdtools.file_handler.indent` : Indent a text
    """
    timestamp = datetime.now()
    script, command_line, cwd, exe, version, pversion = run_time_info()
    rti = "{}\n".format(script)
    rti += "{}\n".format(timestamp.strftime('%Y/%m/%d %H:%M'))
    rti += "\n"
    rti += mdt.__copyright_notice__ + "\n"
    rti += "\n"
    rti += "\n"
    rti += "Command line input:\n"
    rti += "  {}\n".format(command_line)
    rti += "Working directory:\n"
    rti += "  {}\n".format(cwd)
    rti += "Executable:\n"
    rti += "  {}\n".format(exe)
    rti += "mdtools version:\n"
    rti += "  {}\n".format(version)
    rti += "Python version:\n"
    rti += "  {}".format(pversion)
    if indent > 0:
        rti = mdt.fh.indent(rti, amount=indent, char=" ")
    return rti


[docs]def ag_info_str(ag, indent=0):
    """
    Create a string containing information about a MDAnalysis
    :class:`~MDAnalysis.core.groups.AtomGroup`.
    
    The string can be printed directly to standard output using
    :func:`print`.
    
    The information contained in the string is:
        
        * The total number of
            
            - :class:`Segments <MDAnalysis.core.groups.Segment>`
            - :class:`Residues <MDAnalysis.core.groups.Residue>`
            - :attr:`Fragments <MDAnalysis.core.groups.AtomGroup.fragments>`
            - :class:`Atoms <MDAnalysis.core.groups.Atom>`
        
        * The number of different
            
            - :class:`Segments <MDAnalysis.core.groups.Segment>`
            - :class:`Residues <MDAnalysis.core.groups.Residue>`
            - :class:`Atoms <MDAnalysis.core.groups.Atom>`
            - And their respective names.
        
        * Whether the input :class:`~MDAnalysis.core.groups.AtomGroup` is
          an :class:`~MDAnalysis.core.groups.UpdatingAtomGroup` or not.
    
    Refer to the MDAnalysis' user guide for an
    |explanation_of_these_terms|.
    
    Parameters
    ----------
    ag : MDAnalysis.core.groups.AtomGroup
        The MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup` for
        which to create the information string.
    indent : int, optional
        Number of spaces to indent the information string.  Negative
        indentation is treated as zero indentation.
    
    Returns
    -------
    ag_info : str
        Human readable string containing the above listed content.
    
    See Also
    --------
    :func:`mdtools.file_handler.indent` : Indent a text
    """
    unique_segids = np.unique(ag.segids)
    unique_resnames = np.unique(ag.resnames)
    unique_atmnames = np.unique(ag.names)
    unique_atmtypes = np.unique(ag.types)
    try:
        n_fragments = ag.n_fragments
    except mda.exceptions.NoDataError:
        n_fragments = "N/A"
    ag_info = "Segments:               {}\n".format(ag.n_segments)
    ag_info += ("  Different segments:   {}\n"
                .format(len(unique_segids)))
    ag_info += ("  Segment name(s):      '{}'\n"
                .format('\' \''.join(i for i in unique_segids)))
    ag_info += "Residues:               {}\n".format(ag.n_residues)
    ag_info += ("  Different residues:   {}\n"
                .format(len(unique_resnames)))
    ag_info += ("  Residue name(s):      '{}'\n"
                .format('\' \''.join(i for i in unique_resnames)))
    ag_info += "Atoms:                  {}\n".format(ag.n_atoms)
    ag_info += ("  Different atom names: {}\n"
                .format(len(unique_atmnames)))
    # If each atom in a molecule has a different name (frequently the
    # atoms are numbered sequentially), the list of different atom names
    # is too long to be usefully printed on screen.
    # ag_info += ("  Atom name(s):         '{}'\n"
    #             .format('\' \''.join(i for i in unique_atmnames)))
    ag_info += ("  Different atom types: {}\n"
                .format(len(unique_atmtypes)))
    ag_info += ("  Atom type(s):         '{}'\n"
                .format('\' \''.join(i for i in unique_atmtypes)))
    ag_info += "Fragments:              {}\n".format(n_fragments)
    ag_info += ("Updating atom group:    {}"
                .format(isinstance(ag,
                                   mda.core.groups.UpdatingAtomGroup)))
    if indent > 0:
        ag_info = mdt.fh.indent(ag_info, amount=indent, char=" ")
    return ag_info


[docs]def dtrj_trans_info(dtrj):
    """
    Generate basic information about the state transitions in a discrete
    trajectory.
    
    .. deprecated:: 0.0.0.dev0
        :func:`mdtools.run_time_info.dtrj_trans_info` will be replaced
        by :func:`mdtools.run_time_info.dtrj_trans_info_new` in a future
        release.
        
    Parameters
    ----------
    dtrj : array_like
        The discrete trajectory.  Array of shape ``(n, f)``, where ``n``
        is the number of compounds and ``f`` is the number of frames.
        The elements of `dtrj` are interpreted as the indices of the
        states in which a given compound is at a given frame.
    
    Returns
    -------
    n_stay : int
        Number of compounds that are in the same state in all frames.
    always_neg : int
        Number of compounds that are in a negative state in all frames.
    never_neg : int
        Number of compounds that are not in a negative state in any
        frame.
    n_frames_neg : int
        Total number of frames with negative states (summed over all
        compounds).
    n_trans : int
        Total number of state transitions.
    pos2pos : int
        Total number of Positive -> Positive transitions (transitions
        from one state with a positive/zero state index to another state
        with a positive/zero state index).
    pos2neg : int
        Number of Positive -> Negative transitions.
    neg2pos : int
        Number of Negative -> Positive transitions.
    neg2neg : int
        Number of Negative -> Negative transitions.
    
    See Also
    --------
    :func:`mdtools.run_time_info.dtrj_trans_info_str` :
        Create a string containing basic information about the state
        transitions in a discrete trajectory
    
    Note
    ----
    Positive states are states with a state index equal(!) to or greater
    than zero.  Negative states are states with a state index less than
    zero.
    """
    dtrj = np.asarray(dtrj)
    dtrj = np.asarray(dtrj.T, order='C')
    if dtrj.ndim == 1:
        dtrj = np.expand_dims(dtrj, axis=0)
    elif dtrj.ndim > 2:
        raise ValueError("The discrete trajectory must have one or two"
                         " dimensions")
    if np.any(np.modf(dtrj)[0] != 0):
        warnings.warn("At least one element of the discrete trajectory"
                      " is not an integer", RuntimeWarning)
    
    n_stay = np.count_nonzero(np.all(dtrj == dtrj[0], axis=0))
    dtrj_neg = (dtrj < 0)
    always_neg = np.count_nonzero(np.all(dtrj_neg, axis=0))
    never_neg = np.count_nonzero(~np.any(dtrj_neg, axis=0))
    n_frames_neg = np.count_nonzero(dtrj_neg)
    del dtrj_neg
    
    N_CMPS = dtrj.shape[1]
    transitions = (np.diff(dtrj, axis=0) != 0)
    trans_init = np.vstack([transitions, np.zeros(N_CMPS, dtype=bool)])
    trans_final = np.insert(transitions, 0, np.zeros(N_CMPS), axis=0)
    n_trans = np.count_nonzero(transitions)
    if np.count_nonzero(trans_init) != n_trans:
        raise ValueError("The number of transitions in trans_init is not"
                         " the same as in transitions. This should not"
                         " have happened")
    if np.count_nonzero(trans_final) != n_trans:
        raise ValueError("The number of transitions in trans_final is"
                         " not the same as in transitions. This should"
                         " not have happened")
    pos2pos = np.count_nonzero((dtrj[trans_init] >= 0) &
                               (dtrj[trans_final] >= 0))
    pos2neg = np.count_nonzero((dtrj[trans_init] >= 0) &
                               (dtrj[trans_final] < 0))
    neg2pos = np.count_nonzero((dtrj[trans_init] < 0) &
                               (dtrj[trans_final] >= 0))
    neg2neg = np.count_nonzero((dtrj[trans_init] < 0) &
                               (dtrj[trans_final] < 0))
    if pos2pos + pos2neg + neg2pos + neg2neg != n_trans:
        raise ValueError("The sum of Positive <-> Negative transitions"
                         " ({}) is not equal to the total number of"
                         " transitions ({}). This should not have"
                         " happened"
                         .format(pos2pos+pos2neg+neg2pos+neg2neg,
                                 n_trans))
    
    return (n_stay, always_neg, never_neg, n_frames_neg,
            n_trans, pos2pos, pos2neg, neg2pos, neg2neg)


[docs]def dtrj_trans_info_str(dtrj):
    """
    Create a string containing basic information about the state
    transitions in a discrete trajectory.
    
    .. deprecated:: 0.0.0.dev0
        :func:`mdtools.run_time_info.dtrj_trans_info_str` will be
        replaced by :func:`mdtools.run_time_info.dtrj_trans_info_str_new`
        in a future release.
    
    The string can be printed directly to standard output using
    :func:`print`.
    
    The information contained in the string is:
        
        * The number of frames of the discrete trajectory (per compound).
        * The number of compounds (or in other words, the number of
          single-compound trajectories contained in `dtrj`).
        * The information generated by
          :func:`mdtools.run_time_info.dtrj_trans_info`.
    
    Parameters
    ----------
    dtrj : array_like
        The discrete trajectory.  Array of shape ``(n, f)``, where ``n``
        is the number of compounds and ``f`` is the number of frames.
        The elements of `dtrj` are interpreted as the indices of the
        states in which a given compound is at a given frame.
    
    Returns
    -------
    rti : str
        Human readable string containing the above listed content.
    
    See Also
    --------
    :func:`mdtools.run_time_info.dtrj_trans_info` :
        Generate basic information about the state transitions in a
        discrete trajectory
    :func:`mdtools.file_handler.indent` : Indent a text
    """
    N_CMPS, N_FRAMES = dtrj.shape
    trans_info = dtrj_trans_info(dtrj)
    dti = ("No. of frames (per compound):                         {:>12d}\n"
           .format(N_FRAMES))
    dti += ("No. of compounds:                                     {:>12d}\n"
            .format(N_CMPS))
    dti += ("No. of compounds that never leave their state:        {:>12d}\n"
            .format(trans_info[0]))
    dti += ("No. of compounds that are always in a negative state: {:>12d}\n"
            .format(trans_info[1]))
    dti += ("No. of compounds that are never  in a negative state: {:>12d}\n"
            .format(trans_info[2]))
    dti += ("Total No. of frames with negative states:             {:>12d}\n"
            .format(trans_info[3]))
    dti += "\n"
    dti += ("Total No. of state transitions:          {:>12d}\n"
            .format(trans_info[4]))
    dti += ("No. of Positive -> Positive transitions: {:>12d}  ({:>8.4f} %)\n"
            .format(trans_info[5], 100*trans_info[5]/trans_info[4]))
    dti += ("No. of Positive -> Negative transitions: {:>12d}  ({:>8.4f} %)\n"
            .format(trans_info[6], 100*trans_info[6]/trans_info[4]))
    dti += ("No. of Negative -> Positive transitions: {:>12d}  ({:>8.4f} %)\n"
            .format(trans_info[7], 100*trans_info[7]/trans_info[4]))
    dti += ("No. of Negative -> Negative transitions: {:>12d}  ({:>8.4f} %)\n"
            .format(trans_info[8], 100*trans_info[8]/trans_info[4]))
    dti += "Positive states are states with a state index >= 0\n"
    dti += "Negative states are states with a state index <  0\n"
    return dti