Source code for mdtools.run_time_info

# This file is part of MDTools.
# Copyright (C) 2021  The MDTools Development Team and all contributors
# listed in the file AUTHORS.rst
#
# MDTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# MDTools is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with MDTools.  If not, see <http://www.gnu.org/licenses/>.


"""
Classes and functions for getting run time information

This module can be called from :mod:`mdtools` via the shortcut ``rti``::

    import mdtools as mdt
    mdt.rti  # insetad of mdt.run_time_info

"""


# Standard libraries
import sys
import os
import warnings
from datetime import datetime
# Third party libraries
import psutil
import numpy as np
import MDAnalysis as mda
from MDAnalysis.lib.log import ProgressBar
# Local application/library specific imports
import mdtools as mdt


[docs]class ProgressBar(ProgressBar): """ Display a visual progress bar and time estimate. The :class:`ProgressBar` decorates an iterable object, returning an iterator which acts exactly like the original iterable, but prints a dynamically updating progressbar every time a value is requested. See Also -------- :class:`MDAnalysis.lib.log.ProgressBar` : Parent class :class:`tqdm.auto.tqdm` : Grandparent class Notes ----- This class is derived from :class:`MDAnalysis.lib.log.ProgressBar`, which in turn is derived from :class:`tqdm.auto.tqdm`. The only difference to :class:`tqdm.auto.tqdm` is that some default arguments are changed: * `ascii` is set to ``True``. * `unit` is set to ``'frames'``. * `unit_scale` is set to ``True``. * The `bar_format` is changed to switch of a possible inversion of 'unit/s' to 's/unit' (see also https://github.com/tqdm/tqdm/issues/72). * `mininterval` is set to ``300`` seconds. * `maxinterval` is set to ``3600`` seconds. See the MDAnalysis_ and tqdm_ documentations for further information. .. _MDAnalysis: https://docs.mdanalysis.org/stable/documentation_pages/lib/log.html#MDAnalysis.lib.log.ProgressBar .. _tqdm: https://tqdm.github.io/docs/tqdm/#__init__ Example ------- .. code-block:: for ts in mdt.rti.ProgressBar(u.trajectory): # Perform analysis will produce something similar to:: 25%|#####1 | 25/100 [00:15<00:45, 1.67frames/s] """ def __init__(self, *args, **kwargs): """ Initialize the :class:`ProgressBar`. Parameters ---------- args : list, optional Non-keyword arguments. See :class:`MDAnalysis.lib.log.ProgressBar` for possible choices. kwargs : dict, optional Keynword arguments. See :class:`MDAnalysis.lib.log.ProgressBar` for possible choices. """ kwargs['ascii'] = kwargs.pop('ascii', True) kwargs['unit'] = kwargs.pop('unit', "frames") kwargs['mininterval'] = kwargs.pop('mininterval', 300) kwargs['maxinterval'] = kwargs.pop('maxinterval', 3600) kwargs['unit_scale'] = kwargs.pop('unit_scale', True) bar_format = ("{l_bar}{bar}|" + " {n_fmt}/{total_fmt}" + " [{elapsed}<{remaining}," + " {rate_noinv_fmt}" + "{postfix}]") kwargs['bar_format'] = kwargs.pop('bar_format', bar_format) super().__init__(*args, **kwargs)
[docs]def get_num_CPUs(): """ Get the number of available CPUs. The number of available CPUs is obtained in decreasing precedence from the environment variables: 1. OMP_NUM_THREADS 2. SLURM_CPUS_PER_TASK 3. SLURM_JOB_CPUS_PER_NODE 4. SLURM_CPUS_ON_NODE 5. or from the Python function :func:`os.cpu_count()` """ if os.environ.get('OMP_NUM_THREADS') is not None: return int(os.environ['OMP_NUM_THREADS']) elif os.environ.get('SLURM_CPUS_PER_TASK') is not None: return int(os.environ['SLURM_CPUS_PER_TASK']) elif os.environ.get('SLURM_JOB_CPUS_PER_NODE') is not None: return int(os.environ['SLURM_JOB_CPUS_PER_NODE']) elif os.environ.get('SLURM_CPUS_ON_NODE') is not None: return int(os.environ['SLURM_CPUS_ON_NODE']) else: return os.cpu_count()
[docs]def mem_usage(unit='MiB'): """ Get current memory usage. Returns the current memory usage of the process that calls this function. Parameters ---------- unit : {'B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'KB', 'MB', 'GB', 'TB', 'PB'} String determining the unit in which the memory usage should be returned. Default is mebibytes (``'MiB'``). Returns ------- mem : float Memory currently used by the calling process. Returns ``None`` if the memory usage cannot be determined. """ try: proc = psutil.Process(os.getpid()) except (psutil.NoSuchProcess, psutil.AccessDenied) as err: warnings.warn("Cannot get memory usage of process {} because of" " {}".format(os.getpid(), err), RuntimeWarning) return None if unit == 'B': scaling = 1 elif unit == 'KiB': scaling = 2**10 elif unit == 'MiB': scaling = 2**20 elif unit == 'GiB': scaling = 2**30 elif unit == 'TiB': scaling = 2**40 elif unit == 'PiB': scaling = 2**50 elif unit == 'KB': scaling = 1e3 elif unit == 'MB': scaling = 1e6 elif unit == 'GB': scaling = 1e9 elif unit == 'TB': scaling = 1e12 elif unit == 'PB': scaling = 1e15 return proc.memory_info().rss / scaling
[docs]def run_time_info(): """ Generate some run time information. Returns ------- script_name : string The name of the running script. command_line : string The command line input used to start the script. cwd : string The working directory the script was started from. exe : string The exact call of the executable script. mdt_version : string The version number of MDTools. python_version : string The version number of Python. See Also -------- :func:`mdtools.run_time_info.run_time_info_str` : Create a string containing some run time information """ script_name = str(os.path.basename(sys.argv[0])) command_line = script_name + " " + " ".join(sys.argv[1:]) python_version = (str(sys.version_info[0]) + "." + str(sys.version_info[1]) + "." + str(sys.version_info[2])) return (script_name, command_line, os.getcwd(), sys.argv[0], mdt.__version__, python_version)
[docs]def run_time_info_str(indent=0): """ Create a string containing some run time information. The string can be printed directly to standard output using :func:`print`. The information contained in the string is: * The date and time the script (actually this function) was called. * The MDTools copyright notice. * The information generated by :func:`mdtools.run_time_info.run_time_info`. Parameters ---------- indent : int, optional Number of spaces to indent the information string. Negative indentation is treated as zero indentation. Returns ------- rti : str Human readable string containing the above listed content. See Also -------- :func:`mdtools.run_time_info.run_time_info` : Generate some run time information :func:`mdtools.file_handler.header_str` : Create a string containing some run time information to be used as header for text files :func:`mdtools.file_handler.indent` : Indent a text """ timestamp = datetime.now() script, command_line, cwd, exe, version, pversion = run_time_info() rti = "{}\n".format(script) rti += "{}\n".format(timestamp.strftime('%Y/%m/%d %H:%M')) rti += "\n" rti += mdt.__copyright_notice__ + "\n" rti += "\n" rti += "\n" rti += "Command line input:\n" rti += " {}\n".format(command_line) rti += "Working directory:\n" rti += " {}\n".format(cwd) rti += "Executable:\n" rti += " {}\n".format(exe) rti += "mdtools version:\n" rti += " {}\n".format(version) rti += "Python version:\n" rti += " {}".format(pversion) if indent > 0: rti = mdt.fh.indent(rti, amount=indent, char=" ") return rti
[docs]def ag_info_str(ag, indent=0): """ Create a string containing information about a MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup`. The string can be printed directly to standard output using :func:`print`. The information contained in the string is: * The total number of - :class:`Segments <MDAnalysis.core.groups.Segment>` - :class:`Residues <MDAnalysis.core.groups.Residue>` - :attr:`Fragments <MDAnalysis.core.groups.AtomGroup.fragments>` - :class:`Atoms <MDAnalysis.core.groups.Atom>` * The number of different - :class:`Segments <MDAnalysis.core.groups.Segment>` - :class:`Residues <MDAnalysis.core.groups.Residue>` - :class:`Atoms <MDAnalysis.core.groups.Atom>` - And their respective names. * Whether the input :class:`~MDAnalysis.core.groups.AtomGroup` is an :class:`~MDAnalysis.core.groups.UpdatingAtomGroup` or not. Refer to the MDAnalysis' user guide for an |explanation_of_these_terms|. Parameters ---------- ag : MDAnalysis.core.groups.AtomGroup The MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup` for which to create the information string. indent : int, optional Number of spaces to indent the information string. Negative indentation is treated as zero indentation. Returns ------- ag_info : str Human readable string containing the above listed content. See Also -------- :func:`mdtools.file_handler.indent` : Indent a text """ unique_segids = np.unique(ag.segids) unique_resnames = np.unique(ag.resnames) unique_atmnames = np.unique(ag.names) unique_atmtypes = np.unique(ag.types) try: n_fragments = ag.n_fragments except mda.exceptions.NoDataError: n_fragments = "N/A" ag_info = "Segments: {}\n".format(ag.n_segments) ag_info += (" Different segments: {}\n" .format(len(unique_segids))) ag_info += (" Segment name(s): '{}'\n" .format('\' \''.join(i for i in unique_segids))) ag_info += "Residues: {}\n".format(ag.n_residues) ag_info += (" Different residues: {}\n" .format(len(unique_resnames))) ag_info += (" Residue name(s): '{}'\n" .format('\' \''.join(i for i in unique_resnames))) ag_info += "Atoms: {}\n".format(ag.n_atoms) ag_info += (" Different atom names: {}\n" .format(len(unique_atmnames))) # If each atom in a molecule has a different name (frequently the # atoms are numbered sequentially), the list of different atom names # is too long to be usefully printed on screen. # ag_info += (" Atom name(s): '{}'\n" # .format('\' \''.join(i for i in unique_atmnames))) ag_info += (" Different atom types: {}\n" .format(len(unique_atmtypes))) ag_info += (" Atom type(s): '{}'\n" .format('\' \''.join(i for i in unique_atmtypes))) ag_info += "Fragments: {}\n".format(n_fragments) ag_info += ("Updating atom group: {}" .format(isinstance(ag, mda.core.groups.UpdatingAtomGroup))) if indent > 0: ag_info = mdt.fh.indent(ag_info, amount=indent, char=" ") return ag_info
[docs]def dtrj_trans_info(dtrj): """ Generate basic information about the state transitions in a discrete trajectory. .. deprecated:: 0.0.0.dev0 :func:`mdtools.run_time_info.dtrj_trans_info` will be replaced by :func:`mdtools.run_time_info.dtrj_trans_info_new` in a future release. Parameters ---------- dtrj : array_like The discrete trajectory. Array of shape ``(n, f)``, where ``n`` is the number of compounds and ``f`` is the number of frames. The elements of `dtrj` are interpreted as the indices of the states in which a given compound is at a given frame. Returns ------- n_stay : int Number of compounds that are in the same state in all frames. always_neg : int Number of compounds that are in a negative state in all frames. never_neg : int Number of compounds that are not in a negative state in any frame. n_frames_neg : int Total number of frames with negative states (summed over all compounds). n_trans : int Total number of state transitions. pos2pos : int Total number of Positive -> Positive transitions (transitions from one state with a positive/zero state index to another state with a positive/zero state index). pos2neg : int Number of Positive -> Negative transitions. neg2pos : int Number of Negative -> Positive transitions. neg2neg : int Number of Negative -> Negative transitions. See Also -------- :func:`mdtools.run_time_info.dtrj_trans_info_str` : Create a string containing basic information about the state transitions in a discrete trajectory Note ---- Positive states are states with a state index equal(!) to or greater than zero. Negative states are states with a state index less than zero. """ dtrj = np.asarray(dtrj) dtrj = np.asarray(dtrj.T, order='C') if dtrj.ndim == 1: dtrj = np.expand_dims(dtrj, axis=0) elif dtrj.ndim > 2: raise ValueError("The discrete trajectory must have one or two" " dimensions") if np.any(np.modf(dtrj)[0] != 0): warnings.warn("At least one element of the discrete trajectory" " is not an integer", RuntimeWarning) n_stay = np.count_nonzero(np.all(dtrj == dtrj[0], axis=0)) dtrj_neg = (dtrj < 0) always_neg = np.count_nonzero(np.all(dtrj_neg, axis=0)) never_neg = np.count_nonzero(~np.any(dtrj_neg, axis=0)) n_frames_neg = np.count_nonzero(dtrj_neg) del dtrj_neg N_CMPS = dtrj.shape[1] transitions = (np.diff(dtrj, axis=0) != 0) trans_init = np.vstack([transitions, np.zeros(N_CMPS, dtype=bool)]) trans_final = np.insert(transitions, 0, np.zeros(N_CMPS), axis=0) n_trans = np.count_nonzero(transitions) if np.count_nonzero(trans_init) != n_trans: raise ValueError("The number of transitions in trans_init is not" " the same as in transitions. This should not" " have happened") if np.count_nonzero(trans_final) != n_trans: raise ValueError("The number of transitions in trans_final is" " not the same as in transitions. This should" " not have happened") pos2pos = np.count_nonzero((dtrj[trans_init] >= 0) & (dtrj[trans_final] >= 0)) pos2neg = np.count_nonzero((dtrj[trans_init] >= 0) & (dtrj[trans_final] < 0)) neg2pos = np.count_nonzero((dtrj[trans_init] < 0) & (dtrj[trans_final] >= 0)) neg2neg = np.count_nonzero((dtrj[trans_init] < 0) & (dtrj[trans_final] < 0)) if pos2pos + pos2neg + neg2pos + neg2neg != n_trans: raise ValueError("The sum of Positive <-> Negative transitions" " ({}) is not equal to the total number of" " transitions ({}). This should not have" " happened" .format(pos2pos+pos2neg+neg2pos+neg2neg, n_trans)) return (n_stay, always_neg, never_neg, n_frames_neg, n_trans, pos2pos, pos2neg, neg2pos, neg2neg)
[docs]def dtrj_trans_info_str(dtrj): """ Create a string containing basic information about the state transitions in a discrete trajectory. .. deprecated:: 0.0.0.dev0 :func:`mdtools.run_time_info.dtrj_trans_info_str` will be replaced by :func:`mdtools.run_time_info.dtrj_trans_info_str_new` in a future release. The string can be printed directly to standard output using :func:`print`. The information contained in the string is: * The number of frames of the discrete trajectory (per compound). * The number of compounds (or in other words, the number of single-compound trajectories contained in `dtrj`). * The information generated by :func:`mdtools.run_time_info.dtrj_trans_info`. Parameters ---------- dtrj : array_like The discrete trajectory. Array of shape ``(n, f)``, where ``n`` is the number of compounds and ``f`` is the number of frames. The elements of `dtrj` are interpreted as the indices of the states in which a given compound is at a given frame. Returns ------- rti : str Human readable string containing the above listed content. See Also -------- :func:`mdtools.run_time_info.dtrj_trans_info` : Generate basic information about the state transitions in a discrete trajectory :func:`mdtools.file_handler.indent` : Indent a text """ N_CMPS, N_FRAMES = dtrj.shape trans_info = dtrj_trans_info(dtrj) dti = ("No. of frames (per compound): {:>12d}\n" .format(N_FRAMES)) dti += ("No. of compounds: {:>12d}\n" .format(N_CMPS)) dti += ("No. of compounds that never leave their state: {:>12d}\n" .format(trans_info[0])) dti += ("No. of compounds that are always in a negative state: {:>12d}\n" .format(trans_info[1])) dti += ("No. of compounds that are never in a negative state: {:>12d}\n" .format(trans_info[2])) dti += ("Total No. of frames with negative states: {:>12d}\n" .format(trans_info[3])) dti += "\n" dti += ("Total No. of state transitions: {:>12d}\n" .format(trans_info[4])) dti += ("No. of Positive -> Positive transitions: {:>12d} ({:>8.4f} %)\n" .format(trans_info[5], 100*trans_info[5]/trans_info[4])) dti += ("No. of Positive -> Negative transitions: {:>12d} ({:>8.4f} %)\n" .format(trans_info[6], 100*trans_info[6]/trans_info[4])) dti += ("No. of Negative -> Positive transitions: {:>12d} ({:>8.4f} %)\n" .format(trans_info[7], 100*trans_info[7]/trans_info[4])) dti += ("No. of Negative -> Negative transitions: {:>12d} ({:>8.4f} %)\n" .format(trans_info[8], 100*trans_info[8]/trans_info[4])) dti += "Positive states are states with a state index >= 0\n" dti += "Negative states are states with a state index < 0\n" return dti