#!/usr/bin/env python3
# This file is part of MDTools.
# Copyright (C) 2021 The MDTools Development Team and all contributors
# listed in the file AUTHORS.rst
#
# MDTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# MDTools is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with MDTools. If not, see <http://www.gnu.org/licenses/>.
"""
Calculate the number of contacts between two MDAnalysis
:class:`AtomGroups <MDAnalysis.core.groups.AtomGroup>`.
.. todo::
Finish docstring.
Contacts are binned into histograms according to how many different
selection :class:`Atoms <MDAnalysis.core.groups.Atom>`/compounds have
contact with a given reference
:class:`~MDAnalysis.core.groups.Atom`/compound and according to how many
contacts exist between a given reference-selection pair.
Options
-------
-f Trajectory file. See |supported_coordinate_formats| of
MDAnalysis.
-s Topology file. See |supported_topology_formats| of
MDAnalysis.
-o Output filename.
-b First frame to read from the trajectory. Frame numbering
starts at zero. Default: ``0``.
-e Last frame to read from the trajectory. This is exclusive,
i.e. the last frame read is actually ``END - 1``. A value
of ``-1`` means to read the very last frame. Default:
``-1``.
--every Read every n-th frame from the trajectory. Default: ``1``.
--ref Selection string to select the reference group. See
MDAnalysis' |selection_syntax| for possible choices.
--sel Selection string to select the selection group. See
MDAnalysis' |selection_syntax| for possible choices.
-c Cutoff distance in Angstrom. A reference and selection atom
are considered to be in contact, if their distance is less
than or equal to this cutoff.
--refcmp {'segments', 'residues', 'fragments'}
The compounds of the reference group to use for calculating
the contact histograms. Contact histograms are always
calculated for single atom contacts, but also for contacts
between entire compounds. Specify here, which compounds to
use for the reference group. Compounds can be 'segments',
'residues' or 'fragments'. Refer to the MDAanalysis user
guide for an |explanation_of_these_terms|. Note that in any
case, even if ``REFCMP`` is e.g. 'residues', only the atoms
belonging to the reference group are taken into account,
even if the compound might comprise additional atoms that
are not contained in the reference group. Default:
``'residues'``
--selcmp {'segments', 'residues', 'fragments'}
Same for the selection group. Default: ``'residues'``
--updating-ref
Use an :class:`~MDAnalysis.core.groups.UpdatingAtomGroup`
for the reference group. Selection expressions of
:class:`UpdatingAtomGroups <MDAnalysis.core.groups.UpdatingAtomGroup>`
are re-evaluated every
:attr:`time step <MDAnalysis.coordinates.base.Timestep.dt>`.
This is e.g. useful for position-based selections like
'type Li and prop z <= 2.0'.
--updating-sel
Use an :class:`~MDAnalysis.core.groups.UpdatingAtomGroup`
for the selection group.
See Also
--------
:mod:`lig_change_at_pos_change_blocks` :
Compare the coordination environment of reference compounds before
and after they have changed their position
Notes
-----
TODO
Examples
--------
TODO
"""
__author__ = "Andreas Thum"
# Standard libraries
import sys
import os
import warnings
import argparse
from datetime import datetime, timedelta
# Third party libraries
import psutil
import numpy as np
# Local application/library specific imports
import mdtools as mdt
if __name__ == '__main__':
timer_tot = datetime.now()
proc = psutil.Process()
proc.cpu_percent() # Initiate monitoring of CPU usage
parser = argparse.ArgumentParser(
description=(
"Calculate the number of contacts between two MDAnalysis"
" AtomGroups. For more information, refer to the"
" documetation of this script."
)
)
parser.add_argument(
'-f',
dest='TRJFILE',
type=str,
required=True,
help="Trajectory file."
)
parser.add_argument(
'-s',
dest='TOPFILE',
type=str,
required=True,
help="Topology file."
)
parser.add_argument(
'-o',
dest='OUTFILE',
type=str,
required=True,
help="Output filename."
)
parser.add_argument(
'-b',
dest='BEGIN',
type=int,
required=False,
default=0,
help="First frame to read from the trajectory. Frame numbering"
" starts at zero. Default: %(default)s."
)
parser.add_argument(
'-e',
dest='END',
type=int,
required=False,
default=-1,
help="Last frame to read from the trajectory (exclusive)."
" Default: %(default)s."
)
parser.add_argument(
'--every',
dest='EVERY',
type=int,
required=False,
default=1,
help="Read every n-th frame from the trajectory. Default:"
" %(default)s."
)
parser.add_argument(
'--ref',
dest='REF',
type=str,
nargs='+',
required=True,
help="Selection string to select the reference group"
)
parser.add_argument(
'--sel',
dest='SEL',
type=str,
nargs='+',
required=True,
help="Selection string to select the selection group."
)
parser.add_argument(
'-c',
dest='CUTOFF',
type=float,
required=True,
help="Cutoff distance in Angstrom."
)
parser.add_argument(
'--refcmp',
dest='REFCMP',
type=str,
required=False,
choices=('segments', 'residues', 'fragments'),
default='residues',
help="Reference compound. Default: %(default)s"
)
parser.add_argument(
'--selcmp',
dest='SELCMP',
type=str,
required=False,
choices=('segments', 'residues', 'fragments'),
default='residues',
help="Selection compound. Default: %(default)s"
)
parser.add_argument(
'--updating-ref',
dest='UPDATING_REF',
required=False,
default=False,
action='store_true',
help="Use an UpdatingAtomGroup for the reference group."
)
parser.add_argument(
'--updating-sel',
dest='UPDATING_SEL',
required=False,
default=False,
action='store_true',
help="Use an UpdatingAtomGroup for the selection group."
)
args = parser.parse_args()
print(mdt.rti.run_time_info_str())
if args.CUTOFF <= 0:
raise ValueError("-c ({}) must be positive".format(args.CUTOFF))
if mdt.rti.get_num_CPUs() > 1:
mdabackend = 'OpenMP'
else:
mdabackend = 'serial'
print("\n")
u = mdt.select.universe(top=args.TOPFILE, trj=args.TRJFILE)
print("\n")
print("Creating selections...")
timer = datetime.now()
ref = u.select_atoms(' '.join(args.REF), updating=args.UPDATING_REF)
sel = u.select_atoms(' '.join(args.SEL), updating=args.UPDATING_SEL)
if ref.n_atoms == 0 and not args.UPDATING_REF:
raise ValueError("The reference group contains no atoms")
if sel.n_atoms == 0 and not args.UPDATING_SEL:
raise ValueError("The selection group contains no atoms")
print("Reference group: '{}'".format(' '.join(args.REF)))
print(mdt.rti.ag_info_str(ag=ref, indent=2))
print("Selection group: '{}'".format(' '.join(args.SEL)))
print(mdt.rti.ag_info_str(ag=sel, indent=2))
print("Elapsed time: {}".format(datetime.now() - timer))
print("Current memory usage: {:.2f} MiB"
.format(proc.memory_info().rss / 2**20))
print("\n")
BEGIN, END, EVERY, N_FRAMES = mdt.check.frame_slicing(
start=args.BEGIN,
stop=args.END,
step=args.EVERY,
n_frames_tot=u.trajectory.n_frames
)
if not args.UPDATING_REF:
natms_per_refcmp = mdt.strc.natms_per_cmp(
ag=ref, cmp=args.REFCMP, check_contiguous=True
)
if not args.UPDATING_SEL:
natms_per_selcmp = mdt.strc.natms_per_cmp(
ag=sel, cmp=args.SELCMP, check_contiguous=True
)
if not args.UPDATING_REF and not args.UPDATING_SEL:
dist_array_tmp = np.full(
(ref.n_atoms, sel.n_atoms), np.nan, dtype=np.float64
)
else:
dist_array_tmp = None
expected_max_contacts = 16 # Will be increased if necessary
# Histogram for refatm_selatm:
hist_refatm_selatm = np.zeros(expected_max_contacts, dtype=np.uint32)
# Histograms for refatm_selcmp, refcmp_selatm, refcmp_selcmp:
hists_cmp = [np.zeros((4, expected_max_contacts), dtype=np.uint32)
for i in range(3)]
n_atms = np.zeros(2, dtype=int) # refatms and selatms
n_cmps = np.zeros_like(n_atms) # refcmps and selcmps
n_pairs = np.zeros(len(hists_cmp), dtype=np.uint32)
print("\n")
print("Reading trajectory...")
print("Total number of frames: {:>8d}".format(u.trajectory.n_frames))
print("Frames to read: {:>8d}".format(N_FRAMES))
print("First frame to read: {:>8d}".format(BEGIN))
print("Last frame to read: {:>8d}".format(END - 1))
print("Read every n-th frame: {:>8d}".format(EVERY))
print("Time first frame: {:>12.3f} (ps)"
.format(u.trajectory[BEGIN].time))
print("Time last frame: {:>12.3f} (ps)"
.format(u.trajectory[END - 1].time))
print("Time step first frame: {:>12.3f} (ps)"
.format(u.trajectory[BEGIN].dt))
print("Time step last frame: {:>12.3f} (ps)"
.format(u.trajectory[END - 1].dt))
timer = datetime.now()
trj = mdt.rti.ProgressBar(u.trajectory[BEGIN:END:EVERY])
for ts in trj:
if args.UPDATING_REF:
natms_per_refcmp = mdt.strc.natms_per_cmp(
ag=ref, cmp=args.REFCMP, check_contiguous=True
)
if args.UPDATING_SEL:
natms_per_selcmp = mdt.strc.natms_per_cmp(
ag=sel, cmp=args.SELCMP, check_contiguous=True
)
# Generate contact matrices:
cm_refatm_selatm = mdt.strc.contact_matrix(
ref=ref,
sel=sel,
cutoff=args.CUTOFF,
box=ts.dimensions,
result=dist_array_tmp,
mdabackend=mdabackend,
)
n_atms += cm_refatm_selatm.shape
cmp_ccms = cmp_contact_count_matrices(
cm=cm_refatm_selatm,
natms_per_refcmp=natms_per_refcmp,
natms_per_selcmp=natms_per_selcmp,
dtype=np.uint32
)
n_cmps += cmp_ccms[-1].shape
if cm_refatm_selatm.shape[0] == 0:
# Number of reference atoms (and compounds) is zero
continue
# Pure atom-atom contact histogram (refatm_selatm):
hists_tmp = mdt.strc.contact_hist_refcmp_diff_selcmp(
cm=cm_refatm_selatm,
minlength=hist_refatm_selatm.shape[-1],
dtype=np.uint32
)
hist_refatm_selatm, hists_tmp = mdt.nph.match_shape(
hist_refatm_selatm,
hists_tmp
)
hist_refatm_selatm += hists_tmp
# Compound histograms:
# (refatm_selcmp, refcmp_selatm, refcmp_selcmp)
for i, cmp_ccm in enumerate(cmp_ccms):
hists_tmp = np.asarray(mdt.strc.contact_hists(
cm=cmp_ccm,
minlength=hists_cmp[i].shape[-1],
dtype=np.uint32
))
hists_cmp[i], hists_tmp = mdt.nph.match_shape(hists_cmp[i],
hists_tmp)
n_pairs[i] += np.sum(hists_tmp[-1])
hists_cmp[i] += hists_tmp
# ProgressBar update:
progress_bar_mem = proc.memory_info().rss / 2**20
trj.set_postfix_str("{:>7.2f}MiB".format(progress_bar_mem),
refresh=False)
trj.close()
del cm_refatm_selatm, cmp_ccms, dist_array_tmp
print("Elapsed time: {}".format(datetime.now() - timer))
print("Current memory usage: {:.2f} MiB"
.format(proc.memory_info().rss / 2**20))
if np.any(n_atms == 0):
warnings.warn("The total number of reference or selection atoms"
" is zero. The output will be meaningless",
RuntimeWarning)
if np.any(n_pairs == 0):
warnings.warn("At least one of the pair histograms is void."
" This histogram will be meaningless",
RuntimeWarning)
# Average contact numbers:
n_refatms_bound = np.sum(hist_refatm_selatm[1:])
n_refcmps_bound = np.sum(hists_cmp[1][0][1:]) # refcmp_diff_selatm
# Averages of pure atom-atom histogram (refatm_selatm):
avs_refatm_selatm = np.zeros(2, dtype=np.float64)
tot_contacts = np.sum(hist_refatm_selatm *
np.arange(len(hist_refatm_selatm)))
# normalized by the total number of reference atoms
avs_refatm_selatm[0] = tot_contacts / n_atms[0]
# normalized by the number of refatms that are bound to a selatm
avs_refatm_selatm[1] = tot_contacts / n_refatms_bound
# Averages of compound histograms:
# (refatm_selcmp, refcmp_selatm, refcmp_selcmp)
avs_cmp = np.zeros((len(hists_cmp), len(hists_cmp[0][:-1]), 2),
dtype=np.float64)
for i, hists in enumerate(hists_cmp):
for j, hist in enumerate(hists[:-1]):
tot_contacts = np.sum(hist * np.arange(len(hist)))
if i == 0: # refatm_selcmp
avs_cmp[i][j][0] = tot_contacts / n_atms[0]
avs_cmp[i][j][1] = tot_contacts / n_refatms_bound
else: # refcmp_selatm, refcmp_selcmp
avs_cmp[i][j][0] = tot_contacts / n_cmps[0]
avs_cmp[i][j][1] = tot_contacts / n_refcmps_bound
# Pair histograms (refatm_selcmp, refcmp_selatm, refcmp_selcmp):
avs_pair = np.zeros(len(n_pairs), dtype=np.float64)
for i, n in enumerate(n_pairs):
tot_contacts = np.sum(hists_cmp[i][-1] *
np.arange(len(hists_cmp[i][-1])))
avs_pair[i] = tot_contacts / n
# Normalization:
hist_refatm_selatm = hist_refatm_selatm / n_atms[0]
for i in range(len(hists_cmp)):
hists_cmp[i] = hists_cmp[i].astype(np.float64)
if i == 0: # refatm_selcmp
hists_cmp[i][:-1] /= n_atms[0]
else: # refcmp_selatm, refcmp_selcmp
hists_cmp[i][:-1] /= n_cmps[0]
for i, n in enumerate(n_pairs):
hists_cmp[i][-1] /= n
# Bring all histograms to the same length:
max_length = len(hist_refatm_selatm)
for hists in hists_cmp:
max_length = max(max_length, hists.shape[-1])
hist_refatm_selatm = mdt.nph.extend(hist_refatm_selatm, max_length)
for i, hists in enumerate(hists_cmp):
hists_cmp[i] = mdt.nph.extend(hists, max_length, axis=-1)
# Find the last non-zero value up to which to write the histograms
# to file:
if np.any(hist_refatm_selatm != 0):
last_nonzero = np.flatnonzero(hist_refatm_selatm)[-1]
else:
last_nonzero = 0
for hists in hists_cmp:
if np.any(hists != 0):
last_nonzero = max(last_nonzero,
np.max(np.nonzero(hists)[1]))
print("\n")
print("Creating output...")
timer = datetime.now()
mdt.fh.write_header(args.OUTFILE)
with mdt.fh.xopen(args.OUTFILE, 'a') as outfile:
outfile.write("# \n")
outfile.write("# \n")
outfile.write("# Reference: '{}'\n".format(' '.join(args.REF)))
outfile.write(mdt.fh.indent(text=mdt.rti.ag_info_str(ag=ref),
amount=1,
char="# ")
+ "\n")
outfile.write("# \n")
outfile.write("# Selection: '{}'\n".format(' '.join(args.SEL)))
outfile.write(mdt.fh.indent(text=mdt.rti.ag_info_str(ag=sel),
amount=1,
char="# ")
+ "\n")
outfile.write("# \n")
outfile.write("# \n")
outfile.write("# Contact histograms\n")
outfile.write("# Cutoff (Angstrom): {}\n".format(args.CUTOFF))
outfile.write("# Reference compound = '{}'\n".format(args.REFCMP))
outfile.write("# Selection compound = '{}'\n".format(args.SELCMP))
outfile.write("# Number of frames: {:>12d}\n".format(N_FRAMES))
outfile.write("# Total No. of reference atoms (per frame): {:>12d} ({:>12.3f})\n".format(n_atms[0], n_atms[0] / N_FRAMES))
outfile.write("# Total No. of reference compounds (per frame): {:>12d} ({:>12.3f})\n".format(n_cmps[0], n_cmps[0] / N_FRAMES))
outfile.write("# Total No. of selection atoms (per frame): {:>12d} ({:>12.3f})\n".format(n_atms[1], n_atms[1] / N_FRAMES))
outfile.write("# Total No. of selection compounds (per frame): {:>12d} ({:>12.3f})\n".format(n_cmps[1], n_cmps[1] / N_FRAMES))
outfile.write("# Total No. of refatm-selcmp pairs (per frame): {:>12d} ({:>12.3f})\n".format(n_pairs[0], n_pairs[0] / N_FRAMES))
outfile.write("# Total No. of refcmp-selatm pairs (per frame): {:>12d} ({:>12.3f})\n".format(n_pairs[1], n_pairs[1] / N_FRAMES))
outfile.write("# Total No. of refcmp-selcmp pairs (per frame): {:>12d} ({:>12.3f})\n".format(n_pairs[2], n_pairs[2] / N_FRAMES))
outfile.write("# \n")
outfile.write("# \n")
outfile.write("# Histogram averages:\n")
outfile.write("# Percentage of refatms bound to at least one selatm: {:10.4e}\n".format(1 - hist_refatm_selatm[0]))
outfile.write("# Percentage of refcmps bound to at least one selatm: {:10.4e}\n".format(1 - hists_cmp[1][0][0])) # refcmp_diff_selatm
outfile.write("# \n")
outfile.write("# (2)* Av. selatm coordination No. of all refatms: {:10.4e} (Every refatm has on average contact with this many selatms)\n".format(avs_refatm_selatm[0]))
outfile.write("# (2)' Av. selatm coordination No. of bound refatms: {:10.4e} (Every refatm bound to selatms has on average contact with this many selatms)\n".format(avs_refatm_selatm[1]))
outfile.write("# \n")
outfile.write("# (3)* Av. selcmp coordination No. of all refatms: {:10.4e} (Every refatm has on average contact with this many different selcmps)\n".format(avs_cmp[0][0][0]))
outfile.write("# (3)' Av. selcmp coordination No. of bound refatms: {:10.4e} (Every refatm bound to selatms has on average contact with this many different selcmps)\n".format(avs_cmp[0][0][1]))
outfile.write("# (5)* Av. total selcmp coordination No. of all refatms: {:10.4e} (Every refatm has on average this many contacts with selcmps)\n".format(avs_cmp[0][2][0]))
outfile.write("# (5)' Av. total selcmp coordination No. of bound refatms: {:10.4e} (Every refatm bound to selatms has on average this many contacts with selcmps)\n".format(avs_cmp[0][2][1]))
outfile.write("# (6)° Av. No. of 'bonds' between refatm-selcmp pairs: {:10.4e} (Every refatm-selcmp pair is on average connected via this many 'bonds')\n".format(avs_pair[0]))
outfile.write("# \n")
outfile.write("# (7)* Av. selatm coordination No. of all refcmps: {:10.4e} (Every refcmp has on average contact with this many different selatms)\n".format(avs_cmp[1][0][0]))
outfile.write("# (7)' Av. selatm coordination No. of bound refcmps: {:10.4e} (Every refcmp bound to selatms has on average contact with this many different selatms)\n".format(avs_cmp[1][0][1]))
outfile.write("# (9)* Av. total selatm coordination No. of all refcmps: {:10.4e} (Every refcmp has on average this many contacts with selatms)\n".format(avs_cmp[1][2][0]))
outfile.write("# (9)' Av. total selatm coordination No. of bound refcmps: {:10.4e} (Every refcmp bound to selatms has on average this many contacts with selatms)\n".format(avs_cmp[1][2][1]))
outfile.write("# (10)° Av. No. of 'bonds' between refcmp-selatm pairs: {:10.4e} (Every refcmp-selatm pair is on average connected via this many 'bonds')\n".format(avs_pair[1]))
outfile.write("# \n")
outfile.write("# (11)* Av. selcmp coordination No. of all refcmps: {:10.4e} (Every refcmp has on average contact with this many different selcmps)\n".format(avs_cmp[2][0][0]))
outfile.write("# (11)' Av. selcmp coordination No. of bound refcmps: {:10.4e} (Every refcmp bound to selatms has on average contact with this many different selcmps)\n".format(avs_cmp[2][0][1]))
outfile.write("# (13)* Av. total selcmp coordination No. of all refcmps: {:10.4e} (Every refcmp has on average this many contacts with selcmps)\n".format(avs_cmp[2][2][0]))
outfile.write("# (13)' Av. total selcmp coordination No. of bound refcmps: {:10.4e} (Every refcmp bound to selatms has on average this many contacts with selcmps)\n".format(avs_cmp[2][2][1]))
outfile.write("# (14)° Av. No. of 'bonds' between refcmp-selcmp pairs: {:10.4e} (Every refcmp-selcmp pair is on average connected via this many 'bonds')\n".format(avs_pair[2]))
outfile.write("# \n")
outfile.write("# *) Normalized by the total number of reference atoms/compounds\n")
outfile.write("# ') Normalized by the number of bound reference atoms/compounds\n")
outfile.write("# °) Normalized by the number of pairs\n")
outfile.write("# \n")
outfile.write("# \n")
outfile.write("# The columns contain:\n")
outfile.write("# 1 N: Number of contacts between the reference and selection group\n")
outfile.write("# 2 refatm_selatm: % of refatms that have contact with N different selatms (multiple contacts with the same selatm discounted) [refatm_diff_selatm]\n")
outfile.write("# [= % of refatms that have N contacts with selatms (multiple contacts with the same selatm counted) refatm_selatm_tot]\n")
outfile.write("# \n")
outfile.write("# 3 refatm_diff_selcmp: % of refatms that have contact with N different selcmps (multiple contacts with the same selcmp discounted)\n")
outfile.write("# 4 refatm_same_selcmp: % of refatms that have N contacts with the same selcmp (multiple contacts with the same selcmp counted, multiple connections to different selcmps via the same number of 'bonds' discounted)\n")
outfile.write("# 5 refatm_selcmp_tot: % of refatms that have N contacts with selcmps (multiple contacts with the same selcmp counted)\n")
outfile.write("# 6 refatm_selcmp_pair: % of refatm-selcmp pairs connected via N 'bonds' (first element is meaningless)\n")
outfile.write("# \n")
outfile.write("# 7 refcmp_diff_selatm: % of refcmps that have contact with N different selatms (multiple contacts with the same selatm discounted)\n")
outfile.write("# 8 refcmp_same_selatm: % of refcmps that have N contacts with the same selatm (multiple contacts with the same selatm counted, multiple connections to different selatms via the same number of 'bonds' discounted)\n")
outfile.write("# 9 refcmp_selatm_tot: % of refcmps that have N contacts with selatms (multiple contacts with the same selatm counted)\n")
outfile.write("# 10 refcmp_selatm_pair: % of refcmp-selatm pairs connected via N 'bonds' (first element is meaningless)\n")
outfile.write("# \n")
outfile.write("# 11 refcmp_diff_selcmp: % of refcmps that have contact with N different selcmps (multiple contacts with the same selcmp discounted)\n")
outfile.write("# 12 refcmp_same_selcmp: % of refcmps that have N contacts with the same selcmp (multiple contacts with the same selcmp counted, multiple connections to different selcmps via the same number of 'bonds' discounted)\n")
outfile.write("# 13 refcmp_selcmp_tot: % of refcmps that have N contacts with selcmps (multiple contacts with the same selcmp counted)\n")
outfile.write("# 14 refcmp_selcmp_pair: % of refcmp-selcmp pairs connected via N 'bonds' (first element is meaningless)\n")
outfile.write("# \n")
outfile.write("# Note that refatm_selcmp_tot (5) = refatm_selatm_tot (1) (= refatm_selatm) and refcmp_selcmp_tot (13) = refcmp_selatm_tot (9)\n")
outfile.write("# \n")
outfile.write('# Column number:\n')
outfile.write("# {:3d} {:16d} {:16d} {:16d} {:16d} {:16d} {:16d} {:16d} {:16d} {:16d} {:16d} {:16d} {:16d} {:16d}\n"
.format(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))
outfile.write("# {:>3s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s} {:>16s}\n"
.format("N", # 01 \vline
"refatm_selatm", # 02 \vline
"refatm_d_selcmp", # 03
"refatm_s_selcmp", # 04
"refatm_selcmp_t", # 05
"refatm_selcmp_p", # 06 \vline
"refcmp_d_selatm", # 07
"refcmp_s_selatm", # 08
"refcmp_selatm_t", # 09
"refcmp_selatm_p", # 10 \vline
"refcmp_d_selcmp", # 11
"refcmp_s_selcmp", # 12
"refcmp_selcmp_t", # 13
"refcmp_selcmp_p")) # 14
for i in range(last_nonzero + 1):
outfile.write(" {:3d} {:16.9e}"
.format(i, hist_refatm_selatm[i]))
for hists in hists_cmp:
outfile.write(2 * " ")
for hist in hists:
outfile.write(" {:16.9e}".format(hist[i]))
outfile.write("\n")
outfile.write("\n")
outfile.write("\n")
outfile.write("# Sums\n")
outfile.write(" {:3d} {:16.9e}"
.format(np.sum(np.arange(last_nonzero + 1)),
np.sum(hist_refatm_selatm)))
for hists in hists_cmp:
outfile.write(2 * " ")
for hist in hists:
outfile.write(" {:16.9e}".format(np.sum(hist)))
outfile.write("\n")
outfile.flush()
print("Created {}".format(args.OUTFILE))
print("Elapsed time: {}".format(datetime.now() - timer))
print("Current memory usage: {:.2f} MiB"
.format(proc.memory_info().rss / 2**20))
print("\n")
print("Checking output for consistency...")
timer = datetime.now()
tol = 1e-4
# Check histogram sums:
if not np.isclose(np.sum(hist_refatm_selatm), 1, rtol=0, atol=tol):
raise ValueError("The sum over 'hist_refatm_selatm' ({}) is not"
" one".format(np.sum(hist_refatm_selatm)))
for i, hists in enumerate(hists_cmp):
for j, hist in enumerate(hists):
if j == 1: # refcmp_same_selcmp
if np.sum(hist) < 1:
raise ValueError("The sum over 'hists_cmp[{}][{}]'"
" ({}) is less than one"
.format(i, j, np.sum(hist)))
if np.any(hist) > 1:
raise ValueError("At least one element of"
" 'hists_cmp[{}][{}]' is greater"
" than one".format(i, j))
else: # refcmp_diff_selcmp, refcmp_selcmp_tot, recmp_selcmp_pair
if not np.isclose(np.sum(hist), 1, rtol=0, atol=tol):
raise ValueError("The sum over 'hists_cmp[{}][{}]'"
" ({}) is not one"
.format(i, j, np.sum(hist)))
# Check first element of each histogram:
for i, hists in enumerate(hists_cmp):
for j, hist in enumerate(hists[:-1]):
if i == 0: # refatm_selcmp
if not np.isclose(hist[0], hist_refatm_selatm[0],
rtol=0, atol=tol):
raise ValueError(
"The percentage of refatms having no contact"
" with any selatm is not the same in"
" 'hists_cmp[{}][{}]' ({}) and"
" 'hist_refatm_selatm' ({})"
.format(i, j, hist[0], hist_refatm_selatm[0]))
else: # refcmp_selatm, refcmp_selcmp
if not np.isclose(hist[0], hists_cmp[1][0][0],
rtol=0, atol=tol):
raise ValueError(
"The percentage of refcmps having no contact"
" with any selatm is not the same in"
" 'hists_cmp[{}][{}]' ({}) and"
" 'hists_cmp[1][0]' ({})"
.format(i, j, hist[0], hists_cmp[1][0][0]))
for i in range(len(n_pairs)):
if not np.isclose(hists_cmp[i][-1][0], 0, rtol=0, atol=tol):
raise ValueError("The first element of"
" 'hists_cmp[{}][-1][0]' ({}) is not zero"
.format(i, hists_cmp[i][-1][0]))
# Check if refatm_selcmp_tot == refatm_selatm:
if not np.allclose(hists_cmp[0][2], hist_refatm_selatm,
rtol=0, atol=tol, equal_nan=True):
raise ValueError("'hists_cmp[0][2]' != 'hist_refatm_selatm'")
# Check if refcmp_selcmp_tot == refcmp_selatm_tot:
if not np.allclose(hists_cmp[2][2], hists_cmp[1][2],
rtol=0, atol=tol, equal_nan=True):
raise ValueError("'hists_cmp[2][2]' != 'hists_cmp[1][2]'")
# Check averages:
for i, av_refatm_selatm in enumerate(avs_refatm_selatm):
if not np.isclose(avs_cmp[0][2][i], av_refatm_selatm):
# average(refatm_selcmp_tot) != average(refatm_selatm)
raise ValueError("'avs_cmp[0][2][{}]' ({}) !="
" 'avs_refatm_selatm[{}]' ({})"
.format(i, avs_cmp[0][2][i],
i, av_refatm_selatm))
for i, av_refcmp_selatm_tot in enumerate(avs_cmp[1][2]):
if not np.isclose(avs_cmp[2][2][i], av_refcmp_selatm_tot):
# average(refcmp_selcmp_tot) != average(refcmp_selatm_tot)
raise ValueError("'avs_cmp[0][2][{}]' ({}) !="
" 'avs_cmp[1][2][{}]' ({})"
.format(i, avs_cmp[2][2][i],
i, av_refcmp_selatm_tot))
for i, av_pair in enumerate(avs_pair):
for k in range(2):
if not np.isclose(avs_cmp[i][0][k] * av_pair, avs_cmp[i][2][k],
rtol=0, atol=tol):
# av(refcmp_diff_selcmp)*av(refcmp_selcmp_pair) !=
# av(refcmp_selcmp_tot)
raise ValueError(
"'avs_cmp[{}][0][{}]*avs_pair[{}]' ({}) !="
" 'avs_cmp[{}][2][{}]' ({})"
.format(i, k, i, avs_cmp[i][0][k] * av_pair,
i, k, avs_cmp[i][2][k])
)
if np.isclose(hist_refatm_selatm[0], 0, rtol=0, atol=tol):
# All refatms are bound to selatms
if not np.isclose(avs_refatm_selatm[0], avs_refatm_selatm[1],
rtol=0, atol=tol):
raise ValueError("All refatms are bound to selatms, but"
"'avs_refatm_selatm[0]' ({}) !="
" 'avs_refatm_selatm[1]' ({})"
.format(avs_refatm_selatm[0],
avs_refatm_selatm[1]))
for j, av in enumerate(avs_cmp[0][:-1]): # refatm_selcmp
if not np.isclose(av[0], av[1], rtol=0, atol=tol):
raise ValueError("All refatms are bound to selatms, but"
"'avs_cmp[0][{}][0]' ({}) !="
" 'avs_cmp[0][{}][1]' ({})"
.format(j, av[0], j, av[1]))
if np.isclose(hists_cmp[1][0][0], 0, rtol=0, atol=tol): # refcmp_diff_selatm
# All refcmps are bound to selatms
for i, avs in enumerate(avs_cmp[1:]): # refcmp_selatm, refcmp_selcmp
for j, av in enumerate(avs[:-1]):
if not np.isclose(av[0], av[1], rtol=0, atol=tol):
raise ValueError("All refcmps are bound to selatms,"
" but 'avs_cmp[{}][{}][0]' ({}) !="
" 'avs_cmp[{}][{}][1]' ({})"
.format(i, j, av[0], i, j, av[1]))
hist_refatm_same_selatm = np.array([hist_refatm_selatm[0],
1 - hist_refatm_selatm[0]])
hist_refatm_same_selatm = mdt.nph.extend(hist_refatm_same_selatm,
len(hists_cmp[1][1]))
hist_refatm_selatm_pair = np.array([0, 1])
hist_refatm_selatm_pair = mdt.nph.extend(hist_refatm_selatm_pair,
len(hists_cmp[1][-1]))
if not args.UPDATING_REF and np.all(np.equal(natms_per_refcmp, 1)):
# refcmp == refatm
# Check if refcmp_selatm == refatm_selatm:
for j in (0, 2): # refcmp_diff_selatm, refcmp_selatm_tot
if not np.allclose(hists_cmp[1][j], hist_refatm_selatm,
rtol=0, atol=tol, equal_nan=True):
raise ValueError("refcmp = refatm, but"
" 'hists_cmp[1][{}]' !="
" 'hist_refatm_selatm'".format(j))
if not np.allclose(hists_cmp[1][1], hist_refatm_same_selatm,
rtol=0, atol=tol, equal_nan=True):
# refcmp_same_selatm != refatm_same_selatm
raise ValueError("refcmp = refatm, but 'hists_cmp[1][1]' !="
" 'hist_refatm_same_selatm'")
if not np.allclose(hists_cmp[1][-1], hist_refatm_selatm_pair,
rtol=0, atol=tol, equal_nan=True):
# refcmp_selatm_pair != refatm_selatm_pair
raise ValueError("refcmp = refatm, but 'hists_cmp[1][-1]' !="
" 'hist_refatm_selatm_pair'")
# Check if refcmp_selcmp == refatm_selcmp:
for j, hist in enumerate(hists_cmp[2]):
if not np.allclose(hist, hists_cmp[0][j],
rtol=0, atol=tol, equal_nan=True):
raise ValueError("refcmp = refatm, but hists_cmp[2][{}]"
" != hists_cmp[0][{}]".format(j, j))
if not args.UPDATING_SEL and np.all(np.equal(natms_per_selcmp, 1)):
# selcmp == selatm
# Check if refatm_selcmp == refatm_selatm:
for j in (0, 2): # refatm_diff_selcmp, refatm_selcmp_tot
if not np.allclose(hists_cmp[0][j], hist_refatm_selatm,
rtol=0, atol=tol, equal_nan=True):
raise ValueError("selcmp = selatm, but"
" 'hists_cmp[0][{}]' !="
" 'hist_refatm_selatm'".format(j))
if not np.allclose(hists_cmp[0][1], hist_refatm_same_selatm,
rtol=0, atol=tol, equal_nan=True):
# refatm_same_selcmp != refatm_same_selatm
raise ValueError("selcmp = selatm, but 'hists_cmp[0][1]' !="
" 'hist_refatm_same_selatm'")
if not np.allclose(hists_cmp[0][-1], hist_refatm_selatm_pair,
rtol=0, atol=tol, equal_nan=True):
# refatm_selcmp_pair != refatm_selatm_pair
raise ValueError("selcmp = selatm, but 'hists_cmp[0][-1]' !="
" 'hist_refatm_selatm_pair'")
# Check if refcmp_selcmp == refcmp_selatm:
for j, hist in enumerate(hists_cmp[2]):
if not np.allclose(hist, hists_cmp[1][j],
rtol=0, atol=tol, equal_nan=True):
raise ValueError("selcmp = selatm, but hists_cmp[2][{}]"
" != hists_cmp[1][{}]".format(j, j))
print("Elapsed time: {}".format(datetime.now() - timer))
print("Current memory usage: {:.2f} MiB"
.format(proc.memory_info().rss / 2**20))
print("\n")
print("{} done".format(os.path.basename(sys.argv[0])))
print("Totally elapsed time: {}".format(datetime.now() - timer_tot))
print("CPU time: {}"
.format(timedelta(seconds=sum(proc.cpu_times()[:4]))))
print("CPU usage: {:.2f} %".format(proc.cpu_percent()))
print("Current memory usage: {:.2f} MiB"
.format(proc.memory_info().rss / 2**20))