From 32aa1f806fa82f34bb3c29b75a78eb055e060fff Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Fri, 30 Aug 2024 00:00:16 -0700 Subject: [PATCH 01/10] imd sample integration --- package/MDAnalysis/coordinates/IMD.py | 289 ++++++++++ package/MDAnalysis/coordinates/__init__.py | 3 +- package/pyproject.toml | 1 + .../MDAnalysisTests/coordinates/test_imd.py | 526 ++++++++++++++++++ 4 files changed, 818 insertions(+), 1 deletion(-) create mode 100644 package/MDAnalysis/coordinates/IMD.py create mode 100644 testsuite/MDAnalysisTests/coordinates/test_imd.py diff --git a/package/MDAnalysis/coordinates/IMD.py b/package/MDAnalysis/coordinates/IMD.py new file mode 100644 index 00000000000..04662458d43 --- /dev/null +++ b/package/MDAnalysis/coordinates/IMD.py @@ -0,0 +1,289 @@ +""" + +Example: Streaming an IMD v3 trajectory +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To stream a trajectory from GROMACS or another simulation engine that supports +IMD v3, ensure that the simulation engine is running and waiting for an IMD connection. + +For example, in GROMACS, you can use ``gmx mdrun`` with the ``-imdwait`` flag +to ensure that GROMACS will wait for a client before starting the simulation. +In GROMACS, you will know that the simulation is ready and waiting for the +MDAnalysis IMDReader client when this line is printed to the terminal: + +.. code-block:: none + + IMD: Will wait until I have a connection and IMD_GO orders. + +Once the simulation is ready for a client connection, setup your :class:`Universe` +like this: :: + + import MDAnalysis as mda + # Pass host and port of the listening GROMACS simulation + # server as the trajectory argument + u = mda.Universe("topology.tpr", "localhost:8888") + +Classes +^^^^^^^ + +.. autoclass:: IMDReader + :members: + :inherited-members: + +""" + +import queue +from MDAnalysis.coordinates.base import ( + ReaderBase, + FrameIteratorIndices, + FrameIteratorAll, + FrameIteratorSliced, +) +from MDAnalysis.coordinates import core +from MDAnalysis.lib.util import store_init_arguments + +import imdclient +from imdclient.utils import parse_host_port +import numpy as np +import logging +import warnings +from typing import Optional +import numbers + +logger = logging.getLogger("imdclient.IMDClient") + + +class IMDReader(ReaderBase): + """ + Reader for IMD protocol packets. + """ + + format = "IMD" + + @store_init_arguments + def __init__( + self, + filename, + convert_units=True, + n_atoms=None, + **kwargs, + ): + """ + Parameters + ---------- + filename : a string of the form "host:port" where host is the hostname + or IP address of the listening GROMACS server and port + is the port number. + n_atoms : int (optional) + number of atoms in the system. defaults to number of atoms + in the topology. don't set this unless you know what you're doing. + """ + self._init_scope = True + self._reopen_called = False + + super(IMDReader, self).__init__(filename, **kwargs) + + logger.debug("IMDReader initializing") + + if n_atoms is None: + raise ValueError("IMDReader: n_atoms must be specified") + self.n_atoms = n_atoms + + host, port = parse_host_port(filename) + + # This starts the simulation + self._imdclient = imdclient.IMDClient(host, port, n_atoms, **kwargs) + + imdsinfo = self._imdclient.get_imdsessioninfo() + # NOTE: after testing phase, fail out on IMDv2 + + self.ts = self._Timestep( + self.n_atoms, + positions=imdsinfo.positions, + velocities=imdsinfo.velocities, + forces=imdsinfo.forces, + **self._ts_kwargs, + ) + + self._frame = -1 + + try: + self._read_next_timestep() + except StopIteration: + raise RuntimeError("IMDReader: No data found in stream") + + def _read_next_timestep(self): + # No rewinding- to both load the first frame after __init__ + # and access it again during iteration, we need to store first ts in mem + if not self._init_scope and self._frame == -1: + self._frame += 1 + # can't simply return the same ts again- transformations would be applied twice + # instead, return the pre-transformed copy + return self._first_ts + + return self._read_frame(self._frame + 1) + + def _read_frame(self, frame): + + try: + imdf = self._imdclient.get_imdframe() + except EOFError: + # Not strictly necessary, but for clarity + raise StopIteration + + self._frame = frame + self._load_imdframe_into_ts(imdf) + + if self._init_scope: + self._first_ts = self.ts.copy() + self._init_scope = False + + logger.debug(f"IMDReader: Loaded frame {self._frame}") + return self.ts + + def _load_imdframe_into_ts(self, imdf): + self.ts.frame = self._frame + if imdf.time is not None: + self.ts.time = imdf.time + # NOTE: timestep.pyx "dt" method is suspicious bc it uses "new" keyword for a float + self.ts.data["dt"] = imdf.dt + if imdf.energies is not None: + self.ts.data.update(imdf.energies) + if imdf.box is not None: + self.ts.dimensions = core.triclinic_box(*imdf.box) + if imdf.positions is not None: + # must call copy because reference is expected to reset + # see 'test_frame_collect_all_same' in MDAnalysisTests.coordinates.base + self.ts.positions = imdf.positions + if imdf.velocities is not None: + self.ts.velocities = imdf.velocities + if imdf.forces is not None: + self.ts.forces = imdf.forces + + @property + def n_frames(self): + """Changes as stream is processed unlike other readers""" + raise RuntimeError("IMDReader: n_frames is unknown") + + def next(self): + """Don't rewind after iteration. When _reopen() is called, + an error will be raised + """ + try: + ts = self._read_next_timestep() + except (EOFError, IOError): + # Don't rewind here like we normally would + raise StopIteration from None + else: + for auxname, reader in self._auxs.items(): + ts = self._auxs[auxname].update_ts(ts) + + ts = self._apply_transformations(ts) + + return ts + + def rewind(self): + """Raise error on rewind""" + raise RuntimeError("IMDReader: Stream-based readers can't be rewound") + + @staticmethod + def _format_hint(thing): + try: + parse_host_port(thing) + except: + return False + return True + + def close(self): + """Gracefully shut down the reader. Stops the producer thread.""" + logger.debug("IMDReader close() called") + self._imdclient.stop() + # NOTE: removeme after testing + logger.debug("IMDReader shut down gracefully.") + + # Incompatible methods + def copy(self): + raise NotImplementedError("IMDReader does not support copying") + + def _reopen(self): + if self._reopen_called: + raise RuntimeError("IMDReader: Cannot reopen IMD stream") + self._frame = -1 + self._reopen_called = True + + def __getitem__(self, frame): + """This method from ProtoReader must be overridden + to prevent slicing that doesn't make sense in a stream. + """ + raise RuntimeError("IMDReader: Trajectory can only be read in for loop") + + def check_slice_indices(self, start, stop, step): + """Check frame indices are valid and clip to fit trajectory. + + The usage follows standard Python conventions for :func:`range` but see + the warning below. + + Parameters + ---------- + start : int or None + Starting frame index (inclusive). ``None`` corresponds to the default + of 0, i.e., the initial frame. + stop : int or None + Last frame index (exclusive). ``None`` corresponds to the default + of n_frames, i.e., it includes the last frame of the trajectory. + step : int or None + step size of the slice, ``None`` corresponds to the default of 1, i.e, + include every frame in the range `start`, `stop`. + + Returns + ------- + start, stop, step : tuple (int, int, int) + Integers representing the slice + + Warning + ------- + The returned values `start`, `stop` and `step` give the expected result + when passed in :func:`range` but gives unexpected behavior when passed + in a :class:`slice` when ``stop=None`` and ``step=-1`` + + This can be a problem for downstream processing of the output from this + method. For example, slicing of trajectories is implemented by passing + the values returned by :meth:`check_slice_indices` to :func:`range` :: + + range(start, stop, step) + + and using them as the indices to randomly seek to. On the other hand, + in :class:`MDAnalysis.analysis.base.AnalysisBase` the values returned + by :meth:`check_slice_indices` are used to splice the trajectory by + creating a :class:`slice` instance :: + + slice(start, stop, step) + + This creates a discrepancy because these two lines are not equivalent:: + + range(10, -1, -1) # [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + range(10)[slice(10, -1, -1)] # [] + + """ + if start is not None: + raise ValueError( + "IMDReader: Cannot slice a stream, 'start' must be None" + ) + if stop is not None: + raise ValueError( + "IMDReader: Cannot slice a stream, 'stop' must be None" + ) + if step is not None: + if isinstance(step, numbers.Integral): + if step != 1: + raise ValueError( + "IMDReader: Cannot slice a stream, 'step' must be None or 1" + ) + + return start, stop, step + + def __getstate__(self): + raise NotImplementedError("IMDReader does not support pickling") + + def __setstate__(self, state: object): + raise NotImplementedError("IMDReader does not support pickling") diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 9b6a7121bc9..727522e9a84 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -757,7 +757,7 @@ class can choose an appropriate reader automatically. raw :class:`~MDAnalysis.coordinates.Timestep` objects. """ -__all__ = ['reader', 'writer', 'timestep'] +__all__ = ["reader", "writer", "timestep"] from . import base from . import timestep @@ -770,6 +770,7 @@ class can choose an appropriate reader automatically. from . import DMS from . import GMS from . import GRO +from . import IMD from . import INPCRD from . import LAMMPS from . import MOL2 diff --git a/package/pyproject.toml b/package/pyproject.toml index 05b0eb3ba16..111af1495cb 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ 'waterdynamics', 'pathsimanalysis', 'mdahole2', + "imdclient @ git+https://github.com/ljwoods2/imdclient.git@main", ] keywords = [ "python", "science", "chemistry", "biophysics", "molecular-dynamics", diff --git a/testsuite/MDAnalysisTests/coordinates/test_imd.py b/testsuite/MDAnalysisTests/coordinates/test_imd.py new file mode 100644 index 00000000000..bc5ffefaf4f --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_imd.py @@ -0,0 +1,526 @@ +from MDAnalysisTests.datafiles import ( + COORDINATES_TOPOLOGY, + COORDINATES_TRR, + COORDINATES_H5MD, +) +import MDAnalysis as mda +from imdclient.tests.utils import ( + get_free_port, + create_default_imdsinfo_v3, +) + +from imdclient.tests.server import InThreadIMDServer +from MDAnalysisTests.coordinates.base import ( + MultiframeReaderTest, + BaseReference, + BaseWriterTest, + assert_timestep_almost_equal, +) +from numpy.testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_equal, + assert_allclose, +) +import numpy as np +import logging +import pytest +from MDAnalysis.transformations import translate +import pickle + + +logger = logging.getLogger("imdclient.IMDClient") +file_handler = logging.FileHandler("test.log") +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +file_handler.setFormatter(formatter) +logger.addHandler(file_handler) +logger.setLevel(logging.DEBUG) + + +class IMDReference(BaseReference): + def __init__(self): + super(IMDReference, self).__init__() + self.port = get_free_port() + # Serve TRR traj data via the server + traj = mda.coordinates.TRR.TRRReader(COORDINATES_TRR) + self.server = InThreadIMDServer(traj) + self.server.set_imdsessioninfo(create_default_imdsinfo_v3()) + + self.n_atoms = traj.n_atoms + self.prec = 3 + + self.trajectory = f"localhost:{self.port}" + self.topology = COORDINATES_TOPOLOGY + self.changing_dimensions = True + self.reader = mda.coordinates.IMD.IMDReader + + self.first_frame.velocities = self.first_frame.positions / 10 + self.first_frame.forces = self.first_frame.positions / 100 + + self.second_frame.velocities = self.second_frame.positions / 10 + self.second_frame.forces = self.second_frame.positions / 100 + + self.last_frame.velocities = self.last_frame.positions / 10 + self.last_frame.forces = self.last_frame.positions / 100 + + self.jump_to_frame.velocities = self.jump_to_frame.positions / 10 + self.jump_to_frame.forces = self.jump_to_frame.positions / 100 + + def iter_ts(self, i): + ts = self.first_frame.copy() + ts.positions = 2**i * self.first_frame.positions + ts.velocities = ts.positions / 10 + ts.forces = ts.positions / 100 + ts.time = i + ts.frame = i + return ts + + +class TestIMDReaderBaseAPI(MultiframeReaderTest): + + @pytest.fixture() + def ref(self): + """Not a static method like in base class- need new server for each test""" + return IMDReference() + + @pytest.fixture() + def reader(self, ref): + # This will start the test IMD Server, waiting for a connection + # to then send handshake & first frame + ref.server.handshake_sequence("localhost", ref.port) + # This will connect to the test IMD Server and read the first frame + reader = ref.reader(ref.trajectory, n_atoms=ref.n_atoms) + # Send the rest of the frames- small enough to all fit in socket itself + ref.server.send_frames(1, 5) + + reader.add_auxiliary( + "lowf", + ref.aux_lowf, + dt=ref.aux_lowf_dt, + initial_time=0, + time_selector=None, + ) + reader.add_auxiliary( + "highf", + ref.aux_highf, + dt=ref.aux_highf_dt, + initial_time=0, + time_selector=None, + ) + return reader + + @pytest.fixture() + def transformed(self, ref): + # This will start the test IMD Server, waiting for a connection + # to then send handshake & first frame + ref.server.handshake_sequence("localhost", ref.port) + # This will connect to the test IMD Server and read the first frame + transformed = ref.reader(ref.trajectory, n_atoms=ref.n_atoms) + # Send the rest of the frames- small enough to all fit in socket itself + ref.server.send_frames(1, 5) + transformed.add_transformations( + translate([1, 1, 1]), translate([0, 0, 0.33]) + ) + return transformed + + @pytest.mark.skip( + reason="Stream-based reader cannot determine n_frames until EOF" + ) + def test_n_frames(self, reader, ref): + assert_equal( + self.universe.trajectory.n_frames, + 1, + "wrong number of frames in pdb", + ) + + def test_first_frame(self, ref, reader): + # don't rewind here as in inherited base test + assert_timestep_almost_equal( + reader.ts, ref.first_frame, decimal=ref.prec + ) + + @pytest.mark.skip(reason="IMD is not a writeable format") + def test_get_writer_1(self, ref, reader, tmpdir): + with tmpdir.as_cwd(): + outfile = "test-writer." + ref.ext + with reader.Writer(outfile) as W: + assert_equal(isinstance(W, ref.writer), True) + assert_equal(W.n_atoms, reader.n_atoms) + + @pytest.mark.skip(reason="IMD is not a writeable format") + def test_get_writer_2(self, ref, reader, tmpdir): + with tmpdir.as_cwd(): + outfile = "test-writer." + ref.ext + with reader.Writer(outfile, n_atoms=100) as W: + assert_equal(isinstance(W, ref.writer), True) + assert_equal(W.n_atoms, 100) + + @pytest.mark.skip( + reason="Stream-based reader cannot determine total_time until EOF" + ) + def test_total_time(self, reader, ref): + assert_almost_equal(reader.totaltime, ref.totaltime, decimal=ref.prec) + + @pytest.mark.skip(reason="Stream-based reader can only be read iteratively") + def test_changing_dimensions(self, ref, reader): + if ref.changing_dimensions: + reader.rewind() + if ref.dimensions is None: + assert reader.ts.dimensions is None + else: + assert_array_almost_equal( + reader.ts.dimensions, ref.dimensions, decimal=ref.prec + ) + reader[1] + if ref.dimensions_second_frame is None: + assert reader.ts.dimensions is None + else: + assert_array_almost_equal( + reader.ts.dimensions, + ref.dimensions_second_frame, + decimal=ref.prec, + ) + + def test_iter(self, ref, reader): + for i, ts in enumerate(reader): + assert_timestep_almost_equal(ts, ref.iter_ts(i), decimal=ref.prec) + + def test_first_dimensions(self, ref, reader): + # don't rewind here as in inherited base test + if ref.dimensions is None: + assert reader.ts.dimensions is None + else: + assert_array_almost_equal( + reader.ts.dimensions, ref.dimensions, decimal=ref.prec + ) + + def test_volume(self, ref, reader): + # don't rewind here as in inherited base test + vol = reader.ts.volume + # Here we can only be sure about the numbers upto the decimal point due + # to floating point impressions. + assert_almost_equal(vol, ref.volume, 0) + + @pytest.mark.skip(reason="Cannot create new reader from same stream") + def test_reload_auxiliaries_from_description(self, ref, reader): + # get auxiliary desscriptions form existing reader + descriptions = reader.get_aux_descriptions() + # load a new reader, without auxiliaries + reader = ref.reader(ref.trajectory) + # load auxiliaries into new reader, using description... + for aux in descriptions: + reader.add_auxiliary(**aux) + # should have the same number of auxiliaries + assert_equal( + reader.aux_list, + reader.aux_list, + "Number of auxiliaries does not match", + ) + # each auxiliary should be the same + for auxname in reader.aux_list: + assert_equal( + reader._auxs[auxname], + reader._auxs[auxname], + "AuxReaders do not match", + ) + + @pytest.mark.skip(reason="Stream can only be read in for loop") + def test_stop_iter(self, reader): + # reset to 0 + reader.rewind() + for ts in reader[:-1]: + pass + assert_equal(reader.frame, 0) + + @pytest.mark.skip(reason="Cannot rewind stream") + def test_iter_rewinds(self, reader, accessor): + for ts_indices in accessor(reader): + pass + assert_equal(ts_indices.frame, 0) + + @pytest.mark.skip( + reason="Timeseries currently requires n_frames to be known" + ) + @pytest.mark.parametrize( + "order", ("fac", "fca", "afc", "acf", "caf", "cfa") + ) + def test_timeseries_shape(self, reader, order): + timeseries = reader.timeseries(order=order) + a_index = order.index("a") + # f_index = order.index("f") + c_index = order.index("c") + assert timeseries.shape[a_index] == reader.n_atoms + # assert timeseries.shape[f_index] == len(reader) + assert timeseries.shape[c_index] == 3 + + @pytest.mark.skip( + reason="Timeseries currently requires n_frames to be known" + ) + @pytest.mark.parametrize("asel", ("index 1", "index 2", "index 1 to 3")) + def test_timeseries_asel_shape(self, reader, asel): + atoms = mda.Universe(reader.filename).select_atoms(asel) + timeseries = reader.timeseries(atoms, order="fac") + assert timeseries.shape[0] == len(reader) + assert timeseries.shape[1] == len(atoms) + assert timeseries.shape[2] == 3 + + @pytest.mark.skip("Cannot slice stream") + @pytest.mark.parametrize("slice", ([0, 2, 1], [0, 10, 2], [0, 10, 3])) + def test_timeseries_values(self, reader, slice): + ts_positions = [] + if isinstance(reader, mda.coordinates.memory.MemoryReader): + pytest.xfail( + "MemoryReader uses deprecated stop inclusive" + " indexing, see Issue #3893" + ) + if slice[1] > len(reader): + pytest.skip("too few frames in reader") + for i in range(slice[0], slice[1], slice[2]): + ts = reader[i] + ts_positions.append(ts.positions.copy()) + positions = np.asarray(ts_positions) + timeseries = reader.timeseries( + start=slice[0], stop=slice[1], step=slice[2], order="fac" + ) + assert_allclose(timeseries, positions) + + @pytest.mark.skip(reason="Cannot rewind stream") + def test_transformations_2iter(self, ref, transformed): + # Are the transformations applied and + # are the coordinates "overtransformed"? + v1 = np.float32((1, 1, 1)) + v2 = np.float32((0, 0, 0.33)) + idealcoords = [] + for i, ts in enumerate(transformed): + idealcoords.append(ref.iter_ts(i).positions + v1 + v2) + assert_array_almost_equal( + ts.positions, idealcoords[i], decimal=ref.prec + ) + + for i, ts in enumerate(transformed): + assert_almost_equal(ts.positions, idealcoords[i], decimal=ref.prec) + + @pytest.mark.skip(reason="Cannot slice stream") + def test_transformations_slice(self, ref, transformed): + # Are the transformations applied when iterating over a slice of the trajectory? + v1 = np.float32((1, 1, 1)) + v2 = np.float32((0, 0, 0.33)) + for i, ts in enumerate(transformed[2:3:1]): + idealcoords = ref.iter_ts(ts.frame).positions + v1 + v2 + assert_array_almost_equal( + ts.positions, idealcoords, decimal=ref.prec + ) + + @pytest.mark.skip(reason="Cannot slice stream") + def test_transformations_switch_frame(self, ref, transformed): + # This test checks if the transformations are applied and if the coordinates + # "overtransformed" on different situations + # Are the transformations applied when we switch to a different frame? + v1 = np.float32((1, 1, 1)) + v2 = np.float32((0, 0, 0.33)) + first_ideal = ref.iter_ts(0).positions + v1 + v2 + if len(transformed) > 1: + assert_array_almost_equal( + transformed[0].positions, first_ideal, decimal=ref.prec + ) + second_ideal = ref.iter_ts(1).positions + v1 + v2 + assert_array_almost_equal( + transformed[1].positions, second_ideal, decimal=ref.prec + ) + + # What if we comeback to the previous frame? + assert_array_almost_equal( + transformed[0].positions, first_ideal, decimal=ref.prec + ) + + # How about we switch the frame to itself? + assert_array_almost_equal( + transformed[0].positions, first_ideal, decimal=ref.prec + ) + else: + assert_array_almost_equal( + transformed[0].positions, first_ideal, decimal=ref.prec + ) + + @pytest.mark.skip(reason="Cannot rewind stream") + def test_transformation_rewind(self, ref, transformed): + # this test checks if the transformations are applied after rewinding the + # trajectory + v1 = np.float32((1, 1, 1)) + v2 = np.float32((0, 0, 0.33)) + ideal_coords = ref.iter_ts(0).positions + v1 + v2 + transformed.rewind() + assert_array_almost_equal( + transformed[0].positions, ideal_coords, decimal=ref.prec + ) + + @pytest.mark.skip(reason="Cannot make a copy of a stream") + def test_copy(self, ref, transformed): + # this test checks if transformations are carried over a copy and if the + # coordinates of the copy are equal to the original's + v1 = np.float32((1, 1, 1)) + v2 = np.float32((0, 0, 0.33)) + new = transformed.copy() + assert_equal( + transformed.transformations, + new.transformations, + "transformations are not equal", + ) + for i, ts in enumerate(new): + ideal_coords = ref.iter_ts(i).positions + v1 + v2 + assert_array_almost_equal( + ts.positions, ideal_coords, decimal=ref.prec + ) + + @pytest.mark.skip(reason="Cannot pickle socket") + def test_pickle_reader(self, reader): + """It probably wouldn't be a good idea to pickle a + reader that is connected to a server""" + reader_p = pickle.loads(pickle.dumps(reader)) + assert_equal(len(reader), len(reader_p)) + assert_equal( + reader.ts, reader_p.ts, "Timestep is changed after pickling" + ) + + @pytest.mark.skip(reason="Cannot pickle socket") + def test_pickle_next_ts_reader(self, reader): + reader_p = pickle.loads(pickle.dumps(reader)) + assert_equal( + next(reader), + next(reader_p), + "Next timestep is changed after pickling", + ) + + @pytest.mark.skip(reason="Cannot pickle socket") + def test_pickle_last_ts_reader(self, reader): + # move current ts to last frame. + reader[-1] + reader_p = pickle.loads(pickle.dumps(reader)) + assert_equal( + len(reader), + len(reader_p), + "Last timestep is changed after pickling", + ) + assert_equal( + reader.ts, reader_p.ts, "Last timestep is changed after pickling" + ) + + @pytest.mark.skip(reason="Cannot copy stream") + def test_transformations_copy(self, ref, transformed): + # this test checks if transformations are carried over a copy and if the + # coordinates of the copy are equal to the original's + v1 = np.float32((1, 1, 1)) + v2 = np.float32((0, 0, 0.33)) + new = transformed.copy() + assert_equal( + transformed.transformations, + new.transformations, + "transformations are not equal", + ) + for i, ts in enumerate(new): + ideal_coords = ref.iter_ts(i).positions + v1 + v2 + assert_array_almost_equal( + ts.positions, ideal_coords, decimal=ref.prec + ) + + @pytest.mark.skip( + reason="Timeseries currently requires n_frames to be known" + ) + def test_timeseries_empty_asel(self, reader): + with pytest.warns( + UserWarning, + match="Empty string to select atoms, empty group returned.", + ): + atoms = mda.Universe(reader.filename).select_atoms(None) + with pytest.raises(ValueError, match="Timeseries requires at least"): + reader.timeseries(asel=atoms) + + @pytest.mark.skip( + reason="Timeseries currently requires n_frames to be known" + ) + def test_timeseries_empty_atomgroup(self, reader): + with pytest.warns( + UserWarning, + match="Empty string to select atoms, empty group returned.", + ): + atoms = mda.Universe(reader.filename).select_atoms(None) + with pytest.raises(ValueError, match="Timeseries requires at least"): + reader.timeseries(atomgroup=atoms) + + @pytest.mark.skip( + reason="Timeseries currently requires n_frames to be known" + ) + def test_timeseries_asel_warns_deprecation(self, reader): + atoms = mda.Universe(reader.filename).select_atoms("index 1") + with pytest.warns(DeprecationWarning, match="asel argument to"): + timeseries = reader.timeseries(asel=atoms, order="fac") + + @pytest.mark.skip( + reason="Timeseries currently requires n_frames to be known" + ) + def test_timeseries_atomgroup(self, reader): + atoms = mda.Universe(reader.filename).select_atoms("index 1") + timeseries = reader.timeseries(atomgroup=atoms, order="fac") + + @pytest.mark.skip( + reason="Timeseries currently requires n_frames to be known" + ) + def test_timeseries_atomgroup_asel_mutex(self, reader): + atoms = mda.Universe(reader.filename).select_atoms("index 1") + with pytest.raises(ValueError, match="Cannot provide both"): + timeseries = reader.timeseries( + atomgroup=atoms, asel=atoms, order="fac" + ) + + @pytest.mark.skip("Cannot slice stream") + def test_last_frame(self, ref, reader): + ts = reader[-1] + assert_timestep_almost_equal(ts, ref.last_frame, decimal=ref.prec) + + @pytest.mark.skip("Cannot slice stream") + def test_go_over_last_frame(self, ref, reader): + with pytest.raises(IndexError): + reader[ref.n_frames + 1] + + @pytest.mark.skip("Cannot slice stream") + def test_frame_jump(self, ref, reader): + ts = reader[ref.jump_to_frame.frame] + assert_timestep_almost_equal(ts, ref.jump_to_frame, decimal=ref.prec) + + @pytest.mark.skip("Cannot slice stream") + def test_frame_jump_issue1942(self, ref, reader): + """Test for issue 1942 (especially XDR on macOS)""" + reader.rewind() + try: + for ii in range(ref.n_frames + 2): + reader[0] + except StopIteration: + pytest.fail("Frame-seeking wrongly iterated (#1942)") + + def test_next_gives_second_frame(self, ref, reader): + # don't recreate reader here as in inherited base test + ts = reader.next() + assert_timestep_almost_equal(ts, ref.second_frame, decimal=ref.prec) + + @pytest.mark.skip( + reason="Stream isn't rewound after iteration- base reference is the same but it is the last frame" + ) + def test_frame_collect_all_same(self, reader): + # check that the timestep resets so that the base reference is the same + # for all timesteps in a collection with the exception of memoryreader + # and DCDReader + if isinstance(reader, mda.coordinates.memory.MemoryReader): + pytest.xfail("memoryreader allows independent coordinates") + if isinstance(reader, mda.coordinates.DCD.DCDReader): + pytest.xfail( + "DCDReader allows independent coordinates." + "This behaviour is deprecated and will be changed" + "in 3.0" + ) + collected_ts = [] + for i, ts in enumerate(reader): + collected_ts.append(ts.positions) + for array in collected_ts: + assert_allclose(array, collected_ts[0]) From b3298968bdefbd496bc7b5f2234ca51aac3a4243 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Sun, 22 Sep 2024 14:46:11 -0700 Subject: [PATCH 02/10] initial commit --- package/MDAnalysis/analysis/dasktimeseries.py | 50 +++++++++++++++++++ package/MDAnalysis/coordinates/H5MD.py | 49 +++++++++++++++++- tmp/lazyts.ipynb | 21 ++++++++ 3 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 package/MDAnalysis/analysis/dasktimeseries.py create mode 100644 tmp/lazyts.ipynb diff --git a/package/MDAnalysis/analysis/dasktimeseries.py b/package/MDAnalysis/analysis/dasktimeseries.py new file mode 100644 index 00000000000..e63be2e9dfc --- /dev/null +++ b/package/MDAnalysis/analysis/dasktimeseries.py @@ -0,0 +1,50 @@ +from .results import Results, ResultsGroup +import dask.array as da +import numpy as np + + +class DaskTimeSeriesAnalysisBase: + + def __init__(self, dask_timeseries, verbose=False, **kwargs): + self._dts = dask_timeseries + self._verbose = verbose + self.results = Results() + + def _prepare(self): + pass # pylint: disable=unnecessary-pass + + def _compute(self): + pass + + def _conclude(self): + pass # pylint: disable=unnecessary-pass + + def run(self, verbose): + self._prepare() + self._compute() + self._conclude() + return self + + +class DaskRMSF(DaskTimeSeriesAnalysisBase): + def __init__(self, dask_timeseries, verbose=False, **kwargs): + super().__init__(dask_timeseries, verbose=verbose, **kwargs) + self._kwargs = kwargs + + def _prepare(self): + n_atoms = len(self._dts[0]) + self.results["rmsf"] = np.zeros((n_atoms, 3)) + + def _compute(self): + positions = self._dts + mean_positions = positions.mean(axis=0) + subtracted_positions = positions - mean_positions + squared_deviations = subtracted_positions**2 + avg_squared_deviations = squared_deviations.mean(axis=0) + sqrt_avg_squared_deviations = da.sqrt(avg_squared_deviations) + self.results.rmsf = da.sqrt( + (sqrt_avg_squared_deviations**2).sum(axis=1) + ) + + def _conclude(self): + pass diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 48283113f43..b4f15c9cd43 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -208,7 +208,8 @@ """ import warnings - +import dask.array as da +from typing import Any, Union, Optional, List, Dict import numpy as np import MDAnalysis as mda from . import base, core @@ -810,6 +811,52 @@ def Writer(self, filename, n_atoms=None, **kwargs): kwargs.setdefault('forces', self.has_forces) return H5MDWriter(filename, n_atoms, **kwargs) + def dask_timeseries(self, asel: Optional['AtomGroup']=None, + atomgroup: Optional['Atomgroup']=None, + start: Optional[int]=None, stop: Optional[int]=None, + step: Optional[int]=None, + order: Optional[str]='fac') -> np.ndarray: + if asel is not None: + warnings.warn( + "asel argument to timeseries will be renamed to" + "'atomgroup' in 3.0, see #3911", + category=DeprecationWarning) + if atomgroup: + raise ValueError("Cannot provide both asel and atomgroup kwargs") + atomgroup = asel + start, stop, step = self.check_slice_indices(start, stop, step) + nframes = len(range(start, stop, step)) + + if atomgroup is not None: + if len(atomgroup) == 0: + raise ValueError( + "Timeseries requires at least one atom to analyze") + atom_numbers = atomgroup.indices + natoms = len(atom_numbers) + else: + natoms = self.n_atoms + atom_numbers = np.arange(natoms) + + coordinates = da.from_array(self._particle_group['position']['value'],)[start:stop:step, atom_numbers, :] + + # switch axes around + default_order = 'fac' + if order != default_order: + try: + newidx = [default_order.index(i) for i in order] + except ValueError: + raise ValueError(f"Unrecognized order key in {order}, " + "must be permutation of 'fac'") + + try: + coordinates = da.moveaxis(coordinates, newidx, [0, 1, 2]) + except ValueError: + errmsg = ("Repeated or missing keys passed to argument " + f"`order`: {order}, each key must be used once") + raise ValueError(errmsg) + return coordinates + + @property def has_positions(self): """``True`` if 'position' group is in trajectory.""" diff --git a/tmp/lazyts.ipynb b/tmp/lazyts.ipynb new file mode 100644 index 00000000000..f63716f1428 --- /dev/null +++ b/tmp/lazyts.ipynb @@ -0,0 +1,21 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import MDAnalysis as mda\n", + "\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c0c261c7dd94d54b6ab40b23cb2d6006913fa09a Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Mon, 23 Sep 2024 02:07:47 +0000 Subject: [PATCH 03/10] initial commit --- package/MDAnalysis/analysis/dasktimeseries.py | 5 +- package/MDAnalysis/coordinates/H5MD.py | 6 + tmp/lazyts.ipynb | 193 +++++++++++++++++- 3 files changed, 199 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/dasktimeseries.py b/package/MDAnalysis/analysis/dasktimeseries.py index e63be2e9dfc..3518fd9811f 100644 --- a/package/MDAnalysis/analysis/dasktimeseries.py +++ b/package/MDAnalysis/analysis/dasktimeseries.py @@ -19,7 +19,7 @@ def _compute(self): def _conclude(self): pass # pylint: disable=unnecessary-pass - def run(self, verbose): + def run(self): self._prepare() self._compute() self._conclude() @@ -29,7 +29,6 @@ def run(self, verbose): class DaskRMSF(DaskTimeSeriesAnalysisBase): def __init__(self, dask_timeseries, verbose=False, **kwargs): super().__init__(dask_timeseries, verbose=verbose, **kwargs) - self._kwargs = kwargs def _prepare(self): n_atoms = len(self._dts[0]) @@ -44,7 +43,7 @@ def _compute(self): sqrt_avg_squared_deviations = da.sqrt(avg_squared_deviations) self.results.rmsf = da.sqrt( (sqrt_avg_squared_deviations**2).sum(axis=1) - ) + ).compute() def _conclude(self): pass diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index b4f15c9cd43..38e1bab4ba1 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -209,6 +209,7 @@ """ import warnings import dask.array as da +from .. import units from typing import Any, Union, Optional, List, Dict import numpy as np import MDAnalysis as mda @@ -854,6 +855,11 @@ def dask_timeseries(self, asel: Optional['AtomGroup']=None, errmsg = ("Repeated or missing keys passed to argument " f"`order`: {order}, each key must be used once") raise ValueError(errmsg) + + f = units.get_conversion_factor('length', + self.units['length'], 'Angstrom') + coordinates *= f + return coordinates diff --git a/tmp/lazyts.ipynb b/tmp/lazyts.ipynb index f63716f1428..c9f6cc2ce61 100644 --- a/tmp/lazyts.ipynb +++ b/tmp/lazyts.ipynb @@ -1,5 +1,139 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Universe" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/nfs/homes3/ljwoods2/miniforge3/envs/mda-dev/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "/nfs/homes3/ljwoods2/workspace/mdanalysis/package/MDAnalysis/topology/PDBParser.py:348: UserWarning: Unknown element Z found for some atoms. These have been given an empty element record. If needed they can be guessed using MDAnalysis.topology.guessers.\n", + " warnings.warn(wmsg)\n", + "/nfs/homes3/ljwoods2/workspace/mdanalysis/package/MDAnalysis/topology/PDBParser.py:348: UserWarning: Unknown element D found for some atoms. These have been given an empty element record. If needed they can be guessed using MDAnalysis.topology.guessers.\n", + " warnings.warn(wmsg)\n", + "/nfs/homes3/ljwoods2/workspace/mdanalysis/package/MDAnalysis/topology/guessers.py:146: UserWarning: Failed to guess the mass for the following atom types: D\n", + " warnings.warn(\"Failed to guess the mass for the following atom types: {}\".format(atom_type))\n", + "/nfs/homes3/ljwoods2/workspace/mdanalysis/package/MDAnalysis/topology/guessers.py:146: UserWarning: Failed to guess the mass for the following atom types: Z\n", + " warnings.warn(\"Failed to guess the mass for the following atom types: {}\".format(atom_type))\n" + ] + } + ], + "source": [ + "import MDAnalysis as mda\n", + "\n", + "TOPOL = \"/scratch/ljwoods2/workspace/zarrtraj/zarrtraj/data/yiip_equilibrium/YiiP_system.pdb\"\n", + "TRAJ = \"/scratch/ljwoods2/workspace/zarrtraj/zarrtraj/data/yiip_aligned_compressed.h5md\"\n", + "\n", + "u = mda.Universe(TOPOL, TRAJ)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run dask RMSF method" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from MDAnalysis.analysis.dasktimeseries import DaskRMSF\n", + "\n", + "dask_timeseries = u.trajectory.dask_timeseries()\n", + "dRMSF = DaskRMSF(dask_timeseries).run()\n", + "dRMSF_result = dRMSF.results.rmsf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run RMSF method shipped with mda" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from MDAnalysis.analysis.rms import RMSF\n", + "\n", + "RMSF = RMSF(u.atoms).run()\n", + "RMSF_result = RMSF.results.rmsf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ensure results are the same" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from numpy.testing import assert_allclose\n", + "\n", + "assert_allclose(dRMSF_result, RMSF_result, atol=1e-5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Print num cores used by dask scheduler" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'num_workers'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[15], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdask\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmultiprocessing\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m num_threads \u001b[38;5;241m=\u001b[39m \u001b[43mdask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnum_workers\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(num_threads)\n", + "File \u001b[0;32m~/miniforge3/envs/mda-dev/lib/python3.10/site-packages/dask/config.py:613\u001b[0m, in \u001b[0;36mget\u001b[0;34m(key, default, config, override_with)\u001b[0m\n\u001b[1;32m 611\u001b[0m k \u001b[38;5;241m=\u001b[39m canonical_name(k, result)\n\u001b[1;32m 612\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 613\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mresult\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 614\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mIndexError\u001b[39;00m, \u001b[38;5;167;01mKeyError\u001b[39;00m):\n\u001b[1;32m 615\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m default \u001b[38;5;129;01mis\u001b[39;00m no_default:\n", + "\u001b[0;31mKeyError\u001b[0m: 'num_workers'" + ] + } + ], + "source": [ + "import dask\n", + "import multiprocessing\n", + "num_threads = dask.config.get('num_workers', 0)\n", + "print(num_threads)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Settings used to write the trajectory used above" + ] + }, { "cell_type": "code", "execution_count": null, @@ -7,13 +141,68 @@ "outputs": [], "source": [ "import MDAnalysis as mda\n", - "\n" + "import MDAnalysisData\n", + "from MDAnalysis.analysis import align\n", + "\n", + "yiip = MDAnalysisData.yiip_equilibrium.fetch_yiip_equilibrium_long()\n", + "\n", + "u = mda.Universe(\n", + " yiip.topology,\n", + " yiip.trajectory\n", + ")\n", + "\n", + "average = align.AverageStructure(\n", + " u, u, select=\"protein and name CA\", ref_frame=0\n", + ").run()\n", + "ref = average.results.universe\n", + "\n", + "# Writer kwargs only passable to align in mda 2.8.0\n", + "# to relax requirement, just write in two steps\n", + "\n", + "# 1. Align traj and write to xtc\n", + "aligner = align.AlignTraj(\n", + " u,\n", + " ref,\n", + " select=\"protein and name CA\",\n", + " filename=\"/scratch/ljwoods2/workspace/zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center_aligned.xtc\",\n", + ").run()\n", + "\n", + "# 2. Write aligned xtc traj to H5MD\n", + "u_aligned = mda.Universe(\n", + " yiip.topology,\n", + " \"/scratch/ljwoods2/workspace/zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center_aligned.xtc\"\n", + ")\n", + "\n", + "with mda.Writer(\n", + " \"/scratch/ljwoods2/workspace/zarrtraj/zarrtraj/data/yiip_aligned_compressed.h5md\",\n", + " n_atoms=u_aligned.trajectory.n_atoms,\n", + " n_frames=u_aligned.trajectory.n_frames,\n", + " compression=\"gzip\",\n", + " compression_opts=9,\n", + " chunks=(9, u.trajectory.n_atoms, 3),\n", + ") as W:\n", + " for ts in u_aligned.trajectory:\n", + " W.write(u_aligned.atoms)\n" ] } ], "metadata": { + "kernelspec": { + "display_name": "mda-dev", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" } }, "nbformat": 4, From f22dab2e442847d598966ee7a648d5cfd8b6ac49 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Mon, 23 Sep 2024 02:08:26 +0000 Subject: [PATCH 04/10] env used --- tmp/env.yaml | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tmp/env.yaml diff --git a/tmp/env.yaml b/tmp/env.yaml new file mode 100644 index 00000000000..cf6237e4928 --- /dev/null +++ b/tmp/env.yaml @@ -0,0 +1,43 @@ +name: dask-timeseries-dev +channels: + - defaults + - conda-forge +dependencies: + - chemfiles>=0.10 + - codecov + - cython + - dask + - docutils + - fasteners + - griddataformats + - gsd + - h5py>=2.10 + - hypothesis + - ipykernel + - joblib>=0.12 + - mdanalysis-sphinx-theme >=1.3.0 + - matplotlib>=3.2.2 + - mmtf-python + - mock + - networkx + - numpy>=1.23.2 + - pytest + - python==3.10 + - pytng>=0.2.3 + - scikit-learn + - scipy + - pip + - sphinx <7.0 + - tidynamics>=1.0.0 + - tqdm>=4.43.0 + - sphinxcontrib-bibtex + - mdaencore + - waterdynamics + - pip: + - mdahole2 + - pathsimanalysis + - duecredit + - parmed + - sphinx-sitemap + - packaging + - pyedr>=0.7.0 From c3e78faa3cdfa8f51a47fe5d847b86f8cae125db Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Mon, 23 Sep 2024 02:15:10 +0000 Subject: [PATCH 05/10] remove cruft --- package/pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/package/pyproject.toml b/package/pyproject.toml index 111af1495cb..05b0eb3ba16 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -43,7 +43,6 @@ dependencies = [ 'waterdynamics', 'pathsimanalysis', 'mdahole2', - "imdclient @ git+https://github.com/ljwoods2/imdclient.git@main", ] keywords = [ "python", "science", "chemistry", "biophysics", "molecular-dynamics", From 2672b9ccd1b399a6c462edf018ab86f9d810d1f0 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Mon, 23 Sep 2024 02:23:25 +0000 Subject: [PATCH 06/10] remove unused --- package/MDAnalysis/coordinates/IMD.py | 289 ---------- package/MDAnalysis/coordinates/__init__.py | 1 - .../MDAnalysisTests/coordinates/test_imd.py | 526 ------------------ 3 files changed, 816 deletions(-) delete mode 100644 package/MDAnalysis/coordinates/IMD.py delete mode 100644 testsuite/MDAnalysisTests/coordinates/test_imd.py diff --git a/package/MDAnalysis/coordinates/IMD.py b/package/MDAnalysis/coordinates/IMD.py deleted file mode 100644 index 04662458d43..00000000000 --- a/package/MDAnalysis/coordinates/IMD.py +++ /dev/null @@ -1,289 +0,0 @@ -""" - -Example: Streaming an IMD v3 trajectory -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To stream a trajectory from GROMACS or another simulation engine that supports -IMD v3, ensure that the simulation engine is running and waiting for an IMD connection. - -For example, in GROMACS, you can use ``gmx mdrun`` with the ``-imdwait`` flag -to ensure that GROMACS will wait for a client before starting the simulation. -In GROMACS, you will know that the simulation is ready and waiting for the -MDAnalysis IMDReader client when this line is printed to the terminal: - -.. code-block:: none - - IMD: Will wait until I have a connection and IMD_GO orders. - -Once the simulation is ready for a client connection, setup your :class:`Universe` -like this: :: - - import MDAnalysis as mda - # Pass host and port of the listening GROMACS simulation - # server as the trajectory argument - u = mda.Universe("topology.tpr", "localhost:8888") - -Classes -^^^^^^^ - -.. autoclass:: IMDReader - :members: - :inherited-members: - -""" - -import queue -from MDAnalysis.coordinates.base import ( - ReaderBase, - FrameIteratorIndices, - FrameIteratorAll, - FrameIteratorSliced, -) -from MDAnalysis.coordinates import core -from MDAnalysis.lib.util import store_init_arguments - -import imdclient -from imdclient.utils import parse_host_port -import numpy as np -import logging -import warnings -from typing import Optional -import numbers - -logger = logging.getLogger("imdclient.IMDClient") - - -class IMDReader(ReaderBase): - """ - Reader for IMD protocol packets. - """ - - format = "IMD" - - @store_init_arguments - def __init__( - self, - filename, - convert_units=True, - n_atoms=None, - **kwargs, - ): - """ - Parameters - ---------- - filename : a string of the form "host:port" where host is the hostname - or IP address of the listening GROMACS server and port - is the port number. - n_atoms : int (optional) - number of atoms in the system. defaults to number of atoms - in the topology. don't set this unless you know what you're doing. - """ - self._init_scope = True - self._reopen_called = False - - super(IMDReader, self).__init__(filename, **kwargs) - - logger.debug("IMDReader initializing") - - if n_atoms is None: - raise ValueError("IMDReader: n_atoms must be specified") - self.n_atoms = n_atoms - - host, port = parse_host_port(filename) - - # This starts the simulation - self._imdclient = imdclient.IMDClient(host, port, n_atoms, **kwargs) - - imdsinfo = self._imdclient.get_imdsessioninfo() - # NOTE: after testing phase, fail out on IMDv2 - - self.ts = self._Timestep( - self.n_atoms, - positions=imdsinfo.positions, - velocities=imdsinfo.velocities, - forces=imdsinfo.forces, - **self._ts_kwargs, - ) - - self._frame = -1 - - try: - self._read_next_timestep() - except StopIteration: - raise RuntimeError("IMDReader: No data found in stream") - - def _read_next_timestep(self): - # No rewinding- to both load the first frame after __init__ - # and access it again during iteration, we need to store first ts in mem - if not self._init_scope and self._frame == -1: - self._frame += 1 - # can't simply return the same ts again- transformations would be applied twice - # instead, return the pre-transformed copy - return self._first_ts - - return self._read_frame(self._frame + 1) - - def _read_frame(self, frame): - - try: - imdf = self._imdclient.get_imdframe() - except EOFError: - # Not strictly necessary, but for clarity - raise StopIteration - - self._frame = frame - self._load_imdframe_into_ts(imdf) - - if self._init_scope: - self._first_ts = self.ts.copy() - self._init_scope = False - - logger.debug(f"IMDReader: Loaded frame {self._frame}") - return self.ts - - def _load_imdframe_into_ts(self, imdf): - self.ts.frame = self._frame - if imdf.time is not None: - self.ts.time = imdf.time - # NOTE: timestep.pyx "dt" method is suspicious bc it uses "new" keyword for a float - self.ts.data["dt"] = imdf.dt - if imdf.energies is not None: - self.ts.data.update(imdf.energies) - if imdf.box is not None: - self.ts.dimensions = core.triclinic_box(*imdf.box) - if imdf.positions is not None: - # must call copy because reference is expected to reset - # see 'test_frame_collect_all_same' in MDAnalysisTests.coordinates.base - self.ts.positions = imdf.positions - if imdf.velocities is not None: - self.ts.velocities = imdf.velocities - if imdf.forces is not None: - self.ts.forces = imdf.forces - - @property - def n_frames(self): - """Changes as stream is processed unlike other readers""" - raise RuntimeError("IMDReader: n_frames is unknown") - - def next(self): - """Don't rewind after iteration. When _reopen() is called, - an error will be raised - """ - try: - ts = self._read_next_timestep() - except (EOFError, IOError): - # Don't rewind here like we normally would - raise StopIteration from None - else: - for auxname, reader in self._auxs.items(): - ts = self._auxs[auxname].update_ts(ts) - - ts = self._apply_transformations(ts) - - return ts - - def rewind(self): - """Raise error on rewind""" - raise RuntimeError("IMDReader: Stream-based readers can't be rewound") - - @staticmethod - def _format_hint(thing): - try: - parse_host_port(thing) - except: - return False - return True - - def close(self): - """Gracefully shut down the reader. Stops the producer thread.""" - logger.debug("IMDReader close() called") - self._imdclient.stop() - # NOTE: removeme after testing - logger.debug("IMDReader shut down gracefully.") - - # Incompatible methods - def copy(self): - raise NotImplementedError("IMDReader does not support copying") - - def _reopen(self): - if self._reopen_called: - raise RuntimeError("IMDReader: Cannot reopen IMD stream") - self._frame = -1 - self._reopen_called = True - - def __getitem__(self, frame): - """This method from ProtoReader must be overridden - to prevent slicing that doesn't make sense in a stream. - """ - raise RuntimeError("IMDReader: Trajectory can only be read in for loop") - - def check_slice_indices(self, start, stop, step): - """Check frame indices are valid and clip to fit trajectory. - - The usage follows standard Python conventions for :func:`range` but see - the warning below. - - Parameters - ---------- - start : int or None - Starting frame index (inclusive). ``None`` corresponds to the default - of 0, i.e., the initial frame. - stop : int or None - Last frame index (exclusive). ``None`` corresponds to the default - of n_frames, i.e., it includes the last frame of the trajectory. - step : int or None - step size of the slice, ``None`` corresponds to the default of 1, i.e, - include every frame in the range `start`, `stop`. - - Returns - ------- - start, stop, step : tuple (int, int, int) - Integers representing the slice - - Warning - ------- - The returned values `start`, `stop` and `step` give the expected result - when passed in :func:`range` but gives unexpected behavior when passed - in a :class:`slice` when ``stop=None`` and ``step=-1`` - - This can be a problem for downstream processing of the output from this - method. For example, slicing of trajectories is implemented by passing - the values returned by :meth:`check_slice_indices` to :func:`range` :: - - range(start, stop, step) - - and using them as the indices to randomly seek to. On the other hand, - in :class:`MDAnalysis.analysis.base.AnalysisBase` the values returned - by :meth:`check_slice_indices` are used to splice the trajectory by - creating a :class:`slice` instance :: - - slice(start, stop, step) - - This creates a discrepancy because these two lines are not equivalent:: - - range(10, -1, -1) # [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - range(10)[slice(10, -1, -1)] # [] - - """ - if start is not None: - raise ValueError( - "IMDReader: Cannot slice a stream, 'start' must be None" - ) - if stop is not None: - raise ValueError( - "IMDReader: Cannot slice a stream, 'stop' must be None" - ) - if step is not None: - if isinstance(step, numbers.Integral): - if step != 1: - raise ValueError( - "IMDReader: Cannot slice a stream, 'step' must be None or 1" - ) - - return start, stop, step - - def __getstate__(self): - raise NotImplementedError("IMDReader does not support pickling") - - def __setstate__(self, state: object): - raise NotImplementedError("IMDReader does not support pickling") diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 727522e9a84..be7a4d63bcb 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -770,7 +770,6 @@ class can choose an appropriate reader automatically. from . import DMS from . import GMS from . import GRO -from . import IMD from . import INPCRD from . import LAMMPS from . import MOL2 diff --git a/testsuite/MDAnalysisTests/coordinates/test_imd.py b/testsuite/MDAnalysisTests/coordinates/test_imd.py deleted file mode 100644 index bc5ffefaf4f..00000000000 --- a/testsuite/MDAnalysisTests/coordinates/test_imd.py +++ /dev/null @@ -1,526 +0,0 @@ -from MDAnalysisTests.datafiles import ( - COORDINATES_TOPOLOGY, - COORDINATES_TRR, - COORDINATES_H5MD, -) -import MDAnalysis as mda -from imdclient.tests.utils import ( - get_free_port, - create_default_imdsinfo_v3, -) - -from imdclient.tests.server import InThreadIMDServer -from MDAnalysisTests.coordinates.base import ( - MultiframeReaderTest, - BaseReference, - BaseWriterTest, - assert_timestep_almost_equal, -) -from numpy.testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_equal, - assert_allclose, -) -import numpy as np -import logging -import pytest -from MDAnalysis.transformations import translate -import pickle - - -logger = logging.getLogger("imdclient.IMDClient") -file_handler = logging.FileHandler("test.log") -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" -) -file_handler.setFormatter(formatter) -logger.addHandler(file_handler) -logger.setLevel(logging.DEBUG) - - -class IMDReference(BaseReference): - def __init__(self): - super(IMDReference, self).__init__() - self.port = get_free_port() - # Serve TRR traj data via the server - traj = mda.coordinates.TRR.TRRReader(COORDINATES_TRR) - self.server = InThreadIMDServer(traj) - self.server.set_imdsessioninfo(create_default_imdsinfo_v3()) - - self.n_atoms = traj.n_atoms - self.prec = 3 - - self.trajectory = f"localhost:{self.port}" - self.topology = COORDINATES_TOPOLOGY - self.changing_dimensions = True - self.reader = mda.coordinates.IMD.IMDReader - - self.first_frame.velocities = self.first_frame.positions / 10 - self.first_frame.forces = self.first_frame.positions / 100 - - self.second_frame.velocities = self.second_frame.positions / 10 - self.second_frame.forces = self.second_frame.positions / 100 - - self.last_frame.velocities = self.last_frame.positions / 10 - self.last_frame.forces = self.last_frame.positions / 100 - - self.jump_to_frame.velocities = self.jump_to_frame.positions / 10 - self.jump_to_frame.forces = self.jump_to_frame.positions / 100 - - def iter_ts(self, i): - ts = self.first_frame.copy() - ts.positions = 2**i * self.first_frame.positions - ts.velocities = ts.positions / 10 - ts.forces = ts.positions / 100 - ts.time = i - ts.frame = i - return ts - - -class TestIMDReaderBaseAPI(MultiframeReaderTest): - - @pytest.fixture() - def ref(self): - """Not a static method like in base class- need new server for each test""" - return IMDReference() - - @pytest.fixture() - def reader(self, ref): - # This will start the test IMD Server, waiting for a connection - # to then send handshake & first frame - ref.server.handshake_sequence("localhost", ref.port) - # This will connect to the test IMD Server and read the first frame - reader = ref.reader(ref.trajectory, n_atoms=ref.n_atoms) - # Send the rest of the frames- small enough to all fit in socket itself - ref.server.send_frames(1, 5) - - reader.add_auxiliary( - "lowf", - ref.aux_lowf, - dt=ref.aux_lowf_dt, - initial_time=0, - time_selector=None, - ) - reader.add_auxiliary( - "highf", - ref.aux_highf, - dt=ref.aux_highf_dt, - initial_time=0, - time_selector=None, - ) - return reader - - @pytest.fixture() - def transformed(self, ref): - # This will start the test IMD Server, waiting for a connection - # to then send handshake & first frame - ref.server.handshake_sequence("localhost", ref.port) - # This will connect to the test IMD Server and read the first frame - transformed = ref.reader(ref.trajectory, n_atoms=ref.n_atoms) - # Send the rest of the frames- small enough to all fit in socket itself - ref.server.send_frames(1, 5) - transformed.add_transformations( - translate([1, 1, 1]), translate([0, 0, 0.33]) - ) - return transformed - - @pytest.mark.skip( - reason="Stream-based reader cannot determine n_frames until EOF" - ) - def test_n_frames(self, reader, ref): - assert_equal( - self.universe.trajectory.n_frames, - 1, - "wrong number of frames in pdb", - ) - - def test_first_frame(self, ref, reader): - # don't rewind here as in inherited base test - assert_timestep_almost_equal( - reader.ts, ref.first_frame, decimal=ref.prec - ) - - @pytest.mark.skip(reason="IMD is not a writeable format") - def test_get_writer_1(self, ref, reader, tmpdir): - with tmpdir.as_cwd(): - outfile = "test-writer." + ref.ext - with reader.Writer(outfile) as W: - assert_equal(isinstance(W, ref.writer), True) - assert_equal(W.n_atoms, reader.n_atoms) - - @pytest.mark.skip(reason="IMD is not a writeable format") - def test_get_writer_2(self, ref, reader, tmpdir): - with tmpdir.as_cwd(): - outfile = "test-writer." + ref.ext - with reader.Writer(outfile, n_atoms=100) as W: - assert_equal(isinstance(W, ref.writer), True) - assert_equal(W.n_atoms, 100) - - @pytest.mark.skip( - reason="Stream-based reader cannot determine total_time until EOF" - ) - def test_total_time(self, reader, ref): - assert_almost_equal(reader.totaltime, ref.totaltime, decimal=ref.prec) - - @pytest.mark.skip(reason="Stream-based reader can only be read iteratively") - def test_changing_dimensions(self, ref, reader): - if ref.changing_dimensions: - reader.rewind() - if ref.dimensions is None: - assert reader.ts.dimensions is None - else: - assert_array_almost_equal( - reader.ts.dimensions, ref.dimensions, decimal=ref.prec - ) - reader[1] - if ref.dimensions_second_frame is None: - assert reader.ts.dimensions is None - else: - assert_array_almost_equal( - reader.ts.dimensions, - ref.dimensions_second_frame, - decimal=ref.prec, - ) - - def test_iter(self, ref, reader): - for i, ts in enumerate(reader): - assert_timestep_almost_equal(ts, ref.iter_ts(i), decimal=ref.prec) - - def test_first_dimensions(self, ref, reader): - # don't rewind here as in inherited base test - if ref.dimensions is None: - assert reader.ts.dimensions is None - else: - assert_array_almost_equal( - reader.ts.dimensions, ref.dimensions, decimal=ref.prec - ) - - def test_volume(self, ref, reader): - # don't rewind here as in inherited base test - vol = reader.ts.volume - # Here we can only be sure about the numbers upto the decimal point due - # to floating point impressions. - assert_almost_equal(vol, ref.volume, 0) - - @pytest.mark.skip(reason="Cannot create new reader from same stream") - def test_reload_auxiliaries_from_description(self, ref, reader): - # get auxiliary desscriptions form existing reader - descriptions = reader.get_aux_descriptions() - # load a new reader, without auxiliaries - reader = ref.reader(ref.trajectory) - # load auxiliaries into new reader, using description... - for aux in descriptions: - reader.add_auxiliary(**aux) - # should have the same number of auxiliaries - assert_equal( - reader.aux_list, - reader.aux_list, - "Number of auxiliaries does not match", - ) - # each auxiliary should be the same - for auxname in reader.aux_list: - assert_equal( - reader._auxs[auxname], - reader._auxs[auxname], - "AuxReaders do not match", - ) - - @pytest.mark.skip(reason="Stream can only be read in for loop") - def test_stop_iter(self, reader): - # reset to 0 - reader.rewind() - for ts in reader[:-1]: - pass - assert_equal(reader.frame, 0) - - @pytest.mark.skip(reason="Cannot rewind stream") - def test_iter_rewinds(self, reader, accessor): - for ts_indices in accessor(reader): - pass - assert_equal(ts_indices.frame, 0) - - @pytest.mark.skip( - reason="Timeseries currently requires n_frames to be known" - ) - @pytest.mark.parametrize( - "order", ("fac", "fca", "afc", "acf", "caf", "cfa") - ) - def test_timeseries_shape(self, reader, order): - timeseries = reader.timeseries(order=order) - a_index = order.index("a") - # f_index = order.index("f") - c_index = order.index("c") - assert timeseries.shape[a_index] == reader.n_atoms - # assert timeseries.shape[f_index] == len(reader) - assert timeseries.shape[c_index] == 3 - - @pytest.mark.skip( - reason="Timeseries currently requires n_frames to be known" - ) - @pytest.mark.parametrize("asel", ("index 1", "index 2", "index 1 to 3")) - def test_timeseries_asel_shape(self, reader, asel): - atoms = mda.Universe(reader.filename).select_atoms(asel) - timeseries = reader.timeseries(atoms, order="fac") - assert timeseries.shape[0] == len(reader) - assert timeseries.shape[1] == len(atoms) - assert timeseries.shape[2] == 3 - - @pytest.mark.skip("Cannot slice stream") - @pytest.mark.parametrize("slice", ([0, 2, 1], [0, 10, 2], [0, 10, 3])) - def test_timeseries_values(self, reader, slice): - ts_positions = [] - if isinstance(reader, mda.coordinates.memory.MemoryReader): - pytest.xfail( - "MemoryReader uses deprecated stop inclusive" - " indexing, see Issue #3893" - ) - if slice[1] > len(reader): - pytest.skip("too few frames in reader") - for i in range(slice[0], slice[1], slice[2]): - ts = reader[i] - ts_positions.append(ts.positions.copy()) - positions = np.asarray(ts_positions) - timeseries = reader.timeseries( - start=slice[0], stop=slice[1], step=slice[2], order="fac" - ) - assert_allclose(timeseries, positions) - - @pytest.mark.skip(reason="Cannot rewind stream") - def test_transformations_2iter(self, ref, transformed): - # Are the transformations applied and - # are the coordinates "overtransformed"? - v1 = np.float32((1, 1, 1)) - v2 = np.float32((0, 0, 0.33)) - idealcoords = [] - for i, ts in enumerate(transformed): - idealcoords.append(ref.iter_ts(i).positions + v1 + v2) - assert_array_almost_equal( - ts.positions, idealcoords[i], decimal=ref.prec - ) - - for i, ts in enumerate(transformed): - assert_almost_equal(ts.positions, idealcoords[i], decimal=ref.prec) - - @pytest.mark.skip(reason="Cannot slice stream") - def test_transformations_slice(self, ref, transformed): - # Are the transformations applied when iterating over a slice of the trajectory? - v1 = np.float32((1, 1, 1)) - v2 = np.float32((0, 0, 0.33)) - for i, ts in enumerate(transformed[2:3:1]): - idealcoords = ref.iter_ts(ts.frame).positions + v1 + v2 - assert_array_almost_equal( - ts.positions, idealcoords, decimal=ref.prec - ) - - @pytest.mark.skip(reason="Cannot slice stream") - def test_transformations_switch_frame(self, ref, transformed): - # This test checks if the transformations are applied and if the coordinates - # "overtransformed" on different situations - # Are the transformations applied when we switch to a different frame? - v1 = np.float32((1, 1, 1)) - v2 = np.float32((0, 0, 0.33)) - first_ideal = ref.iter_ts(0).positions + v1 + v2 - if len(transformed) > 1: - assert_array_almost_equal( - transformed[0].positions, first_ideal, decimal=ref.prec - ) - second_ideal = ref.iter_ts(1).positions + v1 + v2 - assert_array_almost_equal( - transformed[1].positions, second_ideal, decimal=ref.prec - ) - - # What if we comeback to the previous frame? - assert_array_almost_equal( - transformed[0].positions, first_ideal, decimal=ref.prec - ) - - # How about we switch the frame to itself? - assert_array_almost_equal( - transformed[0].positions, first_ideal, decimal=ref.prec - ) - else: - assert_array_almost_equal( - transformed[0].positions, first_ideal, decimal=ref.prec - ) - - @pytest.mark.skip(reason="Cannot rewind stream") - def test_transformation_rewind(self, ref, transformed): - # this test checks if the transformations are applied after rewinding the - # trajectory - v1 = np.float32((1, 1, 1)) - v2 = np.float32((0, 0, 0.33)) - ideal_coords = ref.iter_ts(0).positions + v1 + v2 - transformed.rewind() - assert_array_almost_equal( - transformed[0].positions, ideal_coords, decimal=ref.prec - ) - - @pytest.mark.skip(reason="Cannot make a copy of a stream") - def test_copy(self, ref, transformed): - # this test checks if transformations are carried over a copy and if the - # coordinates of the copy are equal to the original's - v1 = np.float32((1, 1, 1)) - v2 = np.float32((0, 0, 0.33)) - new = transformed.copy() - assert_equal( - transformed.transformations, - new.transformations, - "transformations are not equal", - ) - for i, ts in enumerate(new): - ideal_coords = ref.iter_ts(i).positions + v1 + v2 - assert_array_almost_equal( - ts.positions, ideal_coords, decimal=ref.prec - ) - - @pytest.mark.skip(reason="Cannot pickle socket") - def test_pickle_reader(self, reader): - """It probably wouldn't be a good idea to pickle a - reader that is connected to a server""" - reader_p = pickle.loads(pickle.dumps(reader)) - assert_equal(len(reader), len(reader_p)) - assert_equal( - reader.ts, reader_p.ts, "Timestep is changed after pickling" - ) - - @pytest.mark.skip(reason="Cannot pickle socket") - def test_pickle_next_ts_reader(self, reader): - reader_p = pickle.loads(pickle.dumps(reader)) - assert_equal( - next(reader), - next(reader_p), - "Next timestep is changed after pickling", - ) - - @pytest.mark.skip(reason="Cannot pickle socket") - def test_pickle_last_ts_reader(self, reader): - # move current ts to last frame. - reader[-1] - reader_p = pickle.loads(pickle.dumps(reader)) - assert_equal( - len(reader), - len(reader_p), - "Last timestep is changed after pickling", - ) - assert_equal( - reader.ts, reader_p.ts, "Last timestep is changed after pickling" - ) - - @pytest.mark.skip(reason="Cannot copy stream") - def test_transformations_copy(self, ref, transformed): - # this test checks if transformations are carried over a copy and if the - # coordinates of the copy are equal to the original's - v1 = np.float32((1, 1, 1)) - v2 = np.float32((0, 0, 0.33)) - new = transformed.copy() - assert_equal( - transformed.transformations, - new.transformations, - "transformations are not equal", - ) - for i, ts in enumerate(new): - ideal_coords = ref.iter_ts(i).positions + v1 + v2 - assert_array_almost_equal( - ts.positions, ideal_coords, decimal=ref.prec - ) - - @pytest.mark.skip( - reason="Timeseries currently requires n_frames to be known" - ) - def test_timeseries_empty_asel(self, reader): - with pytest.warns( - UserWarning, - match="Empty string to select atoms, empty group returned.", - ): - atoms = mda.Universe(reader.filename).select_atoms(None) - with pytest.raises(ValueError, match="Timeseries requires at least"): - reader.timeseries(asel=atoms) - - @pytest.mark.skip( - reason="Timeseries currently requires n_frames to be known" - ) - def test_timeseries_empty_atomgroup(self, reader): - with pytest.warns( - UserWarning, - match="Empty string to select atoms, empty group returned.", - ): - atoms = mda.Universe(reader.filename).select_atoms(None) - with pytest.raises(ValueError, match="Timeseries requires at least"): - reader.timeseries(atomgroup=atoms) - - @pytest.mark.skip( - reason="Timeseries currently requires n_frames to be known" - ) - def test_timeseries_asel_warns_deprecation(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") - with pytest.warns(DeprecationWarning, match="asel argument to"): - timeseries = reader.timeseries(asel=atoms, order="fac") - - @pytest.mark.skip( - reason="Timeseries currently requires n_frames to be known" - ) - def test_timeseries_atomgroup(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") - timeseries = reader.timeseries(atomgroup=atoms, order="fac") - - @pytest.mark.skip( - reason="Timeseries currently requires n_frames to be known" - ) - def test_timeseries_atomgroup_asel_mutex(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") - with pytest.raises(ValueError, match="Cannot provide both"): - timeseries = reader.timeseries( - atomgroup=atoms, asel=atoms, order="fac" - ) - - @pytest.mark.skip("Cannot slice stream") - def test_last_frame(self, ref, reader): - ts = reader[-1] - assert_timestep_almost_equal(ts, ref.last_frame, decimal=ref.prec) - - @pytest.mark.skip("Cannot slice stream") - def test_go_over_last_frame(self, ref, reader): - with pytest.raises(IndexError): - reader[ref.n_frames + 1] - - @pytest.mark.skip("Cannot slice stream") - def test_frame_jump(self, ref, reader): - ts = reader[ref.jump_to_frame.frame] - assert_timestep_almost_equal(ts, ref.jump_to_frame, decimal=ref.prec) - - @pytest.mark.skip("Cannot slice stream") - def test_frame_jump_issue1942(self, ref, reader): - """Test for issue 1942 (especially XDR on macOS)""" - reader.rewind() - try: - for ii in range(ref.n_frames + 2): - reader[0] - except StopIteration: - pytest.fail("Frame-seeking wrongly iterated (#1942)") - - def test_next_gives_second_frame(self, ref, reader): - # don't recreate reader here as in inherited base test - ts = reader.next() - assert_timestep_almost_equal(ts, ref.second_frame, decimal=ref.prec) - - @pytest.mark.skip( - reason="Stream isn't rewound after iteration- base reference is the same but it is the last frame" - ) - def test_frame_collect_all_same(self, reader): - # check that the timestep resets so that the base reference is the same - # for all timesteps in a collection with the exception of memoryreader - # and DCDReader - if isinstance(reader, mda.coordinates.memory.MemoryReader): - pytest.xfail("memoryreader allows independent coordinates") - if isinstance(reader, mda.coordinates.DCD.DCDReader): - pytest.xfail( - "DCDReader allows independent coordinates." - "This behaviour is deprecated and will be changed" - "in 3.0" - ) - collected_ts = [] - for i, ts in enumerate(reader): - collected_ts.append(ts.positions) - for array in collected_ts: - assert_allclose(array, collected_ts[0]) From 7f7e2c8806cc9488fc3cffcfc3aafe62fb509494 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Mon, 23 Sep 2024 02:24:33 +0000 Subject: [PATCH 07/10] remove black format --- package/MDAnalysis/coordinates/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index be7a4d63bcb..9b6a7121bc9 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -757,7 +757,7 @@ class can choose an appropriate reader automatically. raw :class:`~MDAnalysis.coordinates.Timestep` objects. """ -__all__ = ["reader", "writer", "timestep"] +__all__ = ['reader', 'writer', 'timestep'] from . import base from . import timestep From bb74a1604170fcd6cdd9301297fd734a00831eb3 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Mon, 23 Sep 2024 02:26:45 +0000 Subject: [PATCH 08/10] num cpu fix --- tmp/lazyts.ipynb | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/tmp/lazyts.ipynb b/tmp/lazyts.ipynb index c9f6cc2ce61..18aebfc6b48 100644 --- a/tmp/lazyts.ipynb +++ b/tmp/lazyts.ipynb @@ -104,27 +104,24 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, "outputs": [ { - "ename": "KeyError", - "evalue": "'num_workers'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[15], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdask\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmultiprocessing\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m num_threads \u001b[38;5;241m=\u001b[39m \u001b[43mdask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnum_workers\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(num_threads)\n", - "File \u001b[0;32m~/miniforge3/envs/mda-dev/lib/python3.10/site-packages/dask/config.py:613\u001b[0m, in \u001b[0;36mget\u001b[0;34m(key, default, config, override_with)\u001b[0m\n\u001b[1;32m 611\u001b[0m k \u001b[38;5;241m=\u001b[39m canonical_name(k, result)\n\u001b[1;32m 612\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 613\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mresult\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 614\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mIndexError\u001b[39;00m, \u001b[38;5;167;01mKeyError\u001b[39;00m):\n\u001b[1;32m 615\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m default \u001b[38;5;129;01mis\u001b[39;00m no_default:\n", - "\u001b[0;31mKeyError\u001b[0m: 'num_workers'" - ] + "data": { + "text/plain": [ + "24" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "import dask\n", + "\n", "import multiprocessing\n", - "num_threads = dask.config.get('num_workers', 0)\n", - "print(num_threads)" + "multiprocessing.cpu_count()\n" ] }, { From 6d137d1cf5446e1f8eb6d5fb2581c9b55b94cfff Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Mon, 23 Sep 2024 03:23:31 +0000 Subject: [PATCH 09/10] add results --- tmp/lazyts.ipynb | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/tmp/lazyts.ipynb b/tmp/lazyts.ipynb index 18aebfc6b48..f017496e09c 100644 --- a/tmp/lazyts.ipynb +++ b/tmp/lazyts.ipynb @@ -9,15 +9,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/nfs/homes3/ljwoods2/miniforge3/envs/mda-dev/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", "/nfs/homes3/ljwoods2/workspace/mdanalysis/package/MDAnalysis/topology/PDBParser.py:348: UserWarning: Unknown element Z found for some atoms. These have been given an empty element record. If needed they can be guessed using MDAnalysis.topology.guessers.\n", " warnings.warn(wmsg)\n", "/nfs/homes3/ljwoods2/workspace/mdanalysis/package/MDAnalysis/topology/PDBParser.py:348: UserWarning: Unknown element D found for some atoms. These have been given an empty element record. If needed they can be guessed using MDAnalysis.topology.guessers.\n", @@ -47,15 +45,28 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DaskRMSF execution time: 36.913174867630005\n" + ] + } + ], "source": [ "from MDAnalysis.analysis.dasktimeseries import DaskRMSF\n", + "import time\n", "\n", + "start = time.time()\n", "dask_timeseries = u.trajectory.dask_timeseries()\n", "dRMSF = DaskRMSF(dask_timeseries).run()\n", - "dRMSF_result = dRMSF.results.rmsf" + "dRMSF_result = dRMSF.results.rmsf\n", + "stop = time.time()\n", + "\n", + "print(f\"DaskRMSF execution time: {stop - start}\")" ] }, { @@ -67,14 +78,26 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSF execution time: 2235.1541423797607\n" + ] + } + ], "source": [ "from MDAnalysis.analysis.rms import RMSF\n", "\n", + "start = time.time()\n", "RMSF = RMSF(u.atoms).run()\n", - "RMSF_result = RMSF.results.rmsf" + "RMSF_result = RMSF.results.rmsf\n", + "stop = time.time()\n", + "\n", + "print(f\"RMSF execution time: {stop - start}\")" ] }, { @@ -86,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ From 9d12872257b37fbe8e6521c6827631a88bb0196a Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Wed, 2 Oct 2024 22:17:23 +0000 Subject: [PATCH 10/10] ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index e9f3571ca8e..3587be0f891 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,6 @@ benchmarks/results .idea .vscode *.lock + +# dev +tmp/ \ No newline at end of file