diff --git a/.circleci/config.yml b/.circleci/config.yml
index c4d549adce..127a1a30f7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -19,6 +19,7 @@ jobs:
             sudo .github/workflows/dependencies/install_spack
             python3 -m pip install -U pip
             python3 -m pip install -U packaging setuptools wheel
+            python3 -m pip install -U six
             python3 -m pip install -U numpy
             python3 -m pip install -U mpi4py
             python3 -m pip install -U pandas
diff --git a/.github/workflows/clang-format/clang-format.sh b/.github/workflows/clang-format/clang-format.sh
index 0b99c629ab..3824f780f1 100755
--- a/.github/workflows/clang-format/clang-format.sh
+++ b/.github/workflows/clang-format/clang-format.sh
@@ -2,11 +2,11 @@
 
 if (( $# > 0 )); then
     # received arguments, format those files
-    clang-format-13 -i "$@"
+    clang-format-18 -i "$@"
 else
     # received no arguments, find files on our own
     find include/ src/ test/ examples/ \
             -regextype egrep \
             -type f -regex '.*\.(hpp|cpp|hpp\.in)$' \
-        | xargs clang-format-13 -i
+        | xargs clang-format-18 -i
 fi
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 26eed48da1..d90fc6c863 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -128,26 +128,38 @@ jobs:
     - name: Install
       run: |
         sudo apt update
-        sudo apt install clang-14 cmake gfortran libhdf5-dev python3.11 python3.11-dev wget
-        wget https://bootstrap.pypa.io/get-pip.py
-        python3.11 get-pip.py
-        python3.11 -m pip install numpy pandas
+        sudo apt install clang-14 cmake gfortran libhdf5-dev python3.11 python3.11-dev wget python3.11-venv
         sudo .github/workflows/dependencies/install_spack
+        python3.11 -m venv /opt/python_venv
+        . /opt/python_venv/bin/activate
+        python -m pip install numpy pandas
+        git clone -b v4.0.3 https://github.com/ToruNiina/toml11
+        cmake -S toml11 -B build_toml11               \
+          -DCMAKE_INSTALL_PREFIX=toml11_install \
+          -DCMAKE_CXX_STANDARD_REQUIRED=OFF           \
+          -DCMAKE_CXX_STANDARD=11
+        cmake --build build_toml11 -j 2 --target install
     - name: Build
       env: {CC: clang-14, CXX: clang++-14, CXXFLAGS: -Werror}
       run: |
+        # Build Spack packages against system Python
+        # and activate the virtual environment just for the openPMD build.
+        # Spack does not play nice with venv.
         eval $(spack env activate --sh .github/ci/spack-envs/clang14_py311_nompi_h5_ad2/)
         spack install
+        . /opt/python_venv/bin/activate
 
         share/openPMD/download_samples.sh build
+        export CMAKE_PREFIX_PATH="$(realpath toml11_install):$CMAKE_PREFIX_PATH"
         cmake -S . -B build \
           -DopenPMD_USE_PYTHON=ON \
           -DopenPMD_USE_MPI=OFF   \
           -DopenPMD_USE_HDF5=ON   \
           -DopenPMD_USE_ADIOS2=ON \
-          -DopenPMD_USE_INVASIVE_TESTS=ON \
-          -DCMAKE_VERBOSE_MAKEFILE=ON     \
-          -DPython_EXECUTABLE=$(which python3.11)
+          -DopenPMD_USE_INVASIVE_TESTS=ON   \
+          -DopenPMD_USE_INTERNAL_TOML11=OFF \
+          -DCMAKE_VERBOSE_MAKEFILE=ON       \
+          -DPython_EXECUTABLE="$(which python)"
         cmake --build build --parallel 2
         ctest --test-dir build --output-on-failure
 
@@ -271,17 +283,22 @@ jobs:
       run: |
         apk update
         apk add hdf5-dev
-        python3.10 -m pip install numpy
+        # Use a virtual environment in order to avoid compatibility issues
+        # between the various Python installations in this image.
+        python3.10 -m venv /opt/python_env
+        . /opt/python_env/bin/activate
+        python -m pip install numpy
     - name: Build
       env: {CXXFLAGS: -Werror}
       run: |
+        . /opt/python_env/bin/activate
         share/openPMD/download_samples.sh build
         cmake -S . -B build \
           -DopenPMD_USE_PYTHON=ON \
           -DopenPMD_USE_MPI=OFF   \
           -DopenPMD_USE_HDF5=ON   \
           -DopenPMD_USE_INVASIVE_TESTS=ON \
-          -DPython_EXECUTABLE=$(which python3.10)
+          -DPython_EXECUTABLE=$(which python)
         cmake --build build --parallel 2
         cd build
         ctest --output-on-failure
@@ -312,11 +329,14 @@ jobs:
 
         share/openPMD/download_samples.sh build
         cmake -S . -B build \
+          -DCMAKE_CXX_FLAGS="-Wno-error=stringop-overread" \
+          -DCMAKE_C_FLAGS="-Wno-error=stringop-overread" \
           -DopenPMD_USE_PYTHON=ON \
           -DopenPMD_USE_MPI=ON    \
           -DopenPMD_USE_HDF5=ON   \
           -DopenPMD_USE_ADIOS2=ON \
-          -DopenPMD_USE_INVASIVE_TESTS=ON
+          -DopenPMD_USE_INVASIVE_TESTS=ON \
+          -DMPIEXEC_EXECUTABLE=".github/workflows/mpirun_workaround.sh"
         cmake --build build --parallel 2
         cd build
         ctest --output-on-failure
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
index f7b332fbde..641f138976 100644
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -12,7 +12,7 @@ jobs:
 #  appleclang10_py37_h5_ad2_libcpp
 #  appleclang11_nopy_nompi_h5_ad2
 
-  appleclang14_py_mpi_h5_ad2:
+  appleclang15_py_mpi_h5_ad2:
     runs-on: macos-latest
     if: github.event.pull_request.draft == false
     steps:
@@ -45,8 +45,8 @@ jobs:
         cmake --build build --parallel 3
         ctest --test-dir build --verbose
 
-  appleclang13_py:
-    runs-on: macos-11
+  appleclang14_py:
+    runs-on: macos-12
     if: github.event.pull_request.draft == false
     steps:
     - uses: actions/checkout@v3
diff --git a/.github/workflows/mpirun_workaround.sh b/.github/workflows/mpirun_workaround.sh
index cae5a9e791..e953b34647 100755
--- a/.github/workflows/mpirun_workaround.sh
+++ b/.github/workflows/mpirun_workaround.sh
@@ -14,7 +14,8 @@
 # This script provides a workaround by putting the called sub-command into
 # a script in a temporary file.
 
-mpiexec -n 1 ls --all \
+ls="$(which ls)"
+mpiexec "$ls" -m \
     && echo "MPIRUN WORKING AGAIN, PLEASE REMOVE WORKAROUND" >&2 \
     && exit 1 \
     || true
diff --git a/.github/workflows/source.yml b/.github/workflows/source.yml
index b97c5fec3b..ae0b4e6d1f 100644
--- a/.github/workflows/source.yml
+++ b/.github/workflows/source.yml
@@ -36,7 +36,7 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
     - uses: actions/checkout@v3
-    - uses: s-weigand/setup-conda@v1.2.1
+    - uses: s-weigand/setup-conda@v1.2.2
       with:
         update-conda: true
         conda-channels: conda-forge
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8a8276c100..61147bc862 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ exclude: '^share/openPMD/thirdParty'
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
     - id: trailing-whitespace
       args: [--markdown-linebreak-ext=md]
@@ -49,7 +49,7 @@ repos:
 
 # Changes tabs to spaces
 - repo: https://github.com/Lucas-C/pre-commit-hooks
-  rev: v1.5.4
+  rev: v1.5.5
   hooks:
   - id: remove-tabs
 
@@ -66,7 +66,7 @@ repos:
 # clang-format v13
 #   to run manually, use .github/workflows/clang-format/clang-format.sh
 - repo: https://github.com/pre-commit/mirrors-clang-format
-  rev: v17.0.6
+  rev: v18.1.5
   hooks:
   - id: clang-format
     # By default, the clang-format hook configures:
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index bf861156c8..7bd645ce06 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -17,7 +17,7 @@ Changes to "0.15.0"
 Features
 """"""""
 
-- pybind11: require version 2.11.1+ #1220 #1322
+- pybind11: require version 2.12.0+ #1220 #1322 #1637
 
 Bug Fixes
 """""""""
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 01755b3aa8..25188c417c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -278,7 +278,15 @@ if(openPMD_USE_INTERNAL_TOML11)
     add_subdirectory("${openPMD_SOURCE_DIR}/share/openPMD/thirdParty/toml11")
     message(STATUS "toml11: Using INTERNAL version '3.7.1'")
 else()
-    find_package(toml11 3.7.1 CONFIG REQUIRED)
+    # toml11 4.0 was a breaking change. This is reflected in the library's CMake
+    # logic: version 4.0 is not accepted by a call to find_package(toml11 3.7).
+    # Since we support both incompatible versions, we use two find_package()
+    # calls. Search for version 4 first in order to prefer that
+    # in (the unlikely) case that both versions are installed.
+    find_package(toml11 4.0 CONFIG QUIET)
+    if(NOT toml11_FOUND)
+        find_package(toml11 3.7.1 CONFIG REQUIRED)
+    endif()
     message(STATUS "toml11: Found version '${toml11_VERSION}'")
 endif()
 add_library(openPMD::thirdparty::toml11 INTERFACE IMPORTED)
@@ -408,9 +416,9 @@ if(openPMD_USE_PYTHON STREQUAL AUTO)
         if(openPMD_USE_INTERNAL_PYBIND11)
             add_subdirectory("${openPMD_SOURCE_DIR}/share/openPMD/thirdParty/pybind11")
             set(openPMD_HAVE_PYTHON TRUE)
-            message(STATUS "pybind11: Using INTERNAL version 2.11.1")
+            message(STATUS "pybind11: Using INTERNAL version 2.12.0")
         else()
-            find_package(pybind11 2.11.1 CONFIG)
+            find_package(pybind11 2.12.0 CONFIG)
             if(pybind11_FOUND)
                 set(openPMD_HAVE_PYTHON TRUE)
                 message(STATUS "pybind11: Found version '${pybind11_VERSION}'")
@@ -426,9 +434,9 @@ elseif(openPMD_USE_PYTHON)
     if(openPMD_USE_INTERNAL_PYBIND11)
         add_subdirectory("${openPMD_SOURCE_DIR}/share/openPMD/thirdParty/pybind11")
         set(openPMD_HAVE_PYTHON TRUE)
-        message(STATUS "pybind11: Using INTERNAL version 2.11.1")
+        message(STATUS "pybind11: Using INTERNAL version 2.12.0")
     else()
-        find_package(pybind11 2.11.1 REQUIRED CONFIG)
+        find_package(pybind11 2.12.0 REQUIRED CONFIG)
         set(openPMD_HAVE_PYTHON TRUE)
         message(STATUS "pybind11: Found version '${pybind11_VERSION}'")
     endif()
@@ -460,6 +468,7 @@ set(CORE_SOURCE
         src/auxiliary/Date.cpp
         src/auxiliary/Filesystem.cpp
         src/auxiliary/JSON.cpp
+        src/auxiliary/Mpi.cpp
         src/backend/Attributable.cpp
         src/backend/BaseRecordComponent.cpp
         src/backend/MeshRecordComponent.cpp
@@ -482,6 +491,7 @@ set(IO_SOURCE
         src/IO/JSON/JSONIOHandlerImpl.cpp
         src/IO/JSON/JSONFilePosition.cpp
         src/IO/ADIOS/ADIOS2IOHandler.cpp
+        src/IO/ADIOS/ADIOS2File.cpp
         src/IO/ADIOS/ADIOS2Auxiliary.cpp
         src/IO/InvalidatableFile.cpp)
 
@@ -1369,7 +1379,7 @@ if(openPMD_BUILD_TESTING)
                             --outfile                                              \
                                 ../samples/git-sample/thetaMode/data_%T.bp &&      \
                                                                                    \
-                        ${Python_EXECUTABLE}                                       \
+                        ${MPI_TEST_EXE} ${Python_EXECUTABLE}                       \
                             ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe       \
                             --infile ../samples/git-sample/thetaMode/data_%T.bp    \
                             --outfile ../samples/git-sample/thetaMode/data%T.json  \
diff --git a/NEWS.rst b/NEWS.rst
index db59c2ab0f..89089391a4 100644
--- a/NEWS.rst
+++ b/NEWS.rst
@@ -13,7 +13,7 @@ Please transition to ADIOS2.
 For reading legacy ADIOS1 BP3 files, either use an older version of openPMD-api or the BP3 backend in ADIOS2.
 Note that ADIOS2 does not support compression in BP3 files.
 
-pybind11 2.11.1 is now the minimally supported version for Python support.
+pybind11 2.12.0 is now the minimally supported version for Python support.
 
 
 0.15.0
diff --git a/README.md b/README.md
index 8bcbf25c3d..375ea95d57 100644
--- a/README.md
+++ b/README.md
@@ -101,7 +101,7 @@ Required:
 
 Shipped internally in `share/openPMD/thirdParty/`:
 * [Catch2](https://github.com/catchorg/Catch2) 2.13.10+ ([BSL-1.0](https://github.com/catchorg/Catch2/blob/master/LICENSE.txt))
-* [pybind11](https://github.com/pybind/pybind11) 2.11.1+ ([new BSD](https://github.com/pybind/pybind11/blob/master/LICENSE))
+* [pybind11](https://github.com/pybind/pybind11) 2.12.0+ ([new BSD](https://github.com/pybind/pybind11/blob/master/LICENSE))
 * [NLohmann-JSON](https://github.com/nlohmann/json) 3.9.1+ ([MIT](https://github.com/nlohmann/json/blob/develop/LICENSE.MIT))
 * [toml11](https://github.com/ToruNiina/toml11) 3.7.1+ ([MIT](https://github.com/ToruNiina/toml11/blob/master/LICENSE))
 
@@ -116,7 +116,7 @@ while those can be built either with or without:
 Optional language bindings:
 * Python:
   * Python 3.8 - 3.12
-  * pybind11 2.11.1+
+  * pybind11 2.12.0+
   * numpy 1.15+
   * mpi4py 2.1+ (optional, for MPI)
   * pandas 1.0+ (optional, for dataframes)
@@ -267,7 +267,7 @@ The following options allow to switch to external installs:
 | CMake Option                    | Values     | Library       | Version  |
 |---------------------------------|------------|---------------|----------|
 | `openPMD_USE_INTERNAL_CATCH`    | **ON**/OFF | Catch2        | 2.13.10+ |
-| `openPMD_USE_INTERNAL_PYBIND11` | **ON**/OFF | pybind11      |  2.11.1+ |
+| `openPMD_USE_INTERNAL_PYBIND11` | **ON**/OFF | pybind11      |  2.12.0+ |
 | `openPMD_USE_INTERNAL_JSON`     | **ON**/OFF | NLohmann-JSON |   3.9.1+ |
 | `openPMD_USE_INTERNAL_TOML11`   | **ON**/OFF | toml11        |   3.7.1+ |
 
diff --git a/docs/source/analysis/pandas.rst b/docs/source/analysis/pandas.rst
index dcfe97aae2..a5fee0be07 100644
--- a/docs/source/analysis/pandas.rst
+++ b/docs/source/analysis/pandas.rst
@@ -45,16 +45,7 @@ One can also combine all iterations in a single dataframe like this:
 
 .. code-block:: python
 
-   import pandas as pd
-
-   df = pd.concat(
-        (
-            s.iterations[i].particles["electrons"].to_df().assign(iteration=i)
-            for i in s.iterations
-        ),
-        axis=0,
-        ignore_index=True,
-   )
+   df = s.to_df("electrons")
 
    # like before but with a new column "iteration" and all particles
    print(df)
diff --git a/docs/source/analysis/rapids.rst b/docs/source/analysis/rapids.rst
index 41acc55308..e3bb011d8d 100644
--- a/docs/source/analysis/rapids.rst
+++ b/docs/source/analysis/rapids.rst
@@ -51,14 +51,7 @@ One can also combine all iterations in a single dataframe like this:
 
 .. code-block:: python
 
-   cdf = cudf.concat(
-        (
-            cudf.from_pandas(s.iterations[i].particles["electrons"].to_df().assign(iteration=i))
-            for i in s.iterations
-        ),
-        axis=0,
-        ignore_index=True,
-   )
+   cdf = s.to_cudf("electrons")
 
    # like before but with a new column "iteration" and all particles
    print(cdf)
diff --git a/docs/source/backends/adios2.rst b/docs/source/backends/adios2.rst
index a6161ed9dc..55f080494c 100644
--- a/docs/source/backends/adios2.rst
+++ b/docs/source/backends/adios2.rst
@@ -48,6 +48,10 @@ Exceptions to this are the BP3 and SST engines which require their endings ``.bp
 
 For file engines, we currently leverage the default ADIOS2 transport parameters, i.e. ``POSIX`` on Unix systems and ``FStream`` on Windows.
 
+.. tip::
+
+   Use the ``adios2.engine.treat_unsupported_engine_as`` :ref:`JSON/TOML parameter <backendconfig-adios2>` for experimentally interacting with an unsupported ADIOS2 engine.
+
 Steps
 -----
 
@@ -81,6 +85,7 @@ environment variable                  default    description
 ``OPENPMD_ADIOS2_HAVE_METADATA_FILE`` ``1``      Online creation of the adios journal file (``1``: yes, ``0``: no).
 ``OPENPMD_ADIOS2_NUM_SUBSTREAMS``     ``0``      Number of files to be created, 0 indicates maximum number possible.
 ``OPENPMD_ADIOS2_ENGINE``             ``File``   `ADIOS2 engine <https://adios2.readthedocs.io/en/latest/engines/engines.html>`_
+``OPENPMD_ADIOS2_PRETEND_ENGINE``     *empty*    Pretend that an (unknown) ADIOS2 engine is in fact another one (also see the ``adios2.pretend_engine`` :ref:`parameter <backendconfig-adios2>`).
 ``OPENPMD2_ADIOS2_USE_GROUP_TABLE``   ``0``      Use group table (see below)
 ``OPENPMD_ADIOS2_STATS_LEVEL``        ``0``      whether to generate statistics for variables in ADIOS2. (``1``: yes, ``0``: no).
 ``OPENPMD_ADIOS2_ASYNC_WRITE``        ``0``      ADIOS2 BP5 engine: 1 means setting "AsyncWrite" in ADIOS2 to "on". Flushes will go to the buffer by default (see ``preferred_flush_target``).
diff --git a/docs/source/backends/hdf5.rst b/docs/source/backends/hdf5.rst
index 4786f7fa48..1d1866d874 100644
--- a/docs/source/backends/hdf5.rst
+++ b/docs/source/backends/hdf5.rst
@@ -15,6 +15,15 @@ I/O Method
 
 HDF5 internally either writes serially, via ``POSIX`` on Unix systems, or parallel to a single logical file via MPI-I/O.
 
+Virtual File Drivers
+********************
+
+Rudimentary support for HDF5 VFDs (`virtual file driver <https://www.hdfgroup.org/wp-content/uploads/2021/10/HDF5-VFD-Plugins-HUG.pdf>`_) is available (currently only the *subfiling* VFD).
+Note that the subfiling VFD needs to be enabled explicitly when configuring HDF5 and threaded MPI must be used.
+
+Virtual file drivers are configured via JSON/TOML.
+Refer to the page on :ref:`JSON/TOML configuration <backendconfig-hdf5>` for further details.
+
 
 Backend-Specific Controls
 -------------------------
@@ -45,6 +54,10 @@ Although we choose the default to be non-collective (independent) for ease of us
 For independent parallel I/O, potentially prefer using a modern version of the MPICH implementation (especially, use ROMIO instead of OpenMPI's ompio implementation).
 Please refer to the `HDF5 manual, function H5Pset_dxpl_mpio <https://support.hdfgroup.org/HDF5/doc/RM/H5P/H5Pset_dxpl_mpio.htm>`_ for more details.
 
+.. tip::
+
+  Instead of using an environment variable, independent/collective data transfer can also be configured at the API level via :ref:`JSON/TOML <backendconfig-hdf5>`.
+
 ``OPENPMD_HDF5_ALIGNMENT``: this sets the alignment in Bytes for writes via the ``H5Pset_alignment`` function.
 According to the `HDF5 documentation <https://support.hdfgroup.org/HDF5/doc/RM/H5P/H5Pset_alignment.htm>`_:
 *For MPI IO and other parallel systems, choose an alignment which is a multiple of the disk block size.*
@@ -56,6 +69,7 @@ Any file object greater than or equal in size to threshold bytes will be aligned
 
 ``OPENPMD_HDF5_CHUNKS``: this sets defaults for data chunking via `H5Pset_chunk <https://support.hdfgroup.org/HDF5/doc/RM/H5P/H5Pset_chunk.htm>`__.
 Chunking generally improves performance and only needs to be disabled in corner-cases, e.g. when heavily relying on independent, parallel I/O that non-collectively declares data records.
+The chunk size can alternatively (or additionally) be specified explicitly per dataset, by specifying a dataset-specific chunk size in the JSON/TOML configuration of ``resetDataset()``/``reset_dataset()``.
 
 ``OPENPMD_HDF5_COLLECTIVE_METADATA``: this is an option to enable collective MPI calls for HDF5 metadata operations via `H5Pset_all_coll_metadata_ops <https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetAllCollMetadataOps>`__ and `H5Pset_coll_metadata_write <https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetCollMetadataWrite>`__.
 By default, this optimization is enabled as it has proven to provide performance improvements.
diff --git a/docs/source/backends/json.rst b/docs/source/backends/json.rst
index 48ec6b1f44..bbae92aaf6 100644
--- a/docs/source/backends/json.rst
+++ b/docs/source/backends/json.rst
@@ -92,7 +92,6 @@ propagate the exception thrown by Niels Lohmann's library.
 
 The (keys) names ``"attributes"``, ``"data"`` and ``"datatype"`` are reserved and must not be used for base/mesh/particles path, records and their components.
 
-A parallel (i.e. MPI) implementation is *not* available.
 
 TOML Restrictions
 -----------------
@@ -106,7 +105,41 @@ TOML does not support null values.
 
 The (keys) names ``"attributes"``, ``"data"`` and ``"datatype"`` are reserved and must not be used for base/mesh/particles path, records and their components.
 
-A parallel (i.e. MPI) implementation is *not* available.
+
+Using in parallel (MPI)
+-----------------------
+
+Parallel I/O is not a first-class citizen in the JSON and TOML backends, and neither backend will "go out of its way" to support parallel workflows.
+
+However there is a rudimentary form of read and write support in parallel:
+
+Parallel reading
+................
+
+In order not to overload the parallel filesystem with parallel reads, read access to JSON datasets is done by rank 0 and then broadcast to all other ranks.
+Note that there is no granularity whatsoever in reading a JSON file.
+A JSON file is always read into memory and broadcast to all other ranks in its entirety.
+
+Parallel writing
+................
+
+When executed in an MPI context, the JSON/TOML backends will not directly output a single text file, but instead a folder containing one file per MPI rank.
+Neither backend will perform any data aggregation at all.
+
+.. note::
+
+  The parallel write support of the JSON/TOML backends is intended mainly for debugging and prototyping workflows.
+
+The folder will use the specified Series name, but append the postfix ``.parallel``.
+(This is a deliberate indication that this folder cannot directly be opened again by the openPMD-api as a JSON/TOML dataset.)
+This folder contains for each MPI rank *i* a file ``mpi_rank_<i>.json`` (resp. ``mpi_rank_<i>.toml``), containing the serial output of that rank.
+A ``README.txt`` with basic usage instructions is also written.
+
+.. note::
+
+  There is no direct support in the openPMD-api to read a JSON/TOML dataset written in this parallel fashion. The single files (e.g. ``data.json.parallel/mpi_rank_0.json``) are each valid openPMD files and can be read separately, however.
+
+  Note that the auxiliary function ``json::merge()`` (or in Python ``openpmd_api.merge_json()``) is not adequate for merging the single JSON/TOML files back into one, since it does not merge anything below the array level.
 
 
 Example
diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst
index ae2a2d4f63..f6d15a7ac8 100644
--- a/docs/source/details/backendconfig.rst
+++ b/docs/source/details/backendconfig.rst
@@ -77,6 +77,8 @@ For a consistent user interface, backends shall follow the following rules:
 Backend-independent JSON configuration
 --------------------------------------
 
+.. _backend_independent_config:
+
 The openPMD backend can be chosen via the JSON/TOML key ``backend`` which recognizes the alternatives ``["hdf5", "adios2", "json"]``.
 
 The iteration encoding can be chosen via the JSON/TOML key ``iteration_encoding`` which recognizes the alternatives ``["file_based", "group_based", "variable_based"]``.
@@ -97,6 +99,8 @@ It if set to ``{"resizable": true}``, this declares that it shall be allowed to
 For HDF5, resizable Datasets come with a performance penalty.
 For JSON and ADIOS2, all datasets are resizable, independent of this option.
 
+The key ``rank_table`` allows specifying the creation of a **rank table**, used for tracking :ref:`chunk provenance especially in streaming setups <rank_table>`, refer to the streaming documentation for details.
+
 Configuration Structure per Backend
 -----------------------------------
 
@@ -118,6 +122,17 @@ Explanation of the single keys:
 
 * ``adios2.engine.type``: A string that is passed directly to ``adios2::IO:::SetEngine`` for choosing the ADIOS2 engine to be used.
   Please refer to the `official ADIOS2 documentation <https://adios2.readthedocs.io/en/latest/engines/engines.html>`_ for a list of available engines.
+* ``adios2.engine.pretend_engine``: May be used for experimentally testing an ADIOS2 engine that is not explicitly supported by the openPMD-api.
+  Specify the actual engine via ``adios2.engine.type`` and use ``adios2.engine.pretend_engine`` to make the ADIOS2 backend pretend that it is in fact using another engine that it knows.
+  Some advanced engine-specific features will be turned off indiscriminately:
+
+  * The Span API will use a fallback implementation
+  * ``PerformDataWrite()`` will not be used, even when specifying ``adios2.engine.preferred_flush_target = "disk"``.
+  * Engine-specific parameters such as ``QueueLimit`` will not be set by default.
+  * No engine-specific filename extension handling will be executed, the extension specified by the user is taken "as is".
+* ``adios2.engine.access_mode``: One of ``"Write", "Read", "Append", "ReadRandomAccess"``.
+  Only needed in specific use cases, the access mode is usually determined from the specified ``openPMD::Access``.
+  Useful for finetuning the backend-specific behavior of ADIOS2 when overwriting existing Iterations in file-based Append mode.
 * ``adios2.engine.parameters``: An associative array of string-formatted engine parameters, passed directly through to ``adios2::IO::SetParameters``.
   Please refer to the `official ADIOS2 documentation <https://adios2.readthedocs.io/en/latest/engines/engines.html>`_ for the available engine parameters.
   The openPMD-api does not interpret these values and instead simply forwards them to ADIOS2.
@@ -125,10 +140,11 @@ Explanation of the single keys:
 
   * If ``"disk"``, data will be moved to disk on every flush.
   * If ``"buffer"``, then only upon ending an IO step or closing an engine.
+  * If ``new_step``, then a new step will be created. This should be used in combination with the ADIOS2 option ``adios2.engine.parameters.FlattenSteps = "on"``.
 
   This behavior can be overridden on a per-flush basis by specifying this JSON/TOML key as an optional parameter to the ``Series::flush()`` or ``Attributable::seriesFlush()`` methods.
 
-  Additionally, specifying ``"disk_override"`` or ``"buffer_override"`` will take precedence over options specified without the ``_override`` suffix, allowing to invert the normal precedence order.
+  Additionally, specifying ``"disk_override"``, ``"buffer_override"`` or ``"new_step_override"`` will take precedence over options specified without the ``_override`` suffix, allowing to invert the normal precedence order.
   This way, a data producing code can hardcode the preferred flush target per ``flush()`` call, but users can e.g. still entirely deactivate flushing to disk in the ``Series`` constructor by specifying ``preferred_flush_target = buffer_override``.
   This is useful when applying the asynchronous IO capabilities of the BP5 engine.
 * ``adios2.dataset.operators``: This key contains a list of ADIOS2 `operators <https://adios2.readthedocs.io/en/latest/components/components.html#operator>`_, used to enable compression or dataset transformations.
@@ -183,13 +199,35 @@ A full configuration of the HDF5 backend:
 .. literalinclude:: hdf5.json
    :language: json
 
-All keys found under ``hdf5.dataset`` are applicable globally (future: as well as per dataset).
+All keys found under ``hdf5.dataset`` are applicable globally as well as per dataset.
 Explanation of the single keys:
 
 * ``hdf5.dataset.chunks``: This key contains options for data chunking via `H5Pset_chunk <https://support.hdfgroup.org/HDF5/doc/RM/H5P/H5Pset_chunk.htm>`__.
   The default is ``"auto"`` for a heuristic.
   ``"none"`` can be used to disable chunking.
+
+  An explicit chunk size can be specified as a list of positive integers, e.g. ``hdf5.dataset.chunks = [10, 100]``. Note that this specification should only be used per-dataset, e.g. in ``resetDataset()``/``reset_dataset()``.
+
   Chunking generally improves performance and only needs to be disabled in corner-cases, e.g. when heavily relying on independent, parallel I/O that non-collectively declares data records.
+* ``hdf5.vfd.type`` selects the HDF5 virtual file driver.
+  Currently available are:
+
+  * ``"default"``: Equivalent to specifying nothing.
+  * ``subfiling"``: Use the `subfiling VFD <https://www.hdfgroup.org/wp-content/uploads/2022/09/HDF5-Subfiling-VFD.pdf>`_.
+    Note that the subfiling VFD needs to be enabled explicitly when configuring HDF5 and threaded MPI must be used.
+    When using this VFD, the options described below are additionally available.
+    They correspond with the field entries of ``H5FD_subfiling_params_t``, refer to the HDF5 documentation for their detailed meanings.
+
+    * ``hdf5.vfd.ioc_selection``: Must be one of ``["one_per_node", "every_nth_rank", "with_config", "total"]``
+    * ``hdf5.vfd.stripe_size``: Must be an integer
+    * ``hdf5.vfd.stripe_count``: Must be an integer
+
+Flush calls, e.g. ``Series::flush()`` can be configured via JSON/TOML as well.
+The parameters eligible for being passed to flush calls may be configured globally as well, i.e. in the constructor of ``Series``, to provide default settings used for the entire Series.
+
+* ``hdf5.independent_stores``: A boolean that sets the ``H5FD_MPIO_INDEPENDENT`` dataset transfer property if true, otherwise ``H5FD_MPIO_COLLECTIVE``.
+  Only available when using HDF5 in combination with MPI.
+  See the `HDF5 subpage <backends-hdf5>`_ for further information on independent vs. collective flushing.
 
 .. _backendconfig-other:
 
diff --git a/docs/source/details/hdf5.json b/docs/source/details/hdf5.json
index 99eb609123..775e16f60c 100644
--- a/docs/source/details/hdf5.json
+++ b/docs/source/details/hdf5.json
@@ -2,6 +2,12 @@
   "hdf5": {
     "dataset": {
       "chunks": "auto"
+    },
+    "vfd": {
+      "type": "subfiling",
+      "ioc_selection": "every_nth_rank",
+      "stripe_size": 33554432,
+      "stripe_count": -1
     }
   }
 }
diff --git a/docs/source/details/mpi.rst b/docs/source/details/mpi.rst
index ea4ec0551e..38bdc2643d 100644
--- a/docs/source/details/mpi.rst
+++ b/docs/source/details/mpi.rst
@@ -13,11 +13,13 @@ A **collective** operation needs to be executed by *all* MPI ranks of the MPI co
 Contrarily, **independent** operations can also be called by a subset of these MPI ranks.
 For more information, please see the `MPI standard documents <https://www.mpi-forum.org/docs/>`_, for example MPI-3.1 in `"Section 2.4 - Semantic Terms" <https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report.pdf>`_.
 
-============================ ================== ===========================
+============================ ================== ================================
 Functionality                Behavior           Description
-============================ ================== ===========================
+============================ ================== ================================
 ``Series``                   **collective**     open and close
 ``::flush()``                **collective**     read and write
+``::setRankTable()``         **collective**     write, performed at flush
+``::rankTable()``            **coll**/indep.    behavior specified by bool param
 ``Iteration`` [1]_           independent        declare and open
 ``::open()`` [4]_            **collective**     explicit open
 ``Mesh`` [1]_                independent        declare, open, write
@@ -30,7 +32,7 @@ Functionality                Behavior           Description
 ``::storeChunk`` [1]_        independent        write
 ``::loadChunk``              independent        read
 ``::availableChunks`` [4]_   collective         read, immediate result
-============================ ================== ===========================
+============================ ================== ================================
 
 .. [1] Individual backends, i.e. :ref:`parallel HDF5 <backends-hdf5>`, will only support independent operations if the default, non-collective (aka independent) behavior is kept.
        Otherwise these operations are collective.
@@ -47,6 +49,12 @@ Functionality                Behavior           Description
 .. [4] We usually open iterations delayed on first access. This first access is usually the ``flush()`` call after a ``storeChunk``/``loadChunk`` operation. If the first access is non-collective, an explicit, collective ``Iteration::open()`` can be used to have the files already open.
        Alternatively, iterations might be accessed for the first time by immediate operations such as ``::availableChunks()``.
 
+.. warning::
+
+  The openPMD-api will by default flush only those Iterations which are dirty, i.e. have been written to.
+  This is somewhat unfortunate in parallel setups since only the dirty status of the current MPI rank can be considered.
+  As a workaround, use ``Attributable::seriesFlush()`` on an Iteration (or an object contained within an Iteration) to force flush that Iteration regardless of its dirty status.
+
 .. tip::
 
    Just because an operation is independent does not mean it is allowed to be inconsistent.
diff --git a/docs/source/dev/buildoptions.rst b/docs/source/dev/buildoptions.rst
index 8c646b17a0..2ed205c90e 100644
--- a/docs/source/dev/buildoptions.rst
+++ b/docs/source/dev/buildoptions.rst
@@ -40,7 +40,8 @@ The following options switch between static and shared builds and control if dep
 CMake Option                   Values          Description
 ============================== =============== ==================================================
 ``openPMD_BUILD_SHARED_LIBS``  **ON**/OFF      Build the C++ API as shared library
-``HDF5_USE_STATIC_LIBRARIES``  ON/**OFF**      Require static HDF5 library
+``HDF5_USE_STATIC_LIBRARIES``  ON/OFF          Require static HDF5 library
+``ZLIB_USE_STATIC_LIBS``       ON/OFF          Require static ZLIB library
 ============================== =============== ==================================================
 
 Note that python modules (``openpmd_api.cpython.[...].so`` or ``openpmd_api.pyd``) are always dynamic libraries.
@@ -67,7 +68,7 @@ The following options allow to switch to external installs of dependencies:
 CMake Option                      Values      Installs Library       Version
 ================================= =========== ======== ============= ========
 ``openPMD_USE_INTERNAL_CATCH``    **ON**/OFF  No       Catch2        2.13.10+
-``openPMD_USE_INTERNAL_PYBIND11`` **ON**/OFF  No       pybind11       2.11.1+
+``openPMD_USE_INTERNAL_PYBIND11`` **ON**/OFF  No       pybind11       2.12.0+
 ``openPMD_USE_INTERNAL_JSON``     **ON**/OFF  No       NLohmann-JSON   3.9.1+
 ``openPMD_USE_INTERNAL_TOML11``   **ON**/OFF  No       toml11          3.7.1+
 ================================= =========== ======== ============= ========
diff --git a/docs/source/dev/dependencies.rst b/docs/source/dev/dependencies.rst
index 8e0cda7b97..aa0c23c629 100644
--- a/docs/source/dev/dependencies.rst
+++ b/docs/source/dev/dependencies.rst
@@ -18,7 +18,7 @@ Shipped internally
 The following libraries are shipped internally in ``share/openPMD/thirdParty/`` for convenience:
 
 * `Catch2 <https://github.com/catchorg/Catch2>`_ 2.13.10+ (`BSL-1.0 <https://github.com/catchorg/Catch2/blob/master/LICENSE.txt>`__)
-* `pybind11 <https://github.com/pybind/pybind11>`_ 2.11.1+ (`new BSD <https://github.com/pybind/pybind11/blob/master/LICENSE>`_)
+* `pybind11 <https://github.com/pybind/pybind11>`_ 2.12.0+ (`new BSD <https://github.com/pybind/pybind11/blob/master/LICENSE>`_)
 * `NLohmann-JSON <https://github.com/nlohmann/json>`_ 3.9.1+ (`MIT <https://github.com/nlohmann/json/blob/develop/LICENSE.MIT>`_)
 * `toml11 <https://github.com/ToruNiina/toml11>`_ 3.7.1+ (`MIT <https://github.com/ToruNiina/toml11/blob/master/LICENSE>`__)
 
@@ -39,7 +39,7 @@ Optional: language bindings
 * Python:
 
   * Python 3.8 - 3.12
-  * pybind11 2.11.1+
+  * pybind11 2.12.0+
   * numpy 1.15+
   * mpi4py 2.1+ (optional, for MPI)
   * pandas 1.0+ (optional, for dataframes)
diff --git a/docs/source/dev/design.rst b/docs/source/dev/design.rst
index ce43777407..6fb81071ce 100644
--- a/docs/source/dev/design.rst
+++ b/docs/source/dev/design.rst
@@ -23,7 +23,7 @@ Therefore, enabling users to handle hierarchical, self-describing file formats w
 
 .. literalinclude:: IOTask.hpp
    :language: cpp
-   :lines: 48-78
+   :lines: 50-81
 
 Every task is designed to be a fully self-contained description of one such atomic operation. By describing a required minimal step of work (without any side-effect), these operations are the foundation of the unified handling mechanism across suitable file formats.
 The actual low-level exchange of data is implemented in ``IOHandlers``, one per file format (possibly two if handlingi MPI-parallel work is possible and requires different behaviour).
diff --git a/docs/source/usage/firstread.rst b/docs/source/usage/firstread.rst
index 673f69acc4..f8fc484b16 100644
--- a/docs/source/usage/firstread.rst
+++ b/docs/source/usage/firstread.rst
@@ -86,7 +86,7 @@ C++17
 .. code-block:: cpp
 
    auto series = io::Series(
-       "data%T.h5",
+       "data_%T.h5",
        io::Access::READ_ONLY);
 
 
@@ -96,9 +96,20 @@ Python
 .. code-block:: python3
 
    series = io.Series(
-       "data%T.h5",
+       "data_%T.h5",
        io.Access.read_only)
 
+.. tip::
+
+   Replace the file ending ``.h5`` with a wildcard ``.%E`` to let openPMD autodetect the ending from the file system.
+   Use the wildcard ``%T`` to match filename encoded iterations.
+
+.. tip::
+
+   More detailed options can be passed via JSON or TOML as a further constructor parameter.
+   Try ``{"defer_iteration_parsing": true}`` to speed up the first access.
+   (Remember to explicitly ``it.open()`` iterations in that case.)
+
 Iteration
 ---------
 
diff --git a/docs/source/usage/firstwrite.rst b/docs/source/usage/firstwrite.rst
index 3361214926..927d9f650d 100644
--- a/docs/source/usage/firstwrite.rst
+++ b/docs/source/usage/firstwrite.rst
@@ -100,7 +100,7 @@ Python
 Iteration
 ---------
 
-Grouping by an arbitrary, positive integer number ``<N>`` in a series:
+Grouping by an arbitrary, nonnegative integer number ``<N>`` in a series:
 
 C++17
 ^^^^^
diff --git a/docs/source/usage/streaming.rst b/docs/source/usage/streaming.rst
index d70b929389..118e3a6e9d 100644
--- a/docs/source/usage/streaming.rst
+++ b/docs/source/usage/streaming.rst
@@ -95,3 +95,30 @@ This pays tribute to the fact that in streaming mode, an iteration is sent to th
 
 .. literalinclude:: 10_streaming_write.py
    :language: python3
+
+
+Chunk provenance tracking using a rank table
+--------------------------------------------
+
+.. _rank_table:
+
+In a large parallel streaming setup, it is important to adhere to a certain concept of data locality when deciding which data to load from the producer.
+The openPMD-api has some mechanisms to help with this process:
+
+The API call ``BaseRecordComponent::availableChunks()``/``Base_Record_Component.available_chunks()`` returns the data chunks within a specific dataset that are available for loading, each chunk hereby annotating its MPI rank within the *data producer* in ``WrittenChunkInfo::sourceID``/``WrittenChunkInfo::source_ID``.
+
+In order to correlate this information with the MPI ranks of the *data consumer*, a **rank table** can be used in order to transmit an additional tag for each of the producer's MPI ranks. On the data producer side, the rank table can be set manually or automatically:
+
+
+* **automatically** Using the :ref:`JSON/TOML option <backend_independent_config>` ``rank_table``.
+  The suggested specification is ``{"rank_table": "hostname"}``, although the explicit values ``"mpi_processor_name"`` and ``"posix_hostname"`` are also accepted.
+  ``"hostname"`` resolves to the MPI processor name when the Series has been initialized with MPI, to the POSIX hostname otherwise (if that is available).
+* **manually:** Using the API call ``Series::setRankTable(std::string const &myRankInfo)`` that specifies the current rank's tag.
+  This can be used to set custom tags, identifying e.g. NUMA nodes or groups of compute nodes.
+
+The rank table takes the form of a 2-dimensional dataset, listing the tags as null-terminated strings line by line in order of the MPI ranks and can be loaded using ``Series::rankTable()``/``Series.get_rank_table()``.
+
+Setting the rank table is **collective**, though the collective action is only performed upon flushing.
+Reading the rank table requires specifying if the read operation should be done collectively (better for performance), or independently.
+
+In order to retrieve the corresponding information on the **consumer side**, the function ``host_info::byMethod()``/``HostInfo.get()`` can be used for retrieving the local rank's information, or alternatively ``host_info::byMethodCollective()``/``HostInfo.get_info()`` for retrieving the rank table for all consumer ranks.
diff --git a/docs/source/usage/workflow.rst b/docs/source/usage/workflow.rst
index 61ef593a2e..03fa4ce9d4 100644
--- a/docs/source/usage/workflow.rst
+++ b/docs/source/usage/workflow.rst
@@ -3,6 +3,49 @@
 Workflow
 ========
 
+Storing and reading chunks
+--------------------------
+
+1. **Chunks within an n-dimensional dataset**
+
+   Most commonly, chunks within an n-dimensional dataset are identified by their offset and extent.
+   The extent is the size of the chunk in each dimension, NOT the absolute coordinate within the entire dataset.
+
+   In the Python API, this is modeled to conform to the conventional ``__setitem__``/``__getitem__`` protocol.
+
+2. **Joined arrays (write only)**
+
+   (Currently) only supported in ADIOS2 no older than v2.9.0 under the conditions listed in the `ADIOS2 documentation on joined arrays <https://adios2.readthedocs.io/en/latest/components/components.html#shapes>`_.
+
+   In some cases, the concrete chunk within a dataset does not matter and the computation of indexes is a needless computational and mental overhead.
+   This commonly occurs for particle data which the openPMD-standard models as a list of particles.
+   The order of particles does not matter greatly, and making different parallel processes agree on indexing is error-prone boilerplate.
+
+   In such a case, at most one *joined dimension* can be specified in the Dataset, e.g. ``{Dataset::JOINED_DIMENSION, 128, 128}`` (3D for the sake of explanation, particle data would normally be 1D).
+   The chunk is then stored by specifying an empty offset vector ``{}``.
+   The chunk extent vector must be equivalent to the global extent in all non-joined dimensions (i.e. joined arrays allow no further sub-chunking other than concatenation along the joined dimension).
+   The joined dimension of the extent vector specifies the extent that this piece should have along the joined dimension.
+   In the Python API, the slice-based setter syntax can be used as an abbreviation since the necessary information is determined from the passed array, e.g. ``record_component[()] = local_data``.
+   The global extent of the dataset along the joined dimension will then be the sum of all local chunk extents along the joined dimension.
+
+   Since openPMD follows a struct-of-array layout of data, it is important not to lose correlation of data between components. E.g., joining an array must take care that ``particles/e/position/x`` and ``particles/e/position/y`` are joined in uniform way.
+
+   The openPMD-api makes the **following guarantee**:
+
+   Consider a Series written from ``N`` parallel processes between two (collective) flush points. For each parallel process ``n`` and dataset ``D``, let:
+
+    * ``chunk(D, n, i)`` be the ``i``'th chunk written to dataset ``D`` on process ``n``
+    * ``num_chunks(D, n)`` be the count of chunks written by ``n`` to ``D``
+    * ``joined_index(D, c)`` be the index of chunk ``c`` in the joining order of ``D``
+
+  Then for any two datasets ``x`` and ``y``:
+
+    * If for any parallel process ``n`` the condition holds that ``num_chunks(x, n) = num_chunks(y, n)`` (between the two flush points!)...
+    * ...then for any parallel process ``n`` and chunk index ``i`` less than ``num_chunks(x, n)``: ``joined_index(x, chunk(x, n, i)) = joined_index(y, chunk(y, n, i))``.
+
+  **TLDR:** Writing chunks to two joined arrays in synchronous way (**1.** same order of store operations and **2.** between the same flush operations) will result in the same joining order in both arrays.
+
+
 Access modes
 ------------
 
@@ -59,6 +102,8 @@ The openPMD-api distinguishes between a number of different access modes:
   We suggest to fully define iterations when using Append mode (i.e. as if using Create mode) to avoid implementation-specific behavior.
   Appending to an openPMD Series is only supported on a per-iteration level.
 
+  **Tip:** Use the ``adios2.engine.access_mode`` :ref:`backend key <backendconfig>` of the :ref:`ADIOS2 backend <backends-adios2>` to finetune the backend-specific behavior of Append mode for niche use cases.
+
   **Warning:** There is no reading involved in using Append mode.
   It is a user's responsibility to ensure that the appended dataset and the appended-to dataset are compatible with each other.
   The results of using incompatible backend configurations are undefined.
diff --git a/environment.yml b/environment.yml
index 5cdc6f6f02..1840e99d59 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,5 +3,5 @@ name: openPMD-api-dev
 channels:
   - conda-forge
 dependencies:
-  - clang-format-12=12.0.1
+  - clang-format-18=18.1.2
   - bash=5
diff --git a/examples/11_particle_dataframe.py b/examples/11_particle_dataframe.py
index 7e0cad065c..defc93dd96 100755
--- a/examples/11_particle_dataframe.py
+++ b/examples/11_particle_dataframe.py
@@ -16,6 +16,14 @@
 except ImportError:
     print("pandas NOT found. Install pandas to run this example.")
     sys.exit()
+
+found_cudf = False
+try:
+    import cudf
+    found_cudf = True
+except ImportError:
+    print("cudf NOT found. Install RAPIDS for CUDA DataFrame example.")
+
 found_dask = False
 try:
     import dask
@@ -39,6 +47,19 @@
     df = electrons.to_df(np.s_[:100])
     print(df)
 
+    # all particles over all steps
+    df = s.to_df("electrons")
+    print(df)
+
+    if found_cudf:
+        # all particles - to GPU
+        cdf = cudf.from_pandas(electrons.to_df())
+        print(cdf)
+
+        # all particles over all steps - to GPU
+        cdf = s.to_cudf("electrons")
+        print(cdf)
+
     # Particles
     if found_dask:
         # the default schedulers are local/threaded, not requiring much.
diff --git a/examples/12_span_write.cpp b/examples/12_span_write.cpp
index 45379c77e9..e2efcffdaa 100644
--- a/examples/12_span_write.cpp
+++ b/examples/12_span_write.cpp
@@ -28,8 +28,28 @@ void span_write(std::string const &filename)
     for (size_t i = 0; i < 10; ++i)
     {
         Iteration iteration = iterations[i];
-        Record electronPositions = iteration.particles["e"]["position"];
+        auto patches = iteration.particles["e"].particlePatches;
 
+        for (auto record : {"offset", "extent"})
+        {
+            for (auto component : {"x", "y", "z"})
+            {
+                patches[record][component].resetDataset(
+                    {Datatype::DOUBLE, {1}});
+                *patches[record][component]
+                     .storeChunk<double>({0}, {1})
+                     .currentBuffer()
+                     .data() = 4.2;
+            }
+        }
+        for (auto record : {"numParticlesOffset", "numParticles"})
+        {
+            patches[record].resetDataset({Datatype::INT, {1}});
+            *patches[record].storeChunk<int>({0}, {1}).currentBuffer().data() =
+                42;
+        }
+
+        Record electronPositions = iteration.particles["e"]["position"];
         size_t j = 0;
         for (auto const &dim : {"x", "y", "z"})
         {
diff --git a/examples/2a_read_thetaMode_serial.cpp b/examples/2a_read_thetaMode_serial.cpp
index a796e66447..55b9d35831 100644
--- a/examples/2a_read_thetaMode_serial.cpp
+++ b/examples/2a_read_thetaMode_serial.cpp
@@ -29,6 +29,8 @@ using namespace openPMD;
 
 int main()
 {
+    /* The pattern %E instructs the openPMD-api to determine the file ending
+     * automatically. It can also be given explicitly, e.g. `data%T.h5`. */
     Series series =
         Series("../samples/git-sample/thetaMode/data%T.h5", Access::READ_ONLY);
 
diff --git a/examples/2a_read_thetaMode_serial.py b/examples/2a_read_thetaMode_serial.py
index 07021c1f36..45c83122fb 100644
--- a/examples/2a_read_thetaMode_serial.py
+++ b/examples/2a_read_thetaMode_serial.py
@@ -9,6 +9,8 @@
 import openpmd_api as io
 
 if __name__ == "__main__":
+    # The pattern %E instructs the openPMD-api to determine the file ending
+    # automatically. It can also be given explicitly, e.g. `data%T.h5`.
     series = io.Series("../samples/git-sample/thetaMode/data%T.h5",
                        io.Access.read_only)
 
diff --git a/examples/5_write_parallel.cpp b/examples/5_write_parallel.cpp
index 8587175fe7..3cf0f01883 100644
--- a/examples/5_write_parallel.cpp
+++ b/examples/5_write_parallel.cpp
@@ -31,7 +31,8 @@ using namespace openPMD;
 
 int main(int argc, char *argv[])
 {
-    MPI_Init(&argc, &argv);
+    int provided;
+    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
 
     int mpi_size;
     int mpi_rank;
@@ -47,9 +48,23 @@ int main(int argc, char *argv[])
         cout << "Set up a 2D array with 10x300 elements per MPI rank ("
              << mpi_size << "x) that will be written to disk\n";
 
+    std::string subfiling_config = R"(
+[hdf5.vfd]
+type = "subfiling"
+ioc_selection = "every_nth_rank"
+stripe_size = 33554432
+stripe_count = -1
+
+[hdf5.dataset]
+chunks = "auto"
+        )";
+
     // open file for writing
     Series series = Series(
-        "../samples/5_parallel_write.h5", Access::CREATE, MPI_COMM_WORLD);
+        "../samples/5_parallel_write.h5",
+        Access::CREATE,
+        MPI_COMM_WORLD,
+        subfiling_config);
     if (0 == mpi_rank)
         cout << "Created an empty series in parallel with " << mpi_size
              << " MPI ranks\n";
@@ -69,7 +84,10 @@ int main(int argc, char *argv[])
     // example 1D domain decomposition in first index
     Datatype datatype = determineDatatype<float>();
     Extent global_extent = {10ul * mpi_size, 300};
-    Dataset dataset = Dataset(datatype, global_extent);
+    Dataset dataset = Dataset(datatype, global_extent, R"(
+[hdf5.dataset]
+chunks = [10, 100]
+    )");
 
     if (0 == mpi_rank)
         cout << "Prepared a Dataset of size " << dataset.extent[0] << "x"
diff --git a/examples/5_write_parallel.py b/examples/5_write_parallel.py
index ace0cd6e63..8574c1d66e 100644
--- a/examples/5_write_parallel.py
+++ b/examples/5_write_parallel.py
@@ -14,13 +14,21 @@
 import numpy as np
 import openpmd_api as io
 
+try:
+    import adios2
+    from packaging import version
+    USE_JOINED_DIMENSION = \
+        version.parse(adios2.__version__) >= version.parse('2.9.0')
+except ImportError:
+    USE_JOINED_DIMENSION = False
+
 if __name__ == "__main__":
     # also works with any other MPI communicator
     comm = MPI.COMM_WORLD
 
     # global data set to write: [MPI_Size * 10, 300]
     # each rank writes a 10x300 slice with its MPI rank as values
-    local_value = comm.size
+    local_value = comm.rank
     local_data = np.ones(10 * 300,
                          dtype=np.double).reshape(10, 300) * local_value
     if 0 == comm.rank:
@@ -29,7 +37,9 @@
 
     # open file for writing
     series = io.Series(
-        "../samples/5_parallel_write_py.h5",
+        "../samples/5_parallel_write_py.bp"
+        if USE_JOINED_DIMENSION
+        else "../samples/5_parallel_write_py.h5",
         io.Access.create,
         comm
     )
@@ -51,7 +61,9 @@
         meshes["mymesh"]
 
     # example 1D domain decomposition in first index
-    global_extent = [comm.size * 10, 300]
+    global_extent = [io.Dataset.JOINED_DIMENSION, 300] \
+        if USE_JOINED_DIMENSION else [comm.size * 10, 300]
+
     dataset = io.Dataset(local_data.dtype, global_extent)
 
     if 0 == comm.rank:
@@ -64,7 +76,15 @@
               "mymesh in iteration 1")
 
     # example shows a 1D domain decomposition in first index
-    mymesh[comm.rank*10:(comm.rank+1)*10, :] = local_data
+
+    if USE_JOINED_DIMENSION:
+        # explicit API
+        # mymesh.store_chunk(local_data, [], [10, 300])
+        mymesh[:, :] = local_data
+        # or short:
+        # mymesh[()] = local_data
+    else:
+        mymesh[comm.rank*10:(comm.rank+1)*10, :] = local_data
     if 0 == comm.rank:
         print("Registered a single chunk per MPI rank containing its "
               "contribution, ready to write content to disk")
diff --git a/include/openPMD/ChunkInfo.hpp b/include/openPMD/ChunkInfo.hpp
index 5be3c27b56..9bc6e94972 100644
--- a/include/openPMD/ChunkInfo.hpp
+++ b/include/openPMD/ChunkInfo.hpp
@@ -20,8 +20,16 @@
  */
 #pragma once
 
+#include "openPMD/config.hpp"
+
 #include "openPMD/Dataset.hpp" // Offset, Extent
 
+#if openPMD_HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include <map>
+#include <string>
 #include <vector>
 
 namespace openPMD
@@ -73,4 +81,56 @@ struct WrittenChunkInfo : ChunkInfo
 };
 
 using ChunkTable = std::vector<WrittenChunkInfo>;
+
+namespace chunk_assignment
+{
+    using RankMeta = std::map<unsigned int, std::string>;
+} // namespace chunk_assignment
+
+namespace host_info
+{
+    /**
+     * Methods for retrieving hostname / processor identifiers that openPMD-api
+     * is aware of. These can be used for locality-aware chunk distribution
+     * schemes in streaming setups.
+     */
+    enum class Method
+    {
+        POSIX_HOSTNAME,
+        MPI_PROCESSOR_NAME
+    };
+
+    /**
+     * @brief Is the method available on the current system?
+     *
+     * @return true If it is available.
+     * @return false Otherwise.
+     */
+    bool methodAvailable(Method);
+
+    /**
+     * @brief Wrapper for the native hostname retrieval functions such as
+     *        POSIX gethostname().
+     *
+     * @return std::string The hostname / processor name returned by the native
+     *                     function.
+     */
+    std::string byMethod(Method);
+
+#if openPMD_HAVE_MPI
+    /**
+     * @brief Retrieve the hostname information on all MPI ranks and distribute
+     *        a map of "rank -> hostname" to all ranks.
+     *
+     * This call is MPI collective.
+     *
+     * @return chunk_assignment::RankMeta Hostname / processor name information
+     *         for all MPI ranks known to the communicator.
+     *         The result is returned on all ranks.
+     */
+    chunk_assignment::RankMeta byMethodCollective(MPI_Comm, Method);
+#endif
+} // namespace host_info
 } // namespace openPMD
+
+#undef openPMD_POSIX_AVAILABLE
diff --git a/include/openPMD/ChunkInfo_internal.hpp b/include/openPMD/ChunkInfo_internal.hpp
new file mode 100644
index 0000000000..b14ff0f7ad
--- /dev/null
+++ b/include/openPMD/ChunkInfo_internal.hpp
@@ -0,0 +1,67 @@
+/* Copyright 2024 Franz Poeschel
+ *
+ * This file is part of openPMD-api.
+ *
+ * openPMD-api is free software: you can redistribute it and/or modify
+ * it under the terms of of either the GNU General Public License or
+ * the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * openPMD-api is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License and the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * and the GNU Lesser General Public License along with openPMD-api.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "openPMD/ChunkInfo.hpp"
+#include <string>
+
+namespace openPMD::host_info
+{
+
+/**
+ * @brief This defines the method identifiers used
+ *        in `{"rank_table": "hostname"}`
+ *
+ * Currently recognized are:
+ *
+ * * posix_hostname
+ * * mpi_processor_name
+ *
+ * For backwards compatibility reasons, "hostname" is also recognized as a
+ * deprecated alternative for "posix_hostname".
+ *
+ * @return Method enum identifier. The identifier is returned even if the
+ *         method is not available on the system. This should by checked
+ *         via methodAvailable().
+ * @throws std::out_of_range If an unknown string identifier is passed.
+ */
+Method methodFromStringDescription(std::string const &descr, bool consider_mpi);
+
+/*
+ * The following block contains one wrapper for each native hostname
+ * retrieval method. The purpose is to have the same function pointer type
+ * for all of them.
+ */
+
+#ifdef _WIN32
+#define openPMD_POSIX_AVAILABLE false
+#else
+#define openPMD_POSIX_AVAILABLE true
+#endif
+
+#if openPMD_POSIX_AVAILABLE
+std::string posix_hostname();
+#endif
+
+#if openPMD_HAVE_MPI
+std::string mpi_processor_name();
+#endif
+} // namespace openPMD::host_info
diff --git a/include/openPMD/Dataset.hpp b/include/openPMD/Dataset.hpp
index 8757a3cf0a..0032888541 100644
--- a/include/openPMD/Dataset.hpp
+++ b/include/openPMD/Dataset.hpp
@@ -22,7 +22,9 @@
 
 #include "openPMD/Datatype.hpp"
 
+#include <limits>
 #include <memory>
+#include <optional>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -37,6 +39,11 @@ class Dataset
     friend class RecordComponent;
 
 public:
+    enum : std::uint64_t
+    {
+        JOINED_DIMENSION = std::numeric_limits<std::uint64_t>::max()
+    };
+
     Dataset(Datatype, Extent, std::string options = "{}");
 
     /**
@@ -53,5 +60,9 @@ class Dataset
     Datatype dtype;
     uint8_t rank;
     std::string options = "{}"; //!< backend-dependent JSON configuration
+
+    bool empty() const;
+
+    std::optional<size_t> joinedDimension() const;
 };
 } // namespace openPMD
diff --git a/include/openPMD/IO/ADIOS/ADIOS2Auxiliary.hpp b/include/openPMD/IO/ADIOS/ADIOS2Auxiliary.hpp
index 97a3f5539a..9cb275d339 100644
--- a/include/openPMD/IO/ADIOS/ADIOS2Auxiliary.hpp
+++ b/include/openPMD/IO/ADIOS/ADIOS2Auxiliary.hpp
@@ -46,6 +46,50 @@ enum class GroupOrDataset
     DATASET
 };
 
+namespace adios_defs
+{
+    enum class FlushTarget : unsigned char
+    {
+        Buffer,
+        Buffer_Override,
+        Disk,
+        Disk_Override,
+        NewStep,
+        NewStep_Override
+    };
+
+    using FlushTarget = adios_defs::FlushTarget;
+    FlushTarget flushTargetFromString(std::string const &str);
+
+    enum class UseGroupTable
+    {
+        Yes,
+        No
+    };
+} // namespace adios_defs
+
+/*
+ * The following strings are used during parsing of the JSON configuration
+ * string for the ADIOS2 backend.
+ */
+namespace adios_defaults
+{
+    using const_str = char const *const;
+    constexpr const_str str_engine = "engine";
+    constexpr const_str str_type = "type";
+    constexpr const_str str_treat_unsupported_engine_like = "pretend_engine";
+    constexpr const_str str_params = "parameters";
+    constexpr const_str str_usesteps = "usesteps";
+    constexpr const_str str_flushtarget = "preferred_flush_target";
+    constexpr const_str str_usesstepsAttribute = "__openPMD_internal/useSteps";
+    constexpr const_str str_adios2Schema =
+        "__openPMD_internal/openPMD2_adios2_schema";
+    constexpr const_str str_isBoolean = "__is_boolean__";
+    constexpr const_str str_activeTablePrefix = "__openPMD_groups";
+    constexpr const_str str_groupBasedWarning =
+        "__openPMD_internal/warning_bugprone_groupbased_encoding";
+} // namespace adios_defaults
+
 #if openPMD_HAVE_ADIOS2
 namespace detail
 {
diff --git a/include/openPMD/IO/ADIOS/ADIOS2File.hpp b/include/openPMD/IO/ADIOS/ADIOS2File.hpp
new file mode 100644
index 0000000000..0bcdaa6131
--- /dev/null
+++ b/include/openPMD/IO/ADIOS/ADIOS2File.hpp
@@ -0,0 +1,485 @@
+/* Copyright 2023 Franz Poeschel
+ *
+ * This file is part of openPMD-api.
+ *
+ * openPMD-api is free software: you can redistribute it and/or modify
+ * it under the terms of of either the GNU General Public License or
+ * the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * openPMD-api is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License and the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * and the GNU Lesser General Public License along with openPMD-api.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "openPMD/IO/ADIOS/ADIOS2Auxiliary.hpp"
+#include "openPMD/IO/AbstractIOHandler.hpp"
+#include "openPMD/IO/IOTask.hpp"
+#include "openPMD/IO/InvalidatableFile.hpp"
+#include "openPMD/config.hpp"
+
+#if openPMD_HAVE_ADIOS2
+#include <adios2.h>
+#endif
+#if openPMD_HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include <functional>
+#include <string>
+
+namespace openPMD
+{
+class ADIOS2IOHandlerImpl;
+}
+
+#if openPMD_HAVE_ADIOS2
+namespace openPMD::detail
+{
+class ADIOS2File;
+
+/*
+ * IO-heavy action to be executed upon flushing.
+ */
+struct BufferedAction
+{
+    explicit BufferedAction() = default;
+    virtual ~BufferedAction() = default;
+
+    BufferedAction(BufferedAction const &other) = delete;
+    BufferedAction(BufferedAction &&other) = default;
+
+    BufferedAction &operator=(BufferedAction const &other) = delete;
+    BufferedAction &operator=(BufferedAction &&other) = default;
+
+    virtual void run(ADIOS2File &) = 0;
+};
+
+struct BufferedGet : BufferedAction
+{
+    std::string name;
+    Parameter<Operation::READ_DATASET> param;
+
+    void run(ADIOS2File &) override;
+};
+
+struct DatasetReader
+{
+    template <typename T>
+    static void call(
+        ADIOS2IOHandlerImpl *impl,
+        BufferedGet &bp,
+        adios2::IO &IO,
+        adios2::Engine &engine,
+        std::string const &fileName);
+
+    static constexpr char const *errorMsg = "ADIOS2: readDataset()";
+};
+
+struct BufferedPut : BufferedAction
+{
+    std::string name;
+    Parameter<Operation::WRITE_DATASET> param;
+
+    void run(ADIOS2File &) override;
+};
+
+struct WriteDataset
+{
+    template <typename T>
+    static void call(ADIOS2File &ba, BufferedPut &bp);
+
+    template <int n, typename... Params>
+    static void call(Params &&...);
+};
+
+struct BufferedUniquePtrPut
+{
+    std::string name;
+    Offset offset;
+    Extent extent;
+    UniquePtrWithLambda<void> data;
+    Datatype dtype = Datatype::UNDEFINED;
+
+    void run(ADIOS2File &);
+};
+
+struct I_UpdateSpan
+{
+    virtual void *update() = 0;
+    virtual ~I_UpdateSpan() = default;
+};
+
+template <typename T>
+struct UpdateSpan : I_UpdateSpan
+{
+    adios2::detail::Span<T> span;
+
+    UpdateSpan(adios2::detail::Span<T>);
+
+    void *update() override;
+};
+
+/*
+ * Manages per-file information about
+ * (1) the file's IO and Engine objects
+ * (2) the file's deferred IO-heavy actions
+ */
+class ADIOS2File
+{
+    friend struct BufferedGet;
+    friend struct BufferedPut;
+    friend struct RunUniquePtrPut;
+    friend struct WriteDataset;
+
+    using UseGroupTable = adios_defs::UseGroupTable;
+    using FlushTarget = adios_defs::FlushTarget;
+
+public:
+    ADIOS2File(ADIOS2File const &) = delete;
+
+    /**
+     * The full path to the file created on disk, including the
+     * containing directory and the file extension, as determined
+     * by ADIOS2IOHandlerImpl::fileSuffix().
+     * (Meaning, in case of the SST engine, no file suffix since the
+     *  SST engine automatically adds its suffix unconditionally)
+     */
+    std::string m_file;
+    /**
+     * ADIOS requires giving names to instances of adios2::IO.
+     * We make them different from the actual file name, because of the
+     * possible following workflow:
+     *
+     * 1. create file foo.bp
+     *    -> would create IO object named foo.bp
+     * 2. delete that file
+     *    (let's ignore that we don't support deletion yet and call it
+     *     preplanning)
+     * 3. create file foo.bp a second time
+     *    -> would create another IO object named foo.bp
+     *    -> craash
+     *
+     * So, we just give out names based on a counter for IO objects.
+     * Hence, next to the actual file name, also store the name for the
+     * IO.
+     */
+    std::string m_IOName;
+    adios2::ADIOS &m_ADIOS;
+    adios2::IO m_IO;
+    /**
+     * The default queue for deferred actions.
+     * Drained upon ADIOS2File::flush().
+     */
+    std::vector<std::unique_ptr<BufferedAction>> m_buffer;
+    /**
+     * When receiving a unique_ptr, we know that the buffer is ours and
+     * ours alone. So, for performance reasons, show the buffer to ADIOS2 as
+     * late as possible and avoid unnecessary data copies in BP5 triggered
+     * by PerformDataWrites().
+     */
+    std::vector<BufferedUniquePtrPut> m_uniquePtrPuts;
+    /**
+     * This contains deferred actions that have already been enqueued into
+     * ADIOS2, but not yet performed in ADIOS2.
+     * We must store them somewhere until the next PerformPuts/Gets, EndStep
+     * or Close in ADIOS2 to avoid use after free conditions.
+     */
+    std::vector<std::unique_ptr<BufferedAction>> m_alreadyEnqueued;
+    adios2::Mode m_mode;
+    /**
+     * The base pointer of an ADIOS2 span might change after reallocations.
+     * The frontend will ask the backend for those updated base pointers.
+     * Spans given out by the ADIOS2 backend to the frontend are hence
+     * identified by an unsigned integer and stored in this member for later
+     * retrieval of the updated base pointer.
+     * This map is cleared upon flush points.
+     */
+    std::map<unsigned, std::unique_ptr<I_UpdateSpan>> m_updateSpans;
+
+    /*
+     * We call an attribute committed if the step during which it was
+     * written has been closed.
+     * A committed attribute cannot be modified.
+     */
+    std::set<std::string> uncommittedAttributes;
+
+    /*
+     * The openPMD API will generally create new attributes for each
+     * iteration. This results in a growing number of attributes over time.
+     * In streaming-based modes, these will be completely sent anew in each
+     * iteration. If the following boolean is true, old attributes will be
+     * removed upon CLOSE_GROUP.
+     * Should not be set to true in persistent backends.
+     * Will be automatically set by ADIOS2File::configure_IO depending
+     * on chosen ADIOS2 engine and can not be explicitly overridden by user.
+     */
+    bool optimizeAttributesStreaming = false;
+
+    using ParsePreference = Parameter<Operation::OPEN_FILE>::ParsePreference;
+    ParsePreference parsePreference = ParsePreference::UpFront;
+
+    using AttributeMap_t = std::map<std::string, adios2::Params>;
+
+    ADIOS2File(ADIOS2IOHandlerImpl &impl, InvalidatableFile file);
+
+    ~ADIOS2File();
+
+    /**
+     * Implementation of destructor, will only run once.
+     *
+     */
+    void finalize();
+
+    UseGroupTable detectGroupTable();
+
+    adios2::Engine &getEngine();
+
+    template <typename BA>
+    void enqueue(BA &&ba)
+    {
+        enqueue<BA>(std::forward<BA>(ba), m_buffer);
+    }
+
+    template <typename BA>
+    void enqueue(BA &&ba, decltype(m_buffer) &buffer)
+    {
+        using BA_ = typename std::remove_reference<BA>::type;
+        buffer.emplace_back(
+            std::unique_ptr<BufferedAction>(new BA_(std::forward<BA>(ba))));
+    }
+
+    template <typename... Args>
+    void flush(Args &&...args);
+
+    struct ADIOS2FlushParams
+    {
+        /*
+         * Only execute performPutsGets if UserFlush.
+         */
+        FlushLevel level;
+        FlushTarget flushTarget = FlushTarget::Disk;
+
+        ADIOS2FlushParams(FlushLevel level_in) : level(level_in)
+        {}
+
+        ADIOS2FlushParams(FlushLevel level_in, FlushTarget flushTarget_in)
+            : level(level_in), flushTarget(flushTarget_in)
+        {}
+    };
+
+    /**
+     * Flush deferred IO actions.
+     *
+     * @param flushParams Flush level and target.
+     * @param performPutGets A functor that takes as parameters (1) *this
+     *     and (2) the ADIOS2 engine.
+     *     Its task is to ensure that ADIOS2 performs Put/Get operations.
+     *     Several options for this:
+     *     * adios2::Engine::EndStep
+     *     * adios2::Engine::Perform(Puts|Gets)
+     *     * adios2::Engine::Close
+     * @param writeLatePuts Deferred until right before
+     *        Engine::EndStep() or Engine::Close():
+     *        Running unique_ptr Put()s.
+     * @param flushUnconditionally Whether to run the functor even if no
+     *     deferred IO tasks had been queued.
+     */
+    void flush_impl(
+        ADIOS2FlushParams flushParams,
+        std::function<void(ADIOS2File &, adios2::Engine &)> const
+            &performPutGets,
+        bool writeLatePuts,
+        bool flushUnconditionally);
+
+    /**
+     * Overload of flush() that uses adios2::Engine::Perform(Puts|Gets)
+     * and does not flush unconditionally.
+     *
+     */
+    void flush_impl(ADIOS2FlushParams, bool writeLatePuts = false);
+
+    /**
+     * @brief Begin or end an ADIOS step.
+     *
+     * @param mode Whether to begin or end a step.
+     * @return AdvanceStatus
+     */
+    AdvanceStatus advance(AdvanceMode mode);
+
+    /*
+     * Delete all buffered actions without running them.
+     */
+    void drop();
+
+    AttributeMap_t const &availableAttributes();
+
+    std::vector<std::string>
+    availableAttributesPrefixed(std::string const &prefix);
+
+    /*
+     * See description below.
+     */
+    void invalidateAttributesMap();
+
+    AttributeMap_t const &availableVariables();
+
+    std::vector<std::string>
+    availableVariablesPrefixed(std::string const &prefix);
+
+    /*
+     * See description below.
+     */
+    void invalidateVariablesMap();
+
+    void markActive(Writable *);
+
+    // bool isActive(std::string const & path);
+
+    /*
+     * streamStatus is NoStream for file-based ADIOS engines.
+     * This is relevant for the method ADIOS2File::requireActiveStep,
+     * where a step is only opened if the status is OutsideOfStep, but not
+     * if NoStream. The rationale behind this is that parsing a Series
+     * works differently for file-based and for stream-based engines:
+     * * stream-based: Iterations are parsed as they arrive. For parsing an
+     *   iteration, the iteration must be awaited.
+     *   ADIOS2File::requireActiveStep takes care of this.
+     * * file-based: The Series is parsed up front. If no step has been
+     *   opened yet, ADIOS2 gives access to all variables and attributes
+     *   from all steps. Upon opening a step, only the variables from that
+     *   step are shown which hinders parsing. So, until a step is
+     *   explicitly opened via ADIOS2IOHandlerImpl::advance, do not open
+     *   one.
+     *   This is to enable use of ADIOS files without the Streaming API
+     *   (i.e. all iterations should be visible to the user upon opening
+     *   the Series.)
+     *   @todo Add a workflow without up-front parsing of all iterations
+     *         for file-based engines.
+     *         (This would merely be an optimization since the streaming
+     *         API still works with files as intended.)
+     *
+     */
+    enum class StreamStatus
+    {
+        /**
+         * A step is currently active.
+         */
+        DuringStep,
+        /**
+         * A stream is active, but no step.
+         */
+        OutsideOfStep,
+        /**
+         * Stream has ended.
+         */
+        StreamOver,
+        /**
+         * File is not written is streaming fashion.
+         * Begin/EndStep will be replaced by simple flushes.
+         * Used for:
+         * 1) Writing BP4 files without steps despite using the Streaming
+         *    API. This is due to the fact that ADIOS2.6.0 requires using
+         *    steps to read BP4 files written with steps, so using steps
+         *    is opt-in for now.
+         *    Notice that while the openPMD API requires ADIOS >= 2.7.0,
+         *    the resulting files need to be readable from ADIOS 2.6.0 as
+         *    well. This workaround is hence staying until switching to
+         *    a new ADIOS schema.
+         * 2) Reading with the Streaming API any file that has been written
+         *    without steps. This is not a workaround since not using steps,
+         *    while inefficient in ADIOS2, is something that we support.
+         */
+        ReadWithoutStream,
+        /**
+         * The stream status of a file-based engine will be decided upon
+         * opening the engine if in read mode. Up until then, this right
+         * here is the status.
+         */
+        Undecided
+    };
+    StreamStatus streamStatus = StreamStatus::OutsideOfStep;
+
+    size_t currentStep();
+
+private:
+    ADIOS2IOHandlerImpl *m_impl;
+    std::optional<adios2::Engine> m_engine; //! ADIOS engine
+
+    /*
+     * Not all engines support the CurrentStep() call, so we have to
+     * implement this manually.
+     */
+    size_t m_currentStep = 0;
+
+    /*
+     * ADIOS2 does not give direct access to its internal attribute and
+     * variable maps, but will instead give access to copies of them.
+     * In order to avoid unnecessary copies, we buffer the returned map.
+     * The downside of this is that we need to pay attention to invalidate
+     * the map whenever an attribute/variable is altered. In that case, we
+     * fetch the map anew.
+     * If empty, the buffered map has been invalidated and needs to be
+     * queried from ADIOS2 again. If full, the buffered map is equivalent to
+     * the map that would be returned by a call to
+     * IO::Available(Attributes|Variables).
+     */
+    std::optional<AttributeMap_t> m_availableAttributes;
+    std::optional<AttributeMap_t> m_availableVariables;
+
+    std::set<Writable *> m_pathsMarkedAsActive;
+
+    /*
+     * Cannot write attributes right after opening the engine
+     * https://github.com/ornladios/ADIOS2/issues/3433
+     */
+    bool initializedDefaults = false;
+    /*
+     * finalize() will set this true to avoid running twice.
+     */
+    bool finalized = false;
+
+    UseGroupTable useGroupTable() const;
+
+    void create_IO();
+
+    void configure_IO();
+    void configure_IO_Read();
+    void configure_IO_Write();
+};
+
+template <typename... Args>
+void ADIOS2File::flush(Args &&...args)
+{
+    try
+    {
+        flush_impl(std::forward<Args>(args)...);
+    }
+    catch (error::ReadError const &)
+    {
+        /*
+         * We need to take actions out of the buffer, since an exception
+         * should reset everything from the current IOHandler->flush() call.
+         * However, we cannot simply clear the buffer, since tasks may have
+         * been enqueued to ADIOS2 already and we cannot undo that.
+         * So, we need to keep the memory alive for the benefit of ADIOS2.
+         * Luckily, we have m_alreadyEnqueued for exactly that purpose.
+         */
+        for (auto &task : m_buffer)
+        {
+            m_alreadyEnqueued.emplace_back(std::move(task));
+        }
+        m_buffer.clear();
+        throw;
+    }
+}
+} // namespace openPMD::detail
+#endif
diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
index 269d908360..db3162a2da 100644
--- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
+++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
@@ -20,6 +20,7 @@
  */
 #pragma once
 
+#include "openPMD/Error.hpp"
 #include "openPMD/IO/ADIOS/ADIOS2Auxiliary.hpp"
 #include "openPMD/IO/ADIOS/ADIOS2FilePosition.hpp"
 #include "openPMD/IO/AbstractIOHandler.hpp"
@@ -29,9 +30,11 @@
 #include "openPMD/IO/IOTask.hpp"
 #include "openPMD/IO/InvalidatableFile.hpp"
 #include "openPMD/IterationEncoding.hpp"
+#include "openPMD/ThrowError.hpp"
 #include "openPMD/auxiliary/JSON_internal.hpp"
 #include "openPMD/backend/Writable.hpp"
 #include "openPMD/config.hpp"
+#include <stdexcept>
 
 #if openPMD_HAVE_ADIOS2
 #include <adios2.h>
@@ -57,6 +60,8 @@ namespace openPMD
 {
 #if openPMD_HAVE_ADIOS2
 
+std::optional<size_t> joinedDimension(adios2::Dims const &dims);
+
 class ADIOS2IOHandler;
 
 namespace detail
@@ -76,7 +81,7 @@ namespace detail
     template <typename>
     struct DatasetTypes;
     struct WriteDataset;
-    struct BufferedActions;
+    class ADIOS2File;
     struct BufferedPut;
     struct BufferedGet;
     struct BufferedAttributeRead;
@@ -84,12 +89,6 @@ namespace detail
     struct RunUniquePtrPut;
 } // namespace detail
 
-enum class UseGroupTable
-{
-    Yes,
-    No
-};
-
 class ADIOS2IOHandlerImpl
     : public AbstractIOHandlerImplCommon<ADIOS2FilePosition>
 {
@@ -108,10 +107,13 @@ class ADIOS2IOHandlerImpl
     template <typename>
     friend struct detail::DatasetTypes;
     friend struct detail::WriteDataset;
-    friend struct detail::BufferedActions;
+    friend class detail::ADIOS2File;
     friend struct detail::BufferedAttributeRead;
     friend struct detail::RunUniquePtrPut;
 
+    using UseGroupTable = adios_defs::UseGroupTable;
+    using FlushTarget = adios_defs::FlushTarget;
+
 public:
 #if openPMD_HAVE_MPI
 
@@ -204,20 +206,14 @@ class ADIOS2IOHandlerImpl
     void
     deregister(Writable *, Parameter<Operation::DEREGISTER> const &) override;
 
+    void touch(Writable *, Parameter<Operation::TOUCH> const &) override;
+
     /**
      * @brief The ADIOS2 access type to chose for Engines opened
      * within this instance.
      */
     adios2::Mode adios2AccessMode(std::string const &fullPath);
 
-    enum class FlushTarget : unsigned char
-    {
-        Buffer,
-        Buffer_Override,
-        Disk,
-        Disk_Override
-    };
-
     FlushTarget m_flushTarget = FlushTarget::Disk;
 
 private:
@@ -229,6 +225,32 @@ class ADIOS2IOHandlerImpl
      * The ADIOS2 engine type, to be passed to adios2::IO::SetEngine
      */
     std::string m_engineType;
+    std::optional<std::string> m_realEngineType;
+
+    inline std::string const &realEngineType() const
+    {
+        if (m_realEngineType.has_value())
+        {
+            return *m_realEngineType;
+        }
+        else
+        {
+            return m_engineType;
+        }
+    }
+    inline std::string &realEngineType()
+    {
+        return const_cast<std::string &>(
+            static_cast<ADIOS2IOHandlerImpl const *>(this)->realEngineType());
+    }
+    inline void pretendEngine(std::string facade_engine)
+    {
+        if (!m_realEngineType.has_value())
+        {
+            m_realEngineType = std::move(m_engineType);
+        }
+        m_engineType = std::move(facade_engine);
+    }
     /*
      * The filename extension specified by the user.
      */
@@ -339,9 +361,7 @@ class ADIOS2IOHandlerImpl
      * IO and Engine object.
      * Not to be accessed directly, use getFileData().
      */
-    std::unordered_map<
-        InvalidatableFile,
-        std::unique_ptr<detail::BufferedActions>>
+    std::unordered_map<InvalidatableFile, std::unique_ptr<detail::ADIOS2File>>
         m_fileData;
 
     std::map<std::string, adios2::Operator> m_operators;
@@ -389,7 +409,7 @@ class ADIOS2IOHandlerImpl
         ThrowError
     };
 
-    detail::BufferedActions &
+    detail::ADIOS2File &
     getFileData(InvalidatableFile const &file, IfFileNotOpen);
 
     void dropFileData(InvalidatableFile const &file);
@@ -407,7 +427,88 @@ class ADIOS2IOHandlerImpl
         Offset const &offset,
         Extent const &extent,
         adios2::IO &IO,
-        std::string const &var);
+        std::string const &varName)
+    {
+        {
+            auto requiredType = adios2::GetType<T>();
+            auto actualType = IO.VariableType(varName);
+
+            if (requiredType != actualType)
+            {
+                std::stringstream errorMessage;
+                errorMessage << "Trying to access a dataset with wrong type "
+                                "(trying to access dataset with type '"
+                             << requiredType << "', but has type '"
+                             << actualType << "')";
+                throw error::ReadError(
+                    error::AffectedObject::Dataset,
+                    error::Reason::UnexpectedContent,
+                    "ADIOS2",
+                    errorMessage.str());
+            };
+        }
+        adios2::Variable<T> var = IO.InquireVariable<T>(varName);
+        if (!var.operator bool())
+        {
+
+            throw std::runtime_error(
+                "[ADIOS2] Internal error: Failed opening ADIOS2 variable.");
+        }
+        // TODO leave this check to ADIOS?
+        adios2::Dims shape = var.Shape();
+        auto actualDim = shape.size();
+        {
+            auto requiredDim = extent.size();
+            if (requiredDim != actualDim)
+            {
+                throw error::ReadError(
+                    error::AffectedObject::Dataset,
+                    error::Reason::UnexpectedContent,
+                    "ADIOS2",
+                    "Trying to access a dataset with wrong dimensionality "
+                    "(trying to access dataset with dimensionality " +
+                        std::to_string(requiredDim) +
+                        ", but has dimensionality " +
+                        std::to_string(actualDim) + ")");
+            }
+        }
+        auto joinedDim = joinedDimension(shape);
+        if (joinedDim.has_value())
+        {
+            if (!offset.empty())
+            {
+                throw std::runtime_error(
+                    "[ADIOS2] Offset must be an empty vector in case of joined "
+                    "array.");
+            }
+            for (unsigned int i = 0; i < actualDim; i++)
+            {
+                if (*joinedDim != i && extent[i] != shape[i])
+                {
+                    throw std::runtime_error(
+                        "[ADIOS2] store_chunk extent of non-joined dimensions "
+                        "must be equivalent to the total extent.");
+                }
+            }
+        }
+        else
+        {
+            for (unsigned int i = 0; i < actualDim; i++)
+            {
+                if (!(joinedDim.has_value() && *joinedDim == i) &&
+                    offset[i] + extent[i] > shape[i])
+                {
+                    throw std::runtime_error(
+                        "[ADIOS2] Dataset access out of bounds.");
+                }
+            }
+        }
+
+        var.SetSelection(
+            {adios2::Dims(offset.begin(), offset.end()),
+             adios2::Dims(extent.begin(), extent.end())});
+        return var;
+    }
 
     struct
     {
@@ -416,27 +517,6 @@ class ADIOS2IOHandlerImpl
     } printedWarningsAlready;
 }; // ADIOS2IOHandlerImpl
 
-/*
- * The following strings are used during parsing of the JSON configuration
- * string for the ADIOS2 backend.
- */
-namespace ADIOS2Defaults
-{
-    using const_str = char const *const;
-    constexpr const_str str_engine = "engine";
-    constexpr const_str str_type = "type";
-    constexpr const_str str_params = "parameters";
-    constexpr const_str str_usesteps = "usesteps";
-    constexpr const_str str_flushtarget = "preferred_flush_target";
-    constexpr const_str str_usesstepsAttribute = "__openPMD_internal/useSteps";
-    constexpr const_str str_adios2Schema =
-        "__openPMD_internal/openPMD2_adios2_schema";
-    constexpr const_str str_isBoolean = "__is_boolean__";
-    constexpr const_str str_activeTablePrefix = "__openPMD_groups";
-    constexpr const_str str_groupBasedWarning =
-        "__openPMD_internal/warning_bugprone_groupbased_encoding";
-} // namespace ADIOS2Defaults
-
 namespace detail
 {
     // Helper structs for calls to the switchType function
@@ -446,19 +526,6 @@ namespace detail
         std::is_same_v<T, std::complex<long double>> ||
         std::is_same_v<T, std::vector<std::complex<long double>>>;
 
-    struct DatasetReader
-    {
-        template <typename T>
-        static void call(
-            ADIOS2IOHandlerImpl *impl,
-            BufferedGet &bp,
-            adios2::IO &IO,
-            adios2::Engine &engine,
-            std::string const &fileName);
-
-        static constexpr char const *errorMsg = "ADIOS2: readDataset()";
-    };
-
     struct AttributeReader
     {
         template <typename T>
@@ -496,15 +563,6 @@ namespace detail
         static constexpr char const *errorMsg = "ADIOS2: openDataset()";
     };
 
-    struct WriteDataset
-    {
-        template <typename T>
-        static void call(BufferedActions &ba, BufferedPut &bp);
-
-        template <int n, typename... Params>
-        static void call(Params &&...);
-    };
-
     struct VariableDefiner
     {
         /**
@@ -732,397 +790,6 @@ namespace detail
             return data[0] == toRep(val);
         }
     };
-
-    // Other datatypes used in the ADIOS2IOHandler implementation
-
-    struct BufferedActions;
-
-    /*
-     * IO-heavy action to be executed upon flushing.
-     */
-    struct BufferedAction
-    {
-        explicit BufferedAction() = default;
-        virtual ~BufferedAction() = default;
-
-        BufferedAction(BufferedAction const &other) = delete;
-        BufferedAction(BufferedAction &&other) = default;
-
-        BufferedAction &operator=(BufferedAction const &other) = delete;
-        BufferedAction &operator=(BufferedAction &&other) = default;
-
-        virtual void run(BufferedActions &) = 0;
-    };
-
-    struct BufferedGet : BufferedAction
-    {
-        std::string name;
-        Parameter<Operation::READ_DATASET> param;
-
-        void run(BufferedActions &) override;
-    };
-
-    struct BufferedPut : BufferedAction
-    {
-        std::string name;
-        Parameter<Operation::WRITE_DATASET> param;
-
-        void run(BufferedActions &) override;
-    };
-
-    struct BufferedUniquePtrPut
-    {
-        std::string name;
-        Offset offset;
-        Extent extent;
-        UniquePtrWithLambda<void> data;
-        Datatype dtype = Datatype::UNDEFINED;
-
-        void run(BufferedActions &);
-    };
-
-    struct I_UpdateSpan
-    {
-        virtual void *update() = 0;
-        virtual ~I_UpdateSpan() = default;
-    };
-
-    template <typename T>
-    struct UpdateSpan : I_UpdateSpan
-    {
-        adios2::detail::Span<T> span;
-
-        UpdateSpan(adios2::detail::Span<T>);
-
-        void *update() override;
-    };
-
-    /*
-     * Manages per-file information about
-     * (1) the file's IO and Engine objects
-     * (2) the file's deferred IO-heavy actions
-     */
-    struct BufferedActions
-    {
-        friend struct BufferedGet;
-        friend struct BufferedPut;
-        friend struct RunUniquePtrPut;
-        friend struct WriteDataset;
-
-        using FlushTarget = ADIOS2IOHandlerImpl::FlushTarget;
-
-        BufferedActions(BufferedActions const &) = delete;
-
-        /**
-         * The full path to the file created on disk, including the
-         * containing directory and the file extension, as determined
-         * by ADIOS2IOHandlerImpl::fileSuffix().
-         * (Meaning, in case of the SST engine, no file suffix since the
-         *  SST engine automatically adds its suffix unconditionally)
-         */
-        std::string m_file;
-        /**
-         * ADIOS requires giving names to instances of adios2::IO.
-         * We make them different from the actual file name, because of the
-         * possible following workflow:
-         *
-         * 1. create file foo.bp
-         *    -> would create IO object named foo.bp
-         * 2. delete that file
-         *    (let's ignore that we don't support deletion yet and call it
-         *     preplanning)
-         * 3. create file foo.bp a second time
-         *    -> would create another IO object named foo.bp
-         *    -> craash
-         *
-         * So, we just give out names based on a counter for IO objects.
-         * Hence, next to the actual file name, also store the name for the
-         * IO.
-         */
-        std::string m_IOName;
-        adios2::ADIOS &m_ADIOS;
-        adios2::IO m_IO;
-        /**
-         * The default queue for deferred actions.
-         * Drained upon BufferedActions::flush().
-         */
-        std::vector<std::unique_ptr<BufferedAction>> m_buffer;
-        /**
-         * When receiving a unique_ptr, we know that the buffer is ours and
-         * ours alone. So, for performance reasons, show the buffer to ADIOS2 as
-         * late as possible and avoid unnecessary data copies in BP5 triggered
-         * by PerformDataWrites().
-         */
-        std::vector<BufferedUniquePtrPut> m_uniquePtrPuts;
-        /**
-         * This contains deferred actions that have already been enqueued into
-         * ADIOS2, but not yet performed in ADIOS2.
-         * We must store them somewhere until the next PerformPuts/Gets, EndStep
-         * or Close in ADIOS2 to avoid use after free conditions.
-         */
-        std::vector<std::unique_ptr<BufferedAction>> m_alreadyEnqueued;
-        adios2::Mode m_mode;
-        /**
-         * The base pointer of an ADIOS2 span might change after reallocations.
-         * The frontend will ask the backend for those updated base pointers.
-         * Spans given out by the ADIOS2 backend to the frontend are hence
-         * identified by an unsigned integer and stored in this member for later
-         * retrieval of the updated base pointer.
-         * This map is cleared upon flush points.
-         */
-        std::map<unsigned, std::unique_ptr<I_UpdateSpan>> m_updateSpans;
-
-        /*
-         * We call an attribute committed if the step during which it was
-         * written has been closed.
-         * A committed attribute cannot be modified.
-         */
-        std::set<std::string> uncommittedAttributes;
-
-        /*
-         * The openPMD API will generally create new attributes for each
-         * iteration. This results in a growing number of attributes over time.
-         * In streaming-based modes, these will be completely sent anew in each
-         * iteration. If the following boolean is true, old attributes will be
-         * removed upon CLOSE_GROUP.
-         * Should not be set to true in persistent backends.
-         * Will be automatically set by BufferedActions::configure_IO depending
-         * on chosen ADIOS2 engine and can not be explicitly overridden by user.
-         */
-        bool optimizeAttributesStreaming = false;
-
-        using ParsePreference =
-            Parameter<Operation::OPEN_FILE>::ParsePreference;
-        ParsePreference parsePreference = ParsePreference::UpFront;
-
-        using AttributeMap_t = std::map<std::string, adios2::Params>;
-
-        BufferedActions(ADIOS2IOHandlerImpl &impl, InvalidatableFile file);
-
-        ~BufferedActions();
-
-        /**
-         * Implementation of destructor, will only run once.
-         *
-         */
-        void finalize();
-
-        UseGroupTable detectGroupTable();
-
-        adios2::Engine &getEngine();
-
-        template <typename BA>
-        void enqueue(BA &&ba);
-
-        template <typename BA>
-        void enqueue(BA &&ba, decltype(m_buffer) &);
-
-        template <typename... Args>
-        void flush(Args &&...args);
-
-        struct ADIOS2FlushParams
-        {
-            /*
-             * Only execute performPutsGets if UserFlush.
-             */
-            FlushLevel level;
-            FlushTarget flushTarget = FlushTarget::Disk;
-
-            ADIOS2FlushParams(FlushLevel level_in) : level(level_in)
-            {}
-
-            ADIOS2FlushParams(FlushLevel level_in, FlushTarget flushTarget_in)
-                : level(level_in), flushTarget(flushTarget_in)
-            {}
-        };
-
-        /**
-         * Flush deferred IO actions.
-         *
-         * @param flushParams Flush level and target.
-         * @param performPutsGets A functor that takes as parameters (1) *this
-         *     and (2) the ADIOS2 engine.
-         *     Its task is to ensure that ADIOS2 performs Put/Get operations.
-         *     Several options for this:
-         *     * adios2::Engine::EndStep
-         *     * adios2::Engine::Perform(Puts|Gets)
-         *     * adios2::Engine::Close
-         * @param writeLatePuts Deferred until right before
-         *        Engine::EndStep() or Engine::Close():
-         *        Running unique_ptr Put()s.
-         * @param flushUnconditionally Whether to run the functor even if no
-         *     deferred IO tasks had been queued.
-         */
-        template <typename F>
-        void flush_impl(
-            ADIOS2FlushParams flushParams,
-            F &&performPutsGets,
-            bool writeLatePuts,
-            bool flushUnconditionally);
-
-        /**
-         * Overload of flush() that uses adios2::Engine::Perform(Puts|Gets)
-         * and does not flush unconditionally.
-         *
-         */
-        void flush_impl(ADIOS2FlushParams, bool writeLatePuts = false);
-
-        /**
-         * @brief Begin or end an ADIOS step.
-         *
-         * @param mode Whether to begin or end a step.
-         * @return AdvanceStatus
-         */
-        AdvanceStatus advance(AdvanceMode mode);
-
-        /*
-         * Delete all buffered actions without running them.
-         */
-        void drop();
-
-        AttributeMap_t const &availableAttributes();
-
-        std::vector<std::string>
-        availableAttributesPrefixed(std::string const &prefix);
-
-        /*
-         * See description below.
-         */
-        void invalidateAttributesMap();
-
-        AttributeMap_t const &availableVariables();
-
-        std::vector<std::string>
-        availableVariablesPrefixed(std::string const &prefix);
-
-        /*
-         * See description below.
-         */
-        void invalidateVariablesMap();
-
-        void markActive(Writable *);
-
-        // bool isActive(std::string const & path);
-
-        /*
-         * streamStatus is NoStream for file-based ADIOS engines.
-         * This is relevant for the method BufferedActions::requireActiveStep,
-         * where a step is only opened if the status is OutsideOfStep, but not
-         * if NoStream. The rationale behind this is that parsing a Series
-         * works differently for file-based and for stream-based engines:
-         * * stream-based: Iterations are parsed as they arrive. For parsing an
-         *   iteration, the iteration must be awaited.
-         *   BufferedActions::requireActiveStep takes care of this.
-         * * file-based: The Series is parsed up front. If no step has been
-         *   opened yet, ADIOS2 gives access to all variables and attributes
-         *   from all steps. Upon opening a step, only the variables from that
-         *   step are shown which hinders parsing. So, until a step is
-         *   explicitly opened via ADIOS2IOHandlerImpl::advance, do not open
-         *   one.
-         *   This is to enable use of ADIOS files without the Streaming API
-         *   (i.e. all iterations should be visible to the user upon opening
-         *   the Series.)
-         *   @todo Add a workflow without up-front parsing of all iterations
-         *         for file-based engines.
-         *         (This would merely be an optimization since the streaming
-         *         API still works with files as intended.)
-         *
-         */
-        enum class StreamStatus
-        {
-            /**
-             * A step is currently active.
-             */
-            DuringStep,
-            /**
-             * A stream is active, but no step.
-             */
-            OutsideOfStep,
-            /**
-             * Stream has ended.
-             */
-            StreamOver,
-            /**
-             * File is not written is streaming fashion.
-             * Begin/EndStep will be replaced by simple flushes.
-             * Used for:
-             * 1) Writing BP4 files without steps despite using the Streaming
-             *    API. This is due to the fact that ADIOS2.6.0 requires using
-             *    steps to read BP4 files written with steps, so using steps
-             *    is opt-in for now.
-             *    Notice that while the openPMD API requires ADIOS >= 2.7.0,
-             *    the resulting files need to be readable from ADIOS 2.6.0 as
-             *    well. This workaround is hence staying until switching to
-             *    a new ADIOS schema.
-             * 2) Reading with the Streaming API any file that has been written
-             *    without steps. This is not a workaround since not using steps,
-             *    while inefficient in ADIOS2, is something that we support.
-             */
-            ReadWithoutStream,
-            /**
-             * The stream status of a file-based engine will be decided upon
-             * opening the engine if in read mode. Up until then, this right
-             * here is the status.
-             */
-            Undecided
-        };
-        StreamStatus streamStatus = StreamStatus::OutsideOfStep;
-
-        size_t currentStep();
-
-    private:
-        ADIOS2IOHandlerImpl *m_impl;
-        std::optional<adios2::Engine> m_engine; //! ADIOS engine
-        /**
-         * The ADIOS2 engine type, to be passed to adios2::IO::SetEngine
-         */
-        std::string m_engineType;
-
-        /*
-         * Not all engines support the CurrentStep() call, so we have to
-         * implement this manually.
-         */
-        size_t m_currentStep = 0;
-
-        /*
-         * ADIOS2 does not give direct access to its internal attribute and
-         * variable maps, but will instead give access to copies of them.
-         * In order to avoid unnecessary copies, we buffer the returned map.
-         * The downside of this is that we need to pay attention to invalidate
-         * the map whenever an attribute/variable is altered. In that case, we
-         * fetch the map anew.
-         * If empty, the buffered map has been invalidated and needs to be
-         * queried from ADIOS2 again. If full, the buffered map is equivalent to
-         * the map that would be returned by a call to
-         * IO::Available(Attributes|Variables).
-         */
-        std::optional<AttributeMap_t> m_availableAttributes;
-        std::optional<AttributeMap_t> m_availableVariables;
-
-        std::set<Writable *> m_pathsMarkedAsActive;
-
-        /*
-         * Cannot write attributes right after opening the engine
-         * https://github.com/ornladios/ADIOS2/issues/3433
-         */
-        bool initializedDefaults = false;
-        /*
-         * finalize() will set this true to avoid running twice.
-         */
-        bool finalized = false;
-
-        [[nodiscard]] inline UseGroupTable useGroupTable() const
-        {
-            return m_impl->useGroupTable();
-        }
-
-        void create_IO();
-
-        void configure_IO(ADIOS2IOHandlerImpl &impl);
-        void configure_IO_Read();
-        void configure_IO_Write();
-    };
-
 } // namespace detail
 #endif // openPMD_HAVE_ADIOS2
 
diff --git a/include/openPMD/IO/ADIOS/macros.hpp b/include/openPMD/IO/ADIOS/macros.hpp
index 59b630a112..8618573713 100644
--- a/include/openPMD/IO/ADIOS/macros.hpp
+++ b/include/openPMD/IO/ADIOS/macros.hpp
@@ -19,6 +19,21 @@
 #define openPMD_HAS_ADIOS_2_9                                                  \
     (ADIOS2_VERSION_MAJOR * 100 + ADIOS2_VERSION_MINOR >= 209)
 
+#define openPMD_HAS_ADIOS_2_10                                                 \
+    (ADIOS2_VERSION_MAJOR * 100 + ADIOS2_VERSION_MINOR >= 210)
+
+#define openPMD_HAS_ADIOS_2_10_1                                               \
+    (ADIOS2_VERSION_MAJOR * 1000 + ADIOS2_VERSION_MINOR * 10 +                 \
+         ADIOS2_VERSION_PATCH >=                                               \
+     2101)
+
+#if defined(ADIOS2_HAVE_BP5) || openPMD_HAS_ADIOS_2_10
+// ADIOS2 v2.10 no longer defines this
+#define openPMD_HAVE_ADIOS2_BP5 1
+#else
+#define openPMD_HAVE_ADIOS2_BP5 0
+#endif
+
 #else
 
 #define openPMD_HAS_ADIOS_2_8 0
diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp
index 71a0587b0f..1288a87b21 100644
--- a/include/openPMD/IO/AbstractIOHandler.hpp
+++ b/include/openPMD/IO/AbstractIOHandler.hpp
@@ -171,7 +171,7 @@ namespace internal
 
 namespace detail
 {
-    struct BufferedActions;
+    class ADIOS2File;
 }
 
 /** Interface for communicating between logical and physically persistent data.
@@ -186,7 +186,7 @@ class AbstractIOHandler
 {
     friend class Series;
     friend class ADIOS2IOHandlerImpl;
-    friend struct detail::BufferedActions;
+    friend class detail::ADIOS2File;
 
 private:
     IterationEncoding m_encoding = IterationEncoding::groupBased;
@@ -195,13 +195,28 @@ class AbstractIOHandler
     {
         /*
          * In file-based iteration encoding, the APPEND mode is handled entirely
-         * by the frontend, the backend should just treat it as CREATE mode
+         * by the frontend, the backend should just treat it as CREATE mode.
+         * Similar for READ_LINEAR which should be treated as READ_RANDOM_ACCESS
+         * in the backend.
          */
-        if (encoding == IterationEncoding::fileBased &&
-            m_backendAccess == Access::APPEND)
+        if (encoding == IterationEncoding::fileBased)
         {
-            // do we really want to have those as const members..?
-            *const_cast<Access *>(&m_backendAccess) = Access::CREATE;
+            switch (m_backendAccess)
+            {
+
+            case Access::READ_LINEAR:
+                // do we really want to have those as const members..?
+                *const_cast<Access *>(&m_backendAccess) =
+                    Access::READ_RANDOM_ACCESS;
+                break;
+            case Access::APPEND:
+                *const_cast<Access *>(&m_backendAccess) = Access::CREATE;
+                break;
+            case Access::READ_RANDOM_ACCESS:
+            case Access::READ_WRITE:
+            case Access::CREATE:
+                break;
+            }
         }
 
         m_encoding = encoding;
@@ -218,6 +233,12 @@ class AbstractIOHandler
     {}
     virtual ~AbstractIOHandler() = default;
 
+    AbstractIOHandler(AbstractIOHandler const &) = default;
+    AbstractIOHandler(AbstractIOHandler &&) = default;
+
+    AbstractIOHandler &operator=(AbstractIOHandler const &) = default;
+    AbstractIOHandler &operator=(AbstractIOHandler &&) = default;
+
     /** Add provided task to queue according to FIFO.
      *
      * @param   iotask  Task to be executed after all previously enqueued
@@ -245,7 +266,7 @@ class AbstractIOHandler
     /** The currently used backend */
     virtual std::string backendName() const = 0;
 
-    std::string const directory;
+    std::string directory;
     /*
      * Originally, the reason for distinguishing these two was that during
      * parsing in reading access modes, the access type would be temporarily
@@ -261,8 +282,8 @@ class AbstractIOHandler
      * which is entirely implemented by the frontend, which internally uses
      * the backend in CREATE mode.
      */
-    Access const m_backendAccess;
-    Access const m_frontendAccess;
+    Access m_backendAccess;
+    Access m_frontendAccess;
     internal::SeriesStatus m_seriesStatus = internal::SeriesStatus::Default;
     std::queue<IOTask> m_work;
     /**
diff --git a/include/openPMD/IO/AbstractIOHandlerImpl.hpp b/include/openPMD/IO/AbstractIOHandlerImpl.hpp
index 7fc2e4cca0..10b0fd0c97 100644
--- a/include/openPMD/IO/AbstractIOHandlerImpl.hpp
+++ b/include/openPMD/IO/AbstractIOHandlerImpl.hpp
@@ -150,11 +150,13 @@ class AbstractIOHandlerImpl
      * extent of parameters.extent. If possible, the new dataset should be
      * extensible. If possible, the new dataset should be divided into chunks
      * with size parameters.chunkSize. If possible, the new dataset should be
-     * compressed according to parameters.compression. This may be
-     * format-specific. If possible, the new dataset should be transformed
-     * accoring to parameters.transform. This may be format-specific. The
-     * Writables file position should correspond to the newly created dataset.
-     * The Writable should be marked written when the operation completes
+     * compressed/transformed according to the backend-specific configuration in
+     * parameters.options. The Writables file position should correspond to the
+     * newly created dataset. Any pre-existing file position should be ignored,
+     * the new file position will be based upon the parent object and the newly
+     * created path. (The old file position might still contain data due to
+     * reuse of Writable objects across files in file-based encoding.) The
+     * Writable should be marked written when the operation completes
      * successfully.
      */
     virtual void
@@ -395,6 +397,14 @@ class AbstractIOHandlerImpl
     virtual void
     deregister(Writable *, Parameter<Operation::DEREGISTER> const &param) = 0;
 
+    /** Treat this writable's file as open/active/dirty.
+     */
+    virtual void
+    touch(Writable *, Parameter<Operation::TOUCH> const &param) = 0;
+
+    virtual void
+    setWritten(Writable *, Parameter<Operation::SET_WRITTEN> const &param);
+
     AbstractIOHandler *m_handler;
     bool m_verboseIOTasks = false;
 
diff --git a/include/openPMD/IO/AbstractIOHandlerImplCommon.hpp b/include/openPMD/IO/AbstractIOHandlerImplCommon.hpp
index efa8b238ae..035ea30c61 100644
--- a/include/openPMD/IO/AbstractIOHandlerImplCommon.hpp
+++ b/include/openPMD/IO/AbstractIOHandlerImplCommon.hpp
@@ -28,6 +28,8 @@
 #include "openPMD/auxiliary/StringManip.hpp"
 #include "openPMD/backend/Writable.hpp"
 
+#include <stdexcept>
+#include <string>
 #include <unordered_map>
 #include <unordered_set>
 
@@ -200,6 +202,14 @@ AbstractIOHandlerImplCommon<FilePositionType>::refreshFileFromParent(
     Writable *writable, bool preferParentFile)
 {
     auto getFileFromParent = [writable, this]() {
+        auto file_it = m_files.find(writable->parent);
+        if (file_it == m_files.end())
+        {
+            std::stringstream s;
+            s << "Parent Writable " << writable->parent << " of Writable "
+              << writable << " has no associated file.";
+            throw std::runtime_error(s.str());
+        }
         auto file = m_files.find(writable->parent)->second;
         associateWithFile(writable, file);
         return file;
diff --git a/include/openPMD/IO/DummyIOHandler.hpp b/include/openPMD/IO/DummyIOHandler.hpp
index 7cd4123699..cecdf4706b 100644
--- a/include/openPMD/IO/DummyIOHandler.hpp
+++ b/include/openPMD/IO/DummyIOHandler.hpp
@@ -45,5 +45,6 @@ class DummyIOHandler : public AbstractIOHandler
      * without IO.
      */
     std::future<void> flush(internal::ParsedFlushParams &) override;
+    std::string backendName() const override;
 }; // DummyIOHandler
 } // namespace openPMD
diff --git a/include/openPMD/IO/Format.hpp b/include/openPMD/IO/Format.hpp
index 858da29a40..4566400fe2 100644
--- a/include/openPMD/IO/Format.hpp
+++ b/include/openPMD/IO/Format.hpp
@@ -36,6 +36,7 @@ enum class Format
     ADIOS2_SSC,
     JSON,
     TOML,
+    GENERIC,
     DUMMY
 };
 
diff --git a/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp b/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp
index 300b3f19f3..e4efc06ea6 100644
--- a/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp
+++ b/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp
@@ -20,6 +20,7 @@
  */
 #pragma once
 
+#include "openPMD/IO/AbstractIOHandler.hpp"
 #include "openPMD/config.hpp"
 #if openPMD_HAVE_HDF5
 #include "openPMD/IO/AbstractIOHandlerImpl.hpp"
@@ -37,8 +38,14 @@ namespace openPMD
 #if openPMD_HAVE_HDF5
 class HDF5IOHandlerImpl : public AbstractIOHandlerImpl
 {
+    friend class HDF5IOHandler;
+    friend class ParallelHDF5IOHandler;
+
 public:
-    HDF5IOHandlerImpl(AbstractIOHandler *, json::TracingJSON config);
+    HDF5IOHandlerImpl(
+        AbstractIOHandler *,
+        json::TracingJSON config,
+        bool do_warn_unused_params = true);
     ~HDF5IOHandlerImpl() override;
 
     void
@@ -77,6 +84,7 @@ class HDF5IOHandlerImpl : public AbstractIOHandlerImpl
     void listAttributes(Writable *, Parameter<Operation::LIST_ATTS> &) override;
     void
     deregister(Writable *, Parameter<Operation::DEREGISTER> const &) override;
+    void touch(Writable *, Parameter<Operation::TOUCH> const &) override;
 
     std::unordered_map<Writable *, std::string> m_fileNames;
     std::unordered_map<std::string, hid_t> m_fileNamesWithID;
@@ -105,6 +113,8 @@ class HDF5IOHandlerImpl : public AbstractIOHandlerImpl
     hid_t m_H5T_LONG_DOUBLE_80_LE;
     hid_t m_H5T_CLONG_DOUBLE_80_LE;
 
+    std::future<void> flush(internal::ParsedFlushParams &);
+
 protected:
 #if openPMD_HAVE_MPI
     /*
@@ -114,9 +124,11 @@ class HDF5IOHandlerImpl : public AbstractIOHandlerImpl
     std::optional<MPI_Comm> m_communicator;
 #endif
 
-private:
     json::TracingJSON m_config;
-    std::string m_chunks = "auto";
+    nlohmann::json m_global_dataset_config;
+    nlohmann::json m_global_flush_config;
+
+private:
     struct File
     {
         std::string name;
diff --git a/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp b/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp
index e1190b3d71..3b214b64cb 100644
--- a/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp
+++ b/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp
@@ -43,6 +43,8 @@ class ParallelHDF5IOHandlerImpl : public HDF5IOHandlerImpl
 
     MPI_Comm m_mpiComm;
     MPI_Info m_mpiInfo;
+
+    std::future<void> flush(internal::ParsedFlushParams &);
 }; // ParallelHDF5IOHandlerImpl
 #else
 class ParallelHDF5IOHandlerImpl
diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp
index d2fc05f379..731372f9e1 100644
--- a/include/openPMD/IO/IOTask.hpp
+++ b/include/openPMD/IO/IOTask.hpp
@@ -48,20 +48,37 @@ Writable *getWritable(Attributable *);
 /** Type of IO operation between logical and persistent data.
  */
 OPENPMDAPI_EXPORT_ENUM_CLASS(Operation){
-    CREATE_FILE,      CHECK_FILE,     OPEN_FILE,     CLOSE_FILE,
+    CREATE_FILE,
+    CHECK_FILE,
+    OPEN_FILE,
+    CLOSE_FILE,
     DELETE_FILE,
 
-    CREATE_PATH,      CLOSE_PATH,     OPEN_PATH,     DELETE_PATH,
+    CREATE_PATH,
+    CLOSE_PATH,
+    OPEN_PATH,
+    DELETE_PATH,
     LIST_PATHS,
 
-    CREATE_DATASET,   EXTEND_DATASET, OPEN_DATASET,  DELETE_DATASET,
-    WRITE_DATASET,    READ_DATASET,   LIST_DATASETS, GET_BUFFER_VIEW,
+    CREATE_DATASET,
+    EXTEND_DATASET,
+    OPEN_DATASET,
+    DELETE_DATASET,
+    WRITE_DATASET,
+    READ_DATASET,
+    LIST_DATASETS,
+    GET_BUFFER_VIEW,
 
-    DELETE_ATT,       WRITE_ATT,      READ_ATT,      LIST_ATTS,
+    DELETE_ATT,
+    WRITE_ATT,
+    READ_ATT,
+    LIST_ATTS,
 
     ADVANCE,
     AVAILABLE_CHUNKS, //!< Query chunks that can be loaded in a dataset
-    DEREGISTER //!< Inform the backend that an object has been deleted.
+    DEREGISTER, //!< Inform the backend that an object has been deleted.
+    TOUCH, //!< tell the backend that the file is to be considered active
+    SET_WRITTEN //!< tell backend to consider a file written / not written
 }; // note: if you change the enum members here, please update
    // docs/source/dev/design.rst
 
@@ -326,6 +343,7 @@ struct OPENPMDAPI_EXPORT Parameter<Operation::CREATE_DATASET>
     Extent extent = {};
     Datatype dtype = Datatype::UNDEFINED;
     std::string options = "{}";
+    std::optional<size_t> joinedDimension;
 
     /** Warn about unused JSON paramters
      *
@@ -657,6 +675,44 @@ struct OPENPMDAPI_EXPORT Parameter<Operation::DEREGISTER>
     void const *former_parent = nullptr;
 };
 
+template <>
+struct OPENPMDAPI_EXPORT Parameter<Operation::TOUCH> : public AbstractParameter
+{
+    explicit Parameter() = default;
+
+    Parameter(Parameter const &) = default;
+    Parameter(Parameter &&) = default;
+
+    Parameter &operator=(Parameter const &) = default;
+    Parameter &operator=(Parameter &&) = default;
+
+    std::unique_ptr<AbstractParameter> to_heap() && override
+    {
+        return std::make_unique<Parameter<Operation::TOUCH>>(std::move(*this));
+    }
+};
+
+template <>
+struct OPENPMDAPI_EXPORT Parameter<Operation::SET_WRITTEN>
+    : public AbstractParameter
+{
+    explicit Parameter() = default;
+
+    Parameter(Parameter const &) = default;
+    Parameter(Parameter &&) = default;
+
+    Parameter &operator=(Parameter const &) = default;
+    Parameter &operator=(Parameter &&) = default;
+
+    std::unique_ptr<AbstractParameter> to_heap() && override
+    {
+        return std::make_unique<Parameter<Operation::SET_WRITTEN>>(
+            std::move(*this));
+    }
+
+    bool target_status = false;
+};
+
 /** @brief Self-contained description of a single IO operation.
  *
  * Contained are
@@ -688,19 +744,10 @@ class OPENPMDAPI_EXPORT IOTask
         , parameter{std::move(p).to_heap()}
     {}
 
-    explicit IOTask(IOTask const &other)
-        : writable{other.writable}
-        , operation{other.operation}
-        , parameter{other.parameter}
-    {}
-
-    IOTask &operator=(IOTask const &other)
-    {
-        writable = other.writable;
-        operation = other.operation;
-        parameter = other.parameter;
-        return *this;
-    }
+    IOTask(IOTask const &other);
+    IOTask(IOTask &&other) noexcept;
+    IOTask &operator=(IOTask const &other);
+    IOTask &operator=(IOTask &&other) noexcept;
 
     Writable *writable;
     Operation operation;
diff --git a/include/openPMD/IO/JSON/JSONIOHandler.hpp b/include/openPMD/IO/JSON/JSONIOHandler.hpp
index 7fdea5b6f0..7cb6870f5b 100644
--- a/include/openPMD/IO/JSON/JSONIOHandler.hpp
+++ b/include/openPMD/IO/JSON/JSONIOHandler.hpp
@@ -24,17 +24,30 @@
 #include "openPMD/IO/AbstractIOHandler.hpp"
 #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp"
 
+#if openPMD_HAVE_MPI
+#include <mpi.h>
+#endif
+
 namespace openPMD
 {
 class JSONIOHandler : public AbstractIOHandler
 {
 public:
     JSONIOHandler(
-        std::string const &path,
+        std::string path,
+        Access at,
+        openPMD::json::TracingJSON config,
+        JSONIOHandlerImpl::FileFormat,
+        std::string originalExtension);
+#if openPMD_HAVE_MPI
+    JSONIOHandler(
+        std::string path,
         Access at,
+        MPI_Comm,
         openPMD::json::TracingJSON config,
         JSONIOHandlerImpl::FileFormat,
         std::string originalExtension);
+#endif
 
     ~JSONIOHandler() override;
 
diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp
index 5ce9d057c3..b67ac9138a 100644
--- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp
+++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp
@@ -31,6 +31,9 @@
 
 #include <istream>
 #include <nlohmann/json.hpp>
+#if openPMD_HAVE_MPI
+#include <mpi.h>
+#endif
 
 #include <complex>
 #include <fstream>
@@ -70,6 +73,7 @@ struct File
 
         std::string name;
         bool valid = true;
+        bool printedReadmeWarningAlready = false;
     };
 
     std::shared_ptr<FileState> fileState;
@@ -167,6 +171,15 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
         FileFormat,
         std::string originalExtension);
 
+#if openPMD_HAVE_MPI
+    JSONIOHandlerImpl(
+        AbstractIOHandler *,
+        MPI_Comm,
+        openPMD::json::TracingJSON config,
+        FileFormat,
+        std::string originalExtension);
+#endif
+
     ~JSONIOHandlerImpl() override;
 
     void
@@ -227,9 +240,15 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
     void
     deregister(Writable *, Parameter<Operation::DEREGISTER> const &) override;
 
+    void touch(Writable *, Parameter<Operation::TOUCH> const &) override;
+
     std::future<void> flush();
 
 private:
+#if openPMD_HAVE_MPI
+    std::optional<MPI_Comm> m_communicator;
+#endif
+
     using FILEHANDLE = std::fstream;
 
     // map each Writable to its associated file
@@ -323,7 +342,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
 
     // write to disk the json contents associated with the file
     // remove from m_dirty if unsetDirty == true
-    void putJsonContents(File const &, bool unsetDirty = true);
+    auto putJsonContents(File const &, bool unsetDirty = true)
+        -> decltype(m_jsonVals)::iterator;
 
     // figure out the file position of the writable
     // (preferring the parent's file position) and extend it
diff --git a/include/openPMD/Iteration.hpp b/include/openPMD/Iteration.hpp
index 3ede7a6a37..52bf43293a 100644
--- a/include/openPMD/Iteration.hpp
+++ b/include/openPMD/Iteration.hpp
@@ -130,6 +130,9 @@ class Iteration : public Attributable
     friend class Series;
     friend class WriteIterations;
     friend class SeriesIterator;
+    friend class internal::AttributableData;
+    template <typename T>
+    friend T &internal::makeOwning(T &self, Series);
 
 public:
     Iteration(Iteration const &) = default;
@@ -257,6 +260,11 @@ class Iteration : public Attributable
         return *m_iterationData;
     }
 
+    inline std::shared_ptr<Data_t> getShared()
+    {
+        return m_iterationData;
+    }
+
     inline void setData(std::shared_ptr<Data_t> data)
     {
         m_iterationData = std::move(data);
@@ -380,16 +388,6 @@ class Iteration : public Attributable
      */
     void setStepStatus(StepStatus);
 
-    /*
-     * @brief Check recursively whether this Iteration is dirty.
-     *        It is dirty if any attribute or dataset is read from or written to
-     *        the backend.
-     *
-     * @return true If dirty.
-     * @return false Otherwise.
-     */
-    bool dirtyRecursive() const;
-
     /**
      * @brief Link with parent.
      *
diff --git a/include/openPMD/Mesh.hpp b/include/openPMD/Mesh.hpp
index 17ce9373de..53274ac7d4 100644
--- a/include/openPMD/Mesh.hpp
+++ b/include/openPMD/Mesh.hpp
@@ -230,7 +230,7 @@ class Mesh : public BaseRecord<MeshRecordComponent>
 
     void
     flush_impl(std::string const &, internal::FlushParams const &) override;
-    void read() override;
+    void read();
 }; // Mesh
 
 template <typename T>
diff --git a/include/openPMD/ParticleSpecies.hpp b/include/openPMD/ParticleSpecies.hpp
index 808412d0b9..af7aa50375 100644
--- a/include/openPMD/ParticleSpecies.hpp
+++ b/include/openPMD/ParticleSpecies.hpp
@@ -35,6 +35,8 @@ class ParticleSpecies : public Container<Record>
     friend class Container<ParticleSpecies>;
     friend class Container<Record>;
     friend class Iteration;
+    template <typename T>
+    friend T &internal::makeOwning(T &self, Series);
 
 public:
     ParticlePatches particlePatches;
@@ -45,15 +47,12 @@ class ParticleSpecies : public Container<Record>
     void read();
     void flush(std::string const &, internal::FlushParams const &) override;
 
-    /**
-     * @brief Check recursively whether this ParticleSpecies is dirty.
-     *        It is dirty if any attribute or dataset is read from or written to
-     *        the backend.
-     *
-     * @return true If dirty.
-     * @return false Otherwise.
-     */
-    bool dirtyRecursive() const;
+    using Data_t = Container<Record>::ContainerData;
+
+    inline std::shared_ptr<Data_t> getShared()
+    {
+        return m_containerData;
+    }
 };
 
 namespace traits
@@ -66,11 +65,6 @@ namespace traits
         void operator()(T &ret)
         {
             ret.particlePatches.linkHierarchy(ret.writable());
-
-            auto &np = ret.particlePatches["numParticles"];
-            np.resetDataset(Dataset(determineDatatype<uint64_t>(), {1}));
-            auto &npo = ret.particlePatches["numParticlesOffset"];
-            npo.resetDataset(Dataset(determineDatatype<uint64_t>(), {1}));
         }
     };
 } // namespace traits
diff --git a/include/openPMD/Record.hpp b/include/openPMD/Record.hpp
index 4f7ee51c28..c875389db5 100644
--- a/include/openPMD/Record.hpp
+++ b/include/openPMD/Record.hpp
@@ -52,7 +52,7 @@ class Record : public BaseRecord<RecordComponent>
 
     void
     flush_impl(std::string const &, internal::FlushParams const &) override;
-    void read() override;
+    void read();
 }; // Record
 
 template <typename T>
diff --git a/include/openPMD/RecordComponent.hpp b/include/openPMD/RecordComponent.hpp
index 7fe1651409..ebb5a80ca8 100644
--- a/include/openPMD/RecordComponent.hpp
+++ b/include/openPMD/RecordComponent.hpp
@@ -25,6 +25,7 @@
 #include "openPMD/auxiliary/ShareRaw.hpp"
 #include "openPMD/auxiliary/TypeTraits.hpp"
 #include "openPMD/auxiliary/UniquePtr.hpp"
+#include "openPMD/backend/Attributable.hpp"
 #include "openPMD/backend/BaseRecordComponent.hpp"
 
 #include <array>
@@ -69,6 +70,8 @@ namespace internal
          * Chunk reading/writing requests on the contained dataset.
          */
         std::queue<IOTask> m_chunks;
+
+        void push_chunk(IOTask &&task);
         /**
          * Stores the value for constant record components.
          * Ignored otherwise.
@@ -130,6 +133,8 @@ class RecordComponent : public BaseRecordComponent
     friend class DynamicMemoryView;
     friend class internal::RecordComponentData;
     friend class MeshRecordComponent;
+    template <typename T>
+    friend T &internal::makeOwning(T &self, Series);
 
 public:
     enum class Allocation
@@ -477,14 +482,16 @@ class RecordComponent : public BaseRecordComponent
      */
     template <typename Visitor, typename... Args>
     auto visit(Args &&...args) -> decltype(Visitor::template call<char>(
-        std::declval<RecordComponent &>(), std::forward<Args>(args)...));
+                                   std::declval<RecordComponent &>(),
+                                   std::forward<Args>(args)...));
 
     static constexpr char const *const SCALAR = "\vScalar";
 
-private:
+protected:
     void flush(std::string const &, internal::FlushParams const &);
-    virtual void read();
+    void read(bool require_unit_si);
 
+private:
     /**
      * Internal method to be called by all methods that create an empty dataset.
      *
@@ -496,16 +503,6 @@ class RecordComponent : public BaseRecordComponent
     void storeChunk(
         auxiliary::WriteBuffer buffer, Datatype datatype, Offset o, Extent e);
 
-    /**
-     * @brief Check recursively whether this RecordComponent is dirty.
-     *        It is dirty if any attribute or dataset is read from or written to
-     *        the backend.
-     *
-     * @return true If dirty.
-     * @return false Otherwise.
-     */
-    bool dirtyRecursive() const;
-
     // clang-format off
 OPENPMD_protected
     // clang-format on
@@ -529,13 +526,23 @@ OPENPMD_protected
         return *m_recordComponentData;
     }
 
+    inline std::shared_ptr<Data_t> getShared()
+    {
+        return m_recordComponentData;
+    }
+
     inline void setData(std::shared_ptr<internal::RecordComponentData> data)
     {
         m_recordComponentData = std::move(data);
         BaseRecordComponent::setData(m_recordComponentData);
     }
 
-    void readBase();
+    void readBase(bool require_unit_si);
+
+    template <typename T>
+    void verifyChunk(Offset const &, Extent const &) const;
+
+    void verifyChunk(Datatype, Offset const &, Extent const &) const;
 }; // RecordComponent
 
 } // namespace openPMD
diff --git a/include/openPMD/RecordComponent.tpp b/include/openPMD/RecordComponent.tpp
index e8ba6006ab..0a4086e3d8 100644
--- a/include/openPMD/RecordComponent.tpp
+++ b/include/openPMD/RecordComponent.tpp
@@ -172,7 +172,7 @@ RecordComponent::loadChunk(std::shared_ptr<T> data, Offset o, Extent e)
         dRead.extent = extent;
         dRead.dtype = getDatatype();
         dRead.data = std::static_pointer_cast<void>(data);
-        rc.m_chunks.push(IOTask(this, dRead));
+        rc.push_chunk(IOTask(this, dRead));
     }
 }
 
@@ -259,8 +259,17 @@ RecordComponent::storeChunk(T_ContiguousContainer &data, Offset o, Extent e)
     // default arguments
     //   offset = {0u}: expand to right dim {0u, 0u, ...}
     Offset offset = o;
-    if (o.size() == 1u && o.at(0) == 0u && dim > 1u)
-        offset = Offset(dim, 0u);
+    if (o.size() == 1u && o.at(0) == 0u)
+    {
+        if (joinedDimension().has_value())
+        {
+            offset.clear();
+        }
+        else if (dim > 1u)
+        {
+            offset = Offset(dim, 0u);
+        }
+    }
 
     //   extent = {-1u}: take full size
     Extent extent(dim, 1u);
@@ -278,38 +287,7 @@ template <typename T, typename F>
 inline DynamicMemoryView<T>
 RecordComponent::storeChunk(Offset o, Extent e, F &&createBuffer)
 {
-    if (constant())
-        throw std::runtime_error(
-            "Chunks cannot be written for a constant RecordComponent.");
-    if (empty())
-        throw std::runtime_error(
-            "Chunks cannot be written for an empty RecordComponent.");
-    Datatype dtype = determineDatatype<T>();
-    if (dtype != getDatatype())
-    {
-        std::ostringstream oss;
-        oss << "Datatypes of chunk data (" << dtype
-            << ") and record component (" << getDatatype() << ") do not match.";
-        throw std::runtime_error(oss.str());
-    }
-    uint8_t dim = getDimensionality();
-    if (e.size() != dim || o.size() != dim)
-    {
-        std::ostringstream oss;
-        oss << "Dimensionality of chunk ("
-            << "offset=" << o.size() << "D, "
-            << "extent=" << e.size() << "D) "
-            << "and record component (" << int(dim) << "D) "
-            << "do not match.";
-        throw std::runtime_error(oss.str());
-    }
-    Extent dse = getExtent();
-    for (uint8_t i = 0; i < dim; ++i)
-        if (dse[i] < o[i] + e[i])
-            throw std::runtime_error(
-                "Chunk does not reside inside dataset (Dimension on index " +
-                std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
-                " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
+    verifyChunk<T>(o, e);
 
     /*
      * The openPMD backend might not yet know about this dataset.
@@ -334,6 +312,7 @@ RecordComponent::storeChunk(Offset o, Extent e, F &&createBuffer)
         dCreate.name = rc.m_name;
         dCreate.extent = getExtent();
         dCreate.dtype = getDatatype();
+        dCreate.joinedDimension = joinedDimension();
         if (!rc.m_dataset.has_value())
         {
             throw error::WrongAPIUsage(
@@ -358,6 +337,7 @@ RecordComponent::storeChunk(Offset o, Extent e, F &&createBuffer)
         out.ptr = static_cast<void *>(data.get());
         storeChunk(std::move(data), std::move(o), std::move(e));
     }
+    setDirtyRecursive(true);
     return DynamicMemoryView<T>{std::move(getBufferView), size, *this};
 }
 
@@ -407,4 +387,9 @@ auto RecordComponent::visit(Args &&...args)
         getDatatype(), *this, std::forward<Args>(args)...);
 }
 
+template <typename T>
+void RecordComponent::verifyChunk(Offset const &o, Extent const &e) const
+{
+    verifyChunk(determineDatatype<T>(), o, e);
+}
 } // namespace openPMD
diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp
index abc6bdf0ff..04907eda40 100644
--- a/include/openPMD/Series.hpp
+++ b/include/openPMD/Series.hpp
@@ -20,6 +20,7 @@
  */
 #pragma once
 
+#include "openPMD/Error.hpp"
 #include "openPMD/IO/AbstractIOHandler.hpp"
 #include "openPMD/IO/Access.hpp"
 #include "openPMD/IO/Format.hpp"
@@ -40,10 +41,15 @@
 
 #include <cstdint> // uint64_t
 #include <deque>
+#include <functional>
 #include <map>
+#include <memory>
 #include <optional>
 #include <set>
+#include <stdexcept>
 #include <string>
+#include <tuple>
+#include <variant>
 
 // expose private and protected members for invasive testing
 #ifndef OPENPMD_private
@@ -192,7 +198,40 @@ namespace internal
          */
         std::optional<ParsePreference> m_parsePreference;
 
+        std::optional<std::function<AbstractIOHandler *(Series &)>>
+            m_deferred_initialization = std::nullopt;
+
         void close();
+
+#if openPMD_HAVE_MPI
+        /*
+         * @todo Once we have separate MPI headers, move this there.
+         */
+        std::optional<MPI_Comm> m_communicator;
+#endif
+
+        struct NoSourceSpecified
+        {};
+        struct SourceSpecifiedViaJSON
+        {
+            std::string value;
+        };
+        struct SourceSpecifiedManually
+        {
+            std::string value;
+        };
+
+        struct RankTableData
+        {
+            Attributable m_attributable;
+            std::variant<
+                NoSourceSpecified,
+                SourceSpecifiedViaJSON,
+                SourceSpecifiedManually>
+                m_rankTableSource;
+            std::optional<chunk_assignment::RankMeta> m_bufferedRead;
+        };
+        RankTableData m_rankTable;
     }; // SeriesData
 
     class SeriesInternal;
@@ -215,12 +254,25 @@ class Series : public Attributable
     friend class ReadIterations;
     friend class SeriesIterator;
     friend class internal::SeriesData;
+    friend class internal::AttributableData;
     friend class WriteIterations;
 
 public:
     explicit Series();
 
 #if openPMD_HAVE_MPI
+    /**
+     * @brief Construct a new Series
+     *
+     * For further details, refer to the documentation of the non-MPI overload.
+     *
+     * @param filepath The file path.
+     * @param at Access mode.
+     * @param comm The MPI communicator.
+     * @param options Advanced backend configuration via JSON.
+     *      May be specified as a JSON-formatted string directly, or as a path
+     *      to a JSON textfile, prepended by an at sign '@'.
+     */
     Series(
         std::string const &filepath,
         Access at,
@@ -229,13 +281,50 @@ class Series : public Attributable
 #endif
 
     /**
-     * @brief Construct a new Series
-     *
-     * @param filepath The backend will be determined by the filepath extension.
+     * @brief Construct a new Series.
+     *
+     * For details on access modes, JSON/TOML configuration and iteration
+     * encoding, refer to:
+     *
+     * * https://openpmd-api.readthedocs.io/en/latest/usage/workflow.html#access-modes
+     * * https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html
+     * * https://openpmd-api.readthedocs.io/en/latest/usage/concepts.html#iteration-and-series
+     *
+     * In case of file-based iteration encoding, the file names for each
+     * iteration are determined by an expansion pattern that must be specified.
+     * It takes one out of two possible forms:
+     *
+     * 1. Simple form: %T is replaced with the iteration index, e.g.
+     *    `simData_%T.bp` becomes `simData_50.bp`.
+     * 2. Padded form: e.g. %06T is replaced with the iteration index padded to
+     *    at least six digits. `simData_%06T.bp` becomes `simData_000050.bp`.
+     *
+     * The backend is determined:
+     *
+     * 1. Explicitly via the JSON/TOML parameter `backend`, e.g. `{"backend":
+     *    "adios2"}`.
+     * 2. Otherwise implicitly from the filename extension, e.g.
+     *    `simData_%T.h5`.
+     *
+     * The filename extension can be replaced with a globbing pattern %E.
+     * It will be replaced with an automatically determined file name extension:
+     *
+     * 1. In CREATE mode: The extension is set to a backend-specific default
+     *    extension. This requires that the backend is specified via JSON/TOML.
+     * 2. In READ_ONLY, READ_WRITE and READ_LINEAR modes: These modes require
+     *    that files already exist on disk. The disk will be scanned for files
+     *    that match the pattern and the resulting file extension will be used.
+     *    If the result is ambiguous or no such file is found, an error is
+     *    raised.
+     * 3. In APPEND mode: Like (2.), except if no matching file is found. In
+     *    that case, the procedure of (1.) is used, owing to the fact that
+     *    APPEND mode can be used to create new datasets.
+     *
+     * @param filepath The file path.
      * @param at Access mode.
      * @param options Advanced backend configuration via JSON.
-     *      May be specified as a JSON-formatted string directly, or as a path
-     *      to a JSON textfile, prepended by an at sign '@'.
+     *      May be specified as a JSON/TOML-formatted string directly, or as a
+     *      path to a JSON/TOML textfile, prepended by an at sign '@'.
      */
     Series(
         std::string const &filepath,
@@ -331,6 +420,32 @@ class Series : public Attributable
      */
     Series &setMeshesPath(std::string const &meshesPath);
 
+    /**
+     * @throw   no_such_attribute_error If optional attribute is not present.
+     * @param collective Run this read operation collectively.
+                There might be an enormous IO overhead if running this
+                operation non-collectively.
+                To make this explicit to users, there is no default parameter.
+                Parameter is ignored if compiling without MPI support, (it is
+                present for the sake of a consistent API).
+     * @return  Vector with a String per (writing) MPI rank, indicating user-
+     *          defined meta information per rank. Example: host name.
+     */
+#if openPMD_HAVE_MPI
+    chunk_assignment::RankMeta rankTable(bool collective);
+#else
+    chunk_assignment::RankMeta rankTable(bool collective = false);
+#endif
+
+    /**
+     * @brief Set the Mpi Ranks Meta Info attribute, i.e. a Vector with
+     *        a String per (writing) MPI rank, indicating user-
+     *        defined meta information per rank. Example: host name.
+     *
+     * @return Reference to modified series.
+     */
+    Series &setRankTable(std::string const &myRankInfo);
+
     /**
      * @throw   no_such_attribute_error If optional attribute is not present.
      * @return  String representing the path to particle species, relative(!) to
@@ -502,6 +617,7 @@ class Series : public Attributable
      * @return String of a pattern for data backend.
      */
     std::string backend() const;
+    std::string backend();
 
     /** Execute all required remaining IO operations to write or read data.
      *
@@ -636,7 +752,21 @@ OPENPMD_private
     void parseJsonOptions(TracingJSON &options, ParsedInput &);
     bool hasExpansionPattern(std::string filenameWithExtension);
     bool reparseExpansionPattern(std::string filenameWithExtension);
-    void init(std::unique_ptr<AbstractIOHandler>, std::unique_ptr<ParsedInput>);
+    template <typename... MPI_Communicator>
+    void init(
+        std::string const &filepath,
+        Access at,
+        std::string const &options,
+        MPI_Communicator &&...);
+    template <typename TracingJSON, typename... MPI_Communicator>
+    std::tuple<std::unique_ptr<ParsedInput>, TracingJSON> initIOHandler(
+        std::string const &filepath,
+        std::string const &options,
+        Access at,
+        bool resolve_generic_extension,
+        MPI_Communicator &&...);
+    void initSeries(
+        std::unique_ptr<AbstractIOHandler>, std::unique_ptr<ParsedInput>);
     void initDefaults(IterationEncoding, bool initAll = false);
     /**
      * @brief Internal call for flushing a Series.
@@ -673,6 +803,7 @@ OPENPMD_private
         bool flushIOHandler = true);
     void flushMeshesPath();
     void flushParticlesPath();
+    void flushRankTable();
     void readFileBased();
     void readOneIterationFileBased(std::string const &filePath);
     /**
@@ -688,7 +819,7 @@ OPENPMD_private
      * ReadIterations since those methods should be aware when the current step
      * is broken).
      */
-    std::optional<std::deque<IterationIndex_t> > readGorVBased(
+    std::optional<std::deque<IterationIndex_t>> readGorVBased(
         bool do_always_throw_errors,
         bool init,
         std::set<IterationIndex_t> const &ignoreIterations = {});
@@ -758,8 +889,18 @@ OPENPMD_private
      * Returns the current content of the /data/snapshot attribute.
      * (We could also add this to the public API some time)
      */
-    std::optional<std::vector<IterationIndex_t> > currentSnapshot() const;
+    std::optional<std::vector<IterationIndex_t>> currentSnapshot() const;
+
+    AbstractIOHandler *runDeferredInitialization();
+
+    AbstractIOHandler *IOHandler();
+    AbstractIOHandler const *IOHandler() const;
 }; // Series
+
+namespace debug
+{
+    void printDirty(Series const &);
+}
 } // namespace openPMD
 
 // Make sure that this one is always included if Series.hpp is included,
diff --git a/include/openPMD/auxiliary/JSON_internal.hpp b/include/openPMD/auxiliary/JSON_internal.hpp
index 299cd6987c..dc72cffbdc 100644
--- a/include/openPMD/auxiliary/JSON_internal.hpp
+++ b/include/openPMD/auxiliary/JSON_internal.hpp
@@ -91,6 +91,7 @@ namespace json
          * @return nlohmann::json const&
          */
         nlohmann::json const &getShadow() const;
+        nlohmann::json &getShadow();
 
         /**
          * @brief Invert the "shadow", i.e. a copy of the original JSON value
@@ -247,5 +248,11 @@ namespace json
      */
     nlohmann::json &
     merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite);
+
+    nlohmann::json &filterByTemplate(
+        nlohmann::json &defaultVal, nlohmann::json const &positiveMask);
+
+    template <typename toml_t>
+    std::string format_toml(toml_t &&);
 } // namespace json
 } // namespace openPMD
diff --git a/include/openPMD/auxiliary/Mpi.hpp b/include/openPMD/auxiliary/Mpi.hpp
index 940ec026a3..f8eefe0cc5 100644
--- a/include/openPMD/auxiliary/Mpi.hpp
+++ b/include/openPMD/auxiliary/Mpi.hpp
@@ -26,6 +26,9 @@
 
 #if openPMD_HAVE_MPI
 #include <mpi.h>
+
+#include <string>
+#include <vector>
 #endif
 
 #include <type_traits>
@@ -64,5 +67,52 @@ namespace
     }
 } // namespace
 
+/**
+ * Multiple variable-length strings represented in one single buffer
+ * with a fixed line width.
+ * Strings smaller than the maximum width are padded with zeros.
+ * Each line is zero-terminated with at least one zero character.
+ * The length of char_buffer should be equal to the product of line_length
+ * and num_lines.
+ */
+struct StringMatrix
+{
+    std::vector<char> char_buffer;
+    size_t line_length = 0;
+    size_t num_lines = 0;
+};
+
+/*
+ * These are mostly internal helper functions, so this defines only those that
+ * we need.
+ * Logically, these should be complemented by `collectStringsTo()` and
+ * `distributeStringsAsMatrixToAllRanks()`, but we don't need them (yet).
+ */
+
+/**
+ * @brief Collect multiple variable-length strings to one rank in MPI_Gatherv
+ *        fashion. Uses two collective MPI calls, the first to gather the
+ *        different string lengths, the second to gather the actual strings.
+ *
+ * @param communicator MPI communicator
+ * @param destRank Target rank for MPI_Gatherv
+ * @param thisRankString The current MPI rank's contribution to the data.
+ * @return StringMatrix See documentation of StringMatrix struct.
+ */
+StringMatrix collectStringsAsMatrixTo(
+    MPI_Comm communicator, int destRank, std::string const &thisRankString);
+
+/**
+ * @brief Collect multiple variable-length strings to all ranks in
+ *        MPI_Allgatherv fashion. Uses two collective MPI calls, the first to
+ *        gather the different string lengths, the second to gather the actual
+ *        strings.
+ *
+ * @param communicator communicator
+ * @param thisRankString The current MPI rank's contribution to the data.
+ * @return std::vector<std::string> All ranks' strings, returned on all ranks.
+ */
+std::vector<std::string> distributeStringsToAllRanks(
+    MPI_Comm communicator, std::string const &thisRankString);
 #endif
 } // namespace openPMD::auxiliary
diff --git a/include/openPMD/auxiliary/TypeTraits.hpp b/include/openPMD/auxiliary/TypeTraits.hpp
index 64365d7bd9..526746de89 100644
--- a/include/openPMD/auxiliary/TypeTraits.hpp
+++ b/include/openPMD/auxiliary/TypeTraits.hpp
@@ -103,6 +103,27 @@ namespace detail
         constexpr static bool value = true;
         using type = T;
     };
+
+    template <typename>
+    struct IsChar
+    {
+        constexpr static bool value = false;
+    };
+    template <>
+    struct IsChar<char>
+    {
+        constexpr static bool value = true;
+    };
+    template <>
+    struct IsChar<signed char>
+    {
+        constexpr static bool value = true;
+    };
+    template <>
+    struct IsChar<unsigned char>
+    {
+        constexpr static bool value = true;
+    };
 } // namespace detail
 
 template <typename T>
@@ -117,6 +138,9 @@ inline constexpr bool IsPointer_v = detail::IsPointer<T>::value;
 template <typename T>
 using IsPointer_t = typename detail::IsPointer<T>::type;
 
+template <typename C>
+inline constexpr bool IsChar_v = detail::IsChar<C>::value;
+
 /** Emulate in the C++ concept ContiguousContainer
  *
  * Users can implement this trait for a type to signal it can be used as
diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp
index c7b92b8b44..0f7b722ae5 100644
--- a/include/openPMD/backend/Attributable.hpp
+++ b/include/openPMD/backend/Attributable.hpp
@@ -53,6 +53,9 @@ class Series;
 
 namespace internal
 {
+    class IterationData;
+    class SeriesData;
+
     class AttributableData
     {
         friend class openPMD::Attributable;
@@ -74,6 +77,42 @@ namespace internal
          */
         Writable m_writable;
 
+        template <typename T>
+        T asInternalCopyOf()
+        {
+            auto *self = dynamic_cast<typename T::Data_t *>(this);
+            if (!self)
+            {
+                if constexpr (std::is_same_v<Series, T>)
+                {
+                    throw std::runtime_error(
+                        "[Attributable::retrieveSeries] Error when trying to "
+                        "retrieve the Series object. Note: An instance of the "
+                        "Series object must still exist when flushing. A "
+                        "common cause for this error is using a flush call on "
+                        "a handle (e.g. `Iteration::seriesFlush()`) when the "
+                        "original Series object has already gone out of "
+                        "scope.");
+                }
+                else
+                {
+                    throw std::runtime_error(
+
+                        "[AttributableData::asInternalCopyOf<T>] Error when "
+                        "trying to retrieve a containing object. Note: An "
+                        "instance of the Series object must still exist when "
+                        "flushing. A common cause for this error is using a "
+                        "flush call on a handle (e.g. "
+                        "`Iteration::seriesFlush()`) when the original Series "
+                        "object has already gone out of scope.");
+                }
+            }
+            T res;
+            res.setData(
+                std::shared_ptr<typename T::Data_t>(self, [](auto const *) {}));
+            return res;
+        }
+
     private:
         /**
          * The attributes defined by this Attributable.
@@ -83,8 +122,30 @@ namespace internal
 
     template <typename, typename>
     class BaseRecordData;
+
+    class RecordComponentData;
+
+    /*
+     * Internal function to turn a handle into an owning handle that will keep
+     * not only itself, but the entire Series alive. Works by hiding a copy of
+     * the Series into the destructor lambda of the internal shared pointer. The
+     * returned handle is entirely safe to use in just the same ways as a normal
+     * handle, just the surrounding Series needs not be kept alive any more
+     * since it is stored within the handle. By storing the Series in the
+     * handle, not in the actual data, reference cycles are avoided.
+     *
+     * Instantiations for T exist for types RecordComponent,
+     * MeshRecordComponent, Mesh, Record, ParticleSpecies, Iteration.
+     */
+    template <typename T>
+    T &makeOwning(T &self, Series);
 } // namespace internal
 
+namespace debug
+{
+    void printDirty(Series const &);
+}
+
 /** @brief Layer to manage storage of attributes associated with file objects.
  *
  * Mandatory and user-defined Attributes and their data for every object in the
@@ -109,6 +170,10 @@ class Attributable
     friend class Series;
     friend class Writable;
     friend class WriteIterations;
+    friend class internal::RecordComponentData;
+    friend void debug::printDirty(Series const &);
+    template <typename T>
+    friend T &internal::makeOwning(T &self, Series);
 
 protected:
     // tag for internal constructor
@@ -198,6 +263,8 @@ class Attributable
      * of parents. This method will walk up the parent list until it reaches
      * an object that has no parent, which is the Series object, and flush()-es
      * it.
+     * If the Attributable is an Iteration or any object contained in an
+     * Iteration, that Iteration will be flushed regardless of its dirty status.
      *
      * @param backendConfig Further backend-specific instructions on how to
      *                      implement this flush call.
@@ -222,13 +289,16 @@ class Attributable
          * Indicates where this Attributable may be found within its Series.
          * Prefixed by the accessed object, e.g.,
          *   "iterations", "100", "meshes", "E", "x"
-         * Notice that RecordComponent::SCALAR is included in this list, too.
+         * Notice that RecordComponent::SCALAR does not get included in this
+         * list.
          */
         std::vector<std::string> group;
         Access access;
 
         /** Reconstructs a path that can be passed to a Series constructor */
         std::string filePath() const;
+        /** Return the path ob the object within the openPMD file */
+        std::string openPMDPath() const;
     };
 
     /**
@@ -251,8 +321,13 @@ OPENPMD_protected
      * Throws an error otherwise, e.g., for Series objects.
      * @{
      */
-    Iteration const &containingIteration() const;
-    Iteration &containingIteration();
+    [[nodiscard]] auto containingIteration() const
+        -> std::pair<
+            std::optional<internal::IterationData const *>,
+            internal::SeriesData const *>;
+    auto containingIteration() -> std::pair<
+                                   std::optional<internal::IterationData *>,
+                                   internal::SeriesData *>;
     /** @} */
 
     void seriesFlush(internal::FlushParams const &);
@@ -375,20 +450,70 @@ OPENPMD_protected
 
     bool dirty() const
     {
-        return writable().dirty;
+        return writable().dirtySelf;
     }
-    bool &dirty()
+    /** O(1).
+     */
+    bool dirtyRecursive() const
     {
-        return writable().dirty;
+        return writable().dirtyRecursive;
     }
-    bool written() const
+    void setDirty(bool dirty_in)
     {
-        return writable().written;
+        auto &w = writable();
+        w.dirtySelf = dirty_in;
+        setDirtyRecursive(dirty_in);
+    }
+    /* Amortized O(1) if dirty_in is true, else O(1).
+     *
+     * Must be used carefully with `dirty_in == false` since it is assumed that
+     * all children are not dirty.
+     *
+     * Invariant of dirtyRecursive:
+     *   this->dirtyRecursive implies parent->dirtyRecursive.
+     *
+     * Hence:
+     *
+     * * If dirty_in is true: This needs only go up far enough until a parent is
+     *   found that itself is dirtyRecursive.
+     * * If dirty_in is false: Only sets `this` to `dirtyRecursive == false`.
+     *   The caller must ensure that the invariant holds (e.g. clearing
+     *   everything during flushing or reading logic).
+     */
+    void setDirtyRecursive(bool dirty_in)
+    {
+        auto &w = writable();
+        w.dirtyRecursive = dirty_in;
+        if (dirty_in)
+        {
+            auto current = w.parent;
+            while (current && !current->dirtyRecursive)
+            {
+                current->dirtyRecursive = true;
+                current = current->parent;
+            }
+        }
     }
-    bool &written()
+    bool written() const
     {
         return writable().written;
     }
+    enum class EnqueueAsynchronously : bool
+    {
+        Yes,
+        No
+    };
+    /*
+     * setWritten() will take effect immediately.
+     * But it might additionally be necessary in some situations to enqueue a
+     * SET_WRITTEN task to the backend:
+     * A single flush() operation might encompass different Iterations. In
+     * file-based Iteration encoding, some objects must be written to every
+     * single file, thus their `written` flag must be restored to `false` for
+     * each Iteration. When flushing multiple Iterations at once, this must
+     * happen as an asynchronous IO task.
+     */
+    void setWritten(bool val, EnqueueAsynchronously);
 
 private:
     /**
@@ -414,7 +539,7 @@ inline bool Attributable::setAttribute(std::string const &key, T value)
         error::throwNoSuchAttribute(out_of_range_msg(key));
     }
 
-    dirty() = true;
+    setDirty(true);
     auto it = attri.m_attributes.lower_bound(key);
     if (it != attri.m_attributes.end() &&
         !attri.m_attributes.key_comp()(key, it->first))
diff --git a/include/openPMD/backend/Attribute.hpp b/include/openPMD/backend/Attribute.hpp
index 83528dc7b6..a183b7818a 100644
--- a/include/openPMD/backend/Attribute.hpp
+++ b/include/openPMD/backend/Attribute.hpp
@@ -106,28 +106,68 @@ class Attribute : public auxiliary::Variant<Datatype, attribute_types>
 namespace detail
 {
     template <typename T, typename U>
-    auto doConvert(T *pv) -> std::variant<U, std::runtime_error>
+    auto doConvert(T const *pv) -> std::variant<U, std::runtime_error>
     {
         (void)pv;
         if constexpr (std::is_convertible_v<T, U>)
         {
             return {static_cast<U>(*pv)};
         }
+        else if constexpr (
+            std::is_same_v<T, std::string> && auxiliary::IsChar_v<U>)
+        {
+            if (pv->size() == 1)
+            {
+                return static_cast<U>(pv->at(0));
+            }
+            else
+            {
+                return {
+                    std::runtime_error("getCast: cast from string to char only "
+                                       "possible if string has length 1.")};
+            }
+        }
+        else if constexpr (
+            auxiliary::IsChar_v<T> && std::is_same_v<U, std::string>)
+        {
+            return std::string(1, *pv);
+        }
         else if constexpr (auxiliary::IsVector_v<T> && auxiliary::IsVector_v<U>)
         {
+            U res{};
+            res.reserve(pv->size());
             if constexpr (std::is_convertible_v<
                               typename T::value_type,
                               typename U::value_type>)
             {
-                U res{};
-                res.reserve(pv->size());
                 std::copy(pv->begin(), pv->end(), std::back_inserter(res));
                 return {res};
             }
             else
             {
-                return {
-                    std::runtime_error("getCast: no vector cast possible.")};
+                // try a dynamic conversion recursively
+                for (auto const &val : *pv)
+                {
+                    auto conv = doConvert<
+                        typename T::value_type,
+                        typename U::value_type>(&val);
+                    if (auto conv_val =
+                            std::get_if<typename U::value_type>(&conv);
+                        conv_val)
+                    {
+                        res.push_back(std::move(*conv_val));
+                    }
+                    else
+                    {
+                        auto exception = std::get<std::runtime_error>(conv);
+                        return {std::runtime_error(
+                            std::string(
+                                "getCast: no vector cast possible, recursive "
+                                "error: ") +
+                            exception.what())};
+                    }
+                }
+                return {res};
             }
         }
         // conversion cast: array to vector
@@ -135,19 +175,40 @@ namespace detail
         // the frontend expects a vector
         else if constexpr (auxiliary::IsArray_v<T> && auxiliary::IsVector_v<U>)
         {
+            U res{};
+            res.reserve(pv->size());
             if constexpr (std::is_convertible_v<
                               typename T::value_type,
                               typename U::value_type>)
             {
-                U res{};
-                res.reserve(pv->size());
                 std::copy(pv->begin(), pv->end(), std::back_inserter(res));
                 return {res};
             }
             else
             {
-                return {std::runtime_error(
-                    "getCast: no array to vector conversion possible.")};
+                // try a dynamic conversion recursively
+                for (auto const &val : *pv)
+                {
+                    auto conv = doConvert<
+                        typename T::value_type,
+                        typename U::value_type>(&val);
+                    if (auto conv_val =
+                            std::get_if<typename U::value_type>(&conv);
+                        conv_val)
+                    {
+                        res.push_back(std::move(*conv_val));
+                    }
+                    else
+                    {
+                        auto exception = std::get<std::runtime_error>(conv);
+                        return {std::runtime_error(
+                            std::string(
+                                "getCast: no array to vector conversion "
+                                "possible, recursive error: ") +
+                            exception.what())};
+                    }
+                }
+                return {res};
             }
         }
         // conversion cast: vector to array
@@ -155,11 +216,11 @@ namespace detail
         // the frontend expects an array
         else if constexpr (auxiliary::IsVector_v<T> && auxiliary::IsArray_v<U>)
         {
+            U res{};
             if constexpr (std::is_convertible_v<
                               typename T::value_type,
                               typename U::value_type>)
             {
-                U res{};
                 if (res.size() != pv->size())
                 {
                     return std::runtime_error(
@@ -175,24 +236,60 @@ namespace detail
             }
             else
             {
-                return {std::runtime_error(
-                    "getCast: no vector to array conversion possible.")};
+                // try a dynamic conversion recursively
+                for (size_t i = 0; i <= res.size(); ++i)
+                {
+                    auto const &val = (*pv)[i];
+                    auto conv = doConvert<
+                        typename T::value_type,
+                        typename U::value_type>(&val);
+                    if (auto conv_val =
+                            std::get_if<typename U::value_type>(&conv);
+                        conv_val)
+                    {
+                        res[i] = std::move(*conv_val);
+                    }
+                    else
+                    {
+                        auto exception = std::get<std::runtime_error>(conv);
+                        return {std::runtime_error(
+                            std::string(
+                                "getCast: no vector to array conversion "
+                                "possible, recursive error: ") +
+                            exception.what())};
+                    }
+                }
+                return {res};
             }
         }
         // conversion cast: turn a single value into a 1-element vector
         else if constexpr (auxiliary::IsVector_v<U>)
         {
+            U res{};
+            res.reserve(1);
             if constexpr (std::is_convertible_v<T, typename U::value_type>)
             {
-                U res{};
-                res.reserve(1);
                 res.push_back(static_cast<typename U::value_type>(*pv));
                 return {res};
             }
             else
             {
-                return {std::runtime_error(
-                    "getCast: no scalar to vector conversion possible.")};
+                // try a dynamic conversion recursively
+                auto conv = doConvert<T, typename U::value_type>(pv);
+                if (auto conv_val = std::get_if<typename U::value_type>(&conv);
+                    conv_val)
+                {
+                    res.push_back(std::move(*conv_val));
+                    return {res};
+                }
+                else
+                {
+                    auto exception = std::get<std::runtime_error>(conv);
+                    return {std::runtime_error(
+                        std::string("getCast: no scalar to vector conversion "
+                                    "possible, recursive error: ") +
+                        exception.what())};
+                }
             }
         }
         else
diff --git a/include/openPMD/backend/BaseRecord.hpp b/include/openPMD/backend/BaseRecord.hpp
index 88bd73b247..ba137b10db 100644
--- a/include/openPMD/backend/BaseRecord.hpp
+++ b/include/openPMD/backend/BaseRecord.hpp
@@ -237,6 +237,8 @@ class BaseRecord
     friend class internal::BaseRecordData;
     template <typename, typename, typename>
     friend class internal::ScalarIterator;
+    template <typename T>
+    friend T &internal::makeOwning(T &self, Series);
 
     using Data_t =
         internal::BaseRecordData<T_elem, typename T_RecordComponent::Data_t>;
@@ -256,6 +258,11 @@ class BaseRecord
         return *m_baseRecordData;
     }
 
+    inline std::shared_ptr<Data_t> getShared()
+    {
+        return m_baseRecordData;
+    }
+
     BaseRecord();
 
 protected:
@@ -494,15 +501,6 @@ class BaseRecord
     virtual void
     flush_impl(std::string const &, internal::FlushParams const &) = 0;
 
-    /**
-     * @brief Check recursively whether this BaseRecord is dirty.
-     *        It is dirty if any attribute or dataset is read from or written to
-     *        the backend.
-     *
-     * @return true If dirty.
-     * @return false Otherwise.
-     */
-    bool dirtyRecursive() const;
     void eraseScalar();
 }; // BaseRecord
 
@@ -654,7 +652,7 @@ auto BaseRecord<T_elem>::erase(key_type const &key) -> size_type
 
     if (keyScalar)
     {
-        this->written() = false;
+        this->setWritten(false, Attributable::EnqueueAsynchronously::No);
         this->writable().abstractFilePosition.reset();
         this->get().m_datasetDefined = false;
     }
@@ -999,25 +997,12 @@ inline void BaseRecord<T_elem>::flush(
     }
 
     this->flush_impl(name, flushParams);
-    // flush_impl must take care to correctly set the dirty() flag so this
-    // method doesn't do it
-}
-
-template <typename T_elem>
-inline bool BaseRecord<T_elem>::dirtyRecursive() const
-{
-    if (this->dirty())
-    {
-        return true;
-    }
-    for (auto const &pair : *this)
+    if (flushParams.flushLevel != FlushLevel::SkeletonOnly)
     {
-        if (pair.second.dirtyRecursive())
-        {
-            return true;
-        }
+        this->setDirty(false);
     }
-    return false;
+    // flush_impl must take care to correctly set the dirty() flag so this
+    // method doesn't do it
 }
 
 template <typename T_elem>
diff --git a/include/openPMD/backend/BaseRecordComponent.hpp b/include/openPMD/backend/BaseRecordComponent.hpp
index 0288e9bb9a..fe4490830d 100644
--- a/include/openPMD/backend/BaseRecordComponent.hpp
+++ b/include/openPMD/backend/BaseRecordComponent.hpp
@@ -143,6 +143,8 @@ class BaseRecordComponent : virtual public Attributable
      */
     bool constant() const;
 
+    std::optional<size_t> joinedDimension() const;
+
     /**
      * Get data chunks that are available to be loaded from the backend.
      * Note that this is backend-dependent information and the returned
diff --git a/include/openPMD/backend/Container.hpp b/include/openPMD/backend/Container.hpp
index ae18b0182c..58b07bd48a 100644
--- a/include/openPMD/backend/Container.hpp
+++ b/include/openPMD/backend/Container.hpp
@@ -21,6 +21,7 @@
 #pragma once
 
 #include "openPMD/Error.hpp"
+#include "openPMD/IO/Access.hpp"
 #include "openPMD/backend/Attributable.hpp"
 
 #include <initializer_list>
@@ -289,7 +290,7 @@ class Container : virtual public Attributable
         {
             if (IOHandler()->m_seriesStatus !=
                     internal::SeriesStatus::Parsing &&
-                Access::READ_ONLY == IOHandler()->m_frontendAccess)
+                access::readOnly(IOHandler()->m_frontendAccess))
             {
                 auxiliary::OutOfRangeMsg const out_of_range_msg;
                 throw std::out_of_range(out_of_range_msg(key));
@@ -330,7 +331,7 @@ class Container : virtual public Attributable
         {
             if (IOHandler()->m_seriesStatus !=
                     internal::SeriesStatus::Parsing &&
-                Access::READ_ONLY == IOHandler()->m_frontendAccess)
+                access::readOnly(IOHandler()->m_frontendAccess))
             {
                 auxiliary::OutOfRangeMsg out_of_range_msg;
                 throw std::out_of_range(out_of_range_msg(key));
diff --git a/include/openPMD/backend/MeshRecordComponent.hpp b/include/openPMD/backend/MeshRecordComponent.hpp
index 3d10cedacd..d05163d754 100644
--- a/include/openPMD/backend/MeshRecordComponent.hpp
+++ b/include/openPMD/backend/MeshRecordComponent.hpp
@@ -46,7 +46,7 @@ class MeshRecordComponent : public RecordComponent
 private:
     MeshRecordComponent();
     MeshRecordComponent(NoInit);
-    void read() override;
+    void read();
     void flush(std::string const &, internal::FlushParams const &);
 
 public:
diff --git a/include/openPMD/backend/PatchRecord.hpp b/include/openPMD/backend/PatchRecord.hpp
index 84d180bac5..7f488ec92f 100644
--- a/include/openPMD/backend/PatchRecord.hpp
+++ b/include/openPMD/backend/PatchRecord.hpp
@@ -43,6 +43,6 @@ class PatchRecord : public BaseRecord<PatchRecordComponent>
 
     void
     flush_impl(std::string const &, internal::FlushParams const &) override;
-    void read() override;
+    void read();
 }; // PatchRecord
 } // namespace openPMD
diff --git a/include/openPMD/backend/PatchRecordComponent.hpp b/include/openPMD/backend/PatchRecordComponent.hpp
index 10ca725e33..63875b11e2 100644
--- a/include/openPMD/backend/PatchRecordComponent.hpp
+++ b/include/openPMD/backend/PatchRecordComponent.hpp
@@ -20,6 +20,8 @@
  */
 #pragma once
 
+#include "openPMD/Error.hpp"
+#include "openPMD/RecordComponent.hpp"
 #include "openPMD/auxiliary/ShareRawInternal.hpp"
 #include "openPMD/backend/BaseRecordComponent.hpp"
 
@@ -36,41 +38,11 @@
 
 namespace openPMD
 {
-namespace internal
-{
-    class PatchRecordComponentData : public BaseRecordComponentData
-    {
-    public:
-        /**
-         * Chunk reading/writing requests on the contained dataset.
-         */
-        std::queue<IOTask> m_chunks;
-
-        PatchRecordComponentData(PatchRecordComponentData const &) = delete;
-        PatchRecordComponentData(PatchRecordComponentData &&) = delete;
-
-        PatchRecordComponentData &
-        operator=(PatchRecordComponentData const &) = delete;
-        PatchRecordComponentData &
-        operator=(PatchRecordComponentData &&) = delete;
-
-        PatchRecordComponentData();
-
-        void reset() override
-        {
-            BaseRecordComponentData::reset();
-            m_chunks = std::queue<IOTask>();
-        }
-    };
-
-    template <typename, typename>
-    class BaseRecordData;
-} // namespace internal
 
 /**
  * @todo add support for constant patch record components
  */
-class PatchRecordComponent : public BaseRecordComponent
+class PatchRecordComponent : public RecordComponent
 {
     template <typename T, typename T_key, typename T_container>
     friend class Container;
@@ -81,7 +53,6 @@ class PatchRecordComponent : public BaseRecordComponent
     friend class ParticlePatches;
     friend class PatchRecord;
     friend class ParticleSpecies;
-    friend class internal::PatchRecordComponentData;
 
 public:
     /**
@@ -95,7 +66,7 @@ class PatchRecordComponent : public BaseRecordComponent
 
     PatchRecordComponent &setUnitSI(double);
 
-    virtual PatchRecordComponent &resetDataset(Dataset);
+    PatchRecordComponent &resetDataset(Dataset) override;
 
     uint8_t getDimensionality() const;
     Extent getExtent() const;
@@ -115,52 +86,21 @@ class PatchRecordComponent : public BaseRecordComponent
     template <typename T>
     void store(uint64_t idx, T);
 
+    template <typename T>
+    void store(T);
+
     // clang-format off
 OPENPMD_private
     // clang-format on
 
-    void flush(std::string const &, internal::FlushParams const &);
-    virtual void read();
-
-    /**
-     * @brief Check recursively whether this RecordComponent is dirty.
-     *        It is dirty if any attribute or dataset is read from or written to
-     *        the backend.
-     *
-     * @return true If dirty.
-     * @return false Otherwise.
-     */
-    bool dirtyRecursive() const;
+    using RecordComponent::flush;
 
     // clang-format off
 OPENPMD_protected
     // clang-format on
 
-    using Data_t = internal::PatchRecordComponentData;
-
-    std::shared_ptr<Data_t> m_patchRecordComponentData;
-
     PatchRecordComponent();
     PatchRecordComponent(NoInit);
-
-    inline Data_t const &get() const
-    {
-        // cannot call this in the const overload
-        // setDatasetDefined(*m_recordComponentData);
-        return *m_patchRecordComponentData;
-    }
-
-    inline Data_t &get()
-    {
-        setDatasetDefined(*m_patchRecordComponentData);
-        return *m_patchRecordComponentData;
-    }
-
-    inline void setData(std::shared_ptr<Data_t> data)
-    {
-        m_patchRecordComponentData = std::move(data);
-        BaseRecordComponent::setData(m_patchRecordComponentData);
-    }
 }; // PatchRecordComponent
 
 template <typename T>
@@ -203,7 +143,7 @@ inline void PatchRecordComponent::load(std::shared_ptr<T> data)
     dRead.dtype = getDatatype();
     dRead.data = std::static_pointer_cast<void>(data);
     auto &rc = get();
-    rc.m_chunks.push(IOTask(this, dRead));
+    rc.push_chunk(IOTask(this, dRead));
 }
 
 template <typename T>
@@ -242,6 +182,35 @@ inline void PatchRecordComponent::store(uint64_t idx, T data)
     dWrite.dtype = dtype;
     dWrite.data = std::make_shared<T>(data);
     auto &rc = get();
-    rc.m_chunks.push(IOTask(this, std::move(dWrite)));
+    rc.push_chunk(IOTask(this, std::move(dWrite)));
+}
+
+template <typename T>
+inline void PatchRecordComponent::store(T data)
+{
+    Datatype dtype = determineDatatype<T>();
+    if (dtype != getDatatype())
+    {
+        std::ostringstream oss;
+        oss << "Datatypes of patch data (" << dtype << ") and dataset ("
+            << getDatatype() << ") do not match.";
+        throw std::runtime_error(oss.str());
+    }
+
+    if (!joinedDimension().has_value())
+    {
+        throw error::WrongAPIUsage(
+            "[PatchRecordComponent::store] API call without explicit "
+            "specification of index only allowed when a joined dimension is "
+            "specified.");
+    }
+
+    Parameter<Operation::WRITE_DATASET> dWrite;
+    dWrite.offset = {};
+    dWrite.extent = {1};
+    dWrite.dtype = dtype;
+    dWrite.data = std::make_shared<T>(data);
+    auto &rc = get();
+    rc.push_chunk(IOTask(this, std::move(dWrite)));
 }
 } // namespace openPMD
diff --git a/include/openPMD/backend/Writable.hpp b/include/openPMD/backend/Writable.hpp
index 28554d0cf9..d0b8b4f3c7 100644
--- a/include/openPMD/backend/Writable.hpp
+++ b/include/openPMD/backend/Writable.hpp
@@ -44,6 +44,7 @@ template <typename FilePositionType>
 class AbstractIOHandlerImplCommon;
 template <typename>
 class Span;
+class Series;
 
 namespace internal
 {
@@ -52,7 +53,12 @@ namespace internal
 } // namespace internal
 namespace detail
 {
-    struct BufferedActions;
+    class ADIOS2File;
+}
+
+namespace debug
+{
+    void printDirty(Series const &);
 }
 
 /** @brief Layer to mirror structure of logical data and persistent data in
@@ -83,7 +89,7 @@ class Writable final
     friend class Record;
     friend class AbstractIOHandlerImpl;
     friend class ADIOS2IOHandlerImpl;
-    friend struct detail::BufferedActions;
+    friend class detail::ADIOS2File;
     friend class HDF5IOHandlerImpl;
     friend class ParallelHDF5IOHandlerImpl;
     template <typename>
@@ -94,6 +100,7 @@ class Writable final
     friend std::string concrete_bp1_file_position(Writable *);
     template <typename>
     friend class Span;
+    friend void debug::printDirty(Series const &);
 
 private:
     Writable(internal::AttributableData *);
@@ -135,7 +142,25 @@ OPENPMD_private
         IOHandler = nullptr;
     internal::AttributableData *attributable = nullptr;
     Writable *parent = nullptr;
-    bool dirty = true;
+
+    /** Tracks if there are unwritten changes for this specific Writable.
+     *
+     * Manipulate via Attributable::dirty() and Attributable::setDirty().
+     */
+    bool dirtySelf = true;
+    /**
+     * Tracks if there are unwritten changes anywhere in the
+     * tree whose ancestor this Writable is.
+     *
+     * Invariant: this->dirtyRecursive implies parent->dirtyRecursive.
+     *
+     * dirtySelf and dirtyRecursive are separated since that allows specifying
+     * that `this` is not dirty, but some child is.
+     *
+     * Manipulate via Attributable::dirtyRecursive() and
+     * Attributable::setDirtyRecursive().
+     */
+    bool dirtyRecursive = true;
     /**
      * If parent is not null, then this is a key such that:
      * &(*parent)[key] == this
diff --git a/include/openPMD/binding/python/Container.H b/include/openPMD/binding/python/Container.H
index 350eaed823..a07847e600 100644
--- a/include/openPMD/binding/python/Container.H
+++ b/include/openPMD/binding/python/Container.H
@@ -34,6 +34,7 @@
 
 #include <cstddef>
 #include <memory>
+#include <pybind11/attr.h>
 #include <sstream>
 #include <string>
 #include <utility>
@@ -118,11 +119,12 @@ Class_ finalize_container(Class_ cl)
     // keep same policy as Container class: missing keys are created
     cl.def(
         "__getitem__",
-        [](Map &m, KeyType const &k) -> MappedType & { return m[k]; },
+        [](Map &m, KeyType const &k) -> MappedType { return m[k]; },
         // copy + keepalive
         // All objects in the openPMD object model are handles, so using a copy
         // is safer and still performant.
-        py::return_value_policy::copy);
+        py::return_value_policy::move,
+        py::keep_alive<0, 1>());
 
     // Assignment provided only if the type is copyable
     py::detail::map_assignment<Map, Class_>(cl);
diff --git a/include/openPMD/binding/python/Mpi.hpp b/include/openPMD/binding/python/Mpi.hpp
new file mode 100644
index 0000000000..dc110e0ca1
--- /dev/null
+++ b/include/openPMD/binding/python/Mpi.hpp
@@ -0,0 +1,100 @@
+/* Copyright 2021 Axel Huebl and Franz Poeschel
+ *
+ * This file is part of openPMD-api.
+ *
+ * openPMD-api is free software: you can redistribute it and/or modify
+ * it under the terms of of either the GNU General Public License or
+ * the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * openPMD-api is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License and the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * and the GNU Lesser General Public License along with openPMD-api.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "openPMD/config.hpp"
+
+#if openPMD_HAVE_MPI
+
+#include "openPMD/binding/python/Common.hpp"
+
+#include <mpi.h>
+
+/** mpi4py communicator wrapper
+ *
+ * refs:
+ * - https://github.com/mpi4py/mpi4py/blob/3.0.0/src/mpi4py/libmpi.pxd#L35-L36
+ * - https://github.com/mpi4py/mpi4py/blob/3.0.0/src/mpi4py/MPI.pxd#L100-L105
+ * - installed: include/mpi4py/mpi4py.MPI.h
+ */
+struct openPMD_PyMPICommObject
+{
+    PyObject_HEAD MPI_Comm ob_mpi;
+    unsigned int flags;
+};
+using openPMD_PyMPIIntracommObject = openPMD_PyMPICommObject;
+
+inline std::variant<MPI_Comm, std::string>
+pythonObjectAsMpiComm(pybind11::object &comm)
+{
+    namespace py = pybind11;
+    //! TODO perform mpi4py import test and check min-version
+    //!       careful: double MPI_Init risk? only import mpi4py.MPI?
+    //!       required C-API init? probably just checks:
+    //! refs:
+    //! - https://bitbucket.org/mpi4py/mpi4py/src/3.0.0/demo/wrap-c/helloworld.c
+    //! - installed: include/mpi4py/mpi4py.MPI_api.h
+    // if( import_mpi4py() < 0 ) { here be dragons }
+
+    if (comm.ptr() == Py_None)
+        return {"MPI communicator cannot be None."};
+    if (comm.ptr() == nullptr)
+        return {"MPI communicator is a nullptr."};
+
+    // check type string to see if this is mpi4py
+    //   __str__ (pretty)
+    //   __repr__ (unambiguous)
+    //   mpi4py: <mpi4py.MPI.Intracomm object at 0x7f998e6e28d0>
+    //   pyMPI:  ... (TODO)
+    py::str const comm_pystr = py::repr(comm);
+    std::string const comm_str = comm_pystr.cast<std::string>();
+    if (comm_str.substr(0, 12) != std::string("<mpi4py.MPI."))
+        return {"comm is not an mpi4py communicator: " + comm_str};
+    // only checks same layout, e.g. an `int` in `PyObject` could pass this
+    if (!py::isinstance<py::class_<openPMD_PyMPIIntracommObject> >(
+            comm.get_type()))
+        // TODO add mpi4py version from above import check to error message
+        return {
+            "comm has unexpected type layout in " + comm_str +
+            " (Mismatched MPI at compile vs. runtime? "
+            "Breaking mpi4py release?)"};
+
+    // todo other possible implementations:
+    // - pyMPI (inactive since 2008?): import mpi; mpi.WORLD
+
+    // reimplementation of mpi4py's:
+    // MPI_Comm* mpiCommPtr = PyMPIComm_Get(comm.ptr());
+    MPI_Comm *mpiCommPtr =
+        &((openPMD_PyMPIIntracommObject *)(comm.ptr()))->ob_mpi;
+
+    if (PyErr_Occurred())
+        return {"MPI communicator access error."};
+    if (mpiCommPtr == nullptr)
+    {
+        return {
+            "MPI communicator cast failed. "
+            "(Mismatched MPI at compile vs. runtime?)"};
+    }
+    return {*mpiCommPtr};
+}
+
+#endif
diff --git a/include/openPMD/binding/python/Pickle.hpp b/include/openPMD/binding/python/Pickle.hpp
index eabe307af9..3d3b233eb4 100644
--- a/include/openPMD/binding/python/Pickle.hpp
+++ b/include/openPMD/binding/python/Pickle.hpp
@@ -58,7 +58,7 @@ add_pickle(pybind11::class_<T_Args...> &cl, T_SeriesAccessor &&seriesAccessor)
         },
 
         // __setstate__
-        [&seriesAccessor](py::tuple t) {
+        [&seriesAccessor](py::tuple const &t) {
             // our tuple has exactly two elements: filePath & group
             if (t.size() != 2)
                 throw std::runtime_error("Invalid state!");
@@ -67,12 +67,9 @@ add_pickle(pybind11::class_<T_Args...> &cl, T_SeriesAccessor &&seriesAccessor)
             std::vector<std::string> const group =
                 t[1].cast<std::vector<std::string> >();
 
-            // Create a new openPMD Series and keep it alive.
-            // This is a big hack for now, but it works for our use
-            // case, which is spinning up remote serial read series
-            // for DASK.
-            static auto series = openPMD::Series(filename, Access::READ_ONLY);
-            return seriesAccessor(series, group);
+            openPMD::Series series(
+                filename, Access::READ_ONLY, "defer_iteration_parsing = true");
+            return seriesAccessor(std::move(series), group);
         }));
 }
 } // namespace openPMD
diff --git a/pyproject.toml b/pyproject.toml
index 0b739cebcc..7bdc279595 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,6 +4,6 @@ requires = [
     "wheel",
     "cmake>=3.15.0,<4.0.0",
     "packaging>=23",
-    "pybind11>=2.11.1,<3.0.0"
+    "pybind11>=2.12.0,<3.0.0"
 ]
 build-backend = "setuptools.build_meta"
diff --git a/setup.py b/setup.py
index 27e4cc4916..35f447ae4f 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,6 @@ def build_extension(self, ext):
             '-DopenPMD_BUILD_TESTING:BOOL=' + BUILD_TESTING,
             # static/shared libs
             '-DopenPMD_BUILD_SHARED_LIBS:BOOL=' + BUILD_SHARED_LIBS,
-            '-DHDF5_USE_STATIC_LIBRARIES:BOOL=' + HDF5_USE_STATIC_LIBRARIES,
             # Unix: rpath to current dir when packaged
             #       needed for shared (here non-default) builds
             '-DCMAKE_BUILD_WITH_INSTALL_RPATH:BOOL=ON',
@@ -68,6 +67,12 @@ def build_extension(self, ext):
             # Windows: has no RPath concept, all `.dll`s must be in %PATH%
             #          or same dir as calling executable
         ]
+        if HDF5_USE_STATIC_LIBRARIES is not None:
+            cmake_args.append('-DHDF5_USE_STATIC_LIBRARIES:BOOL=' +
+                              HDF5_USE_STATIC_LIBRARIES)
+        if ZLIB_USE_STATIC_LIBS is not None:
+            cmake_args.append('-DZLIB_USE_STATIC_LIBS:BOOL=' +
+                              ZLIB_USE_STATIC_LIBS)
         if CMAKE_INTERPROCEDURAL_OPTIMIZATION is not None:
             cmake_args.append('-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=' +
                               CMAKE_INTERPROCEDURAL_OPTIMIZATION)
@@ -126,7 +131,8 @@ def build_extension(self, ext):
 # Work-around for https://github.com/pypa/setuptools/issues/1712
 # note: changed default for SHARED, MPI, TESTING and EXAMPLES
 openPMD_USE_MPI = os.environ.get('openPMD_USE_MPI', 'OFF')
-HDF5_USE_STATIC_LIBRARIES = os.environ.get('HDF5_USE_STATIC_LIBRARIES', 'OFF')
+HDF5_USE_STATIC_LIBRARIES = os.environ.get('HDF5_USE_STATIC_LIBRARIES', None)
+ZLIB_USE_STATIC_LIBS = os.environ.get('ZLIB_USE_STATIC_LIBS', None)
 # deprecated: backwards compatibility to <= 0.13.*
 BUILD_SHARED_LIBS = os.environ.get('BUILD_SHARED_LIBS', 'OFF')
 BUILD_TESTING = os.environ.get('BUILD_TESTING', 'OFF')
diff --git a/share/openPMD/thirdParty/pybind11/CMakeLists.txt b/share/openPMD/thirdParty/pybind11/CMakeLists.txt
index 87ec103468..7db1bf668f 100644
--- a/share/openPMD/thirdParty/pybind11/CMakeLists.txt
+++ b/share/openPMD/thirdParty/pybind11/CMakeLists.txt
@@ -5,15 +5,25 @@
 # All rights reserved. Use of this source code is governed by a
 # BSD-style license that can be found in the LICENSE file.
 
+# Propagate this policy (FindPythonInterp removal) so it can be detected later
+if(NOT CMAKE_VERSION VERSION_LESS "3.27")
+  cmake_policy(GET CMP0148 _pybind11_cmp0148)
+endif()
+
 cmake_minimum_required(VERSION 3.5)
 
-# The `cmake_minimum_required(VERSION 3.5...3.26)` syntax does not work with
+# The `cmake_minimum_required(VERSION 3.5...3.27)` syntax does not work with
 # some versions of VS that have a patched CMake 3.11. This forces us to emulate
 # the behavior using the following workaround:
-if(${CMAKE_VERSION} VERSION_LESS 3.26)
+if(${CMAKE_VERSION} VERSION_LESS 3.27)
   cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
 else()
-  cmake_policy(VERSION 3.26)
+  cmake_policy(VERSION 3.27)
+endif()
+
+if(_pybind11_cmp0148)
+  cmake_policy(SET CMP0148 ${_pybind11_cmp0148})
+  unset(_pybind11_cmp0148)
 endif()
 
 # Avoid infinite recursion if tests include this as a subdirectory
@@ -82,33 +92,58 @@ if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
   set(pybind11_system "")
 
   set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+  if(CMAKE_VERSION VERSION_LESS "3.18")
+    set(_pybind11_findpython_default OFF)
+  else()
+    set(_pybind11_findpython_default ON)
+  endif()
 else()
   set(PYBIND11_MASTER_PROJECT OFF)
   set(pybind11_system SYSTEM)
+  set(_pybind11_findpython_default OFF)
 endif()
 
 # Options
 option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT})
 option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT})
 option(PYBIND11_NOPYTHON "Disable search for Python" OFF)
+option(PYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION
+       "To enforce that a handle_type_name<> specialization exists" OFF)
 option(PYBIND11_SIMPLE_GIL_MANAGEMENT
        "Use simpler GIL management logic that does not support disassociation" OFF)
+option(PYBIND11_NUMPY_1_ONLY
+       "Disable NumPy 2 support to avoid changes to previous pybind11 versions." OFF)
 set(PYBIND11_INTERNALS_VERSION
     ""
     CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.")
 
+if(PYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION)
+  add_compile_definitions(PYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION)
+endif()
 if(PYBIND11_SIMPLE_GIL_MANAGEMENT)
   add_compile_definitions(PYBIND11_SIMPLE_GIL_MANAGEMENT)
 endif()
+if(PYBIND11_NUMPY_1_ONLY)
+  add_compile_definitions(PYBIND11_NUMPY_1_ONLY)
+endif()
 
 cmake_dependent_option(
   USE_PYTHON_INCLUDE_DIR
   "Install pybind11 headers in Python include directory instead of default installation prefix"
   OFF "PYBIND11_INSTALL" OFF)
 
-cmake_dependent_option(PYBIND11_FINDPYTHON "Force new FindPython" OFF
+cmake_dependent_option(PYBIND11_FINDPYTHON "Force new FindPython" ${_pybind11_findpython_default}
                        "NOT CMAKE_VERSION VERSION_LESS 3.12" OFF)
 
+# Allow PYTHON_EXECUTABLE if in FINDPYTHON mode and building pybind11's tests
+# (makes transition easier while we support both modes).
+if(PYBIND11_MASTER_PROJECT
+   AND PYBIND11_FINDPYTHON
+   AND DEFINED PYTHON_EXECUTABLE
+   AND NOT DEFINED Python_EXECUTABLE)
+  set(Python_EXECUTABLE "${PYTHON_EXECUTABLE}")
+endif()
+
 # NB: when adding a header don't forget to also add it to setup.py
 set(PYBIND11_HEADERS
     include/pybind11/detail/class.h
@@ -132,6 +167,7 @@ set(PYBIND11_HEADERS
     include/pybind11/embed.h
     include/pybind11/eval.h
     include/pybind11/gil.h
+    include/pybind11/gil_safe_call_once.h
     include/pybind11/iostream.h
     include/pybind11/functional.h
     include/pybind11/numpy.h
@@ -141,7 +177,8 @@ set(PYBIND11_HEADERS
     include/pybind11/stl.h
     include/pybind11/stl_bind.h
     include/pybind11/stl/filesystem.h
-    include/pybind11/type_caster_pyobject_ptr.h)
+    include/pybind11/type_caster_pyobject_ptr.h
+    include/pybind11/typing.h)
 
 # Compare with grep and warn if mismatched
 if(PYBIND11_MASTER_PROJECT AND NOT CMAKE_VERSION VERSION_LESS 3.12)
@@ -277,7 +314,21 @@ if(PYBIND11_INSTALL)
 
   # pkg-config support
   if(NOT prefix_for_pc_file)
-    set(prefix_for_pc_file "${CMAKE_INSTALL_PREFIX}")
+    if(IS_ABSOLUTE "${CMAKE_INSTALL_DATAROOTDIR}")
+      set(prefix_for_pc_file "${CMAKE_INSTALL_PREFIX}")
+    else()
+      set(pc_datarootdir "${CMAKE_INSTALL_DATAROOTDIR}")
+      if(CMAKE_VERSION VERSION_LESS 3.20)
+        set(prefix_for_pc_file "\${pcfiledir}/..")
+        while(pc_datarootdir)
+          get_filename_component(pc_datarootdir "${pc_datarootdir}" DIRECTORY)
+          string(APPEND prefix_for_pc_file "/..")
+        endwhile()
+      else()
+        cmake_path(RELATIVE_PATH CMAKE_INSTALL_PREFIX BASE_DIRECTORY CMAKE_INSTALL_DATAROOTDIR
+                   OUTPUT_VARIABLE prefix_for_pc_file)
+      endif()
+    endif()
   endif()
   join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
   configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11.pc.in"
diff --git a/share/openPMD/thirdParty/pybind11/README.rst b/share/openPMD/thirdParty/pybind11/README.rst
index 80213a4062..4032f97a57 100644
--- a/share/openPMD/thirdParty/pybind11/README.rst
+++ b/share/openPMD/thirdParty/pybind11/README.rst
@@ -36,10 +36,10 @@ with everything stripped away that isn't relevant for binding
 generation. Without comments, the core header files only require ~4K
 lines of code and depend on Python (3.6+, or PyPy) and the C++
 standard library. This compact implementation was possible thanks to
-some of the new C++11 language features (specifically: tuples, lambda
-functions and variadic templates). Since its creation, this library has
-grown beyond Boost.Python in many ways, leading to dramatically simpler
-binding code in many common situations.
+some C++11 language features (specifically: tuples, lambda functions and
+variadic templates). Since its creation, this library has grown beyond
+Boost.Python in many ways, leading to dramatically simpler binding code in many
+common situations.
 
 Tutorial and reference documentation is provided at
 `pybind11.readthedocs.io <https://pybind11.readthedocs.io/en/latest>`_.
@@ -71,6 +71,7 @@ pybind11 can map the following core C++ features to Python:
 - Internal references with correct reference counting
 - C++ classes with virtual (and pure virtual) methods can be extended
   in Python
+- Integrated NumPy support (NumPy 2 requires pybind11 2.12+)
 
 Goodies
 -------
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/cast.h b/share/openPMD/thirdParty/pybind11/include/pybind11/cast.h
index db39341180..02d9488dae 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/cast.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/cast.h
@@ -42,13 +42,15 @@ using make_caster = type_caster<intrinsic_t<type>>;
 // Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T
 template <typename T>
 typename make_caster<T>::template cast_op_type<T> cast_op(make_caster<T> &caster) {
-    return caster.operator typename make_caster<T>::template cast_op_type<T>();
+    using result_t = typename make_caster<T>::template cast_op_type<T>; // See PR #4893
+    return caster.operator result_t();
 }
 template <typename T>
 typename make_caster<T>::template cast_op_type<typename std::add_rvalue_reference<T>::type>
 cast_op(make_caster<T> &&caster) {
-    return std::move(caster).operator typename make_caster<T>::
-        template cast_op_type<typename std::add_rvalue_reference<T>::type>();
+    using result_t = typename make_caster<T>::template cast_op_type<
+        typename std::add_rvalue_reference<T>::type>; // See PR #4893
+    return std::move(caster).operator result_t();
 }
 
 template <typename type>
@@ -325,8 +327,9 @@ class type_caster<bool> {
             value = false;
             return true;
         }
-        if (convert || (std::strcmp("numpy.bool_", Py_TYPE(src.ptr())->tp_name) == 0)) {
-            // (allow non-implicit conversion for numpy booleans)
+        if (convert || is_numpy_bool(src)) {
+            // (allow non-implicit conversion for numpy booleans), use strncmp
+            // since NumPy 1.x had an additional trailing underscore.
 
             Py_ssize_t res = -1;
             if (src.is_none()) {
@@ -358,6 +361,15 @@ class type_caster<bool> {
         return handle(src ? Py_True : Py_False).inc_ref();
     }
     PYBIND11_TYPE_CASTER(bool, const_name("bool"));
+
+private:
+    // Test if an object is a NumPy boolean (without fetching the type).
+    static inline bool is_numpy_bool(handle object) {
+        const char *type_name = Py_TYPE(object.ptr())->tp_name;
+        // Name changed to `numpy.bool` in NumPy 2, `numpy.bool_` is needed for 1.x support
+        return std::strcmp("numpy.bool", type_name) == 0
+               || std::strcmp("numpy.bool_", type_name) == 0;
+    }
 };
 
 // Helper class for UTF-{8,16,32} C++ stl strings:
@@ -660,8 +672,9 @@ class tuple_caster {
         return cast(*src, policy, parent);
     }
 
-    static constexpr auto name
-        = const_name("Tuple[") + concat(make_caster<Ts>::name...) + const_name("]");
+    static constexpr auto name = const_name("tuple[")
+                                 + ::pybind11::detail::concat(make_caster<Ts>::name...)
+                                 + const_name("]");
 
     template <typename T>
     using cast_op_type = type;
@@ -869,10 +882,53 @@ struct is_holder_type
 template <typename base, typename deleter>
 struct is_holder_type<base, std::unique_ptr<base, deleter>> : std::true_type {};
 
+#ifdef PYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION // See PR #4888
+
+// This leads to compilation errors if a specialization is missing.
+template <typename T>
+struct handle_type_name;
+
+#else
+
 template <typename T>
 struct handle_type_name {
     static constexpr auto name = const_name<T>();
 };
+
+#endif
+
+template <>
+struct handle_type_name<object> {
+    static constexpr auto name = const_name("object");
+};
+template <>
+struct handle_type_name<list> {
+    static constexpr auto name = const_name("list");
+};
+template <>
+struct handle_type_name<dict> {
+    static constexpr auto name = const_name("dict");
+};
+template <>
+struct handle_type_name<anyset> {
+    static constexpr auto name = const_name("Union[set, frozenset]");
+};
+template <>
+struct handle_type_name<set> {
+    static constexpr auto name = const_name("set");
+};
+template <>
+struct handle_type_name<frozenset> {
+    static constexpr auto name = const_name("frozenset");
+};
+template <>
+struct handle_type_name<str> {
+    static constexpr auto name = const_name("str");
+};
+template <>
+struct handle_type_name<tuple> {
+    static constexpr auto name = const_name("tuple");
+};
 template <>
 struct handle_type_name<bool_> {
     static constexpr auto name = const_name("bool");
@@ -882,6 +938,10 @@ struct handle_type_name<bytes> {
     static constexpr auto name = const_name(PYBIND11_BYTES_NAME);
 };
 template <>
+struct handle_type_name<buffer> {
+    static constexpr auto name = const_name("Buffer");
+};
+template <>
 struct handle_type_name<int_> {
     static constexpr auto name = const_name("int");
 };
@@ -898,10 +958,50 @@ struct handle_type_name<float_> {
     static constexpr auto name = const_name("float");
 };
 template <>
+struct handle_type_name<function> {
+    static constexpr auto name = const_name("Callable");
+};
+template <>
+struct handle_type_name<handle> {
+    static constexpr auto name = handle_type_name<object>::name;
+};
+template <>
 struct handle_type_name<none> {
     static constexpr auto name = const_name("None");
 };
 template <>
+struct handle_type_name<sequence> {
+    static constexpr auto name = const_name("Sequence");
+};
+template <>
+struct handle_type_name<bytearray> {
+    static constexpr auto name = const_name("bytearray");
+};
+template <>
+struct handle_type_name<memoryview> {
+    static constexpr auto name = const_name("memoryview");
+};
+template <>
+struct handle_type_name<slice> {
+    static constexpr auto name = const_name("slice");
+};
+template <>
+struct handle_type_name<type> {
+    static constexpr auto name = const_name("type");
+};
+template <>
+struct handle_type_name<capsule> {
+    static constexpr auto name = const_name("capsule");
+};
+template <>
+struct handle_type_name<ellipsis> {
+    static constexpr auto name = const_name("ellipsis");
+};
+template <>
+struct handle_type_name<weakref> {
+    static constexpr auto name = const_name("weakref");
+};
+template <>
 struct handle_type_name<args> {
     static constexpr auto name = const_name("*args");
 };
@@ -909,6 +1009,30 @@ template <>
 struct handle_type_name<kwargs> {
     static constexpr auto name = const_name("**kwargs");
 };
+template <>
+struct handle_type_name<obj_attr_accessor> {
+    static constexpr auto name = const_name<obj_attr_accessor>();
+};
+template <>
+struct handle_type_name<str_attr_accessor> {
+    static constexpr auto name = const_name<str_attr_accessor>();
+};
+template <>
+struct handle_type_name<item_accessor> {
+    static constexpr auto name = const_name<item_accessor>();
+};
+template <>
+struct handle_type_name<sequence_accessor> {
+    static constexpr auto name = const_name<sequence_accessor>();
+};
+template <>
+struct handle_type_name<list_accessor> {
+    static constexpr auto name = const_name<list_accessor>();
+};
+template <>
+struct handle_type_name<tuple_accessor> {
+    static constexpr auto name = const_name<tuple_accessor>();
+};
 
 template <typename type>
 struct pyobject_caster {
@@ -1377,7 +1501,15 @@ inline namespace literals {
 /** \rst
     String literal version of `arg`
  \endrst */
-constexpr arg operator"" _a(const char *name, size_t) { return arg(name); }
+constexpr arg
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5
+operator"" _a // gcc 4.8.5 insists on having a space (hard error).
+#else
+operator""_a // clang 17 generates a deprecation warning if there is a space.
+#endif
+    (const char *name, size_t) {
+    return arg(name);
+}
 } // namespace literals
 
 PYBIND11_NAMESPACE_BEGIN(detail)
@@ -1438,7 +1570,8 @@ class argument_loader {
     static_assert(args_pos == -1 || args_pos == constexpr_first<argument_is_args, Args...>(),
                   "py::args cannot be specified more than once");
 
-    static constexpr auto arg_names = concat(type_descr(make_caster<Args>::name)...);
+    static constexpr auto arg_names
+        = ::pybind11::detail::concat(type_descr(make_caster<Args>::name)...);
 
     bool load_args(function_call &call) { return load_impl_sequence(call, indices{}); }
 
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/class.h b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/class.h
index bc2b40c50a..0b9ea42db6 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/class.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/class.h
@@ -86,17 +86,16 @@ inline PyTypeObject *make_static_property_type() {
     type->tp_descr_get = pybind11_static_get;
     type->tp_descr_set = pybind11_static_set;
 
-    if (PyType_Ready(type) < 0) {
-        pybind11_fail("make_static_property_type(): failure in PyType_Ready()!");
-    }
-
 #    if PY_VERSION_HEX >= 0x030C0000
-    // PRE 3.12 FEATURE FREEZE. PLEASE REVIEW AFTER FREEZE.
     // Since Python-3.12 property-derived types are required to
     // have dynamic attributes (to set `__doc__`)
     enable_dynamic_attributes(heap_type);
 #    endif
 
+    if (PyType_Ready(type) < 0) {
+        pybind11_fail("make_static_property_type(): failure in PyType_Ready()!");
+    }
+
     setattr((PyObject *) type, "__module__", str("pybind11_builtins"));
     PYBIND11_SET_OLDPY_QUALNAME(type, name_obj);
 
@@ -189,12 +188,10 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P
         return nullptr;
     }
 
-    // This must be a pybind11 instance
-    auto *instance = reinterpret_cast<detail::instance *>(self);
-
     // Ensure that the base __init__ function(s) were called
-    for (const auto &vh : values_and_holders(instance)) {
-        if (!vh.holder_constructed()) {
+    values_and_holders vhs(self);
+    for (const auto &vh : vhs) {
+        if (!vh.holder_constructed() && !vhs.is_redundant_value_and_holder(vh)) {
             PyErr_Format(PyExc_TypeError,
                          "%.200s.__init__() must be called when overriding __init__",
                          get_fully_qualified_tp_name(vh.type->type).c_str());
@@ -375,7 +372,7 @@ extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *,
 extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject *) {
     PyTypeObject *type = Py_TYPE(self);
     std::string msg = get_fully_qualified_tp_name(type) + ": No constructor defined!";
-    PyErr_SetString(PyExc_TypeError, msg.c_str());
+    set_error(PyExc_TypeError, msg.c_str());
     return -1;
 }
 
@@ -522,8 +519,12 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass) {
 
 /// dynamic_attr: Allow the garbage collector to traverse the internal instance `__dict__`.
 extern "C" inline int pybind11_traverse(PyObject *self, visitproc visit, void *arg) {
+#if PY_VERSION_HEX >= 0x030D0000
+    PyObject_VisitManagedDict(self, visit, arg);
+#else
     PyObject *&dict = *_PyObject_GetDictPtr(self);
     Py_VISIT(dict);
+#endif
 // https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_traverse
 #if PY_VERSION_HEX >= 0x03090000
     Py_VISIT(Py_TYPE(self));
@@ -533,8 +534,12 @@ extern "C" inline int pybind11_traverse(PyObject *self, visitproc visit, void *a
 
 /// dynamic_attr: Allow the GC to clear the dictionary.
 extern "C" inline int pybind11_clear(PyObject *self) {
+#if PY_VERSION_HEX >= 0x030D0000
+    PyObject_ClearManagedDict(self);
+#else
     PyObject *&dict = *_PyObject_GetDictPtr(self);
     Py_CLEAR(dict);
+#endif
     return 0;
 }
 
@@ -579,7 +584,7 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
         if (view) {
             view->obj = nullptr;
         }
-        PyErr_SetString(PyExc_BufferError, "pybind11_getbuffer(): Internal error");
+        set_error(PyExc_BufferError, "pybind11_getbuffer(): Internal error");
         return -1;
     }
     std::memset(view, 0, sizeof(Py_buffer));
@@ -587,7 +592,7 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
     if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) {
         delete info;
         // view->obj = nullptr;  // Was just memset to 0, so not necessary
-        PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage");
+        set_error(PyExc_BufferError, "Writable buffer requested for readonly storage");
         return -1;
     }
     view->obj = obj;
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/common.h b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/common.h
index 31a54c773a..454e6061b0 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/common.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/common.h
@@ -10,12 +10,12 @@
 #pragma once
 
 #define PYBIND11_VERSION_MAJOR 2
-#define PYBIND11_VERSION_MINOR 11
-#define PYBIND11_VERSION_PATCH 1
+#define PYBIND11_VERSION_MINOR 12
+#define PYBIND11_VERSION_PATCH 0
 
 // Similar to Python's convention: https://docs.python.org/3/c-api/apiabiversion.html
 // Additional convention: 0xD = dev
-#define PYBIND11_VERSION_HEX 0x020B0100
+#define PYBIND11_VERSION_HEX 0x020C0000
 
 // Define some generic pybind11 helper macros for warning management.
 //
@@ -118,6 +118,14 @@
 #    endif
 #endif
 
+#if defined(PYBIND11_CPP20)
+#    define PYBIND11_CONSTINIT constinit
+#    define PYBIND11_DTOR_CONSTEXPR constexpr
+#else
+#    define PYBIND11_CONSTINIT
+#    define PYBIND11_DTOR_CONSTEXPR
+#endif
+
 // Compiler version assertions
 #if defined(__INTEL_COMPILER)
 #    if __INTEL_COMPILER < 1800
@@ -288,6 +296,10 @@ PYBIND11_WARNING_DISABLE_MSVC(4505)
 #    undef copysign
 #endif
 
+#if defined(PYBIND11_NUMPY_1_ONLY)
+#    define PYBIND11_INTERNAL_NUMPY_1_ONLY_DETECTED
+#endif
+
 #if defined(PYPY_VERSION) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
 #    define PYBIND11_SIMPLE_GIL_MANAGEMENT
 #endif
@@ -399,7 +411,7 @@ PYBIND11_WARNING_POP
         return nullptr;                                                                           \
     }                                                                                             \
     catch (const std::exception &e) {                                                             \
-        PyErr_SetString(PyExc_ImportError, e.what());                                             \
+        ::pybind11::set_error(PyExc_ImportError, e.what());                                       \
         return nullptr;                                                                           \
     }
 
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/init.h b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/init.h
index e21171688c..4509bd131e 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/init.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/init.h
@@ -65,7 +65,7 @@ constexpr bool is_alias(void *) {
 }
 
 // Constructs and returns a new object; if the given arguments don't map to a constructor, we fall
-// back to brace aggregate initiailization so that for aggregate initialization can be used with
+// back to brace aggregate initialization so that for aggregate initialization can be used with
 // py::init, e.g.  `py::init<int, int>` to initialize a `struct T { int a; int b; }`.  For
 // non-aggregate types, we need to use an ordinary T(...) constructor (invoking as `T{...}` usually
 // works, but will not do the expected thing when `T` has an `initializer_list<T>` constructor).
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/internals.h b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/internals.h
index aaa7f8686e..c1047e4a04 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/internals.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/internals.h
@@ -34,8 +34,9 @@
 /// further ABI-incompatible changes may be made before the ABI is officially
 /// changed to the new version.
 #ifndef PYBIND11_INTERNALS_VERSION
-#    if PY_VERSION_HEX >= 0x030C0000
+#    if PY_VERSION_HEX >= 0x030C0000 || defined(_MSC_VER)
 // Version bump for Python 3.12+, before first 3.12 beta release.
+// Version bump for MSVC piggy-backed on PR #4779. See comments there.
 #        define PYBIND11_INTERNALS_VERSION 5
 #    else
 #        define PYBIND11_INTERNALS_VERSION 4
@@ -66,9 +67,14 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass);
 // `Py_LIMITED_API` anyway.
 #    if PYBIND11_INTERNALS_VERSION > 4
 #        define PYBIND11_TLS_KEY_REF Py_tss_t &
-#        if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-// Clang on macOS warns due to `Py_tss_NEEDS_INIT` not specifying an initializer
-// for every field.
+#        if defined(__clang__)
+#            define PYBIND11_TLS_KEY_INIT(var)                                                    \
+                _Pragma("clang diagnostic push")                                         /**/     \
+                    _Pragma("clang diagnostic ignored \"-Wmissing-field-initializers\"") /**/     \
+                    Py_tss_t var                                                                  \
+                    = Py_tss_NEEDS_INIT;                                                          \
+                _Pragma("clang diagnostic pop")
+#        elif defined(__GNUC__) && !defined(__INTEL_COMPILER)
 #            define PYBIND11_TLS_KEY_INIT(var)                                                    \
                 _Pragma("GCC diagnostic push")                                         /**/       \
                     _Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") /**/       \
@@ -291,9 +297,12 @@ struct type_info {
 #endif
 
 /// On Linux/OSX, changes in __GXX_ABI_VERSION__ indicate ABI incompatibility.
+/// On MSVC, changes in _MSC_VER may indicate ABI incompatibility (#2898).
 #ifndef PYBIND11_BUILD_ABI
 #    if defined(__GXX_ABI_VERSION)
 #        define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
+#    elif defined(_MSC_VER)
+#        define PYBIND11_BUILD_ABI "_mscver" PYBIND11_TOSTRING(_MSC_VER)
 #    else
 #        define PYBIND11_BUILD_ABI ""
 #    endif
@@ -352,7 +361,7 @@ inline bool raise_err(PyObject *exc_type, const char *msg) {
         raise_from(exc_type, msg);
         return true;
     }
-    PyErr_SetString(exc_type, msg);
+    set_error(exc_type, msg);
     return false;
 }
 
@@ -447,6 +456,7 @@ inline object get_python_state_dict() {
 #endif
     if (!state_dict) {
         raise_from(PyExc_SystemError, "pybind11::detail::get_python_state_dict() FAILED");
+        throw error_already_set();
     }
     return state_dict;
 }
@@ -459,6 +469,7 @@ inline internals **get_internals_pp_from_capsule(handle obj) {
     void *raw_ptr = PyCapsule_GetPointer(obj.ptr(), /*name=*/nullptr);
     if (raw_ptr == nullptr) {
         raise_from(PyExc_SystemError, "pybind11::detail::get_internals_pp_from_capsule() FAILED");
+        throw error_already_set();
     }
     return static_cast<internals **>(raw_ptr);
 }
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/type_caster_base.h b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/type_caster_base.h
index 16387506cf..518d3107ba 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/detail/type_caster_base.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/detail/type_caster_base.h
@@ -102,8 +102,22 @@ class loader_life_support {
 inline std::pair<decltype(internals::registered_types_py)::iterator, bool>
 all_type_info_get_cache(PyTypeObject *type);
 
+// Band-aid workaround to fix a subtle but serious bug in a minimalistic fashion. See PR #4762.
+inline void all_type_info_add_base_most_derived_first(std::vector<type_info *> &bases,
+                                                      type_info *addl_base) {
+    for (auto it = bases.begin(); it != bases.end(); it++) {
+        type_info *existing_base = *it;
+        if (PyType_IsSubtype(addl_base->type, existing_base->type) != 0) {
+            bases.insert(it, addl_base);
+            return;
+        }
+    }
+    bases.push_back(addl_base);
+}
+
 // Populates a just-created cache entry.
 PYBIND11_NOINLINE void all_type_info_populate(PyTypeObject *t, std::vector<type_info *> &bases) {
+    assert(bases.empty());
     std::vector<PyTypeObject *> check;
     for (handle parent : reinterpret_borrow<tuple>(t->tp_bases)) {
         check.push_back((PyTypeObject *) parent.ptr());
@@ -136,7 +150,7 @@ PYBIND11_NOINLINE void all_type_info_populate(PyTypeObject *t, std::vector<type_
                     }
                 }
                 if (!found) {
-                    bases.push_back(tinfo);
+                    all_type_info_add_base_most_derived_first(bases, tinfo);
                 }
             }
         } else if (type->tp_bases) {
@@ -322,18 +336,29 @@ struct values_and_holders {
     explicit values_and_holders(instance *inst)
         : inst{inst}, tinfo(all_type_info(Py_TYPE(inst))) {}
 
+    explicit values_and_holders(PyObject *obj)
+        : inst{nullptr}, tinfo(all_type_info(Py_TYPE(obj))) {
+        if (!tinfo.empty()) {
+            inst = reinterpret_cast<instance *>(obj);
+        }
+    }
+
     struct iterator {
     private:
         instance *inst = nullptr;
         const type_vec *types = nullptr;
         value_and_holder curr;
         friend struct values_and_holders;
-        iterator(instance *inst, const type_vec *tinfo)
-            : inst{inst}, types{tinfo},
-              curr(inst /* instance */,
-                   types->empty() ? nullptr : (*types)[0] /* type info */,
-                   0, /* vpos: (non-simple types only): the first vptr comes first */
-                   0 /* index */) {}
+        iterator(instance *inst, const type_vec *tinfo) : inst{inst}, types{tinfo} {
+            if (inst != nullptr) {
+                assert(!types->empty());
+                curr = value_and_holder(
+                    inst /* instance */,
+                    (*types)[0] /* type info */,
+                    0, /* vpos: (non-simple types only): the first vptr comes first */
+                    0 /* index */);
+            }
+        }
         // Past-the-end iterator:
         explicit iterator(size_t end) : curr(end) {}
 
@@ -364,6 +389,16 @@ struct values_and_holders {
     }
 
     size_t size() { return tinfo.size(); }
+
+    // Band-aid workaround to fix a subtle but serious bug in a minimalistic fashion. See PR #4762.
+    bool is_redundant_value_and_holder(const value_and_holder &vh) {
+        for (size_t i = 0; i < vh.index; i++) {
+            if (PyType_IsSubtype(tinfo[i]->type, tinfo[vh.index]->type) != 0) {
+                return true;
+            }
+        }
+        return false;
+    }
 };
 
 /**
@@ -486,8 +521,10 @@ PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_i
 inline PyThreadState *get_thread_state_unchecked() {
 #if defined(PYPY_VERSION)
     return PyThreadState_GET();
-#else
+#elif PY_VERSION_HEX < 0x030D0000
     return _PyThreadState_UncheckedGet();
+#else
+    return PyThreadState_GetUnchecked();
 #endif
 }
 
@@ -786,7 +823,7 @@ class type_caster_generic {
         std::string tname = rtti_type ? rtti_type->name() : cast_type.name();
         detail::clean_type_id(tname);
         std::string msg = "Unregistered type : " + tname;
-        PyErr_SetString(PyExc_TypeError, msg.c_str());
+        set_error(PyExc_TypeError, msg.c_str());
         return {nullptr, nullptr};
     }
 
@@ -1164,13 +1201,17 @@ class type_caster_base : public type_caster_generic {
     static Constructor make_move_constructor(...) { return nullptr; }
 };
 
+inline std::string quote_cpp_type_name(const std::string &cpp_type_name) {
+    return cpp_type_name; // No-op for now. See PR #4888
+}
+
 PYBIND11_NOINLINE std::string type_info_description(const std::type_info &ti) {
     if (auto *type_data = get_type_info(ti)) {
         handle th((PyObject *) type_data->type);
         return th.attr("__module__").cast<std::string>() + '.'
                + th.attr("__qualname__").cast<std::string>();
     }
-    return clean_type_id(ti.name());
+    return quote_cpp_type_name(clean_type_id(ti.name()));
 }
 
 PYBIND11_NAMESPACE_END(detail)
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/eigen/tensor.h b/share/openPMD/thirdParty/pybind11/include/pybind11/eigen/tensor.h
index 25d12baca1..d4ed6c0ca8 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/eigen/tensor.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/eigen/tensor.h
@@ -70,7 +70,7 @@ struct eigen_tensor_helper<Eigen::Tensor<Scalar_, NumIndices_, Options_, IndexTy
 
     template <size_t... Is>
     struct helper<index_sequence<Is...>> {
-        static constexpr auto value = concat(const_name(((void) Is, "?"))...);
+        static constexpr auto value = ::pybind11::detail::concat(const_name(((void) Is, "?"))...);
     };
 
     static constexpr auto dimensions_descriptor
@@ -104,7 +104,8 @@ struct eigen_tensor_helper<
         return get_shape() == shape;
     }
 
-    static constexpr auto dimensions_descriptor = concat(const_name<Indices>()...);
+    static constexpr auto dimensions_descriptor
+        = ::pybind11::detail::concat(const_name<Indices>()...);
 
     template <typename... Args>
     static Type *alloc(Args &&...args) {
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/functional.h b/share/openPMD/thirdParty/pybind11/include/pybind11/functional.h
index 87ec4d10cb..6856119cde 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/functional.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/functional.h
@@ -128,7 +128,8 @@ struct type_caster<std::function<Return(Args...)>> {
     }
 
     PYBIND11_TYPE_CASTER(type,
-                         const_name("Callable[[") + concat(make_caster<Args>::name...)
+                         const_name("Callable[[")
+                             + ::pybind11::detail::concat(make_caster<Args>::name...)
                              + const_name("], ") + make_caster<retval_type>::name
                              + const_name("]"));
 };
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/gil.h b/share/openPMD/thirdParty/pybind11/include/pybind11/gil.h
index 570a5581d5..da22f48d7e 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/gil.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/gil.h
@@ -11,6 +11,8 @@
 
 #include "detail/common.h"
 
+#include <cassert>
+
 #if defined(WITH_THREAD) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
 #    include "detail/internals.h"
 #endif
@@ -137,7 +139,9 @@ class gil_scoped_acquire {
 
 class gil_scoped_release {
 public:
+    // PRECONDITION: The GIL must be held when this constructor is called.
     explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) {
+        assert(PyGILState_Check());
         // `get_internals()` must be called here unconditionally in order to initialize
         // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
         // initialization race could occur as multiple threads try `gil_scoped_acquire`.
@@ -201,7 +205,11 @@ class gil_scoped_release {
     PyThreadState *state;
 
 public:
-    gil_scoped_release() : state{PyEval_SaveThread()} {}
+    // PRECONDITION: The GIL must be held when this constructor is called.
+    gil_scoped_release() {
+        assert(PyGILState_Check());
+        state = PyEval_SaveThread();
+    }
     gil_scoped_release(const gil_scoped_release &) = delete;
     gil_scoped_release &operator=(const gil_scoped_release &) = delete;
     ~gil_scoped_release() { PyEval_RestoreThread(state); }
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/gil_safe_call_once.h b/share/openPMD/thirdParty/pybind11/include/pybind11/gil_safe_call_once.h
new file mode 100644
index 0000000000..eaf84d16e8
--- /dev/null
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/gil_safe_call_once.h
@@ -0,0 +1,91 @@
+// Copyright (c) 2023 The pybind Community.
+
+#pragma once
+
+#include "detail/common.h"
+#include "gil.h"
+
+#include <cassert>
+#include <mutex>
+
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
+// Use the `gil_safe_call_once_and_store` class below instead of the naive
+//
+//   static auto imported_obj = py::module_::import("module_name"); // BAD, DO NOT USE!
+//
+// which has two serious issues:
+//
+//     1. Py_DECREF() calls potentially after the Python interpreter was finalized already, and
+//     2. deadlocks in multi-threaded processes (because of missing lock ordering).
+//
+// The following alternative avoids both problems:
+//
+//   PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store<py::object> storage;
+//   auto &imported_obj = storage // Do NOT make this `static`!
+//       .call_once_and_store_result([]() {
+//           return py::module_::import("module_name");
+//       })
+//       .get_stored();
+//
+// The parameter of `call_once_and_store_result()` must be callable. It can make
+// CPython API calls, and in particular, it can temporarily release the GIL.
+//
+// `T` can be any C++ type, it does not have to involve CPython API types.
+//
+// The behavior with regard to signals, e.g. `SIGINT` (`KeyboardInterrupt`),
+// is not ideal. If the main thread is the one to actually run the `Callable`,
+// then a `KeyboardInterrupt` will interrupt it if it is running normal Python
+// code. The situation is different if a non-main thread runs the
+// `Callable`, and then the main thread starts waiting for it to complete:
+// a `KeyboardInterrupt` will not interrupt the non-main thread, but it will
+// get processed only when it is the main thread's turn again and it is running
+// normal Python code. However, this will be unnoticeable for quick call-once
+// functions, which is usually the case.
+template <typename T>
+class gil_safe_call_once_and_store {
+public:
+    // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called.
+    template <typename Callable>
+    gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn) {
+        if (!is_initialized_) { // This read is guarded by the GIL.
+            // Multiple threads may enter here, because the GIL is released in the next line and
+            // CPython API calls in the `fn()` call below may release and reacquire the GIL.
+            gil_scoped_release gil_rel; // Needed to establish lock ordering.
+            std::call_once(once_flag_, [&] {
+                // Only one thread will ever enter here.
+                gil_scoped_acquire gil_acq;
+                ::new (storage_) T(fn()); // fn may release, but will reacquire, the GIL.
+                is_initialized_ = true;   // This write is guarded by the GIL.
+            });
+            // All threads will observe `is_initialized_` as true here.
+        }
+        // Intentionally not returning `T &` to ensure the calling code is self-documenting.
+        return *this;
+    }
+
+    // This must only be called after `call_once_and_store_result()` was called.
+    T &get_stored() {
+        assert(is_initialized_);
+        PYBIND11_WARNING_PUSH
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5
+        // Needed for gcc 4.8.5
+        PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing")
+#endif
+        return *reinterpret_cast<T *>(storage_);
+        PYBIND11_WARNING_POP
+    }
+
+    constexpr gil_safe_call_once_and_store() = default;
+    PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default;
+
+private:
+    alignas(T) char storage_[sizeof(T)] = {};
+    std::once_flag once_flag_ = {};
+    bool is_initialized_ = false;
+    // The `is_initialized_`-`storage_` pair is very similar to `std::optional`,
+    // but the latter does not have the triviality properties of former,
+    // therefore `std::optional` is not a viable alternative here.
+};
+
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/numpy.h b/share/openPMD/thirdParty/pybind11/include/pybind11/numpy.h
index 36077ec04d..03abc8e778 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/numpy.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/numpy.h
@@ -10,7 +10,10 @@
 #pragma once
 
 #include "pybind11.h"
+#include "detail/common.h"
 #include "complex.h"
+#include "gil_safe_call_once.h"
+#include "pytypes.h"
 
 #include <algorithm>
 #include <array>
@@ -26,10 +29,15 @@
 #include <utility>
 #include <vector>
 
+#if defined(PYBIND11_NUMPY_1_ONLY) && !defined(PYBIND11_INTERNAL_NUMPY_1_ONLY_DETECTED)
+#    error PYBIND11_NUMPY_1_ONLY must be defined before any pybind11 header is included.
+#endif
+
 /* This will be true on all flat address space platforms and allows us to reduce the
    whole npy_intp / ssize_t / Py_intptr_t business down to just ssize_t for all size
    and dimension types (e.g. shape, strides, indexing), instead of inflicting this
-   upon the library user. */
+   upon the library user.
+   Note that NumPy 2 now uses ssize_t for `npy_intp` to simplify this. */
 static_assert(sizeof(::pybind11::ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t");
 static_assert(std::is_signed<Py_intptr_t>::value, "Py_intptr_t must be signed");
 // We now can reinterpret_cast between py::ssize_t and Py_intptr_t (MSVC + PyPy cares)
@@ -38,10 +46,16 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 PYBIND11_WARNING_DISABLE_MSVC(4127)
 
+class dtype; // Forward declaration
 class array; // Forward declaration
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
+template <>
+struct handle_type_name<dtype> {
+    static constexpr auto name = const_name("numpy.dtype");
+};
+
 template <>
 struct handle_type_name<array> {
     static constexpr auto name = const_name("numpy.ndarray");
@@ -50,7 +64,8 @@ struct handle_type_name<array> {
 template <typename type, typename SFINAE = void>
 struct npy_format_descriptor;
 
-struct PyArrayDescr_Proxy {
+/* NumPy 1 proxy (always includes legacy fields) */
+struct PyArrayDescr1_Proxy {
     PyObject_HEAD
     PyObject *typeobj;
     char kind;
@@ -65,6 +80,43 @@ struct PyArrayDescr_Proxy {
     PyObject *names;
 };
 
+#ifndef PYBIND11_NUMPY_1_ONLY
+struct PyArrayDescr_Proxy {
+    PyObject_HEAD
+    PyObject *typeobj;
+    char kind;
+    char type;
+    char byteorder;
+    char _former_flags;
+    int type_num;
+    /* Additional fields are NumPy version specific. */
+};
+#else
+/* NumPy 1.x only, we can expose all fields */
+using PyArrayDescr_Proxy = PyArrayDescr1_Proxy;
+#endif
+
+/* NumPy 2 proxy, including legacy fields */
+struct PyArrayDescr2_Proxy {
+    PyObject_HEAD
+    PyObject *typeobj;
+    char kind;
+    char type;
+    char byteorder;
+    char _former_flags;
+    int type_num;
+    std::uint64_t flags;
+    ssize_t elsize;
+    ssize_t alignment;
+    PyObject *metadata;
+    Py_hash_t hash;
+    void *reserved_null[2];
+    /* The following fields only exist if 0 <= type_num < 2056 */
+    char *subarray;
+    PyObject *fields;
+    PyObject *names;
+};
+
 struct PyArray_Proxy {
     PyObject_HEAD
     char *data;
@@ -120,6 +172,28 @@ inline numpy_internals &get_numpy_internals() {
     return *ptr;
 }
 
+PYBIND11_NOINLINE module_ import_numpy_core_submodule(const char *submodule_name) {
+    module_ numpy = module_::import("numpy");
+    str version_string = numpy.attr("__version__");
+
+    module_ numpy_lib = module_::import("numpy.lib");
+    object numpy_version = numpy_lib.attr("NumpyVersion")(version_string);
+    int major_version = numpy_version.attr("major").cast<int>();
+
+#ifdef PYBIND11_NUMPY_1_ONLY
+    if (major_version >= 2) {
+        throw std::runtime_error(
+            "This extension was built with PYBIND11_NUMPY_1_ONLY defined, "
+            "but NumPy 2 is used in this process. For NumPy2 compatibility, "
+            "this extension needs to be rebuilt without the PYBIND11_NUMPY_1_ONLY define.");
+    }
+#endif
+    /* `numpy.core` was renamed to `numpy._core` in NumPy 2.0 as it officially
+        became a private module. */
+    std::string numpy_core_path = major_version >= 2 ? "numpy._core" : "numpy.core";
+    return module_::import((numpy_core_path + "." + submodule_name).c_str());
+}
+
 template <typename T>
 struct same_size {
     template <typename U>
@@ -186,14 +260,16 @@ struct npy_api {
             NPY_ULONG_, NPY_ULONGLONG_, NPY_UINT_),
     };
 
+    unsigned int PyArray_RUNTIME_VERSION_;
+
     struct PyArray_Dims {
         Py_intptr_t *ptr;
         int len;
     };
 
     static npy_api &get() {
-        static npy_api api = lookup();
-        return api;
+        PYBIND11_CONSTINIT static gil_safe_call_once_and_store<npy_api> storage;
+        return storage.call_once_and_store_result(lookup).get_stored();
     }
 
     bool PyArray_Check_(PyObject *obj) const {
@@ -224,6 +300,7 @@ struct npy_api {
     PyObject *(*PyArray_FromAny_)(PyObject *, PyObject *, int, int, int, PyObject *);
     int (*PyArray_DescrConverter_)(PyObject *, PyObject **);
     bool (*PyArray_EquivTypes_)(PyObject *, PyObject *);
+#ifdef PYBIND11_NUMPY_1_ONLY
     int (*PyArray_GetArrayParamsFromObject_)(PyObject *,
                                              PyObject *,
                                              unsigned char,
@@ -232,6 +309,7 @@ struct npy_api {
                                              Py_intptr_t *,
                                              PyObject **,
                                              PyObject *);
+#endif
     PyObject *(*PyArray_Squeeze_)(PyObject *);
     // Unused. Not removed because that affects ABI of the class.
     int (*PyArray_SetBaseObject_)(PyObject *, PyObject *);
@@ -249,7 +327,8 @@ struct npy_api {
         API_PyArray_DescrFromScalar = 57,
         API_PyArray_FromAny = 69,
         API_PyArray_Resize = 80,
-        API_PyArray_CopyInto = 82,
+        // CopyInto was slot 82 and 50 was effectively an alias. NumPy 2 removed 82.
+        API_PyArray_CopyInto = 50,
         API_PyArray_NewCopy = 85,
         API_PyArray_NewFromDescr = 94,
         API_PyArray_DescrNewFromType = 96,
@@ -258,18 +337,25 @@ struct npy_api {
         API_PyArray_View = 137,
         API_PyArray_DescrConverter = 174,
         API_PyArray_EquivTypes = 182,
+#ifdef PYBIND11_NUMPY_1_ONLY
         API_PyArray_GetArrayParamsFromObject = 278,
+#endif
         API_PyArray_SetBaseObject = 282
     };
 
     static npy_api lookup() {
-        module_ m = module_::import("numpy.core.multiarray");
+        module_ m = detail::import_numpy_core_submodule("multiarray");
         auto c = m.attr("_ARRAY_API");
         void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), nullptr);
+        if (api_ptr == nullptr) {
+            raise_from(PyExc_SystemError, "FAILURE obtaining numpy _ARRAY_API pointer.");
+            throw error_already_set();
+        }
         npy_api api;
 #define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func];
         DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion);
-        if (api.PyArray_GetNDArrayCFeatureVersion_() < 0x7) {
+        api.PyArray_RUNTIME_VERSION_ = api.PyArray_GetNDArrayCFeatureVersion_();
+        if (api.PyArray_RUNTIME_VERSION_ < 0x7) {
             pybind11_fail("pybind11 numpy support requires numpy >= 1.7.0");
         }
         DECL_NPY_API(PyArray_Type);
@@ -288,7 +374,9 @@ struct npy_api {
         DECL_NPY_API(PyArray_View);
         DECL_NPY_API(PyArray_DescrConverter);
         DECL_NPY_API(PyArray_EquivTypes);
+#ifdef PYBIND11_NUMPY_1_ONLY
         DECL_NPY_API(PyArray_GetArrayParamsFromObject);
+#endif
         DECL_NPY_API(PyArray_SetBaseObject);
 
 #undef DECL_NPY_API
@@ -310,6 +398,14 @@ inline const PyArrayDescr_Proxy *array_descriptor_proxy(const PyObject *ptr) {
     return reinterpret_cast<const PyArrayDescr_Proxy *>(ptr);
 }
 
+inline const PyArrayDescr1_Proxy *array_descriptor1_proxy(const PyObject *ptr) {
+    return reinterpret_cast<const PyArrayDescr1_Proxy *>(ptr);
+}
+
+inline const PyArrayDescr2_Proxy *array_descriptor2_proxy(const PyObject *ptr) {
+    return reinterpret_cast<const PyArrayDescr2_Proxy *>(ptr);
+}
+
 inline bool check_flags(const void *ptr, int flag) {
     return (flag == (array_proxy(ptr)->flags & flag));
 }
@@ -350,7 +446,7 @@ struct array_info<std::array<T, N>> {
     }
 
     static constexpr auto extents = const_name<array_info<T>::is_array>(
-        concat(const_name<N>(), array_info<T>::extents), const_name<N>());
+        ::pybind11::detail::concat(const_name<N>(), array_info<T>::extents), const_name<N>());
 };
 // For numpy we have special handling for arrays of characters, so we don't include
 // the size in the array extents.
@@ -589,10 +685,32 @@ class dtype : public object {
     }
 
     /// Size of the data type in bytes.
+#ifdef PYBIND11_NUMPY_1_ONLY
     ssize_t itemsize() const { return detail::array_descriptor_proxy(m_ptr)->elsize; }
+#else
+    ssize_t itemsize() const {
+        if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) {
+            return detail::array_descriptor1_proxy(m_ptr)->elsize;
+        }
+        return detail::array_descriptor2_proxy(m_ptr)->elsize;
+    }
+#endif
 
     /// Returns true for structured data types.
+#ifdef PYBIND11_NUMPY_1_ONLY
     bool has_fields() const { return detail::array_descriptor_proxy(m_ptr)->names != nullptr; }
+#else
+    bool has_fields() const {
+        if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) {
+            return detail::array_descriptor1_proxy(m_ptr)->names != nullptr;
+        }
+        const auto *proxy = detail::array_descriptor2_proxy(m_ptr);
+        if (proxy->type_num < 0 || proxy->type_num >= 2056) {
+            return false;
+        }
+        return proxy->names != nullptr;
+    }
+#endif
 
     /// Single-character code for dtype's kind.
     /// For example, floating point types are 'f' and integral types are 'i'.
@@ -618,20 +736,39 @@ class dtype : public object {
     /// Single character for byteorder
     char byteorder() const { return detail::array_descriptor_proxy(m_ptr)->byteorder; }
 
-    /// Alignment of the data type
+/// Alignment of the data type
+#ifdef PYBIND11_NUMPY_1_ONLY
     int alignment() const { return detail::array_descriptor_proxy(m_ptr)->alignment; }
+#else
+    ssize_t alignment() const {
+        if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) {
+            return detail::array_descriptor1_proxy(m_ptr)->alignment;
+        }
+        return detail::array_descriptor2_proxy(m_ptr)->alignment;
+    }
+#endif
 
-    /// Flags for the array descriptor
+/// Flags for the array descriptor
+#ifdef PYBIND11_NUMPY_1_ONLY
     char flags() const { return detail::array_descriptor_proxy(m_ptr)->flags; }
+#else
+    std::uint64_t flags() const {
+        if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) {
+            return (unsigned char) detail::array_descriptor1_proxy(m_ptr)->flags;
+        }
+        return detail::array_descriptor2_proxy(m_ptr)->flags;
+    }
+#endif
 
 private:
-    static object _dtype_from_pep3118() {
-        static PyObject *obj = module_::import("numpy.core._internal")
-                                   .attr("_dtype_from_pep3118")
-                                   .cast<object>()
-                                   .release()
-                                   .ptr();
-        return reinterpret_borrow<object>(obj);
+    static object &_dtype_from_pep3118() {
+        PYBIND11_CONSTINIT static gil_safe_call_once_and_store<object> storage;
+        return storage
+            .call_once_and_store_result([]() {
+                return detail::import_numpy_core_submodule("_internal")
+                    .attr("_dtype_from_pep3118");
+            })
+            .get_stored();
     }
 
     dtype strip_padding(ssize_t itemsize) {
@@ -788,9 +925,7 @@ class array : public buffer {
     }
 
     /// Byte size of a single element
-    ssize_t itemsize() const {
-        return detail::array_descriptor_proxy(detail::array_proxy(m_ptr)->descr)->elsize;
-    }
+    ssize_t itemsize() const { return dtype().itemsize(); }
 
     /// Total number of bytes
     ssize_t nbytes() const { return size() * itemsize(); }
@@ -1008,7 +1143,7 @@ class array : public buffer {
     /// Create array from any object -- always returns a new reference
     static PyObject *raw_array(PyObject *ptr, int ExtraFlags = 0) {
         if (ptr == nullptr) {
-            PyErr_SetString(PyExc_ValueError, "cannot create a pybind11::array from a nullptr");
+            set_error(PyExc_ValueError, "cannot create a pybind11::array from a nullptr");
             return nullptr;
         }
         return detail::npy_api::get().PyArray_FromAny_(
@@ -1155,7 +1290,7 @@ class array_t : public array {
     /// Create array from any object -- always returns a new reference
     static PyObject *raw_array_t(PyObject *ptr) {
         if (ptr == nullptr) {
-            PyErr_SetString(PyExc_ValueError, "cannot create a pybind11::array_t from a nullptr");
+            set_error(PyExc_ValueError, "cannot create a pybind11::array_t from a nullptr");
             return nullptr;
         }
         return detail::npy_api::get().PyArray_FromAny_(ptr,
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/pybind11.h b/share/openPMD/thirdParty/pybind11/include/pybind11/pybind11.h
index 3bce1a01ba..429d2138d1 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/pybind11.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/pybind11.h
@@ -14,7 +14,9 @@
 #include "detail/init.h"
 #include "attr.h"
 #include "gil.h"
+#include "gil_safe_call_once.h"
 #include "options.h"
+#include "typing.h"
 
 #include <cstdlib>
 #include <cstring>
@@ -52,6 +54,47 @@ PYBIND11_WARNING_DISABLE_MSVC(4127)
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
+inline std::string replace_newlines_and_squash(const char *text) {
+    const char *whitespaces = " \t\n\r\f\v";
+    std::string result(text);
+    bool previous_is_whitespace = false;
+
+    if (result.size() >= 2) {
+        // Do not modify string representations
+        char first_char = result[0];
+        char last_char = result[result.size() - 1];
+        if (first_char == last_char && first_char == '\'') {
+            return result;
+        }
+    }
+    result.clear();
+
+    // Replace characters in whitespaces array with spaces and squash consecutive spaces
+    while (*text != '\0') {
+        if (std::strchr(whitespaces, *text)) {
+            if (!previous_is_whitespace) {
+                result += ' ';
+                previous_is_whitespace = true;
+            }
+        } else {
+            result += *text;
+            previous_is_whitespace = false;
+        }
+        ++text;
+    }
+
+    // Strip leading and trailing whitespaces
+    const size_t str_begin = result.find_first_not_of(whitespaces);
+    if (str_begin == std::string::npos) {
+        return "";
+    }
+
+    const size_t str_end = result.find_last_not_of(whitespaces);
+    const size_t str_range = str_end - str_begin + 1;
+
+    return result.substr(str_begin, str_range);
+}
+
 // Apply all the extensions translators from a list
 // Return true if one of the translators completed without raising an exception
 // itself. Return of false indicates that if there are other translators
@@ -424,7 +467,7 @@ class cpp_function : public function {
                 // Write default value if available.
                 if (!is_starred && arg_index < rec->args.size() && rec->args[arg_index].descr) {
                     signature += " = ";
-                    signature += rec->args[arg_index].descr;
+                    signature += detail::replace_newlines_and_squash(rec->args[arg_index].descr);
                 }
                 // Separator for positional-only arguments (placed after the
                 // argument, rather than before like *
@@ -449,9 +492,7 @@ class cpp_function : public function {
                     signature += rec->scope.attr("__module__").cast<std::string>() + "."
                                  + rec->scope.attr("__qualname__").cast<std::string>();
                 } else {
-                    std::string tname(t->name());
-                    detail::clean_type_id(tname);
-                    signature += tname;
+                    signature += detail::quote_cpp_type_name(detail::clean_type_id(t->name()));
                 }
             } else {
                 signature += c;
@@ -680,7 +721,7 @@ class cpp_function : public function {
         /* Iterator over the list of potentially admissible overloads */
         const function_record *overloads = reinterpret_cast<function_record *>(
                                   PyCapsule_GetPointer(self, get_function_record_capsule_name())),
-                              *it = overloads;
+                              *current_overload = overloads;
         assert(overloads != nullptr);
 
         /* Need to know how many arguments + keyword arguments there are to pick the right
@@ -694,9 +735,8 @@ class cpp_function : public function {
         if (overloads->is_constructor) {
             if (!parent
                 || !PyObject_TypeCheck(parent.ptr(), (PyTypeObject *) overloads->scope.ptr())) {
-                PyErr_SetString(
-                    PyExc_TypeError,
-                    "__init__(self, ...) called with invalid or missing `self` argument");
+                set_error(PyExc_TypeError,
+                          "__init__(self, ...) called with invalid or missing `self` argument");
                 return nullptr;
             }
 
@@ -719,9 +759,10 @@ class cpp_function : public function {
             std::vector<function_call> second_pass;
 
             // However, if there are no overloads, we can just skip the no-convert pass entirely
-            const bool overloaded = it != nullptr && it->next != nullptr;
+            const bool overloaded
+                = current_overload != nullptr && current_overload->next != nullptr;
 
-            for (; it != nullptr; it = it->next) {
+            for (; current_overload != nullptr; current_overload = current_overload->next) {
 
                 /* For each overload:
                    1. Copy all positional arguments we were given, also checking to make sure that
@@ -742,7 +783,7 @@ class cpp_function : public function {
                    a result other than PYBIND11_TRY_NEXT_OVERLOAD.
                  */
 
-                const function_record &func = *it;
+                const function_record &func = *current_overload;
                 size_t num_args = func.nargs; // Number of positional arguments that we need
                 if (func.has_args) {
                     --num_args; // (but don't count py::args
@@ -980,10 +1021,10 @@ class cpp_function : public function {
                     }
 
                     if (result.ptr() != PYBIND11_TRY_NEXT_OVERLOAD) {
-                        // The error reporting logic below expects 'it' to be valid, as it would be
-                        // if we'd encountered this failure in the first-pass loop.
+                        // The error reporting logic below expects 'current_overload' to be valid,
+                        // as it would be if we'd encountered this failure in the first-pass loop.
                         if (!result) {
-                            it = &call.func;
+                            current_overload = &call.func;
                         }
                         break;
                     }
@@ -1007,7 +1048,7 @@ class cpp_function : public function {
 
                A translator may choose to do one of the following:
 
-                - catch the exception and call PyErr_SetString or PyErr_SetObject
+                - catch the exception and call py::set_error()
                   to set a standard (or custom) Python exception, or
                 - do nothing and let the exception fall through to the next translator, or
                 - delegate translation to the next translator by throwing a new type of exception.
@@ -1023,8 +1064,7 @@ class cpp_function : public function {
                 return nullptr;
             }
 
-            PyErr_SetString(PyExc_SystemError,
-                            "Exception escaped from default exception translator!");
+            set_error(PyExc_SystemError, "Exception escaped from default exception translator!");
             return nullptr;
         }
 
@@ -1102,7 +1142,7 @@ class cpp_function : public function {
                     }
                     msg += "kwargs: ";
                     bool first = true;
-                    for (auto kwarg : kwargs) {
+                    for (const auto &kwarg : kwargs) {
                         if (first) {
                             first = false;
                         } else {
@@ -1125,20 +1165,21 @@ class cpp_function : public function {
                 raise_from(PyExc_TypeError, msg.c_str());
                 return nullptr;
             }
-            PyErr_SetString(PyExc_TypeError, msg.c_str());
+            set_error(PyExc_TypeError, msg.c_str());
             return nullptr;
         }
         if (!result) {
             std::string msg = "Unable to convert function return value to a "
                               "Python type! The signature was\n\t";
-            msg += it->signature;
+            assert(current_overload != nullptr);
+            msg += current_overload->signature;
             append_note_if_missing_header_is_suspected(msg);
             // Attach additional error info to the exception if supported
             if (PyErr_Occurred()) {
                 raise_from(PyExc_TypeError, msg.c_str());
                 return nullptr;
             }
-            PyErr_SetString(PyExc_TypeError, msg.c_str());
+            set_error(PyExc_TypeError, msg.c_str());
             return nullptr;
         }
         if (overloads->is_constructor && !self_value_and_holder.holder_constructed()) {
@@ -1149,6 +1190,15 @@ class cpp_function : public function {
     }
 };
 
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+template <>
+struct handle_type_name<cpp_function> {
+    static constexpr auto name = const_name("Callable");
+};
+
+PYBIND11_NAMESPACE_END(detail)
+
 /// Wrapper for Python extension modules
 class module_ : public object {
 public:
@@ -1276,6 +1326,15 @@ class module_ : public object {
     }
 };
 
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+template <>
+struct handle_type_name<module_> {
+    static constexpr auto name = const_name("module");
+};
+
+PYBIND11_NAMESPACE_END(detail)
+
 // When inside a namespace (or anywhere as long as it's not the first item on a line),
 // C++20 allows "module" to be used. This is provided for backward compatibility, and for
 // simplicity, if someone wants to use py::module for example, that is perfectly safe.
@@ -1977,7 +2036,7 @@ struct enum_base {
                 object type_name = type::handle_of(arg).attr("__name__");
                 return pybind11::str("{}.{}").format(std::move(type_name), enum_name(arg));
             },
-            name("name"),
+            name("__str__"),
             is_method(m_base));
 
         if (options::show_enum_members_docstring()) {
@@ -2395,7 +2454,7 @@ iterator make_iterator_impl(Iterator first, Sentinel last, Extra &&...extra) {
                 Policy);
     }
 
-    return cast(state{first, last, true});
+    return cast(state{std::forward<Iterator>(first), std::forward<Sentinel>(last), true});
 }
 
 PYBIND11_NAMESPACE_END(detail)
@@ -2406,13 +2465,15 @@ template <return_value_policy Policy = return_value_policy::reference_internal,
           typename Sentinel,
           typename ValueType = typename detail::iterator_access<Iterator>::result_type,
           typename... Extra>
-iterator make_iterator(Iterator first, Sentinel last, Extra &&...extra) {
+typing::Iterator<ValueType> make_iterator(Iterator first, Sentinel last, Extra &&...extra) {
     return detail::make_iterator_impl<detail::iterator_access<Iterator>,
                                       Policy,
                                       Iterator,
                                       Sentinel,
                                       ValueType,
-                                      Extra...>(first, last, std::forward<Extra>(extra)...);
+                                      Extra...>(std::forward<Iterator>(first),
+                                                std::forward<Sentinel>(last),
+                                                std::forward<Extra>(extra)...);
 }
 
 /// Makes a python iterator over the keys (`.first`) of a iterator over pairs from a
@@ -2422,13 +2483,15 @@ template <return_value_policy Policy = return_value_policy::reference_internal,
           typename Sentinel,
           typename KeyType = typename detail::iterator_key_access<Iterator>::result_type,
           typename... Extra>
-iterator make_key_iterator(Iterator first, Sentinel last, Extra &&...extra) {
+typing::Iterator<KeyType> make_key_iterator(Iterator first, Sentinel last, Extra &&...extra) {
     return detail::make_iterator_impl<detail::iterator_key_access<Iterator>,
                                       Policy,
                                       Iterator,
                                       Sentinel,
                                       KeyType,
-                                      Extra...>(first, last, std::forward<Extra>(extra)...);
+                                      Extra...>(std::forward<Iterator>(first),
+                                                std::forward<Sentinel>(last),
+                                                std::forward<Extra>(extra)...);
 }
 
 /// Makes a python iterator over the values (`.second`) of a iterator over pairs from a
@@ -2438,21 +2501,25 @@ template <return_value_policy Policy = return_value_policy::reference_internal,
           typename Sentinel,
           typename ValueType = typename detail::iterator_value_access<Iterator>::result_type,
           typename... Extra>
-iterator make_value_iterator(Iterator first, Sentinel last, Extra &&...extra) {
+typing::Iterator<ValueType> make_value_iterator(Iterator first, Sentinel last, Extra &&...extra) {
     return detail::make_iterator_impl<detail::iterator_value_access<Iterator>,
                                       Policy,
                                       Iterator,
                                       Sentinel,
                                       ValueType,
-                                      Extra...>(first, last, std::forward<Extra>(extra)...);
+                                      Extra...>(std::forward<Iterator>(first),
+                                                std::forward<Sentinel>(last),
+                                                std::forward<Extra>(extra)...);
 }
 
 /// Makes an iterator over values of an stl container or other container supporting
 /// `std::begin()`/`std::end()`
 template <return_value_policy Policy = return_value_policy::reference_internal,
           typename Type,
+          typename ValueType = typename detail::iterator_access<
+              decltype(std::begin(std::declval<Type &>()))>::result_type,
           typename... Extra>
-iterator make_iterator(Type &value, Extra &&...extra) {
+typing::Iterator<ValueType> make_iterator(Type &value, Extra &&...extra) {
     return make_iterator<Policy>(
         std::begin(value), std::end(value), std::forward<Extra>(extra)...);
 }
@@ -2461,8 +2528,10 @@ iterator make_iterator(Type &value, Extra &&...extra) {
 /// `std::begin()`/`std::end()`
 template <return_value_policy Policy = return_value_policy::reference_internal,
           typename Type,
+          typename KeyType = typename detail::iterator_key_access<
+              decltype(std::begin(std::declval<Type &>()))>::result_type,
           typename... Extra>
-iterator make_key_iterator(Type &value, Extra &&...extra) {
+typing::Iterator<KeyType> make_key_iterator(Type &value, Extra &&...extra) {
     return make_key_iterator<Policy>(
         std::begin(value), std::end(value), std::forward<Extra>(extra)...);
 }
@@ -2471,8 +2540,10 @@ iterator make_key_iterator(Type &value, Extra &&...extra) {
 /// `std::begin()`/`std::end()`
 template <return_value_policy Policy = return_value_policy::reference_internal,
           typename Type,
+          typename ValueType = typename detail::iterator_value_access<
+              decltype(std::begin(std::declval<Type &>()))>::result_type,
           typename... Extra>
-iterator make_value_iterator(Type &value, Extra &&...extra) {
+typing::Iterator<ValueType> make_value_iterator(Type &value, Extra &&...extra) {
     return make_value_iterator<Policy>(
         std::begin(value), std::end(value), std::forward<Extra>(extra)...);
 }
@@ -2528,7 +2599,7 @@ inline void register_local_exception_translator(ExceptionTranslator &&translator
 /**
  * Wrapper to generate a new Python exception type.
  *
- * This should only be used with PyErr_SetString for now.
+ * This should only be used with py::set_error() for now.
  * It is not (yet) possible to use as a py::base.
  * Template type argument is reserved for future use.
  */
@@ -2549,27 +2620,25 @@ class exception : public object {
     }
 
     // Sets the current python exception to this exception object with the given message
-    void operator()(const char *message) { PyErr_SetString(m_ptr, message); }
+    PYBIND11_DEPRECATED("Please use py::set_error() instead "
+                        "(https://github.com/pybind/pybind11/pull/4772)")
+    void operator()(const char *message) const { set_error(*this, message); }
 };
 
 PYBIND11_NAMESPACE_BEGIN(detail)
-// Returns a reference to a function-local static exception object used in the simple
-// register_exception approach below.  (It would be simpler to have the static local variable
-// directly in register_exception, but that makes clang <3.5 segfault - issue #1349).
-template <typename CppException>
-exception<CppException> &get_exception_object() {
-    static exception<CppException> ex;
-    return ex;
-}
+
+template <>
+struct handle_type_name<exception<void>> {
+    static constexpr auto name = const_name("Exception");
+};
 
 // Helper function for register_exception and register_local_exception
 template <typename CppException>
 exception<CppException> &
 register_exception_impl(handle scope, const char *name, handle base, bool isLocal) {
-    auto &ex = detail::get_exception_object<CppException>();
-    if (!ex) {
-        ex = exception<CppException>(scope, name, base);
-    }
+    PYBIND11_CONSTINIT static gil_safe_call_once_and_store<exception<CppException>> exc_storage;
+    exc_storage.call_once_and_store_result(
+        [&]() { return exception<CppException>(scope, name, base); });
 
     auto register_func
         = isLocal ? &register_local_exception_translator : &register_exception_translator;
@@ -2581,10 +2650,10 @@ register_exception_impl(handle scope, const char *name, handle base, bool isLoca
         try {
             std::rethrow_exception(p);
         } catch (const CppException &e) {
-            detail::get_exception_object<CppException>()(e.what());
+            set_error(exc_storage.get_stored(), e.what());
         }
     });
-    return ex;
+    return exc_storage.get_stored();
 }
 
 PYBIND11_NAMESPACE_END(detail)
@@ -2703,7 +2772,11 @@ get_type_override(const void *this_ptr, const type_info *this_type, const char *
         if ((std::string) str(f_code->co_name) == name && f_code->co_argcount > 0) {
             PyObject *locals = PyEval_GetLocals();
             if (locals != nullptr) {
+#        if PY_VERSION_HEX >= 0x030b0000
+                PyObject *co_varnames = PyCode_GetVarnames(f_code);
+#        else
                 PyObject *co_varnames = PyObject_GetAttrString((PyObject *) f_code, "co_varnames");
+#        endif
                 PyObject *self_arg = PyTuple_GET_ITEM(co_varnames, 0);
                 Py_DECREF(co_varnames);
                 PyObject *self_caller = dict_getitem(locals, self_arg);
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/pytypes.h b/share/openPMD/thirdParty/pybind11/include/pybind11/pytypes.h
index 64aad63476..d5f6af8e02 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/pytypes.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/pytypes.h
@@ -59,6 +59,7 @@ struct sequence_item;
 struct list_item;
 struct tuple_item;
 } // namespace accessor_policies
+// PLEASE KEEP handle_type_name SPECIALIZATIONS IN SYNC.
 using obj_attr_accessor = accessor<accessor_policies::obj_attr>;
 using str_attr_accessor = accessor<accessor_policies::str_attr>;
 using item_accessor = accessor<accessor_policies::generic_item>;
@@ -305,19 +306,19 @@ class handle : public detail::object_api<handle> {
             "https://pybind11.readthedocs.io/en/stable/advanced/"
             "misc.html#common-sources-of-global-interpreter-lock-errors for debugging advice.\n"
             "If you are convinced there is no bug in your code, you can #define "
-            "PYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF"
+            "PYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF "
             "to disable this check. In that case you have to ensure this #define is consistently "
             "used for all translation units linked into a given pybind11 extension, otherwise "
             "there will be ODR violations.",
             function_name.c_str());
-        fflush(stderr);
         if (Py_TYPE(m_ptr)->tp_name != nullptr) {
             fprintf(stderr,
-                    "The failing %s call was triggered on a %s object.\n",
+                    " The failing %s call was triggered on a %s object.",
                     function_name.c_str(),
                     Py_TYPE(m_ptr)->tp_name);
-            fflush(stderr);
         }
+        fprintf(stderr, "\n");
+        fflush(stderr);
         throw std::runtime_error(function_name + " PyGILState_Check() failure.");
     }
 #endif
@@ -334,6 +335,14 @@ class handle : public detail::object_api<handle> {
 #endif
 };
 
+inline void set_error(const handle &type, const char *message) {
+    PyErr_SetString(type.ptr(), message);
+}
+
+inline void set_error(const handle &type, const handle &value) {
+    PyErr_SetObject(type.ptr(), value.ptr());
+}
+
 /** \rst
     Holds a reference to a Python object (with reference counting)
 
@@ -1612,7 +1621,15 @@ inline namespace literals {
 /** \rst
     String literal version of `str`
  \endrst */
-inline str operator"" _s(const char *s, size_t size) { return {s, size}; }
+inline str
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5
+operator"" _s // gcc 4.8.5 insists on having a space (hard error).
+#else
+operator""_s // clang 17 generates a deprecation warning if there is a space.
+#endif
+    (const char *s, size_t size) {
+    return {s, size};
+}
 } // namespace literals
 
 /// \addtogroup pytypes
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/stl.h b/share/openPMD/thirdParty/pybind11/include/pybind11/stl.h
index f39f44f7c9..71bc5902ef 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/stl.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/stl.h
@@ -100,7 +100,7 @@ struct set_caster {
         return s.release();
     }
 
-    PYBIND11_TYPE_CASTER(type, const_name("Set[") + key_conv::name + const_name("]"));
+    PYBIND11_TYPE_CASTER(type, const_name("set[") + key_conv::name + const_name("]"));
 };
 
 template <typename Type, typename Key, typename Value>
@@ -157,7 +157,7 @@ struct map_caster {
     }
 
     PYBIND11_TYPE_CASTER(Type,
-                         const_name("Dict[") + key_conv::name + const_name(", ") + value_conv::name
+                         const_name("dict[") + key_conv::name + const_name(", ") + value_conv::name
                              + const_name("]"));
 };
 
@@ -172,7 +172,7 @@ struct list_caster {
         auto s = reinterpret_borrow<sequence>(src);
         value.clear();
         reserve_maybe(s, &value);
-        for (auto it : s) {
+        for (const auto &it : s) {
             value_conv conv;
             if (!conv.load(it, convert)) {
                 return false;
@@ -208,7 +208,7 @@ struct list_caster {
         return l.release();
     }
 
-    PYBIND11_TYPE_CASTER(Type, const_name("List[") + value_conv::name + const_name("]"));
+    PYBIND11_TYPE_CASTER(Type, const_name("list[") + value_conv::name + const_name("]"));
 };
 
 template <typename Type, typename Alloc>
@@ -247,7 +247,7 @@ struct array_caster {
             return false;
         }
         size_t ctr = 0;
-        for (auto it : l) {
+        for (const auto &it : l) {
             value_conv conv;
             if (!conv.load(it, convert)) {
                 return false;
@@ -274,7 +274,7 @@ struct array_caster {
 
     PYBIND11_TYPE_CASTER(ArrayType,
                          const_name<Resizable>(const_name(""), const_name("Annotated["))
-                             + const_name("List[") + value_conv::name + const_name("]")
+                             + const_name("list[") + value_conv::name + const_name("]")
                              + const_name<Resizable>(const_name(""),
                                                      const_name(", FixedSize(")
                                                          + const_name<Size>() + const_name(")]")));
@@ -421,7 +421,8 @@ struct variant_caster<V<Ts...>> {
 
     using Type = V<Ts...>;
     PYBIND11_TYPE_CASTER(Type,
-                         const_name("Union[") + detail::concat(make_caster<Ts>::name...)
+                         const_name("Union[")
+                             + ::pybind11::detail::concat(make_caster<Ts>::name...)
                              + const_name("]"));
 };
 
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/stl_bind.h b/share/openPMD/thirdParty/pybind11/include/pybind11/stl_bind.h
index 49f1b77821..a226cbc0e8 100644
--- a/share/openPMD/thirdParty/pybind11/include/pybind11/stl_bind.h
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/stl_bind.h
@@ -525,7 +525,7 @@ class_<Vector, holder_type> bind_vector(handle scope, std::string const &name, A
         [](const Vector &v) -> bool { return !v.empty(); },
         "Check whether the list is nonempty");
 
-    cl.def("__len__", &Vector::size);
+    cl.def("__len__", [](const Vector &vec) { return vec.size(); });
 
 #if 0
     // C++ style functions deprecated, leaving it here as an example
@@ -645,49 +645,50 @@ auto map_if_insertion_operator(Class_ &cl, std::string const &name)
         "Return the canonical string representation of this map.");
 }
 
-template <typename KeyType>
 struct keys_view {
     virtual size_t len() = 0;
     virtual iterator iter() = 0;
-    virtual bool contains(const KeyType &k) = 0;
-    virtual bool contains(const object &k) = 0;
+    virtual bool contains(const handle &k) = 0;
     virtual ~keys_view() = default;
 };
 
-template <typename MappedType>
 struct values_view {
     virtual size_t len() = 0;
     virtual iterator iter() = 0;
     virtual ~values_view() = default;
 };
 
-template <typename KeyType, typename MappedType>
 struct items_view {
     virtual size_t len() = 0;
     virtual iterator iter() = 0;
     virtual ~items_view() = default;
 };
 
-template <typename Map, typename KeysView>
-struct KeysViewImpl : public KeysView {
+template <typename Map>
+struct KeysViewImpl : public detail::keys_view {
     explicit KeysViewImpl(Map &map) : map(map) {}
     size_t len() override { return map.size(); }
     iterator iter() override { return make_key_iterator(map.begin(), map.end()); }
-    bool contains(const typename Map::key_type &k) override { return map.find(k) != map.end(); }
-    bool contains(const object &) override { return false; }
+    bool contains(const handle &k) override {
+        try {
+            return map.find(k.template cast<typename Map::key_type>()) != map.end();
+        } catch (const cast_error &) {
+            return false;
+        }
+    }
     Map &map;
 };
 
-template <typename Map, typename ValuesView>
-struct ValuesViewImpl : public ValuesView {
+template <typename Map>
+struct ValuesViewImpl : public detail::values_view {
     explicit ValuesViewImpl(Map &map) : map(map) {}
     size_t len() override { return map.size(); }
     iterator iter() override { return make_value_iterator(map.begin(), map.end()); }
     Map &map;
 };
 
-template <typename Map, typename ItemsView>
-struct ItemsViewImpl : public ItemsView {
+template <typename Map>
+struct ItemsViewImpl : public detail::items_view {
     explicit ItemsViewImpl(Map &map) : map(map) {}
     size_t len() override { return map.size(); }
     iterator iter() override { return make_iterator(map.begin(), map.end()); }
@@ -700,11 +701,9 @@ template <typename Map, typename holder_type = std::unique_ptr<Map>, typename...
 class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args &&...args) {
     using KeyType = typename Map::key_type;
     using MappedType = typename Map::mapped_type;
-    using StrippedKeyType = detail::remove_cvref_t<KeyType>;
-    using StrippedMappedType = detail::remove_cvref_t<MappedType>;
-    using KeysView = detail::keys_view<StrippedKeyType>;
-    using ValuesView = detail::values_view<StrippedMappedType>;
-    using ItemsView = detail::items_view<StrippedKeyType, StrippedMappedType>;
+    using KeysView = detail::keys_view;
+    using ValuesView = detail::values_view;
+    using ItemsView = detail::items_view;
     using Class_ = class_<Map, holder_type>;
 
     // If either type is a non-module-local bound type then make the map binding non-local as well;
@@ -718,39 +717,20 @@ class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args &&
     }
 
     Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward<Args>(args)...);
-    static constexpr auto key_type_descr = detail::make_caster<KeyType>::name;
-    static constexpr auto mapped_type_descr = detail::make_caster<MappedType>::name;
-    std::string key_type_name(key_type_descr.text), mapped_type_name(mapped_type_descr.text);
 
-    // If key type isn't properly wrapped, fall back to C++ names
-    if (key_type_name == "%") {
-        key_type_name = detail::type_info_description(typeid(KeyType));
-    }
-    // Similarly for value type:
-    if (mapped_type_name == "%") {
-        mapped_type_name = detail::type_info_description(typeid(MappedType));
-    }
-
-    // Wrap KeysView[KeyType] if it wasn't already wrapped
+    // Wrap KeysView if it wasn't already wrapped
     if (!detail::get_type_info(typeid(KeysView))) {
-        class_<KeysView> keys_view(
-            scope, ("KeysView[" + key_type_name + "]").c_str(), pybind11::module_local(local));
+        class_<KeysView> keys_view(scope, "KeysView", pybind11::module_local(local));
         keys_view.def("__len__", &KeysView::len);
         keys_view.def("__iter__",
                       &KeysView::iter,
                       keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */
         );
-        keys_view.def("__contains__",
-                      static_cast<bool (KeysView::*)(const KeyType &)>(&KeysView::contains));
-        // Fallback for when the object is not of the key type
-        keys_view.def("__contains__",
-                      static_cast<bool (KeysView::*)(const object &)>(&KeysView::contains));
+        keys_view.def("__contains__", &KeysView::contains);
     }
     // Similarly for ValuesView:
     if (!detail::get_type_info(typeid(ValuesView))) {
-        class_<ValuesView> values_view(scope,
-                                       ("ValuesView[" + mapped_type_name + "]").c_str(),
-                                       pybind11::module_local(local));
+        class_<ValuesView> values_view(scope, "ValuesView", pybind11::module_local(local));
         values_view.def("__len__", &ValuesView::len);
         values_view.def("__iter__",
                         &ValuesView::iter,
@@ -759,10 +739,7 @@ class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args &&
     }
     // Similarly for ItemsView:
     if (!detail::get_type_info(typeid(ItemsView))) {
-        class_<ItemsView> items_view(
-            scope,
-            ("ItemsView[" + key_type_name + ", ").append(mapped_type_name + "]").c_str(),
-            pybind11::module_local(local));
+        class_<ItemsView> items_view(scope, "ItemsView", pybind11::module_local(local));
         items_view.def("__len__", &ItemsView::len);
         items_view.def("__iter__",
                        &ItemsView::iter,
@@ -788,25 +765,19 @@ class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args &&
 
     cl.def(
         "keys",
-        [](Map &m) {
-            return std::unique_ptr<KeysView>(new detail::KeysViewImpl<Map, KeysView>(m));
-        },
+        [](Map &m) { return std::unique_ptr<KeysView>(new detail::KeysViewImpl<Map>(m)); },
         keep_alive<0, 1>() /* Essential: keep map alive while view exists */
     );
 
     cl.def(
         "values",
-        [](Map &m) {
-            return std::unique_ptr<ValuesView>(new detail::ValuesViewImpl<Map, ValuesView>(m));
-        },
+        [](Map &m) { return std::unique_ptr<ValuesView>(new detail::ValuesViewImpl<Map>(m)); },
         keep_alive<0, 1>() /* Essential: keep map alive while view exists */
     );
 
     cl.def(
         "items",
-        [](Map &m) {
-            return std::unique_ptr<ItemsView>(new detail::ItemsViewImpl<Map, ItemsView>(m));
-        },
+        [](Map &m) { return std::unique_ptr<ItemsView>(new detail::ItemsViewImpl<Map>(m)); },
         keep_alive<0, 1>() /* Essential: keep map alive while view exists */
     );
 
@@ -843,7 +814,8 @@ class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args &&
         m.erase(it);
     });
 
-    cl.def("__len__", &Map::size);
+    // Always use a lambda in case of `using` declaration
+    cl.def("__len__", [](const Map &m) { return m.size(); });
 
     return cl;
 }
diff --git a/share/openPMD/thirdParty/pybind11/include/pybind11/typing.h b/share/openPMD/thirdParty/pybind11/include/pybind11/typing.h
new file mode 100644
index 0000000000..bc275fc50b
--- /dev/null
+++ b/share/openPMD/thirdParty/pybind11/include/pybind11/typing.h
@@ -0,0 +1,125 @@
+/*
+    pybind11/typing.h: Convenience wrapper classes for basic Python types
+    with more explicit annotations.
+
+    Copyright (c) 2023 Dustin Spicuzza <dustin@virtualroadside.com>
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#pragma once
+
+#include "detail/common.h"
+#include "cast.h"
+#include "pytypes.h"
+
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(typing)
+
+/*
+    The following types can be used to direct pybind11-generated docstrings
+    to have have more explicit types (e.g., `list[str]` instead of `list`).
+    Just use these in place of existing types.
+
+    There is no additional enforcement of types at runtime.
+*/
+
+template <typename... Types>
+class Tuple : public tuple {
+    using tuple::tuple;
+};
+
+template <typename K, typename V>
+class Dict : public dict {
+    using dict::dict;
+};
+
+template <typename T>
+class List : public list {
+    using list::list;
+};
+
+template <typename T>
+class Set : public set {
+    using set::set;
+};
+
+template <typename T>
+class Iterable : public iterable {
+    using iterable::iterable;
+};
+
+template <typename T>
+class Iterator : public iterator {
+    using iterator::iterator;
+};
+
+template <typename Signature>
+class Callable;
+
+template <typename Return, typename... Args>
+class Callable<Return(Args...)> : public function {
+    using function::function;
+};
+
+PYBIND11_NAMESPACE_END(typing)
+
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+template <typename... Types>
+struct handle_type_name<typing::Tuple<Types...>> {
+    static constexpr auto name = const_name("tuple[")
+                                 + ::pybind11::detail::concat(make_caster<Types>::name...)
+                                 + const_name("]");
+};
+
+template <>
+struct handle_type_name<typing::Tuple<>> {
+    // PEP 484 specifies this syntax for an empty tuple
+    static constexpr auto name = const_name("tuple[()]");
+};
+
+template <typename T>
+struct handle_type_name<typing::Tuple<T, ellipsis>> {
+    // PEP 484 specifies this syntax for a variable-length tuple
+    static constexpr auto name
+        = const_name("tuple[") + make_caster<T>::name + const_name(", ...]");
+};
+
+template <typename K, typename V>
+struct handle_type_name<typing::Dict<K, V>> {
+    static constexpr auto name = const_name("dict[") + make_caster<K>::name + const_name(", ")
+                                 + make_caster<V>::name + const_name("]");
+};
+
+template <typename T>
+struct handle_type_name<typing::List<T>> {
+    static constexpr auto name = const_name("list[") + make_caster<T>::name + const_name("]");
+};
+
+template <typename T>
+struct handle_type_name<typing::Set<T>> {
+    static constexpr auto name = const_name("set[") + make_caster<T>::name + const_name("]");
+};
+
+template <typename T>
+struct handle_type_name<typing::Iterable<T>> {
+    static constexpr auto name = const_name("Iterable[") + make_caster<T>::name + const_name("]");
+};
+
+template <typename T>
+struct handle_type_name<typing::Iterator<T>> {
+    static constexpr auto name = const_name("Iterator[") + make_caster<T>::name + const_name("]");
+};
+
+template <typename Return, typename... Args>
+struct handle_type_name<typing::Callable<Return(Args...)>> {
+    using retval_type = conditional_t<std::is_same<Return, void>::value, void_type, Return>;
+    static constexpr auto name
+        = const_name("Callable[[") + ::pybind11::detail::concat(make_caster<Args>::name...)
+          + const_name("], ") + make_caster<retval_type>::name + const_name("]");
+};
+
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/share/openPMD/thirdParty/pybind11/pybind11/_version.py b/share/openPMD/thirdParty/pybind11/pybind11/_version.py
index 9280fa054e..ab5a7bf5f5 100644
--- a/share/openPMD/thirdParty/pybind11/pybind11/_version.py
+++ b/share/openPMD/thirdParty/pybind11/pybind11/_version.py
@@ -8,5 +8,5 @@ def _to_int(s: str) -> Union[int, str]:
         return s
 
 
-__version__ = "2.11.1"
+__version__ = "2.12.0"
 version_info = tuple(_to_int(s) for s in __version__.split("."))
diff --git a/share/openPMD/thirdParty/pybind11/pybind11/setup_helpers.py b/share/openPMD/thirdParty/pybind11/pybind11/setup_helpers.py
index aeeee9dcfa..3b16dca886 100644
--- a/share/openPMD/thirdParty/pybind11/pybind11/setup_helpers.py
+++ b/share/openPMD/thirdParty/pybind11/pybind11/setup_helpers.py
@@ -66,7 +66,9 @@
     from setuptools import Extension as _Extension
     from setuptools.command.build_ext import build_ext as _build_ext
 except ImportError:
-    from distutils.command.build_ext import build_ext as _build_ext  # type: ignore[assignment]
+    from distutils.command.build_ext import (  # type: ignore[assignment]
+        build_ext as _build_ext,
+    )
     from distutils.extension import Extension as _Extension  # type: ignore[assignment]
 
 import distutils.ccompiler
diff --git a/share/openPMD/thirdParty/pybind11/tools/FindPythonLibsNew.cmake b/share/openPMD/thirdParty/pybind11/tools/FindPythonLibsNew.cmake
index ce558d4ece..8275b9d5aa 100644
--- a/share/openPMD/thirdParty/pybind11/tools/FindPythonLibsNew.cmake
+++ b/share/openPMD/thirdParty/pybind11/tools/FindPythonLibsNew.cmake
@@ -95,6 +95,22 @@ if(NOT PythonLibsNew_FIND_VERSION)
   set(PythonLibsNew_FIND_VERSION "3.6")
 endif()
 
+if(NOT CMAKE_VERSION VERSION_LESS "3.27")
+  cmake_policy(GET CMP0148 _pybind11_cmp0148)
+  if(NOT _pybind11_cmp0148)
+    message(
+      AUTHOR_WARNING
+        "Policy CMP0148 is not set: The FindPythonInterp and FindPythonLibs "
+        "modules are removed.  Run \"cmake --help-policy CMP0148\" for policy "
+        "details.  Use the cmake_policy command to set the policy and suppress "
+        "this warning, or preferably upgrade to using FindPython, either by "
+        "calling it explicitly before pybind11, or by setting "
+        "PYBIND11_FINDPYTHON ON before pybind11.")
+  endif()
+  cmake_policy(SET CMP0148 OLD)
+  unset(_pybind11_cmp0148)
+endif()
+
 find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} ${_pythonlibs_required}
              ${_pythonlibs_quiet})
 
@@ -172,13 +188,20 @@ _pybind11_get_if_undef(_PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
 _pybind11_get_if_undef(_PYTHON_VALUES 1 PYTHON_PREFIX)
 _pybind11_get_if_undef(_PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
 _pybind11_get_if_undef(_PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
-_pybind11_get_if_undef(_PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
 _pybind11_get_if_undef(_PYTHON_VALUES 5 PYTHON_IS_DEBUG)
 _pybind11_get_if_undef(_PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
 _pybind11_get_if_undef(_PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
 _pybind11_get_if_undef(_PYTHON_VALUES 8 PYTHON_LIBDIR)
 _pybind11_get_if_undef(_PYTHON_VALUES 9 PYTHON_MULTIARCH)
 
+list(GET _PYTHON_VALUES 4 _PYTHON_MODULE_EXT_SUFFIX)
+if(PYBIND11_PYTHONLIBS_OVERWRITE OR NOT DEFINED PYTHON_MODULE_DEBUG_POSTFIX)
+  get_filename_component(PYTHON_MODULE_DEBUG_POSTFIX "${_PYTHON_MODULE_EXT_SUFFIX}" NAME_WE)
+endif()
+if(PYBIND11_PYTHONLIBS_OVERWRITE OR NOT DEFINED PYTHON_MODULE_EXTENSION)
+  get_filename_component(PYTHON_MODULE_EXTENSION "${_PYTHON_MODULE_EXT_SUFFIX}" EXT)
+endif()
+
 # Make sure the Python has the same pointer-size as the chosen compiler
 # Skip if CMAKE_SIZEOF_VOID_P is not defined
 # This should be skipped for (non-Apple) cross-compiles (like EMSCRIPTEN)
diff --git a/share/openPMD/thirdParty/pybind11/tools/make_changelog.py b/share/openPMD/thirdParty/pybind11/tools/make_changelog.py
index b5bd832940..89cf664835 100755
--- a/share/openPMD/thirdParty/pybind11/tools/make_changelog.py
+++ b/share/openPMD/thirdParty/pybind11/tools/make_changelog.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+from __future__ import annotations
 
 import re
 
@@ -29,6 +30,18 @@
 )
 issues = (issue for page in issues_pages for issue in page)
 missing = []
+cats_descr = {
+    "feat": "New Features",
+    "fix": "Bug fixes",
+    "fix(types)": "",
+    "fix(cmake)": "",
+    "docs": "Documentation",
+    "tests": "Tests",
+    "ci": "CI",
+    "chore": "Other",
+    "unknown": "Uncategorised",
+}
+cats: dict[str, list[str]] = {c: [] for c in cats_descr}
 
 for issue in issues:
     changelog = ENTRY.findall(issue.body or "")
@@ -36,14 +49,29 @@
         missing.append(issue)
     else:
         (msg,) = changelog
+        if msg.startswith("- "):
+            msg = msg[2:]
         if not msg.startswith("* "):
             msg = "* " + msg
         if not msg.endswith("."):
             msg += "."
 
         msg += f"\n  `#{issue.number} <{issue.html_url}>`_"
+        for cat in cats:
+            if issue.title.lower().startswith(f"{cat}:"):
+                cats[cat].append(msg)
+                break
+        else:
+            cats["unknown"].append(msg)
 
-        print(Syntax(msg, "rst", theme="ansi_light", word_wrap=True))
+for cat, msgs in cats.items():
+    if msgs:
+        desc = cats_descr[cat]
+        print(f"[bold]{desc}:" if desc else f".. {cat}")
+        print()
+        for msg in msgs:
+            print(Syntax(msg, "rst", theme="ansi_light", word_wrap=True))
+            print()
         print()
 
 if missing:
diff --git a/share/openPMD/thirdParty/pybind11/tools/pybind11Common.cmake b/share/openPMD/thirdParty/pybind11/tools/pybind11Common.cmake
index 308d1b70d1..57721aeb16 100644
--- a/share/openPMD/thirdParty/pybind11/tools/pybind11Common.cmake
+++ b/share/openPMD/thirdParty/pybind11/tools/pybind11Common.cmake
@@ -173,12 +173,16 @@ endif()
 # Check to see which Python mode we are in, new, old, or no python
 if(PYBIND11_NOPYTHON)
   set(_pybind11_nopython ON)
+  # We won't use new FindPython if PYBIND11_FINDPYTHON is defined and falselike
+  # Otherwise, we use if FindPythonLibs is missing or if FindPython was already used
 elseif(
-  _pybind11_missing_old_python STREQUAL "NEW"
-  OR PYBIND11_FINDPYTHON
-  OR Python_FOUND
-  OR Python2_FOUND
-  OR Python3_FOUND)
+  (NOT DEFINED PYBIND11_FINDPYTHON OR PYBIND11_FINDPYTHON)
+  AND (_pybind11_missing_old_python STREQUAL "NEW"
+       OR PYBIND11_FINDPYTHON
+       OR Python_FOUND
+       OR Python3_FOUND
+      ))
+
   # New mode
   include("${CMAKE_CURRENT_LIST_DIR}/pybind11NewTools.cmake")
 
@@ -218,8 +222,15 @@ if(NOT _pybind11_nopython)
 
     execute_process(
       COMMAND
-        ${${_Python}_EXECUTABLE} -c
-        "from pkg_resources import get_distribution; print(get_distribution('${PYPI_NAME}').version)"
+        ${${_Python}_EXECUTABLE} -c "
+try:
+    from importlib.metadata import version
+except ImportError:
+    from pkg_resources import get_distribution
+    def version(s):
+        return get_distribution(s).version
+print(version('${PYPI_NAME}'))
+        "
       RESULT_VARIABLE RESULT_PRESENT
       OUTPUT_VARIABLE PKG_VERSION
       ERROR_QUIET)
@@ -300,21 +311,24 @@ function(_pybind11_generate_lto target prefer_thin_lto)
       set(cxx_append ";-fno-fat-lto-objects")
     endif()
 
-    if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le" OR CMAKE_SYSTEM_PROCESSOR MATCHES "mips64")
-      set(NO_FLTO_ARCH TRUE)
+    if(prefer_thin_lto)
+      set(thin "=thin")
     else()
-      set(NO_FLTO_ARCH FALSE)
+      set(thin "")
     endif()
 
-    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang"
-       AND prefer_thin_lto
-       AND NOT NO_FLTO_ARCH)
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le" OR CMAKE_SYSTEM_PROCESSOR MATCHES "mips64")
+      # Do nothing
+    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES emscripten)
+      # This compile is very costly when cross-compiling, so set this without checking
+      set(PYBIND11_LTO_CXX_FLAGS "-flto${thin}${cxx_append}")
+      set(PYBIND11_LTO_LINKER_FLAGS "-flto${thin}${linker_append}")
+    elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
       _pybind11_return_if_cxx_and_linker_flags_work(
-        HAS_FLTO_THIN "-flto=thin${cxx_append}" "-flto=thin${linker_append}"
+        HAS_FLTO_THIN "-flto${thin}${cxx_append}" "-flto=${thin}${linker_append}"
         PYBIND11_LTO_CXX_FLAGS PYBIND11_LTO_LINKER_FLAGS)
     endif()
-
-    if(NOT HAS_FLTO_THIN AND NOT NO_FLTO_ARCH)
+    if(NOT HAS_FLTO_THIN)
       _pybind11_return_if_cxx_and_linker_flags_work(
         HAS_FLTO "-flto${cxx_append}" "-flto${linker_append}" PYBIND11_LTO_CXX_FLAGS
         PYBIND11_LTO_LINKER_FLAGS)
diff --git a/share/openPMD/thirdParty/pybind11/tools/pybind11Config.cmake.in b/share/openPMD/thirdParty/pybind11/tools/pybind11Config.cmake.in
index 5734f437b3..304f1d9077 100644
--- a/share/openPMD/thirdParty/pybind11/tools/pybind11Config.cmake.in
+++ b/share/openPMD/thirdParty/pybind11/tools/pybind11Config.cmake.in
@@ -149,7 +149,7 @@ default is ``MODULE``. There are several options:
 ``OPT_SIZE``
   Optimize for size, even if the ``CMAKE_BUILD_TYPE`` is not ``MinSizeRel``.
 ``THIN_LTO``
-  Use thin TLO instead of regular if there's a choice (pybind11's selection
+  Use thin LTO instead of regular if there's a choice (pybind11's selection
   is disabled if ``CMAKE_INTERPROCEDURAL_OPTIMIZATIONS`` is set).
 ``WITHOUT_SOABI``
   Disable the SOABI component (``PYBIND11_NEWPYTHON`` mode only).
diff --git a/share/openPMD/thirdParty/pybind11/tools/pybind11NewTools.cmake b/share/openPMD/thirdParty/pybind11/tools/pybind11NewTools.cmake
index 7d7424a790..9fe2eb08dc 100644
--- a/share/openPMD/thirdParty/pybind11/tools/pybind11NewTools.cmake
+++ b/share/openPMD/thirdParty/pybind11/tools/pybind11NewTools.cmake
@@ -32,17 +32,54 @@ if(NOT Python_FOUND AND NOT Python3_FOUND)
     set(Python_ROOT_DIR "$ENV{pythonLocation}")
   endif()
 
-  find_package(Python 3.6 REQUIRED COMPONENTS Interpreter Development ${_pybind11_quiet})
+  # Development.Module support (required for manylinux) started in 3.18
+  if(CMAKE_VERSION VERSION_LESS 3.18)
+    set(_pybind11_dev_component Development)
+  else()
+    set(_pybind11_dev_component Development.Module OPTIONAL_COMPONENTS Development.Embed)
+  endif()
+
+  # Callers need to be able to access Python_EXECUTABLE
+  set(_pybind11_global_keyword "")
+  if(NOT is_config AND NOT DEFINED Python_ARTIFACTS_INTERACTIVE)
+    set(Python_ARTIFACTS_INTERACTIVE TRUE)
+    if(NOT CMAKE_VERSION VERSION_LESS 3.24)
+      set(_pybind11_global_keyword "GLOBAL")
+    endif()
+  endif()
+
+  find_package(Python 3.6 REQUIRED COMPONENTS Interpreter ${_pybind11_dev_component}
+                                              ${_pybind11_quiet} ${_pybind11_global_keyword})
 
   # If we are in submodule mode, export the Python targets to global targets.
   # If this behavior is not desired, FindPython _before_ pybind11.
-  if(NOT is_config)
-    set_property(TARGET Python::Python PROPERTY IMPORTED_GLOBAL TRUE)
+  if(NOT is_config
+     AND Python_ARTIFACTS_INTERACTIVE
+     AND _pybind11_global_keyword STREQUAL "")
+    if(TARGET Python::Python)
+      set_property(TARGET Python::Python PROPERTY IMPORTED_GLOBAL TRUE)
+    endif()
     set_property(TARGET Python::Interpreter PROPERTY IMPORTED_GLOBAL TRUE)
     if(TARGET Python::Module)
       set_property(TARGET Python::Module PROPERTY IMPORTED_GLOBAL TRUE)
     endif()
   endif()
+
+  # Explicitly export version for callers (including our own functions)
+  if(NOT is_config AND Python_ARTIFACTS_INTERACTIVE)
+    set(Python_VERSION
+        "${Python_VERSION}"
+        CACHE INTERNAL "")
+    set(Python_VERSION_MAJOR
+        "${Python_VERSION_MAJOR}"
+        CACHE INTERNAL "")
+    set(Python_VERSION_MINOR
+        "${Python_VERSION_MINOR}"
+        CACHE INTERNAL "")
+    set(Python_VERSION_PATCH
+        "${Python_VERSION_PATCH}"
+        CACHE INTERNAL "")
+  endif()
 endif()
 
 if(Python_FOUND)
@@ -73,15 +110,17 @@ if(NOT DEFINED ${_Python}_EXECUTABLE)
 
 endif()
 
-if(NOT ${_Python}_EXECUTABLE STREQUAL PYBIND11_PYTHON_EXECUTABLE_LAST)
+if(DEFINED PYBIND11_PYTHON_EXECUTABLE_LAST AND NOT ${_Python}_EXECUTABLE STREQUAL
+                                               PYBIND11_PYTHON_EXECUTABLE_LAST)
   # Detect changes to the Python version/binary in subsequent CMake runs, and refresh config if needed
   unset(PYTHON_IS_DEBUG CACHE)
   unset(PYTHON_MODULE_EXTENSION CACHE)
-  set(PYBIND11_PYTHON_EXECUTABLE_LAST
-      "${${_Python}_EXECUTABLE}"
-      CACHE INTERNAL "Python executable during the last CMake run")
 endif()
 
+set(PYBIND11_PYTHON_EXECUTABLE_LAST
+    "${${_Python}_EXECUTABLE}"
+    CACHE INTERNAL "Python executable during the last CMake run")
+
 if(NOT DEFINED PYTHON_IS_DEBUG)
   # Debug check - see https://stackoverflow.com/questions/646518/python-how-to-detect-debug-Interpreter
   execute_process(
@@ -95,25 +134,36 @@ endif()
 
 # Get the suffix - SO is deprecated, should use EXT_SUFFIX, but this is
 # required for PyPy3 (as of 7.3.1)
-if(NOT DEFINED PYTHON_MODULE_EXTENSION)
+if(NOT DEFINED PYTHON_MODULE_EXTENSION OR NOT DEFINED PYTHON_MODULE_DEBUG_POSTFIX)
   execute_process(
     COMMAND
       "${${_Python}_EXECUTABLE}" "-c"
       "import sys, importlib; s = importlib.import_module('distutils.sysconfig' if sys.version_info < (3, 10) else 'sysconfig'); print(s.get_config_var('EXT_SUFFIX') or s.get_config_var('SO'))"
-    OUTPUT_VARIABLE _PYTHON_MODULE_EXTENSION
-    ERROR_VARIABLE _PYTHON_MODULE_EXTENSION_ERR
+    OUTPUT_VARIABLE _PYTHON_MODULE_EXT_SUFFIX
+    ERROR_VARIABLE _PYTHON_MODULE_EXT_SUFFIX_ERR
     OUTPUT_STRIP_TRAILING_WHITESPACE)
 
-  if(_PYTHON_MODULE_EXTENSION STREQUAL "")
+  if(_PYTHON_MODULE_EXT_SUFFIX STREQUAL "")
     message(
       FATAL_ERROR "pybind11 could not query the module file extension, likely the 'distutils'"
-                  "package is not installed. Full error message:\n${_PYTHON_MODULE_EXTENSION_ERR}")
+                  "package is not installed. Full error message:\n${_PYTHON_MODULE_EXT_SUFFIX_ERR}"
+    )
   endif()
 
   # This needs to be available for the pybind11_extension function
-  set(PYTHON_MODULE_EXTENSION
-      "${_PYTHON_MODULE_EXTENSION}"
-      CACHE INTERNAL "")
+  if(NOT DEFINED PYTHON_MODULE_DEBUG_POSTFIX)
+    get_filename_component(_PYTHON_MODULE_DEBUG_POSTFIX "${_PYTHON_MODULE_EXT_SUFFIX}" NAME_WE)
+    set(PYTHON_MODULE_DEBUG_POSTFIX
+        "${_PYTHON_MODULE_DEBUG_POSTFIX}"
+        CACHE INTERNAL "")
+  endif()
+
+  if(NOT DEFINED PYTHON_MODULE_EXTENSION)
+    get_filename_component(_PYTHON_MODULE_EXTENSION "${_PYTHON_MODULE_EXT_SUFFIX}" EXT)
+    set(PYTHON_MODULE_EXTENSION
+        "${_PYTHON_MODULE_EXTENSION}"
+        CACHE INTERNAL "")
+  endif()
 endif()
 
 # Python debug libraries expose slightly different objects before 3.8
@@ -233,11 +283,13 @@ function(pybind11_add_module target_name)
     endif()
   endif()
 
-  # Use case-insensitive comparison to match the result of $<CONFIG:cfgs>
-  string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
-  if(NOT MSVC AND NOT "${uppercase_CMAKE_BUILD_TYPE}" MATCHES DEBUG|RELWITHDEBINFO)
-    # Strip unnecessary sections of the binary on Linux/macOS
-    pybind11_strip(${target_name})
+  if(DEFINED CMAKE_BUILD_TYPE) # see https://github.com/pybind/pybind11/issues/4454
+    # Use case-insensitive comparison to match the result of $<CONFIG:cfgs>
+    string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
+    if(NOT MSVC AND NOT "${uppercase_CMAKE_BUILD_TYPE}" MATCHES DEBUG|RELWITHDEBINFO)
+      # Strip unnecessary sections of the binary on Linux/macOS
+      pybind11_strip(${target_name})
+    endif()
   endif()
 
   if(MSVC)
@@ -251,6 +303,9 @@ endfunction()
 
 function(pybind11_extension name)
   # The extension is precomputed
-  set_target_properties(${name} PROPERTIES PREFIX "" SUFFIX "${PYTHON_MODULE_EXTENSION}")
-
+  set_target_properties(
+    ${name}
+    PROPERTIES PREFIX ""
+               DEBUG_POSTFIX "${PYTHON_MODULE_DEBUG_POSTFIX}"
+               SUFFIX "${PYTHON_MODULE_EXTENSION}")
 endfunction()
diff --git a/share/openPMD/thirdParty/pybind11/tools/pybind11Tools.cmake b/share/openPMD/thirdParty/pybind11/tools/pybind11Tools.cmake
index 48050966a4..045e5f1e7a 100644
--- a/share/openPMD/thirdParty/pybind11/tools/pybind11Tools.cmake
+++ b/share/openPMD/thirdParty/pybind11/tools/pybind11Tools.cmake
@@ -43,7 +43,7 @@ endif()
 
 # A user can set versions manually too
 set(Python_ADDITIONAL_VERSIONS
-    "3.12;3.11;3.10;3.9;3.8"
+    "3.12;3.11;3.10;3.9;3.8;3.7;3.6"
     CACHE INTERNAL "")
 
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
@@ -65,6 +65,7 @@ _pybind11_promote_to_cache(PYTHON_INCLUDE_DIRS)
 _pybind11_promote_to_cache(PYTHON_LIBRARIES)
 _pybind11_promote_to_cache(PYTHON_MODULE_PREFIX)
 _pybind11_promote_to_cache(PYTHON_MODULE_EXTENSION)
+_pybind11_promote_to_cache(PYTHON_MODULE_DEBUG_POSTFIX)
 _pybind11_promote_to_cache(PYTHON_VERSION_MAJOR)
 _pybind11_promote_to_cache(PYTHON_VERSION_MINOR)
 _pybind11_promote_to_cache(PYTHON_VERSION)
@@ -148,8 +149,11 @@ endif()
 
 function(pybind11_extension name)
   # The prefix and extension are provided by FindPythonLibsNew.cmake
-  set_target_properties(${name} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
-                                           SUFFIX "${PYTHON_MODULE_EXTENSION}")
+  set_target_properties(
+    ${name}
+    PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
+               DEBUG_POSTFIX "${PYTHON_MODULE_DEBUG_POSTFIX}"
+               SUFFIX "${PYTHON_MODULE_EXTENSION}")
 endfunction()
 
 # Build a Python extension module:
@@ -212,10 +216,12 @@ function(pybind11_add_module target_name)
     endif()
   endif()
 
-  # Use case-insensitive comparison to match the result of $<CONFIG:cfgs>
-  string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
-  if(NOT MSVC AND NOT "${uppercase_CMAKE_BUILD_TYPE}" MATCHES DEBUG|RELWITHDEBINFO)
-    pybind11_strip(${target_name})
+  if(DEFINED CMAKE_BUILD_TYPE) # see https://github.com/pybind/pybind11/issues/4454
+    # Use case-insensitive comparison to match the result of $<CONFIG:cfgs>
+    string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
+    if(NOT MSVC AND NOT "${uppercase_CMAKE_BUILD_TYPE}" MATCHES DEBUG|RELWITHDEBINFO)
+      pybind11_strip(${target_name})
+    endif()
   endif()
 
   if(MSVC)
diff --git a/src/ChunkInfo.cpp b/src/ChunkInfo.cpp
index 3c01b7b681..5acb1ea07e 100644
--- a/src/ChunkInfo.cpp
+++ b/src/ChunkInfo.cpp
@@ -19,9 +19,22 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/ChunkInfo.hpp"
+#include "openPMD/ChunkInfo_internal.hpp"
+
+#include "openPMD/auxiliary/Mpi.hpp"
 
 #include <utility>
 
+#ifdef _WIN32
+#define openPMD_POSIX_AVAILABLE false
+#else
+#define openPMD_POSIX_AVAILABLE true
+#endif
+
+#if openPMD_POSIX_AVAILABLE
+#include <unistd.h>
+#endif
+
 namespace openPMD
 {
 ChunkInfo::ChunkInfo(Offset offset_in, Extent extent_in)
@@ -48,4 +61,108 @@ bool WrittenChunkInfo::operator==(WrittenChunkInfo const &other) const
     return this->sourceID == other.sourceID &&
         this->ChunkInfo::operator==(other);
 }
+
+namespace host_info
+{
+    constexpr size_t MAX_HOSTNAME_LENGTH = 256;
+
+    Method methodFromStringDescription(
+        std::string const &descr, [[maybe_unused]] bool consider_mpi)
+    {
+        static std::map<std::string, Method> const map{
+            {"posix_hostname", Method::POSIX_HOSTNAME},
+#if openPMD_HAVE_MPI
+            {"hostname",
+             consider_mpi ? Method::MPI_PROCESSOR_NAME
+                          : Method::POSIX_HOSTNAME},
+#else
+            {"hostname", Method::POSIX_HOSTNAME},
+#endif
+            {"mpi_processor_name", Method::MPI_PROCESSOR_NAME}};
+        return map.at(descr);
+    }
+
+    bool methodAvailable(Method method)
+    {
+        switch (method)
+        {
+
+        case Method::POSIX_HOSTNAME:
+            return openPMD_POSIX_AVAILABLE;
+        case Method::MPI_PROCESSOR_NAME:
+            return openPMD_HAVE_MPI == 1;
+        }
+        throw std::runtime_error("Unreachable!");
+    }
+
+    std::string byMethod(Method method)
+    {
+        static std::map<Method, std::string (*)()> const map{
+#if openPMD_POSIX_AVAILABLE
+            {Method::POSIX_HOSTNAME, &posix_hostname},
+#endif
+#if openPMD_HAVE_MPI
+            {Method::MPI_PROCESSOR_NAME, &mpi_processor_name},
+#endif
+        };
+        try
+        {
+            return (*map.at(method))();
+        }
+        catch (std::out_of_range const &)
+        {
+            throw std::runtime_error(
+                "[hostname::byMethod] Specified method is not available.");
+        }
+    }
+
+#if openPMD_HAVE_MPI
+    chunk_assignment::RankMeta byMethodCollective(MPI_Comm comm, Method method)
+    {
+        auto myHostname = byMethod(method);
+        chunk_assignment::RankMeta res;
+        auto allHostnames =
+            auxiliary::distributeStringsToAllRanks(comm, myHostname);
+        for (size_t i = 0; i < allHostnames.size(); ++i)
+        {
+            res[i] = allHostnames[i];
+        }
+        return res;
+    }
+
+    std::string mpi_processor_name()
+    {
+        std::string res;
+        res.resize(MPI_MAX_PROCESSOR_NAME);
+        int string_len;
+        if (MPI_Get_processor_name(res.data(), &string_len) != 0)
+        {
+            throw std::runtime_error(
+                "[mpi_processor_name] Could not inquire processor name.");
+        }
+        // MPI_Get_processor_name returns the string length without null
+        // terminator and std::string::resize() does not use null terminator
+        // either. So, no +-1 necessary.
+        res.resize(string_len);
+        res.shrink_to_fit();
+        return res;
+    }
+#endif
+
+#if openPMD_POSIX_AVAILABLE
+    std::string posix_hostname()
+    {
+        char hostname[MAX_HOSTNAME_LENGTH];
+        if (gethostname(hostname, MAX_HOSTNAME_LENGTH))
+        {
+            throw std::runtime_error(
+                "[posix_hostname] Could not inquire hostname.");
+        }
+        std::string res(hostname);
+        return res;
+    }
+#endif
+} // namespace host_info
 } // namespace openPMD
+
+#undef openPMD_POSIX_AVAILABLE
diff --git a/src/Dataset.cpp b/src/Dataset.cpp
index 662bd2d29f..c1546e9ef0 100644
--- a/src/Dataset.cpp
+++ b/src/Dataset.cpp
@@ -19,6 +19,7 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/Dataset.hpp"
+#include "openPMD/Error.hpp"
 
 #include <cstddef>
 #include <iostream>
@@ -30,6 +31,9 @@ Dataset::Dataset(Datatype d, Extent e, std::string options_in)
 {
     // avoid initialization order issues
     rank = static_cast<uint8_t>(extent.size());
+    // Call this in order to have early error message in case of wrong
+    // specification of joined dimensions
+    joinedDimension();
 }
 
 Dataset::Dataset(Extent e) : Dataset(Datatype::UNDEFINED, std::move(e))
@@ -49,4 +53,41 @@ Dataset &Dataset::extend(Extent newExtents)
     extent = newExtents;
     return *this;
 }
+
+bool Dataset::empty() const
+{
+    auto jd = joinedDimension();
+    for (size_t i = 0; i < extent.size(); ++i)
+    {
+        if (extent[i] == 0 && (!jd.has_value() || jd.value() != i))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+std::optional<size_t> Dataset::joinedDimension() const
+{
+    std::optional<size_t> res;
+    for (size_t i = 0; i < extent.size(); ++i)
+    {
+        if (extent[i] == JOINED_DIMENSION)
+        {
+            if (res.has_value())
+            {
+                throw error::WrongAPIUsage(
+                    "Must specify JOINED_DIMENSION at most once (found at "
+                    "indices " +
+                    std::to_string(res.value()) + " and " + std::to_string(i) +
+                    ")");
+            }
+            else
+            {
+                res = i;
+            }
+        }
+    }
+    return res;
+}
 } // namespace openPMD
diff --git a/src/Format.cpp b/src/Format.cpp
index 8a6ead832a..452ed77bd3 100644
--- a/src/Format.cpp
+++ b/src/Format.cpp
@@ -45,6 +45,8 @@ Format determineFormat(std::string const &filename)
         return Format::JSON;
     if (auxiliary::ends_with(filename, ".toml"))
         return Format::TOML;
+    if (auxiliary::ends_with(filename, ".%E"))
+        return Format::GENERIC;
 
     // Format might still be specified via JSON
     return Format::DUMMY;
@@ -70,6 +72,8 @@ std::string suffix(Format f)
         return ".json";
     case Format::TOML:
         return ".toml";
+    case Format::GENERIC:
+        return ".%E";
     default:
         return "";
     }
diff --git a/src/IO/ADIOS/ADIOS2Auxiliary.cpp b/src/IO/ADIOS/ADIOS2Auxiliary.cpp
index d4c08408ce..95029c009e 100644
--- a/src/IO/ADIOS/ADIOS2Auxiliary.cpp
+++ b/src/IO/ADIOS/ADIOS2Auxiliary.cpp
@@ -28,6 +28,45 @@
 
 #include <iostream>
 
+namespace openPMD::adios_defs
+{
+FlushTarget flushTargetFromString(std::string const &str)
+{
+    if (str == "buffer")
+    {
+        return FlushTarget::Buffer;
+    }
+    else if (str == "disk")
+    {
+        return FlushTarget::Disk;
+    }
+    else if (str == "buffer_override")
+    {
+        return FlushTarget::Buffer_Override;
+    }
+    else if (str == "disk_override")
+    {
+        return FlushTarget::Disk_Override;
+    }
+    else if (str == "new_step")
+    {
+        return FlushTarget::NewStep;
+    }
+    else if (str == "new_step_override")
+    {
+        return FlushTarget::NewStep_Override;
+    }
+    else
+    {
+        throw error::BackendConfigSchema(
+            {"adios2", "engine", adios_defaults::str_flushtarget},
+            "Flush target must be either 'disk', 'buffer' or 'new_step', but "
+            "was " +
+                str + ".");
+    }
+}
+} // namespace openPMD::adios_defs
+
 namespace openPMD::detail
 {
 template <typename T>
diff --git a/src/IO/ADIOS/ADIOS2File.cpp b/src/IO/ADIOS/ADIOS2File.cpp
new file mode 100644
index 0000000000..ee0c1a9062
--- /dev/null
+++ b/src/IO/ADIOS/ADIOS2File.cpp
@@ -0,0 +1,1355 @@
+/* Copyright 2017-2021 Franz Poeschel, Fabian Koller and Axel Huebl
+ *
+ * This file is part of openPMD-api.
+ *
+ * openPMD-api is free software: you can redistribute it and/or modify
+ * it under the terms of of either the GNU General Public License or
+ * the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * openPMD-api is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License and the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * and the GNU Lesser General Public License along with openPMD-api.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "openPMD/IO/ADIOS/ADIOS2File.hpp"
+#include "openPMD/Error.hpp"
+#include "openPMD/IO/ADIOS/ADIOS2IOHandler.hpp"
+#include "openPMD/IO/AbstractIOHandler.hpp"
+#include "openPMD/auxiliary/Environment.hpp"
+#include "openPMD/auxiliary/StringManip.hpp"
+
+#include <stdexcept>
+
+#if openPMD_USE_VERIFY
+#define VERIFY(CONDITION, TEXT)                                                \
+    {                                                                          \
+        if (!(CONDITION))                                                      \
+            throw std::runtime_error((TEXT));                                  \
+    }
+#else
+#define VERIFY(CONDITION, TEXT)                                                \
+    do                                                                         \
+    {                                                                          \
+        (void)sizeof(CONDITION);                                               \
+    } while (0);
+#endif
+
+#define VERIFY_ALWAYS(CONDITION, TEXT)                                         \
+    {                                                                          \
+        if (!(CONDITION))                                                      \
+            throw std::runtime_error((TEXT));                                  \
+    }
+
+#if openPMD_HAVE_ADIOS2
+namespace openPMD::detail
+{
+template <typename T>
+void DatasetReader::call(
+    ADIOS2IOHandlerImpl *impl,
+    detail::BufferedGet &bp,
+    adios2::IO &IO,
+    adios2::Engine &engine,
+    std::string const &fileName)
+{
+    adios2::Variable<T> var =
+        impl->verifyDataset<T>(bp.param.offset, bp.param.extent, IO, bp.name);
+    if (!var)
+    {
+        throw std::runtime_error(
+            "[ADIOS2] Failed retrieving ADIOS2 Variable with name '" + bp.name +
+            "' from file " + fileName + ".");
+    }
+    auto ptr = std::static_pointer_cast<T>(bp.param.data).get();
+    engine.Get(var, ptr);
+}
+
+template <class>
+inline constexpr bool always_false_v = false;
+
+template <typename T>
+void WriteDataset::call(ADIOS2File &ba, detail::BufferedPut &bp)
+{
+    VERIFY_ALWAYS(
+        access::write(ba.m_impl->m_handler->m_backendAccess),
+        "[ADIOS2] Cannot write data in read-only mode.");
+
+    std::visit(
+        [&](auto &&arg) {
+            using ptr_type = std::decay_t<decltype(arg)>;
+            if constexpr (std::is_same_v<ptr_type, std::shared_ptr<void const>>)
+            {
+                auto ptr = static_cast<T const *>(arg.get());
+
+                adios2::Variable<T> var = ba.m_impl->verifyDataset<T>(
+                    bp.param.offset, bp.param.extent, ba.m_IO, bp.name);
+
+                ba.getEngine().Put(var, ptr);
+            }
+            else if constexpr (std::is_same_v<
+                                   ptr_type,
+                                   UniquePtrWithLambda<void>>)
+            {
+                BufferedUniquePtrPut bput;
+                bput.name = std::move(bp.name);
+                bput.offset = std::move(bp.param.offset);
+                bput.extent = std::move(bp.param.extent);
+                /*
+                 * Note: Moving is required here since it's a unique_ptr.
+                 * std::forward<>() would theoretically work, but it
+                 * requires the type parameter and we don't have that
+                 * inside the lambda.
+                 * (ptr_type does not work for this case).
+                 */
+                // clang-format off
+                    bput.data = std::move(arg); // NOLINT(bugprone-move-forwarding-reference)
+                // clang-format on
+                bput.dtype = bp.param.dtype;
+                ba.m_uniquePtrPuts.push_back(std::move(bput));
+            }
+            else
+            {
+                static_assert(
+                    always_false_v<ptr_type>, "Unhandled std::variant branch");
+            }
+        },
+        bp.param.data.m_buffer);
+}
+
+template <int n, typename... Params>
+void WriteDataset::call(Params &&...)
+{
+    throw std::runtime_error("[ADIOS2] WRITE_DATASET: Invalid datatype.");
+}
+
+void BufferedGet::run(ADIOS2File &ba)
+{
+    switchAdios2VariableType<detail::DatasetReader>(
+        param.dtype, ba.m_impl, *this, ba.m_IO, ba.getEngine(), ba.m_file);
+}
+
+void BufferedPut::run(ADIOS2File &ba)
+{
+    switchAdios2VariableType<detail::WriteDataset>(param.dtype, ba, *this);
+}
+
+struct RunUniquePtrPut
+{
+    template <typename T>
+    static void call(BufferedUniquePtrPut &bufferedPut, ADIOS2File &ba)
+    {
+        auto ptr = static_cast<T const *>(bufferedPut.data.get());
+        adios2::Variable<T> var = ba.m_impl->verifyDataset<T>(
+            bufferedPut.offset, bufferedPut.extent, ba.m_IO, bufferedPut.name);
+        ba.getEngine().Put(var, ptr);
+    }
+
+    static constexpr char const *errorMsg = "RunUniquePtrPut";
+};
+
+void BufferedUniquePtrPut::run(ADIOS2File &ba)
+{
+    switchAdios2VariableType<RunUniquePtrPut>(dtype, *this, ba);
+}
+
+ADIOS2File::ADIOS2File(ADIOS2IOHandlerImpl &impl, InvalidatableFile file)
+    : m_file(impl.fullPath(std::move(file)))
+    , m_ADIOS(impl.m_ADIOS)
+    , m_impl(&impl)
+{
+    // Declaring these members in the constructor body to avoid
+    // initialization order hazards. Need the IO_ prefix since in some
+    // situation there seems to be trouble with number-only IO names
+    m_mode = impl.adios2AccessMode(m_file);
+    create_IO();
+    if (!m_IO)
+    {
+        throw std::runtime_error(
+            "[ADIOS2] Internal error: Failed declaring ADIOS2 IO object "
+            "for file " +
+            m_file);
+    }
+    else
+    {
+        configure_IO();
+    }
+}
+
+auto ADIOS2File::useGroupTable() const -> UseGroupTable
+{
+    return m_impl->useGroupTable();
+}
+
+void ADIOS2File::create_IO()
+{
+    m_IOName = std::to_string(m_impl->nameCounter++);
+    m_IO = m_impl->m_ADIOS.DeclareIO("IO_" + m_IOName);
+}
+
+ADIOS2File::~ADIOS2File()
+{
+    finalize();
+}
+
+void ADIOS2File::finalize()
+{
+    if (finalized)
+    {
+        return;
+    }
+    // if write accessing, ensure that the engine is opened
+    // and that all datasets are written
+    // (attributes and unique_ptr datasets are written upon closing a step
+    // or a file which users might never do)
+    bool needToWrite = !m_uniquePtrPuts.empty();
+    if ((needToWrite || !m_engine) && writeOnly(m_mode))
+    {
+        getEngine();
+        for (auto &entry : m_uniquePtrPuts)
+        {
+            entry.run(*this);
+        }
+    }
+    if (m_engine)
+    {
+        auto &engine = m_engine.value();
+        // might have been closed previously
+        if (engine)
+        {
+            if (streamStatus == StreamStatus::DuringStep)
+            {
+                engine.EndStep();
+            }
+            engine.Close();
+            m_ADIOS.RemoveIO(m_IOName);
+        }
+    }
+    finalized = true;
+}
+
+namespace
+{
+    constexpr char const *alwaysSupportsUpfrontParsing[] = {"bp3", "hdf5"};
+    constexpr char const *supportsUpfrontParsingInRandomAccessMode[] = {
+        "bp4", "bp5", "file", "filestream"};
+    constexpr char const *nonPersistentEngines[] = {
+        "sst", "insitumpi", "inline", "staging", "nullcore", "ssc"};
+
+    bool supportedEngine(std::string const &engineType)
+    {
+        auto is_in_list = [&engineType](auto &list) {
+            for (auto const &e : list)
+            {
+                if (engineType == e)
+                {
+                    return true;
+                }
+            }
+            return false;
+        };
+        return is_in_list(alwaysSupportsUpfrontParsing) ||
+            is_in_list(supportsUpfrontParsingInRandomAccessMode) ||
+            is_in_list(nonPersistentEngines);
+    }
+
+    bool supportsUpfrontParsing(Access access, std::string const &engineType)
+    {
+        for (auto const &e : alwaysSupportsUpfrontParsing)
+        {
+            if (e == engineType)
+            {
+                return true;
+            }
+        }
+        if (access != Access::READ_LINEAR)
+        {
+            for (auto const &e : supportsUpfrontParsingInRandomAccessMode)
+            {
+                if (e == engineType)
+                {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    enum class PerstepParsing
+    {
+        Supported,
+        Unsupported,
+        Required
+    };
+
+    PerstepParsing
+    supportsPerstepParsing(Access access, std::string const &engineType)
+    {
+        // required in all streaming engines
+        for (auto const &e : nonPersistentEngines)
+        {
+            if (engineType == e)
+            {
+                return PerstepParsing::Required;
+            }
+        }
+        // supported in file engines in READ_LINEAR mode
+        if (access != Access::READ_RANDOM_ACCESS)
+        {
+            return PerstepParsing::Supported;
+        }
+
+        return PerstepParsing::Unsupported;
+    }
+
+    bool nonpersistentEngine(std::string const &engineType)
+    {
+        for (auto &e : nonPersistentEngines)
+        {
+            if (e == engineType)
+            {
+                return true;
+            }
+        }
+        return false;
+    }
+} // namespace
+
+size_t ADIOS2File::currentStep()
+{
+    if (nonpersistentEngine(m_impl->m_engineType))
+    {
+        return m_currentStep;
+    }
+    else
+    {
+        return getEngine().CurrentStep();
+    }
+}
+
+void ADIOS2File::configure_IO_Read()
+{
+    bool upfrontParsing = supportsUpfrontParsing(
+        m_impl->m_handler->m_backendAccess, m_impl->m_engineType);
+    PerstepParsing perstepParsing = supportsPerstepParsing(
+        m_impl->m_handler->m_backendAccess, m_impl->m_engineType);
+
+    switch (m_impl->m_handler->m_backendAccess)
+    {
+    case Access::READ_LINEAR:
+        switch (perstepParsing)
+        {
+        case PerstepParsing::Supported:
+        case PerstepParsing::Required:
+            // all is fine, we can go forward with READ_LINEAR mode
+            /*
+             * We don't know yet if per-step parsing will be fine since the
+             * engine is not opened yet.
+             * In non-persistent (streaming) engines, per-step parsing is
+             * always fine and always required.
+             */
+            streamStatus = nonpersistentEngine(m_impl->m_engineType)
+                ? StreamStatus::OutsideOfStep
+                : StreamStatus::Undecided;
+            parsePreference = ParsePreference::PerStep;
+            m_IO.SetParameter("StreamReader", "On");
+            break;
+        case PerstepParsing::Unsupported:
+            throw error::Internal(
+                "Internal control flow error: Per-Step parsing cannot be "
+                "unsupported when access type is READ_LINEAR");
+            break;
+        }
+        break;
+    case Access::READ_ONLY:
+    case Access::READ_WRITE:
+        /*
+         * Prefer up-front parsing, but try to fallback to per-step parsing
+         * if possible.
+         */
+        if (upfrontParsing == nonpersistentEngine(m_impl->m_engineType))
+        {
+            throw error::Internal(
+                "Internal control flow error: With access types "
+                "READ_ONLY/READ_WRITE, support for upfront parsing is "
+                "equivalent to the chosen engine being file-based.");
+        }
+        if (upfrontParsing)
+        {
+            streamStatus = StreamStatus::ReadWithoutStream;
+            parsePreference = ParsePreference::UpFront;
+        }
+        else
+        {
+            /*
+             * Scenario: A step-only workflow was used (i.e. a streaming
+             * engine), but Access::READ_ONLY was specified.
+             * Fall back to streaming read mode.
+             */
+            m_mode = adios2::Mode::Read;
+            parsePreference = ParsePreference::PerStep;
+            streamStatus = StreamStatus::OutsideOfStep;
+        }
+        break;
+    default:
+        VERIFY_ALWAYS(
+            access::writeOnly(m_impl->m_handler->m_backendAccess),
+            "Internal control flow error: Must set parse preference for "
+            "any read mode.");
+    }
+}
+
+void ADIOS2File::configure_IO_Write()
+{
+    optimizeAttributesStreaming =
+        // Also, it should only be done when truly streaming, not
+        // when using a disk-based engine that behaves like a
+        // streaming engine (otherwise attributes might vanish)
+        nonpersistentEngine(m_impl->m_engineType);
+
+    streamStatus = StreamStatus::OutsideOfStep;
+}
+
+void ADIOS2File::configure_IO()
+{
+    // step/variable-based iteration encoding requires use of group tables
+    // but the group table feature is available only in ADIOS2 >= v2.9
+    // use old layout to support at least one single iteration otherwise
+    // these properties are inferred from the opened dataset in read mode
+    if (writeOnly(m_mode))
+    {
+
+#if openPMD_HAS_ADIOS_2_9
+        if (!m_impl->m_useGroupTable.has_value())
+        {
+            switch (m_impl->m_handler->m_encoding)
+            {
+            /*
+             * For variable-based encoding, this does not matter as it is
+             * new and requires >= v2.9 features anyway.
+             */
+            case IterationEncoding::variableBased:
+                m_impl->m_useGroupTable = UseGroupTable::Yes;
+                break;
+            case IterationEncoding::groupBased:
+            case IterationEncoding::fileBased:
+                m_impl->m_useGroupTable = UseGroupTable::No;
+                break;
+            }
+        }
+
+        if (m_impl->m_modifiableAttributes ==
+            ADIOS2IOHandlerImpl::ModifiableAttributes::Unspecified)
+        {
+            m_impl->m_modifiableAttributes = m_impl->m_handler->m_encoding ==
+                    IterationEncoding::variableBased
+                ? ADIOS2IOHandlerImpl::ModifiableAttributes::Yes
+                : ADIOS2IOHandlerImpl::ModifiableAttributes::No;
+        }
+#else
+        if (!m_impl->m_useGroupTable.has_value())
+        {
+            m_impl->m_useGroupTable = UseGroupTable::No;
+        }
+
+        m_impl->m_modifiableAttributes =
+            ADIOS2IOHandlerImpl::ModifiableAttributes::No;
+#endif
+    }
+
+    // set engine type
+    {
+        m_IO.SetEngine(m_impl->realEngineType());
+    }
+
+    if (!supportedEngine(m_impl->m_engineType))
+    {
+        std::stringstream sstream;
+        sstream << "User-selected ADIOS2 engine '" << m_impl->m_engineType
+                << "' is not recognized by the openPMD-api. Select one of: '";
+        bool first_entry = true;
+        auto add_entries = [&first_entry, &sstream](auto &list) {
+            for (auto const &e : list)
+            {
+                if (first_entry)
+                {
+                    sstream << e;
+                    first_entry = false;
+                }
+                else
+                {
+                    sstream << ", " << e;
+                }
+            }
+        };
+        add_entries(alwaysSupportsUpfrontParsing);
+        add_entries(supportsUpfrontParsingInRandomAccessMode);
+        add_entries(nonPersistentEngines);
+        sstream << "'." << std::endl;
+        throw error::WrongAPIUsage(sstream.str());
+    }
+
+    // set engine parameters
+    std::set<std::string> alreadyConfigured;
+    bool wasTheFlushTargetSpecifiedViaJSON = false;
+    auto engineConfig = m_impl->config(adios_defaults::str_engine);
+    if (!engineConfig.json().is_null())
+    {
+        auto params = m_impl->config(adios_defaults::str_params, engineConfig);
+        params.declareFullyRead();
+        if (params.json().is_object())
+        {
+            for (auto it = params.json().begin(); it != params.json().end();
+                 it++)
+            {
+                auto maybeString = json::asStringDynamic(it.value());
+                if (maybeString.has_value())
+                {
+                    m_IO.SetParameter(it.key(), std::move(maybeString.value()));
+                }
+                else
+                {
+                    throw error::BackendConfigSchema(
+                        {"adios2", "engine", "parameters", it.key()},
+                        "Must be convertible to string type.");
+                }
+                alreadyConfigured.emplace(
+                    auxiliary::lowerCase(std::string(it.key())));
+            }
+        }
+        auto _useAdiosSteps =
+            m_impl->config(adios_defaults::str_usesteps, engineConfig);
+        if (!_useAdiosSteps.json().is_null() && writeOnly(m_mode))
+        {
+            std::cerr << "[ADIOS2 backend] WARNING: Parameter "
+                         "`adios2.engine.usesteps` is deprecated since use "
+                         "of steps is now always enabled."
+                      << std::endl;
+        }
+
+        if (engineConfig.json().contains(adios_defaults::str_flushtarget))
+        {
+            auto target = json::asLowerCaseStringDynamic(
+                engineConfig[adios_defaults::str_flushtarget].json());
+            if (!target.has_value())
+            {
+                throw error::BackendConfigSchema(
+                    {"adios2", "engine", adios_defaults::str_flushtarget},
+                    "Flush target must be either 'disk' or 'buffer', but "
+                    "was non-literal type.");
+            }
+            m_impl->m_flushTarget =
+                adios_defs::flushTargetFromString(target.value());
+            wasTheFlushTargetSpecifiedViaJSON = true;
+        }
+    }
+
+    auto shadow = m_impl->m_config.invertShadow();
+    if (shadow.size() > 0)
+    {
+        switch (m_impl->m_config.originallySpecifiedAs)
+        {
+        case json::SupportedLanguages::JSON:
+            std::cerr << "Warning: parts of the backend configuration for "
+                         "ADIOS2 remain unused:\n"
+                      << shadow << std::endl;
+            break;
+        case json::SupportedLanguages::TOML: {
+            auto asToml = json::jsonToToml(shadow);
+            std::cerr << "Warning: parts of the backend configuration for "
+                         "ADIOS2 remain unused:\n"
+                      << json::format_toml(asToml) << std::endl;
+            break;
+        }
+        }
+    }
+
+    switch (m_impl->m_handler->m_backendAccess)
+    {
+    case Access::READ_LINEAR:
+    case Access::READ_ONLY:
+        configure_IO_Read();
+        break;
+    case Access::READ_WRITE:
+        if (readOnly(m_mode))
+        {
+            configure_IO_Read();
+        }
+        else
+        {
+            configure_IO_Write();
+        }
+        break;
+    case Access::APPEND:
+    case Access::CREATE:
+        configure_IO_Write();
+        break;
+    }
+
+    auto notYetConfigured = [&alreadyConfigured](std::string const &param) {
+        auto it =
+            alreadyConfigured.find(auxiliary::lowerCase(std::string(param)));
+        return it == alreadyConfigured.end();
+    };
+
+    // read parameters from environment
+    if (notYetConfigured("CollectiveMetadata"))
+    {
+        if (1 == auxiliary::getEnvNum("OPENPMD_ADIOS2_HAVE_METADATA_FILE", 1))
+        {
+            m_IO.SetParameter("CollectiveMetadata", "On");
+        }
+        else
+        {
+            m_IO.SetParameter("CollectiveMetadata", "Off");
+        }
+    }
+    if (notYetConfigured("Profile"))
+    {
+        if (1 == auxiliary::getEnvNum("OPENPMD_ADIOS2_HAVE_PROFILING", 1) &&
+            notYetConfigured("Profile"))
+        {
+            m_IO.SetParameter("Profile", "On");
+        }
+        else
+        {
+            m_IO.SetParameter("Profile", "Off");
+        }
+    }
+    if (notYetConfigured("AsyncWrite"))
+    {
+        if (1 == auxiliary::getEnvNum("OPENPMD_ADIOS2_ASYNC_WRITE", 0) &&
+            notYetConfigured("AsyncWrite"))
+        {
+            m_IO.SetParameter("AsyncWrite", "On");
+            if (!wasTheFlushTargetSpecifiedViaJSON)
+            {
+                m_impl->m_flushTarget = FlushTarget::Buffer;
+            }
+        }
+        else
+        {
+            m_IO.SetParameter("AsyncWrite", "Off");
+        }
+    }
+
+#if openPMD_HAVE_MPI
+    {
+        auto num_substreams =
+            auxiliary::getEnvNum("OPENPMD_ADIOS2_NUM_SUBSTREAMS", 0);
+        if (notYetConfigured("SubStreams") && 0 != num_substreams)
+        {
+            m_IO.SetParameter("SubStreams", std::to_string(num_substreams));
+        }
+
+        // BP5 parameters
+        auto numAgg = auxiliary::getEnvNum("OPENPMD_ADIOS2_BP5_NumAgg", 0);
+        auto numSubFiles =
+            auxiliary::getEnvNum("OPENPMD_ADIOS2_BP5_NumSubFiles", 0);
+        auto AggTypeStr =
+            auxiliary::getEnvString("OPENPMD_ADIOS2_BP5_TypeAgg", "");
+        auto MaxShmMB = auxiliary::getEnvNum("OPENPMD_ADIOS2_BP5_MaxShmMB", 0);
+        auto BufferChunkMB =
+            auxiliary::getEnvNum("OPENPMD_ADIOS2_BP5_BufferChunkMB", 0);
+
+        if (notYetConfigured("NumAggregators") && (numAgg > 0))
+            m_IO.SetParameter("NumAggregators", std::to_string(numAgg));
+        if (notYetConfigured("NumSubFiles") && (numSubFiles > 0))
+            m_IO.SetParameter("NumSubFiles", std::to_string(numSubFiles));
+        if (notYetConfigured("AggregationType") && (AggTypeStr.size() > 0))
+            m_IO.SetParameter("AggregationType", AggTypeStr);
+        if (notYetConfigured("BufferChunkSize") && (BufferChunkMB > 0))
+            m_IO.SetParameter(
+                "BufferChunkSize",
+                std::to_string((uint64_t)BufferChunkMB * (uint64_t)1048576));
+        if (notYetConfigured("MaxShmSize") && (MaxShmMB > 0))
+            m_IO.SetParameter(
+                "MaxShmSize",
+                std::to_string((uint64_t)MaxShmMB * (uint64_t)1048576));
+    }
+#endif
+    if (notYetConfigured("StatsLevel"))
+    {
+        /*
+         * Switch those off by default since they are expensive to compute
+         * and to enable it, set the JSON option "StatsLevel" or the
+         * environment variable "OPENPMD_ADIOS2_STATS_LEVEL" be positive.
+         * The ADIOS2 default was "1" (on).
+         */
+        auto stats_level =
+            auxiliary::getEnvNum("OPENPMD_ADIOS2_STATS_LEVEL", 0);
+        m_IO.SetParameter("StatsLevel", std::to_string(stats_level));
+    }
+    if (m_impl->realEngineType() == "sst" && notYetConfigured("QueueLimit"))
+    {
+        /*
+         * By default, the SST engine of ADIOS2 does not set a limit on its
+         * internal queue length.
+         * If the reading end is slower than the writing end, this will
+         * lead to a congestion in the queue and hence an increasing
+         * memory usage while the writing code goes forward.
+         * We could set a default queue limit of 1, thus forcing the
+         * two codes to proceed entirely in lock-step.
+         * We prefer a default queue limit of 2, which is still lower than
+         * the default infinity, but allows writer and reader to process
+         * data asynchronously as long as neither code fails to keep up the
+         * rhythm. The writer can produce the next iteration while the
+         * reader still deals with the old one.
+         * Thus, a limit of 2 is a good balance between 1 and infinity,
+         * keeping pipeline parallelism a default without running the risk
+         * of using unbound memory.
+         */
+        m_IO.SetParameter("QueueLimit", "2");
+    }
+
+    // We need to open the engine now already to inquire configuration
+    // options stored in there
+    getEngine();
+}
+
+auto ADIOS2File::detectGroupTable() -> UseGroupTable
+{
+    auto const &attributes = availableAttributes();
+    auto lower_bound =
+        attributes.lower_bound(adios_defaults::str_activeTablePrefix);
+    if (lower_bound != attributes.end() &&
+        auxiliary::starts_with(
+            lower_bound->first, adios_defaults::str_activeTablePrefix))
+    {
+        return UseGroupTable::Yes;
+    }
+    else
+    {
+        return UseGroupTable::No;
+    }
+}
+
+adios2::Engine &ADIOS2File::getEngine()
+{
+    if (!m_engine)
+    {
+        auto tempMode = m_mode;
+        switch (m_mode)
+        {
+        case adios2::Mode::Append:
+#ifdef _WIN32
+            /*
+             * On Windows, ADIOS2 v2.8. Append mode only works with existing
+             * files. So, we first check for file existence and switch to
+             * create mode if it does not exist.
+             *
+             * See issue: https://github.com/ornladios/ADIOS2/issues/3358
+             */
+            tempMode = m_impl->checkFile(m_file) ? adios2::Mode::Append
+                                                 : adios2::Mode::Write;
+            [[fallthrough]];
+#endif
+        case adios2::Mode::Write: {
+            // usesSteps attribute only written upon ::advance()
+            // this makes sure that the attribute is only put in case
+            // the streaming API was used.
+            m_engine =
+                std::make_optional(adios2::Engine(m_IO.Open(m_file, tempMode)));
+            m_engine->BeginStep();
+            streamStatus = StreamStatus::DuringStep;
+            break;
+        }
+#if openPMD_HAS_ADIOS_2_8
+        case adios2::Mode::ReadRandomAccess:
+#endif
+        case adios2::Mode::Read: {
+            m_engine =
+                std::make_optional(adios2::Engine(m_IO.Open(m_file, m_mode)));
+            /*
+             * First round: detect use of group table
+             */
+            bool openedANewStep = false;
+            {
+                if (!supportsUpfrontParsing(
+                        m_impl->m_handler->m_backendAccess,
+                        m_impl->m_engineType))
+                {
+                    /*
+                     * In BP5 with Linear read mode, we now need to
+                     * tentatively open the first IO step.
+                     * Otherwise we don't see the group table attributes.
+                     * This branch is also taken by Streaming engines.
+                     */
+                    if (m_engine->BeginStep() != adios2::StepStatus::OK)
+                    {
+                        throw std::runtime_error(
+                            "[ADIOS2] Unexpected step status when "
+                            "opening file/stream.");
+                    }
+                    openedANewStep = true;
+                }
+
+                if (m_impl->m_useGroupTable.has_value())
+                {
+                    switch (m_impl->m_useGroupTable.value())
+                    {
+                    case UseGroupTable::Yes: {
+                        auto detectedGroupTable = detectGroupTable();
+                        if (detectedGroupTable == UseGroupTable::No)
+                        {
+                            std::cerr
+                                << "[Warning] User requested use of group "
+                                   "table when reading from ADIOS2 "
+                                   "dataset, but no group table has been "
+                                   "found. Will ignore."
+                                << std::endl;
+                            m_impl->m_useGroupTable = UseGroupTable::No;
+                        }
+                    }
+                    case UseGroupTable::No:
+                        break;
+                    }
+                }
+                else
+                {
+                    m_impl->m_useGroupTable = detectGroupTable();
+                }
+            };
+
+            /*
+             * Second round: Decide the streamStatus.
+             */
+            switch (streamStatus)
+            {
+            case StreamStatus::Undecided: {
+                auto attr = m_IO.InquireAttribute<bool_representation>(
+                    adios_defaults::str_usesstepsAttribute);
+                if (attr && attr.Data()[0] == 1)
+                {
+                    if (parsePreference == ParsePreference::UpFront)
+                    {
+                        if (openedANewStep)
+                        {
+                            throw error::Internal(
+                                "Logic error in ADIOS2 backend! No need to "
+                                "indiscriminately open a step before doing "
+                                "anything in an engine that supports "
+                                "up-front parsing.");
+                        }
+                        streamStatus = StreamStatus::ReadWithoutStream;
+                    }
+                    else
+                    {
+                        // If the iteration encoding is group-based and
+                        // no group table is used, we're now at a dead-end.
+                        // Step-by-Step parsing is unreliable in that mode
+                        // since groups might be reported that are not
+                        // there.
+                        // But we were only able to find this out by opening
+                        // the ADIOS2 file with an access mode that was
+                        // possibly wrong, so we would have to close and
+                        // reopen here.
+                        // Since group-based encoding is a bag of trouble in
+                        // ADIOS2 anyway, we just don't support this
+                        // particular use case.
+                        // This failure will only arise when the following
+                        // conditions are met:
+                        //
+                        // 1) group-based encoding
+                        // 2) no group table (i.e. old "ADIOS2 schema")
+                        // 3) LINEAR access mode
+                        //
+                        // This is a relatively lenient restriction compared
+                        // to forbidding group-based encoding in ADIOS2
+                        // altogether.
+                        if (m_impl->m_useGroupTable.value() ==
+                                UseGroupTable::No &&
+                            m_IO.InquireAttribute<std::string>(
+                                adios_defaults::str_groupBasedWarning))
+                        {
+                            throw error::OperationUnsupportedInBackend(
+                                "ADIOS2",
+                                "Trying to open a group-based ADIOS2 file "
+                                "that does not have a group table with "
+                                "LINEAR access type. That combination is "
+                                "very buggy, so please use "
+                                "READ_ONLY/READ_RANDOM_ACCESS instead.");
+                        }
+                        if (!openedANewStep &&
+                            m_engine.value().BeginStep() !=
+                                adios2::StepStatus::OK)
+                        {
+                            throw std::runtime_error(
+                                "[ADIOS2] Unexpected step status when "
+                                "opening file/stream.");
+                        }
+                        streamStatus = StreamStatus::DuringStep;
+                    }
+                }
+                else
+                {
+                    /*
+                     * If openedANewStep is true, then the file consists
+                     * of one large step, we just leave it open.
+                     */
+                    streamStatus = StreamStatus::ReadWithoutStream;
+                }
+                break;
+            }
+            case StreamStatus::ReadWithoutStream:
+                // using random-access mode
+                break;
+            case StreamStatus::DuringStep:
+                throw error::Internal(
+                    "[ADIOS2] Control flow error: stream status cannot be "
+                    "DuringStep before opening the engine.");
+            case StreamStatus::OutsideOfStep:
+                if (openedANewStep)
+                {
+                    streamStatus = StreamStatus::DuringStep;
+                }
+                else
+                {
+                    throw error::Internal(
+                        "Control flow error: Step should have been opened "
+                        "before this point.");
+                }
+                break;
+            default:
+                throw std::runtime_error("[ADIOS2] Control flow error!");
+            }
+            break;
+        }
+        default:
+            throw std::runtime_error("[ADIOS2] Invalid ADIOS access mode");
+        }
+
+        if (!m_engine.value())
+        {
+            throw std::runtime_error("[ADIOS2] Failed opening Engine.");
+        }
+    }
+    return m_engine.value();
+}
+
+void ADIOS2File::flush_impl(
+    ADIOS2FlushParams flushParams,
+    std::function<void(ADIOS2File &, adios2::Engine &)> const &performPutGets,
+    bool writeLatePuts,
+    bool flushUnconditionally)
+{
+    auto level = flushParams.level;
+    if (streamStatus == StreamStatus::StreamOver)
+    {
+        if (flushUnconditionally)
+        {
+            throw std::runtime_error(
+                "[ADIOS2] Cannot access engine since stream is over.");
+        }
+        return;
+    }
+    auto &eng = getEngine();
+    /*
+     * Only open a new step if it is necessary.
+     */
+    if (streamStatus == StreamStatus::OutsideOfStep)
+    {
+        if (m_buffer.empty() && (!writeLatePuts || m_uniquePtrPuts.empty()))
+        {
+            if (flushUnconditionally)
+            {
+                performPutGets(*this, eng);
+            }
+            return;
+        }
+    }
+    for (auto &ba : m_buffer)
+    {
+        ba->run(*this);
+    }
+
+    if (!initializedDefaults)
+    {
+        // Currently only schema 0 supported
+        if (m_impl->m_writeAttributesFromThisRank)
+        {
+            m_IO.DefineAttribute<uint64_t>(adios_defaults::str_adios2Schema, 0);
+        }
+        initializedDefaults = true;
+    }
+
+    if (writeLatePuts)
+    {
+        for (auto &entry : m_uniquePtrPuts)
+        {
+            entry.run(*this);
+        }
+    }
+
+    if (readOnly(m_mode))
+    {
+        level = FlushLevel::UserFlush;
+    }
+
+    switch (level)
+    {
+    case FlushLevel::UserFlush:
+        performPutGets(*this, eng);
+        m_updateSpans.clear();
+        m_buffer.clear();
+        m_alreadyEnqueued.clear();
+        if (writeLatePuts)
+        {
+            m_uniquePtrPuts.clear();
+        }
+
+        break;
+
+    case FlushLevel::InternalFlush:
+    case FlushLevel::SkeletonOnly:
+    case FlushLevel::CreateOrOpenFiles:
+        /*
+         * Tasks have been given to ADIOS2, but we don't flush them
+         * yet. So, move everything to m_alreadyEnqueued to avoid
+         * use-after-free.
+         */
+        for (auto &task : m_buffer)
+        {
+            m_alreadyEnqueued.emplace_back(std::move(task));
+        }
+        if (writeLatePuts)
+        {
+            throw error::Internal(
+                "ADIOS2 backend: Flush of late writes was requested at the "
+                "wrong time.");
+        }
+        m_buffer.clear();
+        break;
+    }
+}
+
+void ADIOS2File::flush_impl(ADIOS2FlushParams flushParams, bool writeLatePuts)
+{
+    auto decideFlushAPICall = [this, flushTarget = flushParams.flushTarget](
+                                  adios2::Engine &engine) {
+#if ADIOS2_VERSION_MAJOR * 1000000000 + ADIOS2_VERSION_MINOR * 100000000 +     \
+        ADIOS2_VERSION_PATCH * 1000000 + ADIOS2_VERSION_TWEAK >=               \
+    2701001223
+        enum class CleanedFlushTarget
+        {
+            Buffer,
+            Disk,
+            Step
+        };
+
+        CleanedFlushTarget target{};
+        switch (flushTarget)
+        {
+        case FlushTarget::Disk:
+        case FlushTarget::Disk_Override:
+            if (m_impl->realEngineType() == "bp5" ||
+                /* this second check should be sufficient, but we leave the
+                   first check in as a safeguard against renamings in
+                   ADIOS2. Also do a lowerCase transform since the docstring
+                   of `Engine::Type()` claims that the return value is in
+                   lowercase, but for BP5 this does not seem true. */
+                auxiliary::lowerCase(engine.Type()) == "bp5writer")
+            {
+                target = CleanedFlushTarget::Disk;
+            }
+            else
+            {
+                target = CleanedFlushTarget::Buffer;
+            }
+            break;
+        case FlushTarget::Buffer:
+        case FlushTarget::Buffer_Override:
+            target = CleanedFlushTarget::Buffer;
+            break;
+        case FlushTarget::NewStep:
+        case FlushTarget::NewStep_Override:
+            target = CleanedFlushTarget::Step;
+            break;
+        }
+
+        switch (target)
+        {
+        case CleanedFlushTarget::Disk:
+            /*
+             * Draining the uniquePtrPuts now to use this chance to free the
+             * memory.
+             */
+            for (auto &entry : m_uniquePtrPuts)
+            {
+                entry.run(*this);
+            }
+            engine.PerformDataWrite();
+            m_uniquePtrPuts.clear();
+            m_updateSpans.clear();
+            break;
+        case CleanedFlushTarget::Buffer:
+            engine.PerformPuts();
+            break;
+        case CleanedFlushTarget::Step:
+            if (streamStatus != StreamStatus::DuringStep)
+            {
+                throw error::OperationUnsupportedInBackend(
+                    "ADIOS2",
+                    "Trying to flush to a new step while no step is active");
+            }
+            /*
+             * Draining the uniquePtrPuts now to use this chance to free the
+             * memory.
+             */
+            for (auto &entry : m_uniquePtrPuts)
+            {
+                entry.run(*this);
+            }
+            engine.EndStep();
+            engine.BeginStep();
+            // ++m_currentStep; // think we should keep this as the logical step
+            m_uniquePtrPuts.clear();
+            uncommittedAttributes.clear();
+            m_updateSpans.clear();
+            break;
+        }
+#else
+        (void)this;
+        (void)flushTarget;
+        engine.PerformPuts();
+#endif
+    };
+
+    flush_impl(
+        flushParams,
+        [decideFlushAPICall = std::move(decideFlushAPICall)](
+            ADIOS2File &ba, adios2::Engine &eng) {
+            if (writeOnly(ba.m_mode))
+            {
+                decideFlushAPICall(eng);
+            }
+            else
+            {
+                eng.PerformGets();
+            }
+        },
+        writeLatePuts,
+        /* flushUnconditionally = */ false);
+}
+
+AdvanceStatus ADIOS2File::advance(AdvanceMode mode)
+{
+    if (streamStatus == StreamStatus::Undecided)
+    {
+        throw error::Internal(
+            "[ADIOS2File::advance()] StreamStatus Undecided before "
+            "beginning/ending a step?");
+    }
+    // sic! no else
+    if (streamStatus == StreamStatus::ReadWithoutStream)
+    {
+        flush(
+            ADIOS2FlushParams{FlushLevel::UserFlush},
+            /* writeLatePuts = */ false);
+        return AdvanceStatus::RANDOMACCESS;
+    }
+
+    switch (mode)
+    {
+    case AdvanceMode::ENDSTEP: {
+        /*
+         * Advance mode write:
+         * Close the current step, defer opening the new step
+         * until one is actually needed:
+         * (1) The engine is accessed in ADIOS2File::flush
+         * (2) A new step is opened before the currently active step
+         *     has seen an access. See the following lines: open the
+         *     step just to skip it again.
+         */
+        if (streamStatus == StreamStatus::OutsideOfStep)
+        {
+            if (getEngine().BeginStep() != adios2::StepStatus::OK)
+            {
+                throw std::runtime_error(
+                    "[ADIOS2] Trying to close a step that cannot be "
+                    "opened.");
+            }
+        }
+
+        if (writeOnly(m_mode) && m_impl->m_writeAttributesFromThisRank &&
+            !m_IO.InquireAttribute<bool_representation>(
+                adios_defaults::str_usesstepsAttribute))
+        {
+            m_IO.DefineAttribute<bool_representation>(
+                adios_defaults::str_usesstepsAttribute, 1);
+        }
+
+        flush(
+            ADIOS2FlushParams{FlushLevel::UserFlush},
+            [](ADIOS2File &, adios2::Engine &eng) { eng.EndStep(); },
+            /* writeLatePuts = */ true,
+            /* flushUnconditionally = */ true);
+        uncommittedAttributes.clear();
+        m_updateSpans.clear();
+        streamStatus = StreamStatus::OutsideOfStep;
+        ++m_currentStep;
+        return AdvanceStatus::OK;
+    }
+    case AdvanceMode::BEGINSTEP: {
+        adios2::StepStatus adiosStatus{};
+
+        if (streamStatus != StreamStatus::DuringStep)
+        {
+            adiosStatus = getEngine().BeginStep();
+        }
+        else
+        {
+            adiosStatus = adios2::StepStatus::OK;
+        }
+        AdvanceStatus res = AdvanceStatus::OK;
+        switch (adiosStatus)
+        {
+        case adios2::StepStatus::EndOfStream:
+            streamStatus = StreamStatus::StreamOver;
+            res = AdvanceStatus::OVER;
+            break;
+        case adios2::StepStatus::OK:
+            streamStatus = StreamStatus::DuringStep;
+            res = AdvanceStatus::OK;
+            break;
+        case adios2::StepStatus::NotReady:
+        case adios2::StepStatus::OtherError:
+            throw std::runtime_error("[ADIOS2] Unexpected step status.");
+        }
+        invalidateAttributesMap();
+        invalidateVariablesMap();
+        m_pathsMarkedAsActive.clear();
+        return res;
+    }
+    }
+    throw std::runtime_error(
+        "Internal error: Advance mode should be explicitly"
+        " chosen by the front-end.");
+}
+
+void ADIOS2File::drop()
+{
+    assert(m_buffer.empty());
+}
+
+static std::vector<std::string> availableAttributesOrVariablesPrefixed(
+    std::string const &prefix,
+    ADIOS2File::AttributeMap_t const &(ADIOS2File::*getBasicMap)(),
+    ADIOS2File &ba)
+{
+    std::string var = auxiliary::ends_with(prefix, '/') ? prefix : prefix + '/';
+    ADIOS2File::AttributeMap_t const &attributes = (ba.*getBasicMap)();
+    std::vector<std::string> ret;
+    for (auto it = attributes.lower_bound(prefix); it != attributes.end(); ++it)
+    {
+        if (auxiliary::starts_with(it->first, var))
+        {
+            ret.emplace_back(auxiliary::replace_first(it->first, var, ""));
+        }
+        else
+        {
+            break;
+        }
+    }
+    return ret;
+}
+
+std::vector<std::string>
+ADIOS2File::availableAttributesPrefixed(std::string const &prefix)
+{
+    return availableAttributesOrVariablesPrefixed(
+        prefix, &ADIOS2File::availableAttributes, *this);
+}
+
+std::vector<std::string>
+ADIOS2File::availableVariablesPrefixed(std::string const &prefix)
+{
+    return availableAttributesOrVariablesPrefixed(
+        prefix, &ADIOS2File::availableVariables, *this);
+}
+
+void ADIOS2File::invalidateAttributesMap()
+{
+    m_availableAttributes = std::optional<AttributeMap_t>();
+}
+
+ADIOS2File::AttributeMap_t const &ADIOS2File::availableAttributes()
+{
+    if (m_availableAttributes)
+    {
+        return m_availableAttributes.value();
+    }
+    else
+    {
+        m_availableAttributes = std::make_optional(m_IO.AvailableAttributes());
+        return m_availableAttributes.value();
+    }
+}
+
+void ADIOS2File::invalidateVariablesMap()
+{
+    m_availableVariables = std::optional<AttributeMap_t>();
+}
+
+ADIOS2File::AttributeMap_t const &ADIOS2File::availableVariables()
+{
+    if (m_availableVariables)
+    {
+        return m_availableVariables.value();
+    }
+    else
+    {
+        m_availableVariables = std::make_optional(m_IO.AvailableVariables());
+        return m_availableVariables.value();
+    }
+}
+
+void ADIOS2File::markActive(Writable *writable)
+{
+    switch (useGroupTable())
+    {
+    case UseGroupTable::No:
+        break;
+    case UseGroupTable::Yes:
+#if openPMD_HAS_ADIOS_2_9
+    {
+        if (writeOnly(m_mode) && m_impl->m_writeAttributesFromThisRank)
+        {
+            auto currentStepBuffered = currentStep();
+            do
+            {
+                using attr_t = unsigned long long;
+                auto filePos = m_impl->setAndGetFilePosition(
+                    writable, /* write = */ false);
+                auto fullPath =
+                    adios_defaults::str_activeTablePrefix + filePos->location;
+                m_IO.DefineAttribute<attr_t>(
+                    fullPath,
+                    currentStepBuffered,
+                    /* variableName = */ "",
+                    /* separator = */ "/",
+                    /* allowModification = */ true);
+                m_pathsMarkedAsActive.emplace(writable);
+                writable = writable->parent;
+            } while (writable &&
+                     m_pathsMarkedAsActive.find(writable) ==
+                         m_pathsMarkedAsActive.end());
+        }
+    }
+#else
+        (void)writable;
+        throw error::OperationUnsupportedInBackend(
+            m_impl->m_handler->backendName(),
+            "Group table feature requires ADIOS2 >= v2.9.");
+#endif
+    break;
+    }
+}
+} // namespace openPMD::detail
+#endif
diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp
index 708fbbdef0..bdbd43325a 100644
--- a/src/IO/ADIOS/ADIOS2IOHandler.cpp
+++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp
@@ -20,6 +20,7 @@
  */
 
 #include "openPMD/IO/ADIOS/ADIOS2IOHandler.hpp"
+#include "openPMD/IO/ADIOS/ADIOS2File.hpp"
 
 #include "openPMD/Datatype.hpp"
 #include "openPMD/Error.hpp"
@@ -29,6 +30,7 @@
 #include "openPMD/IterationEncoding.hpp"
 #include "openPMD/auxiliary/Environment.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
+#include "openPMD/auxiliary/JSON_internal.hpp"
 #include "openPMD/auxiliary/Mpi.hpp"
 #include "openPMD/auxiliary/StringManip.hpp"
 #include "openPMD/auxiliary/TypeTraits.hpp"
@@ -39,6 +41,7 @@
 #include <iterator>
 #include <memory>
 #include <set>
+#include <sstream>
 #include <stdexcept>
 #include <string>
 #include <type_traits>
@@ -67,6 +70,18 @@ namespace openPMD
 
 #if openPMD_HAVE_ADIOS2
 
+std::optional<size_t> joinedDimension(adios2::Dims const &dims)
+{
+    for (size_t i = 0; i < dims.size(); ++i)
+    {
+        if (dims[i] == adios2::JoinedDim)
+        {
+            return i;
+        }
+    }
+    return std::nullopt;
+}
+
 #if openPMD_HAVE_MPI
 
 ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl(
@@ -143,7 +158,7 @@ ADIOS2IOHandlerImpl::~ADIOS2IOHandlerImpl()
      * This means that destruction order is nondeterministic.
      * Let's determinize it (necessary if computing in parallel).
      */
-    using file_t = std::unique_ptr<detail::BufferedActions>;
+    using file_t = std::unique_ptr<detail::ADIOS2File>;
     std::vector<file_t> sorted;
     sorted.reserve(m_fileData.size());
     for (auto &pair : m_fileData)
@@ -177,14 +192,17 @@ template <typename Callback>
 void ADIOS2IOHandlerImpl::init(
     json::TracingJSON cfg, Callback &&callbackWriteAttributesFromRank)
 {
+    if (auto unsupported_engine_cfg =
+            auxiliary::getEnvString("OPENPMD_ADIOS2_PRETEND_ENGINE", "");
+        !unsupported_engine_cfg.empty())
+    {
+        auxiliary::lowerCase(unsupported_engine_cfg);
+        pretendEngine(std::move(unsupported_engine_cfg));
+    }
     // allow overriding through environment variable
-    m_engineType =
+    realEngineType() =
         auxiliary::getEnvString("OPENPMD_ADIOS2_ENGINE", m_engineType);
-    std::transform(
-        m_engineType.begin(),
-        m_engineType.end(),
-        m_engineType.begin(),
-        [](unsigned char c) { return std::tolower(c); });
+    auxiliary::lowerCase(realEngineType());
 
     // environment-variable based configuration
     if (int groupTableViaEnv =
@@ -227,11 +245,11 @@ void ADIOS2IOHandlerImpl::init(
                 m_config["attribute_writing_ranks"].json());
         }
 
-        auto engineConfig = config(ADIOS2Defaults::str_engine);
+        auto engineConfig = config(adios_defaults::str_engine);
         if (!engineConfig.json().is_null())
         {
             auto engineTypeConfig =
-                config(ADIOS2Defaults::str_type, engineConfig).json();
+                config(adios_defaults::str_type, engineConfig).json();
             if (!engineTypeConfig.is_null())
             {
                 // convert to string
@@ -240,7 +258,7 @@ void ADIOS2IOHandlerImpl::init(
                 if (maybeEngine.has_value())
                 {
                     // override engine type by JSON/TOML configuration
-                    m_engineType = std::move(maybeEngine.value());
+                    realEngineType() = std::move(maybeEngine.value());
                 }
                 else
                 {
@@ -249,6 +267,24 @@ void ADIOS2IOHandlerImpl::init(
                         "Must be convertible to string type.");
                 }
             }
+
+            if (engineConfig.json().contains(
+                    adios_defaults::str_treat_unsupported_engine_like))
+            {
+                auto maybeEngine = json::asLowerCaseStringDynamic(
+                    engineConfig
+                        [adios_defaults::str_treat_unsupported_engine_like]
+                            .json());
+                if (!maybeEngine.has_value())
+                {
+                    throw error::BackendConfigSchema(
+                        {"adios2",
+                         adios_defaults::str_engine,
+                         adios_defaults::str_treat_unsupported_engine_like},
+                        "Must be convertible to string type.");
+                }
+                pretendEngine(std::move(*maybeEngine));
+            }
         }
         auto operators = getOperators();
         if (operators)
@@ -341,17 +377,38 @@ std::string ADIOS2IOHandlerImpl::fileSuffix(bool verbose) const
 {
     // SST engine adds its suffix unconditionally
     // so we don't add it
+#if openPMD_HAVE_ADIOS2_BP5 && openPMD_HAS_ADIOS_2_9
+    constexpr char const *const default_file_ending = ".bp5";
+#else
+    constexpr char const *const default_file_ending = ".bp4";
+#endif
+
+    if (m_realEngineType.has_value())
+    {
+        // unknown engine type, use whatever ending the user specified
+        return m_userSpecifiedExtension;
+    }
+
     static std::map<std::string, AcceptedEndingsForEngine> const endings{
-        {"sst", {{"", ""}, {".sst", ""}}},
-        {"staging", {{"", ""}, {".sst", ""}}},
-        {"filestream", {{".bp", ".bp"}, {".bp4", ".bp4"}, {".bp5", ".bp5"}}},
-        {"bp4", {{".bp4", ".bp4"}, {".bp", ".bp"}}},
-        {"bp5", {{".bp5", ".bp5"}, {".bp", ".bp"}}},
-        {"bp3", {{".bp", ".bp"}}},
-        {"file", {{".bp", ".bp"}, {".bp4", ".bp4"}, {".bp5", ".bp5"}}},
-        {"hdf5", {{".h5", ".h5"}}},
-        {"nullcore", {{".nullcore", ".nullcore"}, {".bp", ".bp"}}},
-        {"ssc", {{".ssc", ".ssc"}}}};
+        {"sst", {{"", ""}, {".sst", ""}, {".%E", ""}}},
+        {"staging", {{"", ""}, {".sst", ""}, {".%E", ""}}},
+        {"filestream",
+         {{".bp", ".bp"},
+          {".bp4", ".bp4"},
+          {".bp5", ".bp5"},
+          {".%E", default_file_ending}}},
+        {"bp4", {{".bp4", ".bp4"}, {".bp", ".bp"}, {".%E", ".bp4"}}},
+        {"bp5", {{".bp5", ".bp5"}, {".bp", ".bp"}, {".%E", ".bp5"}}},
+        {"bp3", {{".bp", ".bp"}, {".%E", ".bp"}}},
+        {"file",
+         {{".bp", ".bp"},
+          {".bp4", ".bp4"},
+          {".bp5", ".bp5"},
+          {".%E", default_file_ending}}},
+        {"hdf5", {{".h5", ".h5"}, {".%E", ".h5"}}},
+        {"nullcore",
+         {{".nullcore", ".nullcore"}, {".bp", ".bp"}, {".%E", ".nullcore"}}},
+        {"ssc", {{".ssc", ".ssc"}, {".%E", ".ssc"}}}};
 
     if (auto engine = endings.find(m_engineType); engine != endings.end())
     {
@@ -421,34 +478,7 @@ std::string ADIOS2IOHandlerImpl::fileSuffix(bool verbose) const
     }
 }
 
-using FlushTarget = ADIOS2IOHandlerImpl::FlushTarget;
-static FlushTarget flushTargetFromString(std::string const &str)
-{
-    if (str == "buffer")
-    {
-        return FlushTarget::Buffer;
-    }
-    else if (str == "disk")
-    {
-        return FlushTarget::Disk;
-    }
-    else if (str == "buffer_override")
-    {
-        return FlushTarget::Buffer_Override;
-    }
-    else if (str == "disk_override")
-    {
-        return FlushTarget::Disk_Override;
-    }
-    else
-    {
-        throw error::BackendConfigSchema(
-            {"adios2", "engine", ADIOS2Defaults::str_flushtarget},
-            "Flush target must be either 'disk' or 'buffer', but "
-            "was " +
-                str + ".");
-    }
-}
+using FlushTarget = adios_defs::FlushTarget;
 
 static FlushTarget &
 overrideFlushTarget(FlushTarget &inplace, FlushTarget new_val)
@@ -458,9 +488,11 @@ overrideFlushTarget(FlushTarget &inplace, FlushTarget new_val)
         {
         case FlushTarget::Buffer:
         case FlushTarget::Disk:
+        case FlushTarget::NewStep:
             return true;
         case FlushTarget::Buffer_Override:
         case FlushTarget::Disk_Override:
+        case FlushTarget::NewStep_Override:
             return false;
         }
         return true;
@@ -486,7 +518,7 @@ ADIOS2IOHandlerImpl::flush(internal::ParsedFlushParams &flushParams)
 {
     auto res = AbstractIOHandlerImpl::flush();
 
-    detail::BufferedActions::ADIOS2FlushParams adios2FlushParams{
+    detail::ADIOS2File::ADIOS2FlushParams adios2FlushParams{
         flushParams.flushLevel, m_flushTarget};
     if (flushParams.backendConfig.json().contains("adios2"))
     {
@@ -494,20 +526,20 @@ ADIOS2IOHandlerImpl::flush(internal::ParsedFlushParams &flushParams)
         if (adios2Config.json().contains("engine"))
         {
             auto engineConfig = adios2Config["engine"];
-            if (engineConfig.json().contains(ADIOS2Defaults::str_flushtarget))
+            if (engineConfig.json().contains(adios_defaults::str_flushtarget))
             {
                 auto target = json::asLowerCaseStringDynamic(
-                    engineConfig[ADIOS2Defaults::str_flushtarget].json());
+                    engineConfig[adios_defaults::str_flushtarget].json());
                 if (!target.has_value())
                 {
                     throw error::BackendConfigSchema(
-                        {"adios2", "engine", ADIOS2Defaults::str_flushtarget},
+                        {"adios2", "engine", adios_defaults::str_flushtarget},
                         "Flush target must be either 'disk' or 'buffer', but "
                         "was non-literal type.");
                 }
                 overrideFlushTarget(
                     adios2FlushParams.flushTarget,
-                    flushTargetFromString(target.value()));
+                    adios_defs::flushTargetFromString(target.value()));
             }
         }
 
@@ -524,24 +556,26 @@ ADIOS2IOHandlerImpl::flush(internal::ParsedFlushParams &flushParams)
                 auto asToml = json::jsonToToml(shadow);
                 std::cerr << "Warning: parts of the backend configuration for "
                              "ADIOS2 remain unused:\n"
-                          << asToml << std::endl;
+                          << json::format_toml(asToml) << std::endl;
                 break;
             }
             }
         }
     }
 
-    for (auto &p : m_fileData)
+    for (auto const &file : m_dirty)
     {
-        if (m_dirty.find(p.first) != m_dirty.end())
+        auto file_data = m_fileData.find(file);
+        if (file_data == m_fileData.end())
         {
-            p.second->flush(adios2FlushParams, /* writeLatePuts = */ false);
-        }
-        else
-        {
-            p.second->drop();
+            throw error::Internal(
+                "[ADIOS2 backend] No associated data found for file'" + *file +
+                "'.");
         }
+        file_data->second->flush(
+            adios2FlushParams, /* writeLatePuts = */ false);
     }
+    m_dirty.clear();
     return res;
 }
 
@@ -630,7 +664,7 @@ void ADIOS2IOHandlerImpl::createFile(
                 printedWarningsAlready.noGroupBased = true;
             }
             fileData.m_IO.DefineAttribute(
-                ADIOS2Defaults::str_groupBasedWarning,
+                adios_defaults::str_groupBasedWarning,
                 std::string("Consider using file-based or variable-based "
                             "encoding instead in ADIOS2."));
         }
@@ -649,7 +683,7 @@ void ADIOS2IOHandlerImpl::checkFile(
 
 bool ADIOS2IOHandlerImpl::checkFile(std::string fullFilePath) const
 {
-    if (m_engineType == "bp3")
+    if (realEngineType() == "bp3")
     {
         if (!auxiliary::ends_with(fullFilePath, ".bp"))
         {
@@ -659,7 +693,7 @@ bool ADIOS2IOHandlerImpl::checkFile(std::string fullFilePath) const
             fullFilePath += ".bp";
         }
     }
-    else if (m_engineType == "sst")
+    else if (realEngineType() == "sst")
     {
         /*
          * SST will add this ending indiscriminately
@@ -734,6 +768,13 @@ void ADIOS2IOHandlerImpl::createDataset(
             "[ADIOS2] Creating a dataset in a file opened as read "
             "only is not possible.");
     }
+#if !openPMD_HAS_ADIOS_2_9
+    if (parameters.joinedDimension.has_value())
+    {
+        error::throwOperationUnsupportedInBackend(
+            "ADIOS1", "Joined Arrays require ADIOS2 >= v2.9");
+    }
+#endif
     if (!writable->written)
     {
         /* Sanitize name */
@@ -741,6 +782,7 @@ void ADIOS2IOHandlerImpl::createDataset(
 
         auto const file =
             refreshFileFromParent(writable, /* preferParentFile = */ true);
+        writable->abstractFilePosition.reset();
         auto filePos = setAndGetFilePosition(writable, name);
         filePos->gd = GroupOrDataset::DATASET;
         auto const varName = nameOfVariable(writable);
@@ -767,8 +809,11 @@ void ADIOS2IOHandlerImpl::createDataset(
                 varName + "' remain unused:\n");
 
         // cast from openPMD::Extent to adios2::Dims
-        adios2::Dims const shape(
-            parameters.extent.begin(), parameters.extent.end());
+        adios2::Dims shape(parameters.extent.begin(), parameters.extent.end());
+        if (auto jd = parameters.joinedDimension; jd.has_value())
+        {
+            shape[jd.value()] = adios2::JoinedDim;
+        }
 
         auto &fileData = getFileData(file, IfFileNotOpen::ThrowError);
 
@@ -883,6 +928,7 @@ void ADIOS2IOHandlerImpl::openFile(
     // lazy opening is deathly in parallel situations
     auto &fileData = getFileData(file, IfFileNotOpen::OpenImplicitly);
     *parameters.out_parsePreference = fileData.parsePreference;
+    m_dirty.emplace(std::move(file));
 }
 
 void ADIOS2IOHandlerImpl::closeFile(
@@ -902,9 +948,7 @@ void ADIOS2IOHandlerImpl::closeFile(
              */
             it->second->flush(
                 FlushLevel::UserFlush,
-                [](detail::BufferedActions &ba, adios2::Engine &) {
-                    ba.finalize();
-                },
+                [](detail::ADIOS2File &ba, adios2::Engine &) { ba.finalize(); },
                 /* writeLatePuts = */ true,
                 /* flushUnconditionally = */ false);
             m_fileData.erase(it);
@@ -993,7 +1037,7 @@ void ADIOS2IOHandlerImpl::writeDataset(
         "[ADIOS2] Cannot write data in read-only mode.");
     setAndGetFilePosition(writable);
     auto file = refreshFileFromParent(writable, /* preferParentFile = */ false);
-    detail::BufferedActions &ba = getFileData(file, IfFileNotOpen::ThrowError);
+    detail::ADIOS2File &ba = getFileData(file, IfFileNotOpen::ThrowError);
     detail::BufferedPut bp;
     bp.name = nameOfVariable(writable);
     bp.param = std::move(parameters);
@@ -1044,7 +1088,7 @@ void ADIOS2IOHandlerImpl::readDataset(
 {
     setAndGetFilePosition(writable);
     auto file = refreshFileFromParent(writable, /* preferParentFile = */ false);
-    detail::BufferedActions &ba = getFileData(file, IfFileNotOpen::ThrowError);
+    detail::ADIOS2File &ba = getFileData(file, IfFileNotOpen::ThrowError);
     detail::BufferedGet bg;
     bg.name = nameOfVariable(writable);
     bg.param = parameters;
@@ -1060,7 +1104,7 @@ namespace detail
         static void call(
             ADIOS2IOHandlerImpl *impl,
             Parameter<Operation::GET_BUFFER_VIEW> &params,
-            detail::BufferedActions &ba,
+            detail::ADIOS2File &ba,
             std::string const &varName)
         {
             auto &IO = ba.m_IO;
@@ -1127,7 +1171,7 @@ void ADIOS2IOHandlerImpl::getBufferView(
             begin(optInEngines),
             end(optInEngines),
             [this](std::string const &engine) {
-                return engine == this->m_engineType;
+                return engine == this->realEngineType();
             }))
     {
         parameters.out->backendManagedBuffer = false;
@@ -1135,7 +1179,7 @@ void ADIOS2IOHandlerImpl::getBufferView(
     }
     setAndGetFilePosition(writable);
     auto file = refreshFileFromParent(writable, /* preferParentFile = */ false);
-    detail::BufferedActions &ba = getFileData(file, IfFileNotOpen::ThrowError);
+    detail::ADIOS2File &ba = getFileData(file, IfFileNotOpen::ThrowError);
 
     std::string name = nameOfVariable(writable);
     switch (m_useSpanBasedPutByDefault)
@@ -1188,7 +1232,7 @@ void ADIOS2IOHandlerImpl::readAttribute(
 {
     auto file = refreshFileFromParent(writable, /* preferParentFile = */ false);
     auto pos = setAndGetFilePosition(writable);
-    detail::BufferedActions &ba = getFileData(file, IfFileNotOpen::ThrowError);
+    detail::ADIOS2File &ba = getFileData(file, IfFileNotOpen::ThrowError);
     auto name = nameOfAttribute(writable, parameters.name);
 
     auto type = detail::attributeInfo(ba.m_IO, name, /* verbose = */ true);
@@ -1275,11 +1319,11 @@ void ADIOS2IOHandlerImpl::listPaths(
     }
     case UseGroupTable::Yes: {
         {
-            auto tablePrefix = ADIOS2Defaults::str_activeTablePrefix + myName;
+            auto tablePrefix = adios_defaults::str_activeTablePrefix + myName;
             std::vector attrs =
                 fileData.availableAttributesPrefixed(tablePrefix);
             if (fileData.streamStatus ==
-                detail::BufferedActions::StreamStatus::DuringStep)
+                detail::ADIOS2File::StreamStatus::DuringStep)
             {
                 auto currentStep = fileData.currentStep();
                 for (auto const &attrName : attrs)
@@ -1451,12 +1495,12 @@ void ADIOS2IOHandlerImpl::availableChunks(
 {
     setAndGetFilePosition(writable);
     auto file = refreshFileFromParent(writable, /* preferParentFile = */ false);
-    detail::BufferedActions &ba = getFileData(file, IfFileNotOpen::ThrowError);
+    detail::ADIOS2File &ba = getFileData(file, IfFileNotOpen::ThrowError);
     std::string varName = nameOfVariable(writable);
     auto engine = ba.getEngine(); // make sure that data are present
     auto datatype = detail::fromADIOS2Type(ba.m_IO.VariableType(varName));
-    bool allSteps = ba.streamStatus ==
-        detail::BufferedActions::StreamStatus::ReadWithoutStream;
+    bool allSteps = ba.m_mode != adios2::Mode::Read &&
+        ba.streamStatus == detail::ADIOS2File::StreamStatus::ReadWithoutStream;
     switchAdios2VariableType<detail::RetrieveBlocksInfo>(
         datatype,
         parameters,
@@ -1472,8 +1516,55 @@ void ADIOS2IOHandlerImpl::deregister(
     m_files.erase(writable);
 }
 
+void ADIOS2IOHandlerImpl::touch(
+    Writable *writable, Parameter<Operation::TOUCH> const &)
+{
+    auto file = refreshFileFromParent(writable, /* preferParentFile = */ false);
+    m_dirty.emplace(std::move(file));
+}
+
 adios2::Mode ADIOS2IOHandlerImpl::adios2AccessMode(std::string const &fullPath)
 {
+    if (m_config.json().contains("engine") &&
+        m_config["engine"].json().contains("access_mode"))
+    {
+        auto const &access_mode_json = m_config["engine"]["access_mode"].json();
+        auto maybe_access_mode_string =
+            json::asLowerCaseStringDynamic(access_mode_json);
+        if (!maybe_access_mode_string.has_value())
+        {
+            throw error::BackendConfigSchema(
+                {"adios2", "engine", "access_mode"}, "Must be of string type.");
+        }
+        auto access_mode_string = *maybe_access_mode_string;
+        using pair_t = std::pair<char const *, adios2::Mode>;
+        constexpr std::array<pair_t, 4> modeNames{
+            pair_t{"write", adios2::Mode::Write},
+            pair_t{"read", adios2::Mode::Read},
+            pair_t{"append", adios2::Mode::Append}
+#if openPMD_HAS_ADIOS_2_8
+            ,
+            pair_t{"readrandomaccess", adios2::Mode::ReadRandomAccess}
+#endif
+        };
+        for (auto const &[name, mode] : modeNames)
+        {
+            if (name == access_mode_string)
+            {
+                return mode;
+            }
+        }
+        std::stringstream error;
+        error << "Unsupported value '" << access_mode_string
+              << "'. Must be one of:";
+        for (auto const &pair : modeNames)
+        {
+            error << " '" << pair.first << "'";
+        }
+        error << '.';
+        throw error::BackendConfigSchema(
+            {"adios2", "engine", "access_mode"}, error.str());
+    }
     switch (m_handler->m_backendAccess)
     {
     case Access::CREATE:
@@ -1588,7 +1679,7 @@ GroupOrDataset ADIOS2IOHandlerImpl::groupOrDataset(Writable *writable)
     return setAndGetFilePosition(writable)->gd;
 }
 
-detail::BufferedActions &ADIOS2IOHandlerImpl::getFileData(
+detail::ADIOS2File &ADIOS2IOHandlerImpl::getFileData(
     InvalidatableFile const &file, IfFileNotOpen flag)
 {
     VERIFY_ALWAYS(
@@ -1603,7 +1694,7 @@ detail::BufferedActions &ADIOS2IOHandlerImpl::getFileData(
         case IfFileNotOpen::OpenImplicitly: {
 
             auto res = m_fileData.emplace(
-                file, std::make_unique<detail::BufferedActions>(*this, file));
+                file, std::make_unique<detail::ADIOS2File>(*this, file));
             return *res.first->second;
         }
         case IfFileNotOpen::ThrowError:
@@ -1628,75 +1719,8 @@ void ADIOS2IOHandlerImpl::dropFileData(InvalidatableFile const &file)
     }
 }
 
-template <typename T>
-adios2::Variable<T> ADIOS2IOHandlerImpl::verifyDataset(
-    Offset const &offset,
-    Extent const &extent,
-    adios2::IO &IO,
-    std::string const &varName)
-{
-    {
-        auto requiredType = adios2::GetType<T>();
-        auto actualType = IO.VariableType(varName);
-        std::stringstream errorMessage;
-        errorMessage
-            << "[ADIOS2] Trying to access a dataset with wrong type (trying to "
-               "access dataset with type "
-            << determineDatatype<T>() << ", but has type "
-            << detail::fromADIOS2Type(actualType, false) << ")";
-        VERIFY_ALWAYS(requiredType == actualType, errorMessage.str());
-    }
-    adios2::Variable<T> var = IO.InquireVariable<T>(varName);
-    VERIFY_ALWAYS(
-        var.operator bool(),
-        "[ADIOS2] Internal error: Failed opening ADIOS2 variable.")
-    // TODO leave this check to ADIOS?
-    adios2::Dims shape = var.Shape();
-    auto actualDim = shape.size();
-    {
-        auto requiredDim = extent.size();
-        VERIFY_ALWAYS(
-            requiredDim == actualDim,
-            "[ADIOS2] Trying to access a dataset with wrong dimensionality "
-            "(trying to access dataset with dimensionality " +
-                std::to_string(requiredDim) + ", but has dimensionality " +
-                std::to_string(actualDim) + ")")
-    }
-    for (unsigned int i = 0; i < actualDim; i++)
-    {
-        VERIFY_ALWAYS(
-            offset[i] + extent[i] <= shape[i],
-            "[ADIOS2] Dataset access out of bounds.")
-    }
-
-    var.SetSelection(
-        {adios2::Dims(offset.begin(), offset.end()),
-         adios2::Dims(extent.begin(), extent.end())});
-    return var;
-}
-
 namespace detail
 {
-    template <typename T>
-    void DatasetReader::call(
-        ADIOS2IOHandlerImpl *impl,
-        detail::BufferedGet &bp,
-        adios2::IO &IO,
-        adios2::Engine &engine,
-        std::string const &fileName)
-    {
-        adios2::Variable<T> var = impl->verifyDataset<T>(
-            bp.param.offset, bp.param.extent, IO, bp.name);
-        if (!var)
-        {
-            throw std::runtime_error(
-                "[ADIOS2] Failed retrieving ADIOS2 Variable with name '" +
-                bp.name + "' from file " + fileName + ".");
-        }
-        auto ptr = std::static_pointer_cast<T>(bp.param.data).get();
-        engine.Get(var, ptr);
-    }
-
     template <typename T>
     Datatype AttributeReader::call(
         ADIOS2IOHandlerImpl &impl,
@@ -1723,7 +1747,7 @@ namespace detail
             }
 
             std::string metaAttr;
-            metaAttr = ADIOS2Defaults::str_isBoolean + name;
+            metaAttr = adios_defaults::str_isBoolean + name;
             /*
              * In verbose mode, attributeInfo will yield a warning if not
              * finding the requested attribute. Since we expect the attribute
@@ -1957,7 +1981,7 @@ namespace detail
         else if constexpr (std::is_same_v<T, bool>)
         {
             IO.DefineAttribute<bool_representation>(
-                ADIOS2Defaults::str_isBoolean + fullName, 1);
+                adios_defaults::str_isBoolean + fullName, 1);
             auto representation = bool_repr::toRep(value);
             defineAttribute(representation);
         }
@@ -2012,64 +2036,6 @@ namespace detail
     template <class>
     inline constexpr bool always_false_v = false;
 
-    template <typename T>
-    void WriteDataset::call(BufferedActions &ba, detail::BufferedPut &bp)
-    {
-        VERIFY_ALWAYS(
-            access::write(ba.m_impl->m_handler->m_backendAccess),
-            "[ADIOS2] Cannot write data in read-only mode.");
-
-        std::visit(
-            [&](auto &&arg) {
-                using ptr_type = std::decay_t<decltype(arg)>;
-                if constexpr (std::is_same_v<
-                                  ptr_type,
-                                  std::shared_ptr<void const>>)
-                {
-                    auto ptr = static_cast<T const *>(arg.get());
-
-                    adios2::Variable<T> var = ba.m_impl->verifyDataset<T>(
-                        bp.param.offset, bp.param.extent, ba.m_IO, bp.name);
-
-                    ba.getEngine().Put(var, ptr);
-                }
-                else if constexpr (std::is_same_v<
-                                       ptr_type,
-                                       UniquePtrWithLambda<void>>)
-                {
-                    BufferedUniquePtrPut bput;
-                    bput.name = std::move(bp.name);
-                    bput.offset = std::move(bp.param.offset);
-                    bput.extent = std::move(bp.param.extent);
-                    /*
-                     * Note: Moving is required here since it's a unique_ptr.
-                     * std::forward<>() would theoretically work, but it
-                     * requires the type parameter and we don't have that
-                     * inside the lambda.
-                     * (ptr_type does not work for this case).
-                     */
-                    // clang-format off
-                    bput.data = std::move(arg); // NOLINT(bugprone-move-forwarding-reference)
-                    // clang-format on
-                    bput.dtype = bp.param.dtype;
-                    ba.m_uniquePtrPuts.push_back(std::move(bput));
-                }
-                else
-                {
-                    static_assert(
-                        always_false_v<ptr_type>,
-                        "Unhandled std::variant branch");
-                }
-            },
-            bp.param.data.m_buffer);
-    }
-
-    template <int n, typename... Params>
-    void WriteDataset::call(Params &&...)
-    {
-        throw std::runtime_error("[ADIOS2] WRITE_DATASET: Invalid datatype.");
-    }
-
     template <typename T>
     void VariableDefiner::call(
         adios2::IO &IO,
@@ -2172,1241 +2138,6 @@ namespace detail
     {
         // variable has not been found, so we don't fill in any blocks
     }
-
-    void BufferedGet::run(BufferedActions &ba)
-    {
-        switchAdios2VariableType<detail::DatasetReader>(
-            param.dtype, ba.m_impl, *this, ba.m_IO, ba.getEngine(), ba.m_file);
-    }
-
-    void BufferedPut::run(BufferedActions &ba)
-    {
-        switchAdios2VariableType<detail::WriteDataset>(param.dtype, ba, *this);
-    }
-
-    struct RunUniquePtrPut
-    {
-        template <typename T>
-        static void call(BufferedUniquePtrPut &bufferedPut, BufferedActions &ba)
-        {
-            auto ptr = static_cast<T const *>(bufferedPut.data.get());
-            adios2::Variable<T> var = ba.m_impl->verifyDataset<T>(
-                bufferedPut.offset,
-                bufferedPut.extent,
-                ba.m_IO,
-                bufferedPut.name);
-            ba.getEngine().Put(var, ptr);
-        }
-
-        static constexpr char const *errorMsg = "RunUniquePtrPut";
-    };
-
-    void BufferedUniquePtrPut::run(BufferedActions &ba)
-    {
-        switchAdios2VariableType<RunUniquePtrPut>(dtype, *this, ba);
-    }
-
-    BufferedActions::BufferedActions(
-        ADIOS2IOHandlerImpl &impl, InvalidatableFile file)
-        : m_file(impl.fullPath(std::move(file)))
-        , m_ADIOS(impl.m_ADIOS)
-        , m_impl(&impl)
-        , m_engineType(impl.m_engineType)
-    {
-        // Declaring these members in the constructor body to avoid
-        // initialization order hazards. Need the IO_ prefix since in some
-        // situation there seems to be trouble with number-only IO names
-        m_mode = impl.adios2AccessMode(m_file);
-        create_IO();
-        if (!m_IO)
-        {
-            throw std::runtime_error(
-                "[ADIOS2] Internal error: Failed declaring ADIOS2 IO object "
-                "for file " +
-                m_file);
-        }
-        else
-        {
-            configure_IO(impl);
-        }
-    }
-
-    void BufferedActions::create_IO()
-    {
-        m_IOName = std::to_string(m_impl->nameCounter++);
-        m_IO = m_impl->m_ADIOS.DeclareIO("IO_" + m_IOName);
-    }
-
-    BufferedActions::~BufferedActions()
-    {
-        finalize();
-    }
-
-    void BufferedActions::finalize()
-    {
-        if (finalized)
-        {
-            return;
-        }
-        // if write accessing, ensure that the engine is opened
-        // and that all datasets are written
-        // (attributes and unique_ptr datasets are written upon closing a step
-        // or a file which users might never do)
-        bool needToWrite = !m_uniquePtrPuts.empty();
-        if ((needToWrite || !m_engine) && writeOnly(m_mode))
-        {
-            getEngine();
-            for (auto &entry : m_uniquePtrPuts)
-            {
-                entry.run(*this);
-            }
-        }
-        if (m_engine)
-        {
-            auto &engine = m_engine.value();
-            // might have been closed previously
-            if (engine)
-            {
-                if (streamStatus == StreamStatus::DuringStep)
-                {
-                    engine.EndStep();
-                }
-                engine.Close();
-                m_ADIOS.RemoveIO(m_IOName);
-            }
-        }
-        finalized = true;
-    }
-
-    namespace
-    {
-        constexpr char const *alwaysSupportsUpfrontParsing[] = {"bp3", "hdf5"};
-        constexpr char const *supportsUpfrontParsingInRandomAccessMode[] = {
-            "bp4", "bp5", "file", "filestream"};
-        constexpr char const *nonPersistentEngines[] = {
-            "sst", "insitumpi", "inline", "staging", "nullcore", "ssc"};
-
-        bool supportedEngine(std::string const &engineType)
-        {
-            auto is_in_list = [&engineType](auto &list) {
-                for (auto const &e : list)
-                {
-                    if (engineType == e)
-                    {
-                        return true;
-                    }
-                }
-                return false;
-            };
-            return is_in_list(alwaysSupportsUpfrontParsing) ||
-                is_in_list(supportsUpfrontParsingInRandomAccessMode) ||
-                is_in_list(nonPersistentEngines);
-        }
-
-        bool
-        supportsUpfrontParsing(Access access, std::string const &engineType)
-        {
-            for (auto const &e : alwaysSupportsUpfrontParsing)
-            {
-                if (e == engineType)
-                {
-                    return true;
-                }
-            }
-            if (access != Access::READ_LINEAR)
-            {
-                for (auto const &e : supportsUpfrontParsingInRandomAccessMode)
-                {
-                    if (e == engineType)
-                    {
-                        return true;
-                    }
-                }
-            }
-            return false;
-        }
-
-        enum class PerstepParsing
-        {
-            Supported,
-            Unsupported,
-            Required
-        };
-
-        PerstepParsing
-        supportsPerstepParsing(Access access, std::string const &engineType)
-        {
-            // required in all streaming engines
-            for (auto const &e : nonPersistentEngines)
-            {
-                if (engineType == e)
-                {
-                    return PerstepParsing::Required;
-                }
-            }
-            // supported in file engines in READ_LINEAR mode
-            if (access != Access::READ_RANDOM_ACCESS)
-            {
-                return PerstepParsing::Supported;
-            }
-
-            return PerstepParsing::Unsupported;
-        }
-
-        bool nonpersistentEngine(std::string const &engineType)
-        {
-            for (auto &e : nonPersistentEngines)
-            {
-                if (e == engineType)
-                {
-                    return true;
-                }
-            }
-            return false;
-        }
-    } // namespace
-
-    size_t BufferedActions::currentStep()
-    {
-        if (nonpersistentEngine(m_engineType))
-        {
-            return m_currentStep;
-        }
-        else
-        {
-            return getEngine().CurrentStep();
-        }
-    }
-
-    void BufferedActions::configure_IO_Read()
-    {
-        bool upfrontParsing = supportsUpfrontParsing(
-            m_impl->m_handler->m_backendAccess, m_engineType);
-        PerstepParsing perstepParsing = supportsPerstepParsing(
-            m_impl->m_handler->m_backendAccess, m_engineType);
-
-        switch (m_impl->m_handler->m_backendAccess)
-        {
-        case Access::READ_LINEAR:
-            switch (perstepParsing)
-            {
-            case PerstepParsing::Supported:
-            case PerstepParsing::Required:
-                // all is fine, we can go forward with READ_LINEAR mode
-                /*
-                 * We don't know yet if per-step parsing will be fine since the
-                 * engine is not opened yet.
-                 * In non-persistent (streaming) engines, per-step parsing is
-                 * always fine and always required.
-                 */
-                streamStatus = nonpersistentEngine(m_engineType)
-                    ? StreamStatus::OutsideOfStep
-                    : StreamStatus::Undecided;
-                parsePreference = ParsePreference::PerStep;
-                m_IO.SetParameter("StreamReader", "On");
-                break;
-            case PerstepParsing::Unsupported:
-                throw error::Internal(
-                    "Internal control flow error: Per-Step parsing cannot be "
-                    "unsupported when access type is READ_LINEAR");
-                break;
-            }
-            break;
-        case Access::READ_ONLY:
-        case Access::READ_WRITE:
-            /*
-             * Prefer up-front parsing, but try to fallback to per-step parsing
-             * if possible.
-             */
-            if (upfrontParsing == nonpersistentEngine(m_engineType))
-            {
-                throw error::Internal(
-                    "Internal control flow error: With access types "
-                    "READ_ONLY/READ_WRITE, support for upfront parsing is "
-                    "equivalent to the chosen engine being file-based.");
-            }
-            if (upfrontParsing)
-            {
-                streamStatus = StreamStatus::ReadWithoutStream;
-                parsePreference = ParsePreference::UpFront;
-            }
-            else
-            {
-                /*
-                 * Scenario: A step-only workflow was used (i.e. a streaming
-                 * engine), but Access::READ_ONLY was specified.
-                 * Fall back to streaming read mode.
-                 */
-                m_mode = adios2::Mode::Read;
-                parsePreference = ParsePreference::PerStep;
-                streamStatus = StreamStatus::OutsideOfStep;
-            }
-            break;
-        default:
-            VERIFY_ALWAYS(
-                access::writeOnly(m_impl->m_handler->m_backendAccess),
-                "Internal control flow error: Must set parse preference for "
-                "any read mode.");
-        }
-    }
-
-    void BufferedActions::configure_IO_Write()
-    {
-        optimizeAttributesStreaming =
-            // Also, it should only be done when truly streaming, not
-            // when using a disk-based engine that behaves like a
-            // streaming engine (otherwise attributes might vanish)
-            nonpersistentEngine(m_engineType);
-
-        streamStatus = StreamStatus::OutsideOfStep;
-    }
-
-    void BufferedActions::configure_IO(ADIOS2IOHandlerImpl &impl)
-    {
-        // step/variable-based iteration encoding requires use of group tables
-        // but the group table feature is available only in ADIOS2 >= v2.9
-        // use old layout to support at least one single iteration otherwise
-        // these properties are inferred from the opened dataset in read mode
-        if (writeOnly(m_mode))
-        {
-
-#if openPMD_HAS_ADIOS_2_9
-            if (!m_impl->m_useGroupTable.has_value())
-            {
-                switch (m_impl->m_handler->m_encoding)
-                {
-                /*
-                 * For variable-based encoding, this does not matter as it is
-                 * new and requires >= v2.9 features anyway.
-                 */
-                case IterationEncoding::variableBased:
-                    m_impl->m_useGroupTable = UseGroupTable::Yes;
-                    break;
-                case IterationEncoding::groupBased:
-                case IterationEncoding::fileBased:
-                    m_impl->m_useGroupTable = UseGroupTable::No;
-                    break;
-                }
-            }
-
-            if (m_impl->m_modifiableAttributes ==
-                ADIOS2IOHandlerImpl::ModifiableAttributes::Unspecified)
-            {
-                m_impl->m_modifiableAttributes =
-                    m_impl->m_handler->m_encoding ==
-                        IterationEncoding::variableBased
-                    ? ADIOS2IOHandlerImpl::ModifiableAttributes::Yes
-                    : ADIOS2IOHandlerImpl::ModifiableAttributes::No;
-            }
-#else
-            if (!m_impl->m_useGroupTable.has_value())
-            {
-                m_impl->m_useGroupTable = UseGroupTable::No;
-            }
-
-            m_impl->m_modifiableAttributes =
-                ADIOS2IOHandlerImpl::ModifiableAttributes::No;
-#endif
-        }
-
-        // set engine type
-        {
-            m_IO.SetEngine(m_engineType);
-        }
-
-        if (!supportedEngine(m_engineType))
-        {
-            std::stringstream sstream;
-            sstream
-                << "User-selected ADIOS2 engine '" << m_engineType
-                << "' is not recognized by the openPMD-api. Select one of: '";
-            bool first_entry = true;
-            auto add_entries = [&first_entry, &sstream](auto &list) {
-                for (auto const &e : list)
-                {
-                    if (first_entry)
-                    {
-                        sstream << e;
-                        first_entry = false;
-                    }
-                    else
-                    {
-                        sstream << ", " << e;
-                    }
-                }
-            };
-            add_entries(alwaysSupportsUpfrontParsing);
-            add_entries(supportsUpfrontParsingInRandomAccessMode);
-            add_entries(nonPersistentEngines);
-            sstream << "'." << std::endl;
-            throw error::WrongAPIUsage(sstream.str());
-        }
-
-        // set engine parameters
-        std::set<std::string> alreadyConfigured;
-        bool wasTheFlushTargetSpecifiedViaJSON = false;
-        auto engineConfig = impl.config(ADIOS2Defaults::str_engine);
-        if (!engineConfig.json().is_null())
-        {
-            auto params = impl.config(ADIOS2Defaults::str_params, engineConfig);
-            params.declareFullyRead();
-            if (params.json().is_object())
-            {
-                for (auto it = params.json().begin(); it != params.json().end();
-                     it++)
-                {
-                    auto maybeString = json::asStringDynamic(it.value());
-                    if (maybeString.has_value())
-                    {
-                        m_IO.SetParameter(
-                            it.key(), std::move(maybeString.value()));
-                    }
-                    else
-                    {
-                        throw error::BackendConfigSchema(
-                            {"adios2", "engine", "parameters", it.key()},
-                            "Must be convertible to string type.");
-                    }
-                    alreadyConfigured.emplace(
-                        auxiliary::lowerCase(std::string(it.key())));
-                }
-            }
-            auto _useAdiosSteps =
-                impl.config(ADIOS2Defaults::str_usesteps, engineConfig);
-            if (!_useAdiosSteps.json().is_null() && writeOnly(m_mode))
-            {
-                std::cerr << "[ADIOS2 backend] WARNING: Parameter "
-                             "`adios2.engine.usesteps` is deprecated since use "
-                             "of steps is now always enabled."
-                          << std::endl;
-            }
-
-            if (engineConfig.json().contains(ADIOS2Defaults::str_flushtarget))
-            {
-                auto target = json::asLowerCaseStringDynamic(
-                    engineConfig[ADIOS2Defaults::str_flushtarget].json());
-                if (!target.has_value())
-                {
-                    throw error::BackendConfigSchema(
-                        {"adios2", "engine", ADIOS2Defaults::str_flushtarget},
-                        "Flush target must be either 'disk' or 'buffer', but "
-                        "was non-literal type.");
-                }
-                m_impl->m_flushTarget = flushTargetFromString(target.value());
-                wasTheFlushTargetSpecifiedViaJSON = true;
-            }
-        }
-
-        auto shadow = impl.m_config.invertShadow();
-        if (shadow.size() > 0)
-        {
-            switch (impl.m_config.originallySpecifiedAs)
-            {
-            case json::SupportedLanguages::JSON:
-                std::cerr << "Warning: parts of the backend configuration for "
-                             "ADIOS2 remain unused:\n"
-                          << shadow << std::endl;
-                break;
-            case json::SupportedLanguages::TOML: {
-                auto asToml = json::jsonToToml(shadow);
-                std::cerr << "Warning: parts of the backend configuration for "
-                             "ADIOS2 remain unused:\n"
-                          << asToml << std::endl;
-                break;
-            }
-            }
-        }
-
-        switch (m_impl->m_handler->m_backendAccess)
-        {
-        case Access::READ_LINEAR:
-        case Access::READ_ONLY:
-            configure_IO_Read();
-            break;
-        case Access::READ_WRITE:
-            if (readOnly(m_mode))
-            {
-                configure_IO_Read();
-            }
-            else
-            {
-                configure_IO_Write();
-            }
-            break;
-        case Access::APPEND:
-        case Access::CREATE:
-            configure_IO_Write();
-            break;
-        }
-
-        auto notYetConfigured = [&alreadyConfigured](std::string const &param) {
-            auto it = alreadyConfigured.find(
-                auxiliary::lowerCase(std::string(param)));
-            return it == alreadyConfigured.end();
-        };
-
-        // read parameters from environment
-        if (notYetConfigured("CollectiveMetadata"))
-        {
-            if (1 ==
-                auxiliary::getEnvNum("OPENPMD_ADIOS2_HAVE_METADATA_FILE", 1))
-            {
-                m_IO.SetParameter("CollectiveMetadata", "On");
-            }
-            else
-            {
-                m_IO.SetParameter("CollectiveMetadata", "Off");
-            }
-        }
-        if (notYetConfigured("Profile"))
-        {
-            if (1 == auxiliary::getEnvNum("OPENPMD_ADIOS2_HAVE_PROFILING", 1) &&
-                notYetConfigured("Profile"))
-            {
-                m_IO.SetParameter("Profile", "On");
-            }
-            else
-            {
-                m_IO.SetParameter("Profile", "Off");
-            }
-        }
-        if (notYetConfigured("AsyncWrite"))
-        {
-            if (1 == auxiliary::getEnvNum("OPENPMD_ADIOS2_ASYNC_WRITE", 0) &&
-                notYetConfigured("AsyncWrite"))
-            {
-                m_IO.SetParameter("AsyncWrite", "On");
-                if (!wasTheFlushTargetSpecifiedViaJSON)
-                {
-                    m_impl->m_flushTarget = FlushTarget::Buffer;
-                }
-            }
-            else
-            {
-                m_IO.SetParameter("AsyncWrite", "Off");
-            }
-        }
-
-#if openPMD_HAVE_MPI
-        {
-            auto num_substreams =
-                auxiliary::getEnvNum("OPENPMD_ADIOS2_NUM_SUBSTREAMS", 0);
-            if (notYetConfigured("SubStreams") && 0 != num_substreams)
-            {
-                m_IO.SetParameter("SubStreams", std::to_string(num_substreams));
-            }
-
-            // BP5 parameters
-            auto numAgg = auxiliary::getEnvNum("OPENPMD_ADIOS2_BP5_NumAgg", 0);
-            auto numSubFiles =
-                auxiliary::getEnvNum("OPENPMD_ADIOS2_BP5_NumSubFiles", 0);
-            auto AggTypeStr =
-                auxiliary::getEnvString("OPENPMD_ADIOS2_BP5_TypeAgg", "");
-            auto MaxShmMB =
-                auxiliary::getEnvNum("OPENPMD_ADIOS2_BP5_MaxShmMB", 0);
-            auto BufferChunkMB =
-                auxiliary::getEnvNum("OPENPMD_ADIOS2_BP5_BufferChunkMB", 0);
-
-            if (notYetConfigured("NumAggregators") && (numAgg > 0))
-                m_IO.SetParameter("NumAggregators", std::to_string(numAgg));
-            if (notYetConfigured("NumSubFiles") && (numSubFiles > 0))
-                m_IO.SetParameter("NumSubFiles", std::to_string(numSubFiles));
-            if (notYetConfigured("AggregationType") && (AggTypeStr.size() > 0))
-                m_IO.SetParameter("AggregationType", AggTypeStr);
-            if (notYetConfigured("BufferChunkSize") && (BufferChunkMB > 0))
-                m_IO.SetParameter(
-                    "BufferChunkSize",
-                    std::to_string(
-                        (uint64_t)BufferChunkMB * (uint64_t)1048576));
-            if (notYetConfigured("MaxShmSize") && (MaxShmMB > 0))
-                m_IO.SetParameter(
-                    "MaxShmSize",
-                    std::to_string((uint64_t)MaxShmMB * (uint64_t)1048576));
-        }
-#endif
-        if (notYetConfigured("StatsLevel"))
-        {
-            /*
-             * Switch those off by default since they are expensive to compute
-             * and to enable it, set the JSON option "StatsLevel" or the
-             * environment variable "OPENPMD_ADIOS2_STATS_LEVEL" be positive.
-             * The ADIOS2 default was "1" (on).
-             */
-            auto stats_level =
-                auxiliary::getEnvNum("OPENPMD_ADIOS2_STATS_LEVEL", 0);
-            m_IO.SetParameter("StatsLevel", std::to_string(stats_level));
-        }
-        if (m_engineType == "sst" && notYetConfigured("QueueLimit"))
-        {
-            /*
-             * By default, the SST engine of ADIOS2 does not set a limit on its
-             * internal queue length.
-             * If the reading end is slower than the writing end, this will
-             * lead to a congestion in the queue and hence an increasing
-             * memory usage while the writing code goes forward.
-             * We could set a default queue limit of 1, thus forcing the
-             * two codes to proceed entirely in lock-step.
-             * We prefer a default queue limit of 2, which is still lower than
-             * the default infinity, but allows writer and reader to process
-             * data asynchronously as long as neither code fails to keep up the
-             * rhythm. The writer can produce the next iteration while the
-             * reader still deals with the old one.
-             * Thus, a limit of 2 is a good balance between 1 and infinity,
-             * keeping pipeline parallelism a default without running the risk
-             * of using unbound memory.
-             */
-            m_IO.SetParameter("QueueLimit", "2");
-        }
-
-        // We need to open the engine now already to inquire configuration
-        // options stored in there
-        getEngine();
-    }
-
-    UseGroupTable BufferedActions::detectGroupTable()
-    {
-        auto const &attributes = availableAttributes();
-        auto lower_bound =
-            attributes.lower_bound(ADIOS2Defaults::str_activeTablePrefix);
-        if (lower_bound != attributes.end() &&
-            auxiliary::starts_with(
-                lower_bound->first, ADIOS2Defaults::str_activeTablePrefix))
-        {
-            return UseGroupTable::Yes;
-        }
-        else
-        {
-            return UseGroupTable::No;
-        }
-    }
-
-    adios2::Engine &BufferedActions::getEngine()
-    {
-        if (!m_engine)
-        {
-            auto tempMode = m_mode;
-            switch (m_mode)
-            {
-            case adios2::Mode::Append:
-#ifdef _WIN32
-                /*
-                 * On Windows, ADIOS2 v2.8. Append mode only works with existing
-                 * files. So, we first check for file existence and switch to
-                 * create mode if it does not exist.
-                 *
-                 * See issue: https://github.com/ornladios/ADIOS2/issues/3358
-                 */
-                tempMode = m_impl->checkFile(m_file) ? adios2::Mode::Append
-                                                     : adios2::Mode::Write;
-                [[fallthrough]];
-#endif
-            case adios2::Mode::Write: {
-                // usesSteps attribute only written upon ::advance()
-                // this makes sure that the attribute is only put in case
-                // the streaming API was used.
-                m_engine = std::make_optional(
-                    adios2::Engine(m_IO.Open(m_file, tempMode)));
-                m_engine->BeginStep();
-                streamStatus = StreamStatus::DuringStep;
-                break;
-            }
-#if openPMD_HAS_ADIOS_2_8
-            case adios2::Mode::ReadRandomAccess:
-#endif
-            case adios2::Mode::Read: {
-                m_engine = std::make_optional(
-                    adios2::Engine(m_IO.Open(m_file, m_mode)));
-                /*
-                 * First round: detect use of group table
-                 */
-                bool openedANewStep = false;
-                {
-                    if (!supportsUpfrontParsing(
-                            m_impl->m_handler->m_backendAccess, m_engineType))
-                    {
-                        /*
-                         * In BP5 with Linear read mode, we now need to
-                         * tentatively open the first IO step.
-                         * Otherwise we don't see the group table attributes.
-                         * This branch is also taken by Streaming engines.
-                         */
-                        if (m_engine->BeginStep() != adios2::StepStatus::OK)
-                        {
-                            throw std::runtime_error(
-                                "[ADIOS2] Unexpected step status when "
-                                "opening file/stream.");
-                        }
-                        openedANewStep = true;
-                    }
-
-                    if (m_impl->m_useGroupTable.has_value())
-                    {
-                        switch (m_impl->m_useGroupTable.value())
-                        {
-                        case UseGroupTable::Yes: {
-                            auto detectedGroupTable = detectGroupTable();
-                            if (detectedGroupTable == UseGroupTable::No)
-                            {
-                                std::cerr
-                                    << "[Warning] User requested use of group "
-                                       "table when reading from ADIOS2 "
-                                       "dataset, but no group table has been "
-                                       "found. Will ignore."
-                                    << std::endl;
-                                m_impl->m_useGroupTable = UseGroupTable::No;
-                            }
-                        }
-                        case openPMD::UseGroupTable::No:
-                            break;
-                        }
-                    }
-                    else
-                    {
-                        m_impl->m_useGroupTable = detectGroupTable();
-                    }
-                };
-
-                /*
-                 * Second round: Decide the streamStatus.
-                 */
-                switch (streamStatus)
-                {
-                case StreamStatus::Undecided: {
-                    auto attr = m_IO.InquireAttribute<bool_representation>(
-                        ADIOS2Defaults::str_usesstepsAttribute);
-                    if (attr && attr.Data()[0] == 1)
-                    {
-                        if (parsePreference == ParsePreference::UpFront)
-                        {
-                            if (openedANewStep)
-                            {
-                                throw error::Internal(
-                                    "Logic error in ADIOS2 backend! No need to "
-                                    "indiscriminately open a step before doing "
-                                    "anything in an engine that supports "
-                                    "up-front parsing.");
-                            }
-                            streamStatus = StreamStatus::ReadWithoutStream;
-                        }
-                        else
-                        {
-                            // If the iteration encoding is group-based and
-                            // no group table is used, we're now at a dead-end.
-                            // Step-by-Step parsing is unreliable in that mode
-                            // since groups might be reported that are not
-                            // there.
-                            // But we were only able to find this out by opening
-                            // the ADIOS2 file with an access mode that was
-                            // possibly wrong, so we would have to close and
-                            // reopen here.
-                            // Since group-based encoding is a bag of trouble in
-                            // ADIOS2 anyway, we just don't support this
-                            // particular use case.
-                            // This failure will only arise when the following
-                            // conditions are met:
-                            //
-                            // 1) group-based encoding
-                            // 2) no group table (i.e. old "ADIOS2 schema")
-                            // 3) LINEAR access mode
-                            //
-                            // This is a relatively lenient restriction compared
-                            // to forbidding group-based encoding in ADIOS2
-                            // altogether.
-                            if (m_impl->m_useGroupTable.value() ==
-                                    UseGroupTable::No &&
-                                m_IO.InquireAttribute<std::string>(
-                                    ADIOS2Defaults::str_groupBasedWarning))
-                            {
-                                throw error::OperationUnsupportedInBackend(
-                                    "ADIOS2",
-                                    "Trying to open a group-based ADIOS2 file "
-                                    "that does not have a group table with "
-                                    "LINEAR access type. That combination is "
-                                    "very buggy, so please use "
-                                    "READ_ONLY/READ_RANDOM_ACCESS instead.");
-                            }
-                            if (!openedANewStep &&
-                                m_engine.value().BeginStep() !=
-                                    adios2::StepStatus::OK)
-                            {
-                                throw std::runtime_error(
-                                    "[ADIOS2] Unexpected step status when "
-                                    "opening file/stream.");
-                            }
-                            streamStatus = StreamStatus::DuringStep;
-                        }
-                    }
-                    else
-                    {
-                        /*
-                         * If openedANewStep is true, then the file consists
-                         * of one large step, we just leave it open.
-                         */
-                        streamStatus = StreamStatus::ReadWithoutStream;
-                    }
-                    break;
-                }
-                case StreamStatus::ReadWithoutStream:
-                    // using random-access mode
-                    break;
-                case StreamStatus::DuringStep:
-                    throw error::Internal(
-                        "[ADIOS2] Control flow error: stream status cannot be "
-                        "DuringStep before opening the engine.");
-                case StreamStatus::OutsideOfStep:
-                    if (openedANewStep)
-                    {
-                        streamStatus = StreamStatus::DuringStep;
-                    }
-                    else
-                    {
-                        throw error::Internal(
-                            "Control flow error: Step should have been opened "
-                            "before this point.");
-                    }
-                    break;
-                default:
-                    throw std::runtime_error("[ADIOS2] Control flow error!");
-                }
-                break;
-            }
-            default:
-                throw std::runtime_error("[ADIOS2] Invalid ADIOS access mode");
-            }
-
-            if (!m_engine.value())
-            {
-                throw std::runtime_error("[ADIOS2] Failed opening Engine.");
-            }
-        }
-        return m_engine.value();
-    }
-
-    template <typename BA>
-    void BufferedActions::enqueue(BA &&ba)
-    {
-        enqueue<BA>(std::forward<BA>(ba), m_buffer);
-    }
-
-    template <typename BA>
-    void BufferedActions::enqueue(BA &&ba, decltype(m_buffer) &buffer)
-    {
-        using _BA = typename std::remove_reference<BA>::type;
-        buffer.emplace_back(
-            std::unique_ptr<BufferedAction>(new _BA(std::forward<BA>(ba))));
-    }
-
-    template <typename... Args>
-    void BufferedActions::flush(Args &&...args)
-    {
-        try
-        {
-            flush_impl(std::forward<Args>(args)...);
-        }
-        catch (error::ReadError const &)
-        {
-            /*
-             * We need to take actions out of the buffer, since an exception
-             * should reset everything from the current IOHandler->flush() call.
-             * However, we cannot simply clear the buffer, since tasks may have
-             * been enqueued to ADIOS2 already and we cannot undo that.
-             * So, we need to keep the memory alive for the benefit of ADIOS2.
-             * Luckily, we have m_alreadyEnqueued for exactly that purpose.
-             */
-            for (auto &task : m_buffer)
-            {
-                m_alreadyEnqueued.emplace_back(std::move(task));
-            }
-            m_buffer.clear();
-            throw;
-        }
-    }
-
-    template <typename F>
-    void BufferedActions::flush_impl(
-        ADIOS2FlushParams flushParams,
-        F &&performPutGets,
-        bool writeLatePuts,
-        bool flushUnconditionally)
-    {
-        auto level = flushParams.level;
-        if (streamStatus == StreamStatus::StreamOver)
-        {
-            if (flushUnconditionally)
-            {
-                throw std::runtime_error(
-                    "[ADIOS2] Cannot access engine since stream is over.");
-            }
-            return;
-        }
-        auto &eng = getEngine();
-        /*
-         * Only open a new step if it is necessary.
-         */
-        if (streamStatus == StreamStatus::OutsideOfStep)
-        {
-            if (m_buffer.empty() && (!writeLatePuts || m_uniquePtrPuts.empty()))
-            {
-                if (flushUnconditionally)
-                {
-                    performPutGets(*this, eng);
-                }
-                return;
-            }
-        }
-        for (auto &ba : m_buffer)
-        {
-            ba->run(*this);
-        }
-
-        if (!initializedDefaults)
-        {
-            // Currently only schema 0 supported
-            if (m_impl->m_writeAttributesFromThisRank)
-            {
-                m_IO.DefineAttribute<uint64_t>(
-                    ADIOS2Defaults::str_adios2Schema, 0);
-            }
-            initializedDefaults = true;
-        }
-
-        if (writeLatePuts)
-        {
-            for (auto &entry : m_uniquePtrPuts)
-            {
-                entry.run(*this);
-            }
-        }
-
-        if (readOnly(m_mode))
-        {
-            level = FlushLevel::UserFlush;
-        }
-
-        switch (level)
-        {
-        case FlushLevel::UserFlush:
-            performPutGets(*this, eng);
-            m_updateSpans.clear();
-            m_buffer.clear();
-            m_alreadyEnqueued.clear();
-            if (writeLatePuts)
-            {
-                m_uniquePtrPuts.clear();
-            }
-
-            break;
-
-        case FlushLevel::InternalFlush:
-        case FlushLevel::SkeletonOnly:
-        case FlushLevel::CreateOrOpenFiles:
-            /*
-             * Tasks have been given to ADIOS2, but we don't flush them
-             * yet. So, move everything to m_alreadyEnqueued to avoid
-             * use-after-free.
-             */
-            for (auto &task : m_buffer)
-            {
-                m_alreadyEnqueued.emplace_back(std::move(task));
-            }
-            if (writeLatePuts)
-            {
-                throw error::Internal(
-                    "ADIOS2 backend: Flush of late writes was requested at the "
-                    "wrong time.");
-            }
-            m_buffer.clear();
-            break;
-        }
-    }
-
-    void BufferedActions::flush_impl(
-        ADIOS2FlushParams flushParams, bool writeLatePuts)
-    {
-        auto decideFlushAPICall = [this, flushTarget = flushParams.flushTarget](
-                                      adios2::Engine &engine) {
-#if ADIOS2_VERSION_MAJOR * 1000000000 + ADIOS2_VERSION_MINOR * 100000000 +     \
-        ADIOS2_VERSION_PATCH * 1000000 + ADIOS2_VERSION_TWEAK >=               \
-    2701001223
-            bool performDataWrite{};
-            switch (flushTarget)
-            {
-            case FlushTarget::Disk:
-            case FlushTarget::Disk_Override:
-                performDataWrite = true;
-                break;
-            case FlushTarget::Buffer:
-            case FlushTarget::Buffer_Override:
-                performDataWrite = false;
-                break;
-            }
-            performDataWrite = performDataWrite && m_engineType == "bp5";
-
-            if (performDataWrite)
-            {
-                /*
-                 * Deliberately don't write buffered attributes now since
-                 * readers won't be able to see them before EndStep anyway,
-                 * so there's no use. In fact, writing them now is harmful
-                 * because they can't be overwritten after this anymore in the
-                 * current step.
-                 * Draining the uniquePtrPuts now is good however, since we
-                 * should use this chance to free the memory.
-                 */
-                for (auto &entry : m_uniquePtrPuts)
-                {
-                    entry.run(*this);
-                }
-                engine.PerformDataWrite();
-                m_uniquePtrPuts.clear();
-            }
-            else
-            {
-                engine.PerformPuts();
-            }
-#else
-            (void)this;
-            (void)flushTarget;
-            engine.PerformPuts();
-#endif
-        };
-
-        flush_impl(
-            flushParams,
-            [decideFlushAPICall = std::move(decideFlushAPICall)](
-                BufferedActions &ba, adios2::Engine &eng) {
-                if (writeOnly(ba.m_mode))
-                {
-                    decideFlushAPICall(eng);
-                }
-                else
-                {
-                    eng.PerformGets();
-                }
-            },
-            writeLatePuts,
-            /* flushUnconditionally = */ false);
-    }
-
-    AdvanceStatus BufferedActions::advance(AdvanceMode mode)
-    {
-        if (streamStatus == StreamStatus::Undecided)
-        {
-            throw error::Internal(
-                "[BufferedActions::advance()] StreamStatus Undecided before "
-                "beginning/ending a step?");
-        }
-        // sic! no else
-        if (streamStatus == StreamStatus::ReadWithoutStream)
-        {
-            flush(
-                ADIOS2FlushParams{FlushLevel::UserFlush},
-                /* writeLatePuts = */ false);
-            return AdvanceStatus::RANDOMACCESS;
-        }
-
-        switch (mode)
-        {
-        case AdvanceMode::ENDSTEP: {
-            /*
-             * Advance mode write:
-             * Close the current step, defer opening the new step
-             * until one is actually needed:
-             * (1) The engine is accessed in BufferedActions::flush
-             * (2) A new step is opened before the currently active step
-             *     has seen an access. See the following lines: open the
-             *     step just to skip it again.
-             */
-            if (streamStatus == StreamStatus::OutsideOfStep)
-            {
-                if (getEngine().BeginStep() != adios2::StepStatus::OK)
-                {
-                    throw std::runtime_error(
-                        "[ADIOS2] Trying to close a step that cannot be "
-                        "opened.");
-                }
-            }
-
-            if (writeOnly(m_mode) && m_impl->m_writeAttributesFromThisRank &&
-                !m_IO.InquireAttribute<bool_representation>(
-                    ADIOS2Defaults::str_usesstepsAttribute))
-            {
-                m_IO.DefineAttribute<bool_representation>(
-                    ADIOS2Defaults::str_usesstepsAttribute, 1);
-            }
-
-            flush(
-                ADIOS2FlushParams{FlushLevel::UserFlush},
-                [](BufferedActions &, adios2::Engine &eng) { eng.EndStep(); },
-                /* writeLatePuts = */ true,
-                /* flushUnconditionally = */ true);
-            uncommittedAttributes.clear();
-            m_updateSpans.clear();
-            streamStatus = StreamStatus::OutsideOfStep;
-            ++m_currentStep;
-            return AdvanceStatus::OK;
-        }
-        case AdvanceMode::BEGINSTEP: {
-            adios2::StepStatus adiosStatus{};
-
-            if (streamStatus != StreamStatus::DuringStep)
-            {
-                adiosStatus = getEngine().BeginStep();
-            }
-            else
-            {
-                adiosStatus = adios2::StepStatus::OK;
-            }
-            AdvanceStatus res = AdvanceStatus::OK;
-            switch (adiosStatus)
-            {
-            case adios2::StepStatus::EndOfStream:
-                streamStatus = StreamStatus::StreamOver;
-                res = AdvanceStatus::OVER;
-                break;
-            case adios2::StepStatus::OK:
-                streamStatus = StreamStatus::DuringStep;
-                res = AdvanceStatus::OK;
-                break;
-            case adios2::StepStatus::NotReady:
-            case adios2::StepStatus::OtherError:
-                throw std::runtime_error("[ADIOS2] Unexpected step status.");
-            }
-            invalidateAttributesMap();
-            invalidateVariablesMap();
-            m_pathsMarkedAsActive.clear();
-            return res;
-        }
-        }
-        throw std::runtime_error(
-            "Internal error: Advance mode should be explicitly"
-            " chosen by the front-end.");
-    }
-
-    void BufferedActions::drop()
-    {
-        m_buffer.clear();
-    }
-
-    static std::vector<std::string> availableAttributesOrVariablesPrefixed(
-        std::string const &prefix,
-        BufferedActions::AttributeMap_t const &(
-            BufferedActions::*getBasicMap)(),
-        BufferedActions &ba)
-    {
-        std::string var =
-            auxiliary::ends_with(prefix, '/') ? prefix : prefix + '/';
-        BufferedActions::AttributeMap_t const &attributes = (ba.*getBasicMap)();
-        std::vector<std::string> ret;
-        for (auto it = attributes.lower_bound(prefix); it != attributes.end();
-             ++it)
-        {
-            if (auxiliary::starts_with(it->first, var))
-            {
-                ret.emplace_back(auxiliary::replace_first(it->first, var, ""));
-            }
-            else
-            {
-                break;
-            }
-        }
-        return ret;
-    }
-
-    std::vector<std::string>
-    BufferedActions::availableAttributesPrefixed(std::string const &prefix)
-    {
-        return availableAttributesOrVariablesPrefixed(
-            prefix, &BufferedActions::availableAttributes, *this);
-    }
-
-    std::vector<std::string>
-    BufferedActions::availableVariablesPrefixed(std::string const &prefix)
-    {
-        return availableAttributesOrVariablesPrefixed(
-            prefix, &BufferedActions::availableVariables, *this);
-    }
-
-    void BufferedActions::invalidateAttributesMap()
-    {
-        m_availableAttributes = std::optional<AttributeMap_t>();
-    }
-
-    BufferedActions::AttributeMap_t const &
-    BufferedActions::availableAttributes()
-    {
-        if (m_availableAttributes)
-        {
-            return m_availableAttributes.value();
-        }
-        else
-        {
-            m_availableAttributes =
-                std::make_optional(m_IO.AvailableAttributes());
-            return m_availableAttributes.value();
-        }
-    }
-
-    void BufferedActions::invalidateVariablesMap()
-    {
-        m_availableVariables = std::optional<AttributeMap_t>();
-    }
-
-    BufferedActions::AttributeMap_t const &BufferedActions::availableVariables()
-    {
-        if (m_availableVariables)
-        {
-            return m_availableVariables.value();
-        }
-        else
-        {
-            m_availableVariables =
-                std::make_optional(m_IO.AvailableVariables());
-            return m_availableVariables.value();
-        }
-    }
-
-    void BufferedActions::markActive(Writable *writable)
-    {
-        switch (useGroupTable())
-        {
-        case UseGroupTable::No:
-            break;
-        case UseGroupTable::Yes:
-#if openPMD_HAS_ADIOS_2_9
-        {
-            if (writeOnly(m_mode) && m_impl->m_writeAttributesFromThisRank)
-            {
-                auto currentStepBuffered = currentStep();
-                do
-                {
-                    using attr_t = unsigned long long;
-                    auto filePos = m_impl->setAndGetFilePosition(
-                        writable, /* write = */ false);
-                    auto fullPath = ADIOS2Defaults::str_activeTablePrefix +
-                        filePos->location;
-                    m_IO.DefineAttribute<attr_t>(
-                        fullPath,
-                        currentStepBuffered,
-                        /* variableName = */ "",
-                        /* separator = */ "/",
-                        /* allowModification = */ true);
-                    m_pathsMarkedAsActive.emplace(writable);
-                    writable = writable->parent;
-                } while (writable &&
-                         m_pathsMarkedAsActive.find(writable) ==
-                             m_pathsMarkedAsActive.end());
-            }
-        }
-#else
-            (void)writable;
-            throw error::OperationUnsupportedInBackend(
-                m_impl->m_handler->backendName(),
-                "Group table feature requires ADIOS2 >= v2.9.");
-#endif
-        break;
-        }
-    }
 } // namespace detail
 
 #if openPMD_HAVE_MPI
diff --git a/src/IO/AbstractIOHandlerHelper.cpp b/src/IO/AbstractIOHandlerHelper.cpp
index 699dfd3619..8576343e5d 100644
--- a/src/IO/AbstractIOHandlerHelper.cpp
+++ b/src/IO/AbstractIOHandlerHelper.cpp
@@ -125,8 +125,23 @@ std::unique_ptr<AbstractIOHandler> createIOHandler<json::TracingJSON>(
             "ssc",
             std::move(originalExtension));
     case Format::JSON:
-        throw error::WrongAPIUsage(
-            "JSON backend not available in parallel openPMD.");
+        return constructIOHandler<JSONIOHandler, openPMD_HAVE_JSON>(
+            "JSON",
+            path,
+            access,
+            comm,
+            std::move(options),
+            JSONIOHandlerImpl::FileFormat::Json,
+            std::move(originalExtension));
+    case Format::TOML:
+        return constructIOHandler<JSONIOHandler, openPMD_HAVE_JSON>(
+            "JSON",
+            path,
+            access,
+            comm,
+            std::move(options),
+            JSONIOHandlerImpl::FileFormat::Toml,
+            std::move(originalExtension));
     default:
         throw error::WrongAPIUsage(
             "Unknown file format! Did you specify a file ending? Specified "
diff --git a/src/IO/AbstractIOHandlerImpl.cpp b/src/IO/AbstractIOHandlerImpl.cpp
index bbab360b4d..8993816f48 100644
--- a/src/IO/AbstractIOHandlerImpl.cpp
+++ b/src/IO/AbstractIOHandlerImpl.cpp
@@ -25,7 +25,9 @@
 #include "openPMD/backend/Writable.hpp"
 
 #include <iostream>
+#include <sstream>
 #include <stdexcept>
+#include <type_traits>
 
 namespace openPMD
 {
@@ -38,12 +40,67 @@ AbstractIOHandlerImpl::AbstractIOHandlerImpl(AbstractIOHandler *handler)
     }
 }
 
+namespace
+{
+    template <typename Vec>
+    auto vec_as_string(Vec const &vec) -> std::string
+    {
+        if (vec.empty())
+        {
+            return "[]";
+        }
+        else
+        {
+            std::stringstream res;
+            res << '[';
+            auto it = vec.begin();
+            res << *it++;
+            auto end = vec.end();
+            for (; it != end; ++it)
+            {
+                res << ", " << *it;
+            }
+            res << ']';
+            return res.str();
+        }
+    }
+
+    template <typename T, typename SFINAE = void>
+    struct self_or_invoked
+    {
+        using type = T;
+    };
+
+    template <typename T>
+    struct self_or_invoked<T, std::enable_if_t<std::is_invocable_v<T>>>
+    {
+        using type = std::invoke_result_t<T>;
+    };
+
+    template <typename T>
+    using self_or_invoked_t = typename self_or_invoked<T>::type;
+
+    template <typename DeferredString>
+    auto
+    undefer_string(DeferredString &&str) -> self_or_invoked_t<DeferredString &&>
+    {
+        if constexpr (std::is_invocable_v<DeferredString &&>)
+        {
+            return str();
+        }
+        else
+        {
+            return std::forward<DeferredString>(str);
+        }
+    }
+} // namespace
+
 template <typename... Args>
 void AbstractIOHandlerImpl::writeToStderr([[maybe_unused]] Args &&...args) const
 {
     if (m_verboseIOTasks)
     {
-        (std::cerr << ... << args) << std::endl;
+        (std::cerr << ... << undefer_string(args)) << std::endl;
     }
 }
 
@@ -108,7 +165,9 @@ std::future<void> AbstractIOHandlerImpl::flush()
                     "->",
                     i.writable,
                     "] CREATE_DATASET: ",
-                    parameter.name);
+                    parameter.name,
+                    ", extent=",
+                    [&parameter]() { return vec_as_string(parameter.extent); });
                 createDataset(i.writable, parameter);
                 break;
             }
@@ -362,6 +421,22 @@ std::future<void> AbstractIOHandlerImpl::flush()
                 deregister(i.writable, parameter);
                 break;
             }
+            case O::TOUCH: {
+                auto &parameter =
+                    deref_dynamic_cast<Parameter<O::TOUCH>>(i.parameter.get());
+                writeToStderr(
+                    "[", i.writable->parent, "->", i.writable, "] TOUCH");
+                touch(i.writable, parameter);
+                break;
+            }
+            case O::SET_WRITTEN: {
+                auto &parameter = deref_dynamic_cast<Parameter<O::SET_WRITTEN>>(
+                    i.parameter.get());
+                writeToStderr(
+                    "[", i.writable->parent, "->", i.writable, "] SET_WRITTEN");
+                setWritten(i.writable, parameter);
+                break;
+            }
             }
         }
         catch (...)
@@ -409,4 +484,10 @@ std::future<void> AbstractIOHandlerImpl::flush()
     }
     return std::future<void>();
 }
+
+void AbstractIOHandlerImpl::setWritten(
+    Writable *w, Parameter<Operation::SET_WRITTEN> const &param)
+{
+    w->written = param.target_status;
+}
 } // namespace openPMD
diff --git a/src/IO/DummyIOHandler.cpp b/src/IO/DummyIOHandler.cpp
index 6bc6ec4d64..7882c9d5e3 100644
--- a/src/IO/DummyIOHandler.cpp
+++ b/src/IO/DummyIOHandler.cpp
@@ -36,4 +36,9 @@ std::future<void> DummyIOHandler::flush(internal::ParsedFlushParams &)
 {
     return std::future<void>();
 }
+
+std::string DummyIOHandler::backendName() const
+{
+    return "Dummy";
+}
 } // namespace openPMD
diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp
index e2fa63d9b3..32a9e80d74 100644
--- a/src/IO/HDF5/HDF5IOHandler.cpp
+++ b/src/IO/HDF5/HDF5IOHandler.cpp
@@ -19,8 +19,16 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/IO/HDF5/HDF5IOHandler.hpp"
+#include "openPMD/IO/AbstractIOHandler.hpp"
+#include "openPMD/IO/AbstractIOHandlerImpl.hpp"
+#include "openPMD/IO/FlushParametersInternal.hpp"
 #include "openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp"
 #include "openPMD/auxiliary/Environment.hpp"
+#include "openPMD/auxiliary/JSON_internal.hpp"
+#include "openPMD/auxiliary/Variant.hpp"
+#include <optional>
+#include <sstream>
+#include <stdexcept>
 
 #if openPMD_HAVE_HDF5
 #include "openPMD/Datatype.hpp"
@@ -34,6 +42,7 @@
 #include "openPMD/auxiliary/TypeTraits.hpp"
 #include "openPMD/backend/Attribute.hpp"
 
+#include <H5FDmpio.h>
 #include <hdf5.h>
 #endif
 
@@ -66,7 +75,9 @@ namespace openPMD
 #endif
 
 HDF5IOHandlerImpl::HDF5IOHandlerImpl(
-    AbstractIOHandler *handler, json::TracingJSON config)
+    AbstractIOHandler *handler,
+    json::TracingJSON config,
+    bool do_warn_unused_params)
     : AbstractIOHandlerImpl(handler)
     , m_datasetTransferProperty{H5P_DEFAULT}
     , m_fileAccessProperty{H5P_DEFAULT}
@@ -132,58 +143,63 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl(
         m_H5T_LONG_DOUBLE_80_LE >= 0,
         "[HDF5] Internal error: Failed to create 128-bit complex long double");
 
-    m_chunks = auxiliary::getEnvString("OPENPMD_HDF5_CHUNKS", "auto");
     // JSON option can overwrite env option:
     if (config.json().contains("hdf5"))
     {
         m_config = config["hdf5"];
 
-        // check for global dataset configs
-        if (m_config.json().contains("dataset"))
         {
-            auto datasetConfig = m_config["dataset"];
-            if (datasetConfig.json().contains("chunks"))
+            constexpr char const *const init_json_shadow_str = R"(
             {
-                auto maybeChunks = json::asLowerCaseStringDynamic(
-                    datasetConfig["chunks"].json());
-                if (maybeChunks.has_value())
-                {
-                    m_chunks = std::move(maybeChunks.value());
-                }
-                else
-                {
-                    throw error::BackendConfigSchema(
-                        {"hdf5", "dataset", "chunks"},
-                        "Must be convertible to string type.");
-                }
-            }
-        }
-        if (m_chunks != "auto" && m_chunks != "none")
-        {
-            std::cerr << "Warning: HDF5 chunking option set to an invalid "
-                         "value '"
-                      << m_chunks << "'. Reset to 'auto'." << std::endl;
-            m_chunks = "auto";
+              "dataset": {
+                "chunks": null
+              },
+              "independent_stores": null
+            })";
+            constexpr char const *const dataset_cfg_mask = R"(
+            {
+              "dataset": {
+                "chunks": null
+              }
+            })";
+            constexpr char const *const flush_cfg_mask = R"(
+            {
+              "independent_stores": null
+            })";
+            m_global_dataset_config = m_config.json();
+            json::filterByTemplate(
+                m_global_dataset_config,
+                nlohmann::json::parse(dataset_cfg_mask));
+            m_global_flush_config = m_config.json();
+            json::filterByTemplate(
+                m_global_flush_config, nlohmann::json::parse(flush_cfg_mask));
+            auto init_json_shadow = nlohmann::json::parse(init_json_shadow_str);
+            json::merge(m_config.getShadow(), init_json_shadow);
         }
 
         // unused params
-        auto shadow = m_config.invertShadow();
-        if (shadow.size() > 0)
+        if (do_warn_unused_params)
         {
-            switch (m_config.originallySpecifiedAs)
+            auto shadow = m_config.invertShadow();
+            if (shadow.size() > 0)
             {
-            case json::SupportedLanguages::JSON:
-                std::cerr << "Warning: parts of the backend configuration for "
-                             "HDF5 remain unused:\n"
-                          << shadow << std::endl;
-                break;
-            case json::SupportedLanguages::TOML: {
-                auto asToml = json::jsonToToml(shadow);
-                std::cerr << "Warning: parts of the backend configuration for "
-                             "HDF5 remain unused:\n"
-                          << asToml << std::endl;
-                break;
-            }
+                switch (m_config.originallySpecifiedAs)
+                {
+                case json::SupportedLanguages::JSON:
+                    std::cerr
+                        << "Warning: parts of the backend configuration for "
+                           "HDF5 remain unused:\n"
+                        << shadow << std::endl;
+                    break;
+                case json::SupportedLanguages::TOML: {
+                    auto asToml = json::jsonToToml(shadow);
+                    std::cerr
+                        << "Warning: parts of the backend configuration for "
+                           "HDF5 remain unused:\n"
+                        << json::format_toml(asToml) << std::endl;
+                    break;
+                }
+                }
             }
         }
     }
@@ -452,6 +468,12 @@ void HDF5IOHandlerImpl::createDataset(
             "[HDF5] Creating a dataset in a file opened as read only is not "
             "possible.");
 
+    if (parameters.joinedDimension.has_value())
+    {
+        error::throwOperationUnsupportedInBackend(
+            "ADIOS1", "Joined Arrays currently only supported in ADIOS2");
+    }
+
     if (!writable->written)
     {
         /* Sanitize name */
@@ -461,8 +483,40 @@ void HDF5IOHandlerImpl::createDataset(
         if (auxiliary::ends_with(name, '/'))
             name = auxiliary::replace_last(name, "/", "");
 
-        json::TracingJSON config =
-            json::parseOptions(parameters.options, /* considerFiles = */ false);
+        std::vector<hsize_t> dims;
+        std::uint64_t num_elements = 1u;
+        for (auto const &val : parameters.extent)
+        {
+            dims.push_back(static_cast<hsize_t>(val));
+            num_elements *= val;
+        }
+
+        Datatype d = parameters.dtype;
+        if (d == Datatype::UNDEFINED)
+        {
+            // TODO handle unknown dtype
+            std::cerr << "[HDF5] Datatype::UNDEFINED caught during dataset "
+                         "creation (serial HDF5)"
+                      << std::endl;
+            d = Datatype::BOOL;
+        }
+
+        json::TracingJSON config = [&]() {
+            auto parsed_config = json::parseOptions(
+                parameters.options, /* considerFiles = */ false);
+            if (auto hdf5_config_it = parsed_config.config.find("hdf5");
+                hdf5_config_it != parsed_config.config.end())
+            {
+                auto copy = m_global_dataset_config;
+                json::merge(copy, hdf5_config_it.value());
+                hdf5_config_it.value() = std::move(copy);
+            }
+            else
+            {
+                parsed_config.config["hdf5"] = m_global_dataset_config;
+            }
+            return parsed_config;
+        }();
 
         // general
         bool is_resizable_dataset = false;
@@ -471,17 +525,90 @@ void HDF5IOHandlerImpl::createDataset(
             is_resizable_dataset = config["resizable"].json().get<bool>();
         }
 
+        using chunking_t = std::vector<hsize_t>;
+        using compute_chunking_t =
+            std::variant<chunking_t, std::string /* either "none" or "auto"*/>;
+
+        bool chunking_config_from_json = false;
+        auto throw_chunking_error = [&chunking_config_from_json]() {
+            if (chunking_config_from_json)
+            {
+                throw error::BackendConfigSchema(
+                    {"hdf5", "dataset", "chunks"},
+                    R"(Must be "auto", "none", or a an array of integer.)");
+            }
+            else
+            {
+                throw error::WrongAPIUsage(
+                    "Environment variable OPENPMD_HDF5_CHUNKS accepts values "
+                    "'auto' and 'none'.");
+            }
+        };
+
+        compute_chunking_t compute_chunking =
+            auxiliary::getEnvString("OPENPMD_HDF5_CHUNKS", "auto");
+
         // HDF5 specific
         if (config.json().contains("hdf5") &&
             config["hdf5"].json().contains("dataset"))
         {
             json::TracingJSON datasetConfig{config["hdf5"]["dataset"]};
 
-            /*
-             * @todo Read more options from config here.
-             */
-            (void)datasetConfig;
+            if (datasetConfig.json().contains("chunks"))
+            {
+                chunking_config_from_json = true;
+
+                auto chunks_json = datasetConfig["chunks"];
+                if (chunks_json.json().is_string())
+                {
+
+                    compute_chunking =
+                        json::asLowerCaseStringDynamic(chunks_json.json())
+                            .value();
+                }
+                else if (chunks_json.json().is_array())
+                {
+                    try
+                    {
+                        compute_chunking =
+                            chunks_json.json().get<std::vector<hsize_t>>();
+                    }
+                    catch (nlohmann::json::type_error const &)
+                    {
+                        throw_chunking_error();
+                    }
+                }
+                else
+                {
+                    throw_chunking_error();
+                }
+            }
         }
+        std::optional<chunking_t> chunking = std::visit(
+            auxiliary::overloaded{
+                [&](chunking_t &&explicitly_specified)
+                    -> std::optional<chunking_t> {
+                    return std::move(explicitly_specified);
+                },
+                [&](std::string const &method_name)
+                    -> std::optional<chunking_t> {
+                    if (method_name == "auto")
+                    {
+
+                        return getOptimalChunkDims(dims, toBytes(d));
+                    }
+                    else if (method_name == "none")
+                    {
+                        return std::nullopt;
+                    }
+                    else
+                    {
+                        throw_chunking_error();
+                        throw std::runtime_error("Unreachable!");
+                    }
+                }},
+            std::move(compute_chunking));
+
         parameters.warnUnusedParameters(
             config,
             "hdf5",
@@ -496,9 +623,19 @@ void HDF5IOHandlerImpl::createDataset(
         }
 #endif
 
+        writable->abstractFilePosition.reset();
         /* Open H5Object to write into */
-        auto res = getFile(writable);
-        File file = res ? res.value() : getFile(writable->parent).value();
+        File file{};
+        if (auto opt = getFile(writable->parent); opt.has_value())
+        {
+            file = opt.value();
+        }
+        else
+        {
+            throw error::Internal(
+                "[HDF5] CREATE_DATASET task must have a parent with an "
+                "associated file.");
+        }
         hid_t node_id =
             H5Gopen(file.id, concrete_h5_file_position(writable).c_str(), gapl);
         VERIFY(
@@ -536,26 +673,6 @@ void HDF5IOHandlerImpl::createDataset(
             // else: link_id == 0: Link does not exist, nothing to do
         }
 
-        Datatype d = parameters.dtype;
-        if (d == Datatype::UNDEFINED)
-        {
-            // TODO handle unknown dtype
-            std::cerr << "[HDF5] Datatype::UNDEFINED caught during dataset "
-                         "creation (serial HDF5)"
-                      << std::endl;
-            d = Datatype::BOOL;
-        }
-
-        Attribute a(0);
-        a.dtype = d;
-        std::vector<hsize_t> dims;
-        std::uint64_t num_elements = 1u;
-        for (auto const &val : parameters.extent)
-        {
-            dims.push_back(static_cast<hsize_t>(val));
-            num_elements *= val;
-        }
-
         std::vector<hsize_t> max_dims(dims.begin(), dims.end());
         if (is_resizable_dataset)
             max_dims.assign(dims.size(), H5F_UNLIMITED);
@@ -572,24 +689,46 @@ void HDF5IOHandlerImpl::createDataset(
 
         H5Pset_fill_time(datasetCreationProperty, H5D_FILL_TIME_NEVER);
 
-        if (num_elements != 0u && m_chunks != "none")
+        if (num_elements != 0u && chunking.has_value())
         {
-            //! @todo add per dataset chunk control from JSON config
-
-            // get chunking dimensions
-            std::vector<hsize_t> chunk_dims =
-                getOptimalChunkDims(dims, toBytes(d));
-
-            //! @todo allow overwrite with user-provided chunk size
-            // for( auto const& val : parameters.chunkSize )
-            //    chunk_dims.push_back(static_cast< hsize_t >(val));
-
-            herr_t status = H5Pset_chunk(
-                datasetCreationProperty, chunk_dims.size(), chunk_dims.data());
-            VERIFY(
-                status == 0,
-                "[HDF5] Internal error: Failed to set chunk size during "
-                "dataset creation");
+            if (chunking->size() != parameters.extent.size())
+            {
+                std::string chunking_printed = [&]() {
+                    if (chunking->empty())
+                    {
+                        return std::string("[]");
+                    }
+                    else
+                    {
+                        std::stringstream s;
+                        auto it = chunking->begin();
+                        auto end = chunking->end();
+                        s << '[' << *it++;
+                        for (; it != end; ++it)
+                        {
+                            s << ", " << *it;
+                        }
+                        s << ']';
+                        return s.str();
+                    }
+                }();
+                std::cerr << "[HDF5] Chunking for dataset '" << name
+                          << "' was specified as " << chunking_printed
+                          << ", but dataset has dimensionality "
+                          << parameters.extent.size() << ". Will ignore."
+                          << std::endl;
+            }
+            else
+            {
+                herr_t status = H5Pset_chunk(
+                    datasetCreationProperty,
+                    chunking->size(),
+                    chunking->data());
+                VERIFY(
+                    status == 0,
+                    "[HDF5] Internal error: Failed to set chunk size during "
+                    "dataset creation");
+            }
         }
 
         std::string const &compression = ""; // @todo read from JSON
@@ -625,6 +764,8 @@ void HDF5IOHandlerImpl::createDataset(
             {typeid(std::complex<double>).name(), m_H5T_CDOUBLE},
             {typeid(std::complex<long double>).name(), m_H5T_CLONG_DOUBLE},
         });
+        Attribute a(0);
+        a.dtype = d;
         hid_t datatype = getH5DataType(a);
         VERIFY(
             datatype >= 0,
@@ -1660,17 +1801,17 @@ void HDF5IOHandlerImpl::writeAttribute(
         break;
     }
     case DT::CFLOAT: {
-        std::complex<float> f = att.get<std::complex<float> >();
+        std::complex<float> f = att.get<std::complex<float>>();
         status = H5Awrite(attribute_id, dataType, &f);
         break;
     }
     case DT::CDOUBLE: {
-        std::complex<double> d = att.get<std::complex<double> >();
+        std::complex<double> d = att.get<std::complex<double>>();
         status = H5Awrite(attribute_id, dataType, &d);
         break;
     }
     case DT::CLONG_DOUBLE: {
-        std::complex<long double> d = att.get<std::complex<long double> >();
+        std::complex<long double> d = att.get<std::complex<long double>>();
         status = H5Awrite(attribute_id, dataType, &d);
         break;
     }
@@ -1680,94 +1821,90 @@ void HDF5IOHandlerImpl::writeAttribute(
         break;
     case DT::VEC_CHAR:
         status = H5Awrite(
-            attribute_id, dataType, att.get<std::vector<char> >().data());
+            attribute_id, dataType, att.get<std::vector<char>>().data());
         break;
     case DT::VEC_SHORT:
         status = H5Awrite(
-            attribute_id, dataType, att.get<std::vector<short> >().data());
+            attribute_id, dataType, att.get<std::vector<short>>().data());
         break;
     case DT::VEC_INT:
         status = H5Awrite(
-            attribute_id, dataType, att.get<std::vector<int> >().data());
+            attribute_id, dataType, att.get<std::vector<int>>().data());
         break;
     case DT::VEC_LONG:
         status = H5Awrite(
-            attribute_id, dataType, att.get<std::vector<long> >().data());
+            attribute_id, dataType, att.get<std::vector<long>>().data());
         break;
     case DT::VEC_LONGLONG:
         status = H5Awrite(
-            attribute_id, dataType, att.get<std::vector<long long> >().data());
+            attribute_id, dataType, att.get<std::vector<long long>>().data());
         break;
     case DT::VEC_UCHAR:
         status = H5Awrite(
             attribute_id,
             dataType,
-            att.get<std::vector<unsigned char> >().data());
+            att.get<std::vector<unsigned char>>().data());
         break;
     case DT::VEC_SCHAR:
         status = H5Awrite(
-            attribute_id,
-            dataType,
-            att.get<std::vector<signed char> >().data());
+            attribute_id, dataType, att.get<std::vector<signed char>>().data());
         break;
     case DT::VEC_USHORT:
         status = H5Awrite(
             attribute_id,
             dataType,
-            att.get<std::vector<unsigned short> >().data());
+            att.get<std::vector<unsigned short>>().data());
         break;
     case DT::VEC_UINT:
         status = H5Awrite(
             attribute_id,
             dataType,
-            att.get<std::vector<unsigned int> >().data());
+            att.get<std::vector<unsigned int>>().data());
         break;
     case DT::VEC_ULONG:
         status = H5Awrite(
             attribute_id,
             dataType,
-            att.get<std::vector<unsigned long> >().data());
+            att.get<std::vector<unsigned long>>().data());
         break;
     case DT::VEC_ULONGLONG:
         status = H5Awrite(
             attribute_id,
             dataType,
-            att.get<std::vector<unsigned long long> >().data());
+            att.get<std::vector<unsigned long long>>().data());
         break;
     case DT::VEC_FLOAT:
         status = H5Awrite(
-            attribute_id, dataType, att.get<std::vector<float> >().data());
+            attribute_id, dataType, att.get<std::vector<float>>().data());
         break;
     case DT::VEC_DOUBLE:
         status = H5Awrite(
-            attribute_id, dataType, att.get<std::vector<double> >().data());
+            attribute_id, dataType, att.get<std::vector<double>>().data());
         break;
     case DT::VEC_LONG_DOUBLE:
         status = H5Awrite(
-            attribute_id,
-            dataType,
-            att.get<std::vector<long double> >().data());
+            attribute_id, dataType, att.get<std::vector<long double>>().data());
         break;
     case DT::VEC_CFLOAT:
         status = H5Awrite(
             attribute_id,
             dataType,
-            att.get<std::vector<std::complex<float> > >().data());
+            att.get<std::vector<std::complex<float>>>().data());
         break;
     case DT::VEC_CDOUBLE:
         status = H5Awrite(
             attribute_id,
             dataType,
-            att.get<std::vector<std::complex<double> > >().data());
+            att.get<std::vector<std::complex<double>>>().data());
         break;
     case DT::VEC_CLONG_DOUBLE:
         status = H5Awrite(
             attribute_id,
             dataType,
-            att.get<std::vector<std::complex<long double> > >().data());
+            att.get<std::vector<std::complex<long double>>>().data());
         break;
     case DT::VEC_STRING: {
-        auto vs = att.get<std::vector<std::string> >();
+        auto vs = att.get<std::vector<std::string>>();
         size_t max_len = 0;
         for (std::string const &s : vs)
             max_len = std::max(max_len, s.size() + 1);
@@ -1779,7 +1916,7 @@ void HDF5IOHandlerImpl::writeAttribute(
     }
     case DT::ARR_DBL_7:
         status = H5Awrite(
-            attribute_id, dataType, att.get<std::array<double, 7> >().data());
+            attribute_id, dataType, att.get<std::array<double, 7>>().data());
         break;
     case DT::BOOL: {
         bool b = att.get<bool>();
@@ -2385,19 +2522,19 @@ void HDF5IOHandlerImpl::readAttribute(
         }
         else if (H5Tequal(attr_type, m_H5T_CFLOAT))
         {
-            std::vector<std::complex<float> > vcf(dims[0], 0);
+            std::vector<std::complex<float>> vcf(dims[0], 0);
             status = H5Aread(attr_id, attr_type, vcf.data());
             a = Attribute(vcf);
         }
         else if (H5Tequal(attr_type, m_H5T_CDOUBLE))
         {
-            std::vector<std::complex<double> > vcd(dims[0], 0);
+            std::vector<std::complex<double>> vcd(dims[0], 0);
             status = H5Aread(attr_id, attr_type, vcd.data());
             a = Attribute(vcd);
         }
         else if (H5Tequal(attr_type, m_H5T_CLONG_DOUBLE))
         {
-            std::vector<std::complex<long double> > vcld(dims[0], 0);
+            std::vector<std::complex<long double>> vcld(dims[0], 0);
             status = H5Aread(attr_id, attr_type, vcld.data());
             a = Attribute(vcld);
         }
@@ -2418,7 +2555,7 @@ void HDF5IOHandlerImpl::readAttribute(
                 tmpBuffer,
                 nullptr,
                 H5P_DEFAULT);
-            std::vector<std::complex<long double> > vcld{
+            std::vector<std::complex<long double>> vcld{
                 tmpBuffer, tmpBuffer + dims[0]};
             delete[] tmpBuffer;
             a = Attribute(std::move(vcld));
@@ -2780,6 +2917,11 @@ void HDF5IOHandlerImpl::deregister(
     m_fileNames.erase(writable);
 }
 
+void HDF5IOHandlerImpl::touch(Writable *, Parameter<Operation::TOUCH> const &)
+{
+    // no-op
+}
+
 std::optional<HDF5IOHandlerImpl::File>
 HDF5IOHandlerImpl::getFile(Writable *writable)
 {
@@ -2798,6 +2940,37 @@ HDF5IOHandlerImpl::getFile(Writable *writable)
     res.id = it2->second;
     return std::make_optional(std::move(res));
 }
+
+std::future<void> HDF5IOHandlerImpl::flush(internal::ParsedFlushParams &params)
+{
+    auto res = AbstractIOHandlerImpl::flush();
+
+    if (params.backendConfig.json().contains("hdf5"))
+    {
+        auto hdf5_config = params.backendConfig["hdf5"];
+
+        if (auto shadow = hdf5_config.invertShadow(); shadow.size() > 0)
+        {
+            switch (hdf5_config.originallySpecifiedAs)
+            {
+            case json::SupportedLanguages::JSON:
+                std::cerr << "Warning: parts of the backend configuration for "
+                             "HDF5 remain unused:\n"
+                          << shadow << std::endl;
+                break;
+            case json::SupportedLanguages::TOML: {
+                auto asToml = json::jsonToToml(shadow);
+                std::cerr << "Warning: parts of the backend configuration for "
+                             "HDF5 remain unused:\n"
+                          << json::format_toml(asToml) << std::endl;
+                break;
+            }
+            }
+        }
+    }
+
+    return res;
+}
 #endif
 
 #if openPMD_HAVE_HDF5
@@ -2809,9 +2982,9 @@ HDF5IOHandler::HDF5IOHandler(
 
 HDF5IOHandler::~HDF5IOHandler() = default;
 
-std::future<void> HDF5IOHandler::flush(internal::ParsedFlushParams &)
+std::future<void> HDF5IOHandler::flush(internal::ParsedFlushParams &params)
 {
-    return m_impl->flush();
+    return m_impl->flush(params);
 }
 #else
 
diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp
index 47a7764480..00d5741457 100644
--- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp
+++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp
@@ -19,8 +19,19 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp"
+#include "openPMD/Error.hpp"
+#include "openPMD/IO/FlushParametersInternal.hpp"
+#include "openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp"
 #include "openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp"
 #include "openPMD/auxiliary/Environment.hpp"
+#include "openPMD/auxiliary/JSON_internal.hpp"
+#include "openPMD/auxiliary/StringManip.hpp"
+#include "openPMD/auxiliary/Variant.hpp"
+#include <type_traits>
+
+#ifdef H5_HAVE_SUBFILING_VFD
+#include <H5FDsubfiling.h>
+#endif
 
 #if openPMD_HAVE_MPI
 #include <mpi.h>
@@ -54,14 +65,26 @@ ParallelHDF5IOHandler::ParallelHDF5IOHandler(
 
 ParallelHDF5IOHandler::~ParallelHDF5IOHandler() = default;
 
-std::future<void> ParallelHDF5IOHandler::flush(internal::ParsedFlushParams &)
+std::future<void>
+ParallelHDF5IOHandler::flush(internal::ParsedFlushParams &params)
 {
-    return m_impl->flush();
+    if (auto hdf5_config_it = params.backendConfig.json().find("hdf5");
+        hdf5_config_it != params.backendConfig.json().end())
+    {
+        auto copied_global_cfg = m_impl->m_global_flush_config;
+        json::merge(copied_global_cfg, hdf5_config_it.value());
+        hdf5_config_it.value() = std::move(copied_global_cfg);
+    }
+    else
+    {
+        params.backendConfig["hdf5"].json() = m_impl->m_global_flush_config;
+    }
+    return m_impl->flush(params);
 }
 
 ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl(
     AbstractIOHandler *handler, MPI_Comm comm, json::TracingJSON config)
-    : HDF5IOHandlerImpl{handler, std::move(config)}
+    : HDF5IOHandlerImpl{handler, std::move(config), /* do_warn_unused_params = */ false}
     , m_mpiComm{comm}
     , m_mpiInfo{MPI_INFO_NULL} /* MPI 3.0+: MPI_INFO_ENV */
 {
@@ -112,14 +135,14 @@ ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl(
     }
 
     H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_COLLECTIVE;
-    auto const hdf5_collective =
+    auto const hdf5_independent =
         auxiliary::getEnvString("OPENPMD_HDF5_INDEPENDENT", "ON");
-    if (hdf5_collective == "ON")
+    if (hdf5_independent == "ON")
         xfer_mode = H5FD_MPIO_INDEPENDENT;
     else
     {
         VERIFY(
-            hdf5_collective == "OFF",
+            hdf5_independent == "OFF",
             "[HDF5] Internal error: OPENPMD_HDF5_INDEPENDENT property must be "
             "either ON or OFF");
     }
@@ -164,6 +187,173 @@ ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl(
     VERIFY(
         status >= 0,
         "[HDF5] Internal error: Failed to set HDF5 file access property");
+
+    if (!m_config.json().is_null() && m_config.json().contains("vfd"))
+    {
+        auto vfd_json_config = m_config["vfd"];
+        if (!vfd_json_config.json().contains("type"))
+        {
+            throw error::BackendConfigSchema(
+                {"hdf5", "vfd"},
+                "VFD configuration requires specifying the VFD type.");
+        }
+        std::string user_specified_type;
+        if (auto value =
+                json::asLowerCaseStringDynamic(vfd_json_config["type"].json());
+            value.has_value())
+        {
+            user_specified_type = *value;
+        }
+        else
+        {
+            throw error::BackendConfigSchema(
+                {"hdf5", "vfd", "type"}, "VFD type must be given as a string.");
+        }
+
+        if (user_specified_type == "default")
+        { /* no-op */
+        }
+        else if (user_specified_type == "subfiling")
+        {
+#ifdef H5_HAVE_SUBFILING_VFD
+            int thread_level = 0;
+            MPI_Query_thread(&thread_level);
+            if (thread_level >= MPI_THREAD_MULTIPLE)
+            {
+                H5FD_subfiling_config_t vfd_config;
+                // query default subfiling parameters
+                H5Pget_fapl_subfiling(m_fileAccessProperty, &vfd_config);
+
+                auto int_accessor =
+                    [&vfd_json_config](
+                        std::string const &key) -> std::optional<long long> {
+                    if (!vfd_json_config.json().contains(key))
+                    {
+                        return std::nullopt;
+                    }
+                    auto const &val = vfd_json_config[key].json();
+                    if (val.is_number_integer())
+                    {
+                        return val.get<long long>();
+                    }
+                    else
+                    {
+                        throw error::BackendConfigSchema(
+                            {"hdf5", "vfd", key},
+                            "Excpecting value of type integer.");
+                    }
+                };
+                auto string_accessor =
+                    [&vfd_json_config](
+                        std::string const &key) -> std::optional<std::string> {
+                    if (!vfd_json_config.json().contains(key))
+                    {
+                        return std::nullopt;
+                    }
+                    auto const &val = vfd_json_config[key].json();
+                    if (auto str_val = json::asLowerCaseStringDynamic(val);
+                        str_val.has_value())
+                    {
+                        return *str_val;
+                    }
+                    else
+                    {
+                        throw error::BackendConfigSchema(
+                            {"hdf5", "vfd", key},
+                            "Excpecting value of type string.");
+                    }
+                };
+
+                auto set_param = [](std::string const &key,
+                                    auto *target,
+                                    auto const &accessor) {
+                    if (auto val = accessor(key); val.has_value())
+                    {
+                        *target = static_cast<
+                            std::remove_reference_t<decltype(*target)>>(*val);
+                    }
+                };
+
+                set_param(
+                    "stripe_size",
+                    &vfd_config.shared_cfg.stripe_size,
+                    int_accessor);
+                set_param(
+                    "stripe_count",
+                    &vfd_config.shared_cfg.stripe_count,
+                    int_accessor);
+                std::optional<std::string> ioc_selection_raw;
+                set_param("ioc_selection", &ioc_selection_raw, string_accessor);
+
+                std::map<std::string, H5FD_subfiling_ioc_select_t> const
+                    ioc_selection_map{
+                        {"one_per_node", SELECT_IOC_ONE_PER_NODE},
+                        {"every_nth_rank", SELECT_IOC_EVERY_NTH_RANK},
+                        {"with_config", SELECT_IOC_WITH_CONFIG},
+                        {"total", SELECT_IOC_TOTAL}};
+                if (ioc_selection_raw.has_value())
+                {
+                    if (auto ioc_selection =
+                            ioc_selection_map.find(*ioc_selection_raw);
+                        ioc_selection != ioc_selection_map.end())
+                    {
+                        vfd_config.shared_cfg.ioc_selection =
+                            ioc_selection->second;
+                    }
+                    else
+                    {
+                        throw error::BackendConfigSchema(
+                            {"hdf5", "vfd", "ioc_selection"},
+                            "Unexpected value: '" + *ioc_selection_raw + "'.");
+                    }
+                }
+
+                // ... and set them
+                H5Pset_fapl_subfiling(m_fileAccessProperty, &vfd_config);
+            }
+            else
+            {
+                std::cerr << "[HDF5 Backend] The requested subfiling VFD of "
+                             "HDF5 requires the use of threaded MPI."
+                          << std::endl;
+            }
+#else
+            std::cerr
+                << "[HDF5 Backend] No support for the requested subfiling VFD "
+                   "found in the installed version of HDF5. Will continue with "
+                   "default settings. Tip: Configure a recent version of HDF5 "
+                   "with '-DHDF5_ENABLE_SUBFILING_VFD=ON'."
+                << std::endl;
+#endif
+        }
+        else
+        {
+            throw error::BackendConfigSchema(
+                {"hdf5", "vfd", "type"},
+                "Unknown value: '" + user_specified_type + "'.");
+        }
+    }
+
+    // unused params
+    auto shadow = m_config.invertShadow();
+    if (shadow.size() > 0)
+    {
+        switch (m_config.originallySpecifiedAs)
+        {
+        case json::SupportedLanguages::JSON:
+            std::cerr << "Warning: parts of the backend configuration for "
+                         "HDF5 remain unused:\n"
+                      << shadow << std::endl;
+            break;
+        case json::SupportedLanguages::TOML: {
+            auto asToml = json::jsonToToml(shadow);
+            std::cerr << "Warning: parts of the backend configuration for "
+                         "HDF5 remain unused:\n"
+                      << json::format_toml(asToml) << std::endl;
+            break;
+        }
+        }
+    }
 }
 
 ParallelHDF5IOHandlerImpl::~ParallelHDF5IOHandlerImpl()
@@ -179,6 +369,55 @@ ParallelHDF5IOHandlerImpl::~ParallelHDF5IOHandlerImpl()
         m_openFileIDs.erase(file);
     }
 }
+
+std::future<void>
+ParallelHDF5IOHandlerImpl::flush(internal::ParsedFlushParams &params)
+{
+    std::optional<H5FD_mpio_xfer_t> old_value;
+    if (params.backendConfig.json().contains("hdf5"))
+    {
+        auto hdf5_config = params.backendConfig["hdf5"];
+
+        if (hdf5_config.json().contains("independent_stores"))
+        {
+            auto independent_stores_json = hdf5_config["independent_stores"];
+            if (!independent_stores_json.json().is_boolean())
+            {
+                throw error::BackendConfigSchema(
+                    {"hdf5", "independent_stores"}, "Requires boolean value.");
+            }
+            bool independent_stores =
+                independent_stores_json.json().get<bool>();
+            old_value = std::make_optional<H5FD_mpio_xfer_t>();
+            herr_t status =
+                H5Pget_dxpl_mpio(m_datasetTransferProperty, &*old_value);
+            VERIFY(
+                status >= 0,
+                "[HDF5] Internal error: Failed to query the global data "
+                "transfer mode before flushing.");
+            H5FD_mpio_xfer_t new_value = independent_stores
+                ? H5FD_MPIO_INDEPENDENT
+                : H5FD_MPIO_COLLECTIVE;
+            status = H5Pset_dxpl_mpio(m_datasetTransferProperty, new_value);
+            VERIFY(
+                status >= 0,
+                "[HDF5] Internal error: Failed to set the local data "
+                "transfer mode before flushing.");
+        }
+    }
+    auto res = HDF5IOHandlerImpl::flush(params);
+
+    if (old_value.has_value())
+    {
+        herr_t status = H5Pset_dxpl_mpio(m_datasetTransferProperty, *old_value);
+        VERIFY(
+            status >= 0,
+            "[HDF5] Internal error: Failed to reset the global data "
+            "transfer mode after flushing.");
+    }
+
+    return res;
+}
 #else
 
 #if openPMD_HAVE_MPI
diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp
index e6ff0be887..47b0bea4ca 100644
--- a/src/IO/IOTask.cpp
+++ b/src/IO/IOTask.cpp
@@ -66,7 +66,8 @@ void Parameter<Operation::CREATE_DATASET>::warnUnusedParameters<
             break;
         case json::SupportedLanguages::TOML: {
             auto asToml = json::jsonToToml(shadow);
-            std::cerr << warningMessage << asToml << std::endl;
+            std::cerr << warningMessage << json::format_toml(asToml)
+                      << std::endl;
             break;
         }
         }
@@ -154,4 +155,10 @@ namespace internal
         }
     }
 } // namespace internal
+
+IOTask::IOTask(IOTask const &) = default;
+IOTask::IOTask(IOTask &&) noexcept = default;
+
+IOTask &IOTask::operator=(IOTask const &) = default;
+IOTask &IOTask::operator=(IOTask &&) noexcept = default;
 } // namespace openPMD
diff --git a/src/IO/JSON/JSONIOHandler.cpp b/src/IO/JSON/JSONIOHandler.cpp
index 041b236340..d2a6217eb5 100644
--- a/src/IO/JSON/JSONIOHandler.cpp
+++ b/src/IO/JSON/JSONIOHandler.cpp
@@ -26,15 +26,29 @@ namespace openPMD
 JSONIOHandler::~JSONIOHandler() = default;
 
 JSONIOHandler::JSONIOHandler(
-    std::string const &path,
+    std::string path,
     Access at,
     openPMD::json::TracingJSON jsonCfg,
     JSONIOHandlerImpl::FileFormat format,
     std::string originalExtension)
-    : AbstractIOHandler{path, at}
+    : AbstractIOHandler{std::move(path), at}
     , m_impl{this, std::move(jsonCfg), format, std::move(originalExtension)}
 {}
 
+#if openPMD_HAVE_MPI
+JSONIOHandler::JSONIOHandler(
+    std::string path,
+    Access at,
+    MPI_Comm comm,
+    openPMD::json::TracingJSON jsonCfg,
+    JSONIOHandlerImpl::FileFormat format,
+    std::string originalExtension)
+    : AbstractIOHandler{std::move(path), at}
+    , m_impl{JSONIOHandlerImpl{
+          this, comm, std::move(jsonCfg), format, std::move(originalExtension)}}
+{}
+#endif
+
 std::future<void> JSONIOHandler::flush(internal::ParsedFlushParams &)
 {
     return m_impl.flush();
diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp
index a4e1bb39ab..e06aa36ed8 100644
--- a/src/IO/JSON/JSONIOHandlerImpl.cpp
+++ b/src/IO/JSON/JSONIOHandlerImpl.cpp
@@ -23,12 +23,17 @@
 #include "openPMD/Datatype.hpp"
 #include "openPMD/DatatypeHelpers.hpp"
 #include "openPMD/Error.hpp"
+#include "openPMD/IO/AbstractIOHandler.hpp"
+#include "openPMD/IO/AbstractIOHandlerImpl.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
+#include "openPMD/auxiliary/JSON_internal.hpp"
 #include "openPMD/auxiliary/Memory.hpp"
 #include "openPMD/auxiliary/StringManip.hpp"
 #include "openPMD/auxiliary/TypeTraits.hpp"
 #include "openPMD/backend/Writable.hpp"
 
+#include <iomanip>
+#include <sstream>
 #include <toml.hpp>
 
 #include <algorithm>
@@ -133,6 +138,21 @@ JSONIOHandlerImpl::JSONIOHandlerImpl(
     , m_originalExtension{std::move(originalExtension)}
 {}
 
+#if openPMD_HAVE_MPI
+JSONIOHandlerImpl::JSONIOHandlerImpl(
+    AbstractIOHandler *handler,
+    MPI_Comm comm,
+    // NOLINTNEXTLINE(performance-unnecessary-value-param)
+    [[maybe_unused]] openPMD::json::TracingJSON config,
+    FileFormat format,
+    std::string originalExtension)
+    : AbstractIOHandlerImpl(handler)
+    , m_communicator{comm}
+    , m_fileFormat{format}
+    , m_originalExtension{std::move(originalExtension)}
+{}
+#endif
+
 JSONIOHandlerImpl::~JSONIOHandlerImpl() = default;
 
 std::future<void> JSONIOHandlerImpl::flush()
@@ -260,12 +280,19 @@ void JSONIOHandlerImpl::createDataset(
             "[JSON] Creating a dataset in a file opened as read only is not "
             "possible.");
     }
+    if (parameter.joinedDimension.has_value())
+    {
+        error::throwOperationUnsupportedInBackend(
+            "ADIOS1", "Joined Arrays currently only supported in ADIOS2");
+    }
+
     if (!writable->written)
     {
         /* Sanitize name */
         std::string name = removeSlashes(parameter.name);
 
         auto file = refreshFileFromParent(writable);
+        writable->abstractFilePosition.reset();
         setAndGetFilePosition(writable);
         auto &jsonVal = obtainJsonContents(writable);
         // be sure to have a JSON object, not a list
@@ -612,7 +639,11 @@ void JSONIOHandlerImpl::closeFile(
     auto fileIterator = m_files.find(writable);
     if (fileIterator != m_files.end())
     {
-        putJsonContents(fileIterator->second);
+        auto it = putJsonContents(fileIterator->second);
+        if (it != m_jsonVals.end())
+        {
+            m_jsonVals.erase(it);
+        }
         m_dirty.erase(fileIterator->second);
         // do not invalidate the file
         // it still exists, it is just not open
@@ -1009,6 +1040,13 @@ void JSONIOHandlerImpl::deregister(
     m_files.erase(writable);
 }
 
+void JSONIOHandlerImpl::touch(
+    Writable *writable, Parameter<Operation::TOUCH> const &)
+{
+    auto file = refreshFileFromParent(writable);
+    m_dirty.emplace(std::move(file));
+}
+
 auto JSONIOHandlerImpl::getFilehandle(File const &fileName, Access access)
     -> std::tuple<std::unique_ptr<FILEHANDLE>, std::istream *, std::ostream *>
 {
@@ -1244,20 +1282,64 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file)
         return it->second;
     }
     // read from file
-    auto [fh, fh_with_precision, _] = getFilehandle(file, Access::READ_ONLY);
-    (void)_;
-    std::shared_ptr<nlohmann::json> res = std::make_shared<nlohmann::json>();
-    switch (m_fileFormat)
+    auto serialImplementation = [&file, this]() {
+        auto [fh, fh_with_precision, _] =
+            getFilehandle(file, Access::READ_ONLY);
+        (void)_;
+        std::shared_ptr<nlohmann::json> res =
+            std::make_shared<nlohmann::json>();
+        switch (m_fileFormat)
+        {
+        case FileFormat::Json:
+            *fh_with_precision >> *res;
+            break;
+        case FileFormat::Toml:
+            *res = openPMD::json::tomlToJson(
+                toml::parse(*fh_with_precision, *file));
+            break;
+        }
+        VERIFY(fh->good(), "[JSON] Failed reading from a file.");
+        return res;
+    };
+#if openPMD_HAVE_MPI
+    auto parallelImplementation = [&file, this](MPI_Comm comm) {
+        auto path = fullPath(*file);
+        std::string collectivelyReadRawData =
+            auxiliary::collective_file_read(path, comm);
+        std::shared_ptr<nlohmann::json> res =
+            std::make_shared<nlohmann::json>();
+        switch (m_fileFormat)
+        {
+        case FileFormat::Json:
+            *res = nlohmann::json::parse(collectivelyReadRawData);
+            break;
+        case FileFormat::Toml:
+            std::istringstream istream(
+                collectivelyReadRawData.c_str(),
+                std::ios_base::binary | std::ios_base::in);
+            auto as_toml = toml::parse(
+                istream >> std::setprecision(
+                               std::numeric_limits<double>::digits10 + 1),
+                *file);
+            *res = openPMD::json::tomlToJson(as_toml);
+            break;
+        }
+        return res;
+    };
+    std::shared_ptr<nlohmann::json> res;
+    if (m_communicator.has_value())
     {
-    case FileFormat::Json:
-        *fh_with_precision >> *res;
-        break;
-    case FileFormat::Toml:
-        *res =
-            openPMD::json::tomlToJson(toml::parse(*fh_with_precision, *file));
-        break;
+        res = parallelImplementation(m_communicator.value());
+    }
+    else
+    {
+        res = serialImplementation();
     }
-    VERIFY(fh->good(), "[JSON] Failed reading from a file.");
+
+#else
+    auto res = serialImplementation();
+#endif
+
     m_jsonVals.emplace(file, res);
     return res;
 }
@@ -1269,21 +1351,26 @@ nlohmann::json &JSONIOHandlerImpl::obtainJsonContents(Writable *writable)
     return (*obtainJsonContents(file))[filePosition->id];
 }
 
-void JSONIOHandlerImpl::putJsonContents(
+auto JSONIOHandlerImpl::putJsonContents(
     File const &filename,
     bool unsetDirty // = true
-)
+    ) -> decltype(m_jsonVals)::iterator
 {
     VERIFY_ALWAYS(
         filename.valid(),
         "[JSON] File has been overwritten/deleted before writing");
     auto it = m_jsonVals.find(filename);
-    if (it != m_jsonVals.end())
+    if (it == m_jsonVals.end())
     {
+        return it;
+    }
+
+    (*it->second)["platform_byte_widths"] = platformSpecifics();
+
+    auto writeSingleFile = [this, &it](std::string const &writeThisFile) {
         auto [fh, _, fh_with_precision] =
-            getFilehandle(filename, Access::CREATE);
+            getFilehandle(File(writeThisFile), Access::CREATE);
         (void)_;
-        (*it->second)["platform_byte_widths"] = platformSpecifics();
 
         switch (m_fileFormat)
         {
@@ -1291,18 +1378,115 @@ void JSONIOHandlerImpl::putJsonContents(
             *fh_with_precision << *it->second << std::endl;
             break;
         case FileFormat::Toml:
-            *fh_with_precision << openPMD::json::jsonToToml(*it->second)
+            *fh_with_precision << openPMD::json::format_toml(
+                                      openPMD::json::jsonToToml(*it->second))
                                << std::endl;
             break;
         }
 
         VERIFY(fh->good(), "[JSON] Failed writing data to disk.")
-        m_jsonVals.erase(it);
-        if (unsetDirty)
+    };
+
+    auto serialImplementation = [&filename, &writeSingleFile]() {
+        writeSingleFile(*filename);
+    };
+
+#if openPMD_HAVE_MPI
+    auto num_digits = [](unsigned n) -> unsigned {
+        constexpr auto max = std::numeric_limits<unsigned>::max();
+        unsigned base_10 = 1;
+        unsigned res = 1;
+        while (base_10 < max)
         {
-            m_dirty.erase(filename);
+            base_10 *= 10;
+            if (n / base_10 == 0)
+            {
+                return res;
+            }
+            ++res;
         }
+        return res;
+    };
+
+    auto parallelImplementation =
+        [this, &filename, &writeSingleFile, &num_digits](MPI_Comm comm) {
+            auto path = fullPath(*filename);
+            auto dirpath = path + ".parallel";
+            if (!auxiliary::create_directories(dirpath))
+            {
+                throw std::runtime_error(
+                    "Failed creating directory '" + dirpath +
+                    "' for parallel JSON output");
+            }
+            int rank = 0, size = 0;
+            MPI_Comm_rank(comm, &rank);
+            MPI_Comm_size(comm, &size);
+            std::stringstream subfilePath;
+            // writeSingleFile will prepend the base dir
+            subfilePath << *filename << ".parallel/mpi_rank_"
+                        << std::setw(num_digits(size - 1)) << std::setfill('0')
+                        << rank << [&]() {
+                               switch (m_fileFormat)
+                               {
+                               case FileFormat::Json:
+                                   return ".json";
+                               case FileFormat::Toml:
+                                   return ".toml";
+                               }
+                               throw std::runtime_error("Unreachable!");
+                           }();
+            writeSingleFile(subfilePath.str());
+            if (rank == 0)
+            {
+                constexpr char const *readme_msg = R"(
+This folder has been created by a parallel instance of the JSON backend in
+openPMD. There is one JSON file for each parallel writer MPI rank.
+The parallel JSON backend performs no metadata or data aggregation at all.
+
+This functionality is intended mainly for debugging and prototyping workflows.
+There is no support in the openPMD-api for reading this folder as a single
+dataset. For reading purposes, either pick a single .json file and read that, or
+merge the .json files somehow (no tooling provided for this (yet)).
+)";
+                std::fstream readme_file;
+                readme_file.open(
+                    dirpath + "/README.txt",
+                    std::ios_base::out | std::ios_base::trunc);
+                readme_file << readme_msg + 1;
+                readme_file.close();
+                if (!readme_file.good() &&
+                    !filename.fileState->printedReadmeWarningAlready)
+                {
+                    std::cerr
+                        << "[Warning] Something went wrong in trying to create "
+                           "README file at '"
+                        << dirpath
+                        << "/README.txt'. Will ignore and continue. The README "
+                           "message would have been:\n----------\n"
+                        << readme_msg + 1 << "----------" << std::endl;
+                    filename.fileState->printedReadmeWarningAlready = true;
+                }
+            }
+        };
+
+    std::shared_ptr<nlohmann::json> res;
+    if (m_communicator.has_value())
+    {
+        parallelImplementation(m_communicator.value());
+    }
+    else
+    {
+        serialImplementation();
+    }
+
+#else
+    serialImplementation();
+#endif
+    if (unsetDirty)
+    {
+        m_dirty.erase(filename);
     }
+    return it;
 }
 
 std::shared_ptr<JSONFilePosition> JSONIOHandlerImpl::setAndGetFilePosition(
diff --git a/src/Iteration.cpp b/src/Iteration.cpp
index 593e38066f..366fea0de1 100644
--- a/src/Iteration.cpp
+++ b/src/Iteration.cpp
@@ -21,6 +21,8 @@
 #include "openPMD/Iteration.hpp"
 #include "openPMD/Dataset.hpp"
 #include "openPMD/Datatype.hpp"
+#include "openPMD/IO/AbstractIOHandler.hpp"
+#include "openPMD/IO/IOTask.hpp"
 #include "openPMD/Series.hpp"
 #include "openPMD/auxiliary/DerefDynamicCast.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
@@ -208,6 +210,16 @@ void Iteration::flushFileBased(
         fCreate.name = filename;
         IOHandler()->enqueue(IOTask(&s.writable(), fCreate));
 
+        /*
+         * If it was written before, then in the context of another iteration.
+         */
+        auto &attr = s.get().m_rankTable.m_attributable;
+        attr.setWritten(false, Attributable::EnqueueAsynchronously::Yes);
+        s.get()
+            .m_rankTable.m_attributable.get()
+            .m_writable.abstractFilePosition.reset();
+        s.flushRankTable();
+
         /* create basePath */
         Parameter<Operation::CREATE_PATH> pCreate;
         pCreate.path = auxiliary::replace_first(s.basePath(), "%T/", "");
@@ -305,6 +317,8 @@ void Iteration::flushVariableBased(
 
 void Iteration::flush(internal::FlushParams const &flushParams)
 {
+    Parameter<Operation::TOUCH> touch;
+    IOHandler()->enqueue(IOTask(&writable(), touch));
     if (access::readOnly(IOHandler()->m_frontendAccess))
     {
         for (auto &m : meshes)
@@ -331,7 +345,7 @@ void Iteration::flush(internal::FlushParams const &flushParams)
         }
         else
         {
-            meshes.dirty() = false;
+            meshes.setDirty(false);
         }
 
         if (!particles.empty() || s.containsAttribute("particlesPath"))
@@ -347,11 +361,17 @@ void Iteration::flush(internal::FlushParams const &flushParams)
         }
         else
         {
-            particles.dirty() = false;
+            particles.setDirty(false);
         }
 
         flushAttributes(flushParams);
     }
+    if (flushParams.flushLevel != FlushLevel::SkeletonOnly)
+    {
+        setDirty(false);
+        meshes.setDirty(false);
+        particles.setDirty(false);
+    }
 }
 
 void Iteration::deferParseAccess(DeferredParseAccess dr)
@@ -509,13 +529,9 @@ void Iteration::read_impl(std::string const &groupPath)
                       << " and will skip them due to read error:\n"
                       << err.what() << std::endl;
             meshes = {};
-            meshes.dirty() = false;
         }
     }
-    else
-    {
-        meshes.dirty() = false;
-    }
+    meshes.setDirty(false);
 
     if (hasParticles)
     {
@@ -529,13 +545,9 @@ void Iteration::read_impl(std::string const &groupPath)
                       << " and will skip them due to read error:\n"
                       << err.what() << std::endl;
             particles = {};
-            particles.dirty() = false;
         }
     }
-    else
-    {
-        particles.dirty() = false;
-    }
+    particles.setDirty(false);
 
     readAttributes(ReadMode::FullyReread);
 #ifdef openPMD_USE_INVASIVE_TESTS
@@ -619,9 +631,9 @@ void Iteration::readMeshes(std::string const &meshesPath)
         MeshRecordComponent &mrc = m;
         IOHandler()->enqueue(IOTask(&mrc, dOpen));
         IOHandler()->flush(internal::defaultFlushParams);
-        mrc.written() = false;
+        mrc.setWritten(false, Attributable::EnqueueAsynchronously::No);
         mrc.resetDataset(Dataset(*dOpen.dtype, *dOpen.extent));
-        mrc.written() = true;
+        mrc.setWritten(true, Attributable::EnqueueAsynchronously::No);
         try
         {
             m.read();
@@ -743,7 +755,8 @@ auto Iteration::beginStep(
         access::read(series.IOHandler()->m_frontendAccess))
     {
         bool previous = series.iterations.written();
-        series.iterations.written() = false;
+        series.iterations.setWritten(
+            false, Attributable::EnqueueAsynchronously::Yes);
         auto oldStatus = IOHandl->m_seriesStatus;
         IOHandl->m_seriesStatus = internal::SeriesStatus::Parsing;
         try
@@ -759,7 +772,8 @@ auto Iteration::beginStep(
             throw;
         }
         IOHandl->m_seriesStatus = oldStatus;
-        series.iterations.written() = previous;
+        series.iterations.setWritten(
+            previous, Attributable::EnqueueAsynchronously::Yes);
     }
 
     res.stepStatus = status;
@@ -822,33 +836,6 @@ void Iteration::setStepStatus(StepStatus status)
     }
 }
 
-bool Iteration::dirtyRecursive() const
-{
-    if (dirty())
-    {
-        return true;
-    }
-    if (particles.dirty() || meshes.dirty())
-    {
-        return true;
-    }
-    for (auto const &pair : particles)
-    {
-        if (pair.second.dirtyRecursive())
-        {
-            return true;
-        }
-    }
-    for (auto const &pair : meshes)
-    {
-        if (pair.second.dirtyRecursive())
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
 void Iteration::linkHierarchy(Writable &w)
 {
     Attributable::linkHierarchy(w);
diff --git a/src/Mesh.cpp b/src/Mesh.cpp
index fa32d24374..f977bbe905 100644
--- a/src/Mesh.cpp
+++ b/src/Mesh.cpp
@@ -139,7 +139,7 @@ Mesh &Mesh::setDataOrder(Mesh::DataOrder dor)
 
 std::vector<std::string> Mesh::axisLabels() const
 {
-    return getAttribute("axisLabels").get<std::vector<std::string> >();
+    return getAttribute("axisLabels").get<std::vector<std::string>>();
 }
 
 Mesh &Mesh::setAxisLabels(std::vector<std::string> const &als)
@@ -165,7 +165,7 @@ template Mesh &Mesh::setGridSpacing(std::vector<long double> const &gs);
 
 std::vector<double> Mesh::gridGlobalOffset() const
 {
-    return getAttribute("gridGlobalOffset").get<std::vector<double> >();
+    return getAttribute("gridGlobalOffset").get<std::vector<double>>();
 }
 
 Mesh &Mesh::setGridGlobalOffset(std::vector<double> const &ggo)
@@ -331,9 +331,9 @@ void Mesh::read()
     aRead.name = "axisLabels";
     IOHandler()->enqueue(IOTask(this, aRead));
     IOHandler()->flush(internal::defaultFlushParams);
-    if (*aRead.dtype == DT::VEC_STRING || *aRead.dtype == DT::STRING)
-        setAxisLabels(
-            Attribute(*aRead.resource).get<std::vector<std::string> >());
+    Attribute a = Attribute(*aRead.resource);
+    if (auto val = a.getOptional<std::vector<std::string>>(); val.has_value())
+        setAxisLabels(*val);
     else
         throw error::ReadError(
             error::AffectedObject::Attribute,
@@ -346,16 +346,16 @@ void Mesh::read()
     aRead.name = "gridSpacing";
     IOHandler()->enqueue(IOTask(this, aRead));
     IOHandler()->flush(internal::defaultFlushParams);
-    Attribute a = Attribute(*aRead.resource);
+    a = Attribute(*aRead.resource);
     if (*aRead.dtype == DT::VEC_FLOAT || *aRead.dtype == DT::FLOAT)
-        setGridSpacing(a.get<std::vector<float> >());
+        setGridSpacing(a.get<std::vector<float>>());
     else if (*aRead.dtype == DT::VEC_DOUBLE || *aRead.dtype == DT::DOUBLE)
-        setGridSpacing(a.get<std::vector<double> >());
+        setGridSpacing(a.get<std::vector<double>>());
     else if (
         *aRead.dtype == DT::VEC_LONG_DOUBLE || *aRead.dtype == DT::LONG_DOUBLE)
-        setGridSpacing(a.get<std::vector<long double> >());
+        setGridSpacing(a.get<std::vector<long double>>());
     // conversion cast if a backend reports an integer type
-    else if (auto val = a.getOptional<std::vector<double> >(); val.has_value())
+    else if (auto val = a.getOptional<std::vector<double>>(); val.has_value())
         setGridSpacing(val.value());
     else
         throw error::ReadError(
@@ -370,7 +370,7 @@ void Mesh::read()
     IOHandler()->enqueue(IOTask(this, aRead));
     IOHandler()->flush(internal::defaultFlushParams);
     if (auto val =
-            Attribute(*aRead.resource).getOptional<std::vector<double> >();
+            Attribute(*aRead.resource).getOptional<std::vector<double>>();
         val.has_value())
         setGridGlobalOffset(val.value());
     else
@@ -438,9 +438,9 @@ void Mesh::read()
             dOpen.name = component;
             IOHandler()->enqueue(IOTask(&rc, dOpen));
             IOHandler()->flush(internal::defaultFlushParams);
-            rc.written() = false;
+            rc.setWritten(false, Attributable::EnqueueAsynchronously::No);
             rc.resetDataset(Dataset(*dOpen.dtype, *dOpen.extent));
-            rc.written() = true;
+            rc.setWritten(true, Attributable::EnqueueAsynchronously::No);
             try
             {
                 rc.read();
diff --git a/src/ParticlePatches.cpp b/src/ParticlePatches.cpp
index 5d84b6cd32..491add8be7 100644
--- a/src/ParticlePatches.cpp
+++ b/src/ParticlePatches.cpp
@@ -93,14 +93,14 @@ void ParticlePatches::read()
                     datatypeToString(*dOpen.dtype) + ")");
 
         /* allow all attributes to be set */
-        prc.written() = false;
+        prc.setWritten(false, Attributable::EnqueueAsynchronously::No);
         prc.resetDataset(Dataset(*dOpen.dtype, *dOpen.extent));
-        prc.written() = true;
+        prc.setWritten(true, Attributable::EnqueueAsynchronously::No);
 
-        pr.dirty() = false;
+        pr.setDirty(false);
         try
         {
-            prc.PatchRecordComponent::read();
+            prc.PatchRecordComponent::read(/* require_unit_si = */ false);
         }
         catch (error::ReadError const &err)
         {
@@ -111,5 +111,6 @@ void ParticlePatches::read()
             Container<PatchRecord>::container().erase(component_name);
         }
     }
+    setDirty(false);
 }
 } // namespace openPMD
diff --git a/src/ParticleSpecies.cpp b/src/ParticleSpecies.cpp
index 7f57450acf..4006cc82ba 100644
--- a/src/ParticleSpecies.cpp
+++ b/src/ParticleSpecies.cpp
@@ -104,6 +104,7 @@ void ParticleSpecies::read()
         auto &container = particlePatches.container();
         container.erase("numParticles");
         container.erase("numParticlesOffset");
+        particlePatches.setDirty(false);
     }
 
     /* obtain all scalar records */
@@ -123,9 +124,9 @@ void ParticleSpecies::read()
             RecordComponent &rc = r;
             IOHandler()->enqueue(IOTask(&rc, dOpen));
             IOHandler()->flush(internal::defaultFlushParams);
-            rc.written() = false;
+            rc.setWritten(false, Attributable::EnqueueAsynchronously::No);
             rc.resetDataset(Dataset(*dOpen.dtype, *dOpen.extent));
-            rc.written() = true;
+            rc.setWritten(true, Attributable::EnqueueAsynchronously::No);
             r.read();
         }
         catch (error::ReadError const &err)
@@ -147,10 +148,7 @@ namespace
 {
     bool flushParticlePatches(ParticlePatches const &particlePatches)
     {
-        return particlePatches.find("numParticles") != particlePatches.end() &&
-            particlePatches.find("numParticlesOffset") !=
-            particlePatches.end() &&
-            particlePatches.size() >= 3;
+        return !particlePatches.empty();
     }
 } // namespace
 
@@ -163,6 +161,10 @@ void ParticleSpecies::flush(
             record.second.flush(record.first, flushParams);
         for (auto &patch : particlePatches)
             patch.second.flush(patch.first, flushParams);
+        if (flushParams.flushLevel != FlushLevel::SkeletonOnly)
+        {
+            particlePatches.setDirty(false);
+        }
     }
     else
     {
@@ -184,32 +186,14 @@ void ParticleSpecies::flush(
             for (auto &patch : particlePatches)
                 patch.second.flush(patch.first, flushParams);
         }
-    }
-}
-
-bool ParticleSpecies::dirtyRecursive() const
-{
-    if (dirty())
-    {
-        return true;
-    }
-    for (auto const &pair : *this)
-    {
-        if (pair.second.dirtyRecursive())
+        else
         {
-            return true;
+            particlePatches.setDirty(false);
         }
     }
-    if (flushParticlePatches(particlePatches))
+    if (flushParams.flushLevel != FlushLevel::SkeletonOnly)
     {
-        for (auto const &pair : particlePatches)
-        {
-            if (pair.second.dirtyRecursive())
-            {
-                return true;
-            }
-        }
+        setDirty(false);
     }
-    return false;
 }
 } // namespace openPMD
diff --git a/src/Record.cpp b/src/Record.cpp
index 939930d12c..3bcac4d7e1 100644
--- a/src/Record.cpp
+++ b/src/Record.cpp
@@ -104,7 +104,7 @@ void Record::read()
         /* using operator[] will incorrectly update parent */
         try
         {
-            T_RecordComponent::read();
+            T_RecordComponent::read(/* require_unit_si = */ true);
         }
         catch (error::ReadError const &err)
         {
@@ -128,7 +128,7 @@ void Record::read()
             rc.get().m_isConstant = true;
             try
             {
-                rc.read();
+                rc.read(/* require_unit_si = */ true);
             }
             catch (error::ReadError const &err)
             {
@@ -150,12 +150,12 @@ void Record::read()
             dOpen.name = component;
             IOHandler()->enqueue(IOTask(&rc, dOpen));
             IOHandler()->flush(internal::defaultFlushParams);
-            rc.written() = false;
+            rc.setWritten(false, Attributable::EnqueueAsynchronously::No);
             rc.resetDataset(Dataset(*dOpen.dtype, *dOpen.extent));
-            rc.written() = true;
+            rc.setWritten(true, Attributable::EnqueueAsynchronously::No);
             try
             {
-                rc.read();
+                rc.read(/* require_unit_si = */ true);
             }
             catch (error::ReadError const &err)
             {
diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp
index fed6fe60d4..0387268514 100644
--- a/src/RecordComponent.cpp
+++ b/src/RecordComponent.cpp
@@ -25,6 +25,7 @@
 #include "openPMD/IO/Format.hpp"
 #include "openPMD/Series.hpp"
 #include "openPMD/auxiliary/Memory.hpp"
+#include "openPMD/backend/Attributable.hpp"
 #include "openPMD/backend/BaseRecord.hpp"
 
 #include <algorithm>
@@ -39,6 +40,21 @@ namespace openPMD
 namespace internal
 {
     RecordComponentData::RecordComponentData() = default;
+    auto RecordComponentData::push_chunk(IOTask &&task) -> void
+    {
+        Attributable a;
+        a.setData(std::shared_ptr<AttributableData>{this, [](auto const &) {}});
+// this check can be too costly in some setups
+#if 0
+        if (a.containingIteration().closed())
+        {
+            throw error::WrongAPIUsage(
+                "Cannot write/read chunks to/from closed Iterations.");
+        }
+#endif
+        a.setDirtyRecursive(true);
+        m_chunks.push(std::move(task));
+    }
 } // namespace internal
 
 RecordComponent::RecordComponent() : BaseRecordComponent(NoInit())
@@ -95,10 +111,7 @@ RecordComponent &RecordComponent::resetDataset(Dataset d)
     }
     // if( d.extent.empty() )
     //    throw std::runtime_error("Dataset extent must be at least 1D.");
-    if (std::any_of(
-            d.extent.begin(), d.extent.end(), [](Extent::value_type const &i) {
-                return i == 0u;
-            }))
+    if (d.empty())
         return makeEmpty(std::move(d));
 
     rc.m_isEmpty = false;
@@ -111,7 +124,7 @@ RecordComponent &RecordComponent::resetDataset(Dataset d)
         rc.m_dataset = std::move(d);
     }
 
-    dirty() = true;
+    setDirty(true);
     return *this;
 }
 
@@ -204,7 +217,7 @@ RecordComponent &RecordComponent::makeEmpty(Dataset d)
         throw std::runtime_error("Dataset extent must be at least 1D.");
 
     rc.m_isEmpty = true;
-    dirty() = true;
+    setDirty(true);
     if (!written())
     {
         switchType<detail::DefaultValue<RecordComponent> >(
@@ -299,6 +312,7 @@ void RecordComponent::flush(
                 dCreate.extent = getExtent();
                 dCreate.dtype = getDatatype();
                 dCreate.options = rc.m_dataset.value().options;
+                dCreate.joinedDimension = joinedDimension();
                 IOHandler()->enqueue(IOTask(this, dCreate));
             }
         }
@@ -338,11 +352,15 @@ void RecordComponent::flush(
 
         flushAttributes(flushParams);
     }
+    if (flushParams.flushLevel != FlushLevel::SkeletonOnly)
+    {
+        setDirty(false);
+    }
 }
 
-void RecordComponent::read()
+void RecordComponent::read(bool require_unit_si)
 {
-    readBase();
+    readBase(require_unit_si);
 }
 
 namespace
@@ -367,7 +385,7 @@ namespace
     };
 } // namespace
 
-void RecordComponent::readBase()
+void RecordComponent::readBase(bool require_unit_si)
 {
     using DT = Datatype;
     // auto & rc = get();
@@ -381,9 +399,9 @@ void RecordComponent::readBase()
 
         Attribute a(*aRead.resource);
         DT dtype = *aRead.dtype;
-        written() = false;
+        setWritten(false, Attributable::EnqueueAsynchronously::No);
         switchNonVectorType<MakeConstant>(dtype, *this, a);
-        written() = true;
+        setWritten(true, Attributable::EnqueueAsynchronously::No);
 
         aRead.name = "shape";
         IOHandler()->enqueue(IOTask(this, aRead));
@@ -408,40 +426,56 @@ void RecordComponent::readBase()
                 oss.str());
         }
 
-        written() = false;
+        setWritten(false, Attributable::EnqueueAsynchronously::No);
         resetDataset(Dataset(dtype, e));
-        written() = true;
+        setWritten(true, Attributable::EnqueueAsynchronously::No);
     }
 
-    aRead.name = "unitSI";
-    IOHandler()->enqueue(IOTask(this, aRead));
-    IOHandler()->flush(internal::defaultFlushParams);
-    if (auto val = Attribute(*aRead.resource).getOptional<double>();
-        val.has_value())
-        setUnitSI(val.value());
-    else
-        throw error::ReadError(
-            error::AffectedObject::Attribute,
-            error::Reason::UnexpectedContent,
-            {},
-            "Unexpected Attribute datatype for 'unitSI' (expected double, "
-            "found " +
-                datatypeToString(Attribute(*aRead.resource).dtype) + ")");
-
     readAttributes(ReadMode::FullyReread);
-}
 
-bool RecordComponent::dirtyRecursive() const
-{
-    if (this->dirty())
+    if (require_unit_si)
     {
-        return true;
+        if (!containsAttribute("unitSI"))
+        {
+            throw error::ReadError(
+                error::AffectedObject::Attribute,
+                error::Reason::NotFound,
+                {},
+                "Attribute unitSI required for record components, not found in "
+                "'" +
+                    myPath().openPMDPath() + "'.");
+        }
+        if (!getAttribute("unitSI").getOptional<double>().has_value())
+        {
+            throw error::ReadError(
+                error::AffectedObject::Attribute,
+                error::Reason::UnexpectedContent,
+                {},
+                "Unexpected Attribute datatype for 'unitSI' (expected double, "
+                "found " +
+                    datatypeToString(Attribute(*aRead.resource).dtype) +
+                    ") in '" + myPath().openPMDPath() + "'.");
+        }
     }
-    return !get().m_chunks.empty();
 }
 
 void RecordComponent::storeChunk(
     auxiliary::WriteBuffer buffer, Datatype dtype, Offset o, Extent e)
+{
+    verifyChunk(dtype, o, e);
+
+    Parameter<Operation::WRITE_DATASET> dWrite;
+    dWrite.offset = std::move(o);
+    dWrite.extent = std::move(e);
+    dWrite.dtype = dtype;
+    /* std::static_pointer_cast correctly reference-counts the pointer */
+    dWrite.data = std::move(buffer);
+    auto &rc = get();
+    rc.push_chunk(IOTask(this, std::move(dWrite)));
+}
+
+void RecordComponent::verifyChunk(
+    Datatype dtype, Offset const &o, Extent const &e) const
 {
     if (constant())
         throw std::runtime_error(
@@ -457,32 +491,59 @@ void RecordComponent::storeChunk(
         throw std::runtime_error(oss.str());
     }
     uint8_t dim = getDimensionality();
-    if (e.size() != dim || o.size() != dim)
-    {
-        std::ostringstream oss;
-        oss << "Dimensionality of chunk ("
-            << "offset=" << o.size() << "D, "
-            << "extent=" << e.size() << "D) "
-            << "and record component (" << int(dim) << "D) "
-            << "do not match.";
-        throw std::runtime_error(oss.str());
-    }
     Extent dse = getExtent();
-    for (uint8_t i = 0; i < dim; ++i)
-        if (dse[i] < o[i] + e[i])
-            throw std::runtime_error(
-                "Chunk does not reside inside dataset (Dimension on index " +
-                std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
-                " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
 
-    Parameter<Operation::WRITE_DATASET> dWrite;
-    dWrite.offset = o;
-    dWrite.extent = e;
-    dWrite.dtype = dtype;
-    /* std::static_pointer_cast correctly reference-counts the pointer */
-    dWrite.data = std::move(buffer);
-    auto &rc = get();
-    rc.m_chunks.push(IOTask(this, std::move(dWrite)));
+    if (auto jd = joinedDimension(); jd.has_value())
+    {
+        if (o.size() != 0)
+        {
+            std::ostringstream oss;
+            oss << "Joined array: Must specify an empty offset (given: "
+                << "offset=" << o.size() << "D, "
+                << "extent=" << e.size() << "D).";
+            throw std::runtime_error(oss.str());
+        }
+        if (e.size() != dim)
+        {
+            std::ostringstream oss;
+            oss << "Joined array: Dimensionalities of chunk extent and dataset "
+                   "extent must be equivalent (given: "
+                << "offset=" << o.size() << "D, "
+                << "extent=" << e.size() << "D).";
+            throw std::runtime_error(oss.str());
+        }
+        for (size_t i = 0; i < dim; ++i)
+        {
+            if (i != jd.value() && e[i] != dse[i])
+            {
+                throw std::runtime_error(
+                    "Joined array: Chunk extent on non-joined dimensions must "
+                    "be equivalent to dataset extents (Dimension on index " +
+                    std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
+                    " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
+            }
+        }
+    }
+    else
+    {
+        if (e.size() != dim || o.size() != dim)
+        {
+            std::ostringstream oss;
+            oss << "Dimensionality of chunk ("
+                << "offset=" << o.size() << "D, "
+                << "extent=" << e.size() << "D) "
+                << "and record component (" << int(dim) << "D) "
+                << "do not match.";
+            throw std::runtime_error(oss.str());
+        }
+        for (uint8_t i = 0; i < dim; ++i)
+            if (dse[i] < o[i] + e[i])
+                throw std::runtime_error(
+                    "Chunk does not reside inside dataset (Dimension on "
+                    "index " +
+                    std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
+                    " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
+    }
 }
 
 namespace
diff --git a/src/Series.cpp b/src/Series.cpp
index 5d698ffbcf..d587575b44 100644
--- a/src/Series.cpp
+++ b/src/Series.cpp
@@ -19,24 +19,37 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/Series.hpp"
+#include "openPMD/ChunkInfo.hpp"
+#include "openPMD/ChunkInfo_internal.hpp"
 #include "openPMD/Error.hpp"
 #include "openPMD/IO/AbstractIOHandler.hpp"
 #include "openPMD/IO/AbstractIOHandlerHelper.hpp"
+#include "openPMD/IO/Access.hpp"
+#include "openPMD/IO/DummyIOHandler.hpp"
 #include "openPMD/IO/Format.hpp"
+#include "openPMD/IO/IOTask.hpp"
 #include "openPMD/IterationEncoding.hpp"
 #include "openPMD/ReadIterations.hpp"
+#include "openPMD/ThrowError.hpp"
 #include "openPMD/auxiliary/Date.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
 #include "openPMD/auxiliary/JSON_internal.hpp"
+#include "openPMD/auxiliary/Mpi.hpp"
 #include "openPMD/auxiliary/StringManip.hpp"
+#include "openPMD/auxiliary/Variant.hpp"
+#include "openPMD/backend/Attributable.hpp"
 #include "openPMD/version.hpp"
 
+#include <algorithm>
 #include <cctype>
 #include <exception>
 #include <iomanip>
 #include <iostream>
+#include <memory>
+#include <optional>
 #include <regex>
 #include <set>
+#include <stdexcept>
 #include <string>
 #include <tuple>
 #include <utility>
@@ -71,9 +84,11 @@ namespace
     struct Match
     {
         bool isContained{}; //! pattern match successful
-        int padding{}; //! number of zeros used for padding of iteration
-        Series::IterationIndex_t
-            iteration{}; //! iteration found in regex pattern (default: 0)
+        int padding{}; //! number of zeros used for padding of iteration, zero
+                       //! if no padding
+        Series::IterationIndex_t iteration =
+            0; //! iteration found in regex pattern (default: 0)
+        std::optional<std::string> extension;
 
         // support for std::tie
         operator std::tuple<bool &, int &, Series::IterationIndex_t &>()
@@ -102,7 +117,7 @@ namespace
         std::string const &prefix,
         int padding,
         std::string const &postfix,
-        std::string const &extension);
+        std::optional<std::string> const &extension);
 } // namespace
 
 struct Series::ParsedInput
@@ -113,7 +128,7 @@ struct Series::ParsedInput
     IterationEncoding iterationEncoding;
     std::string filenamePrefix;
     std::string filenamePostfix;
-    std::string filenameExtension;
+    std::optional<std::string> filenameExtension;
     int filenamePadding = -1;
 }; // ParsedInput
 
@@ -177,10 +192,274 @@ Series &Series::setMeshesPath(std::string const &mp)
         setAttribute("meshesPath", mp);
     else
         setAttribute("meshesPath", mp + "/");
-    dirty() = true;
+    setDirty(true);
     return *this;
 }
 
+#if openPMD_HAVE_MPI
+chunk_assignment::RankMeta Series::rankTable(bool collective)
+#else
+chunk_assignment::RankMeta Series::rankTable([[maybe_unused]] bool collective)
+#endif
+{
+    auto &series = get();
+    auto &rankTable = series.m_rankTable;
+    if (rankTable.m_bufferedRead.has_value())
+    {
+        return *rankTable.m_bufferedRead;
+    }
+    if (iterationEncoding() == IterationEncoding::fileBased)
+    {
+        std::cerr << "[Series] Use rank table in file-based iteration encoding "
+                     "at your own risk. Make sure to have an iteration open "
+                     "before calling this."
+                  << std::endl;
+        if (iterations.empty())
+        {
+            return {};
+        }
+#if 0
+        Parameter<Operation::OPEN_FILE> openFile;
+        openFile.name = iterationFilename(iterations.begin()->first);
+        // @todo: check if the series currently has an open file, check if
+        // collective is true
+        IOHandler()->enqueue(IOTask(this, openFile));
+#endif
+    }
+    Parameter<Operation::LIST_DATASETS> listDatasets;
+    IOHandler()->enqueue(IOTask(this, listDatasets));
+    IOHandler()->flush(internal::defaultFlushParams);
+    if (std::none_of(
+            listDatasets.datasets->begin(),
+            listDatasets.datasets->end(),
+            [](std::string const &str) { return str == "rankTable"; }))
+    {
+        rankTable.m_bufferedRead = chunk_assignment::RankMeta{};
+        return {};
+    }
+    Parameter<Operation::OPEN_DATASET> openDataset;
+    openDataset.name = "rankTable";
+    IOHandler()->enqueue(IOTask(&rankTable.m_attributable, openDataset));
+
+    IOHandler()->flush(internal::defaultFlushParams);
+    if (openDataset.extent->size() != 2)
+    {
+        // @todo use better error type
+        throw std::runtime_error("[Series] rankTable must be 2D.");
+    }
+    if (*openDataset.dtype != Datatype::CHAR &&
+        *openDataset.dtype != Datatype::UCHAR &&
+        *openDataset.dtype != Datatype::SCHAR)
+    {
+        // @todo use better error type
+        throw std::runtime_error("[Series] rankTable must have char type.");
+    }
+
+    auto writerRanks = (*openDataset.extent)[0];
+    auto lineWidth = (*openDataset.extent)[1];
+
+    if (lineWidth < 1)
+    {
+        // Check this because our indexing logic later relies on this
+        // @todo use better error type
+        throw std::runtime_error("[Series] rankTable lines must not be empty.");
+    }
+
+    std::shared_ptr<char> get{
+        new char[writerRanks * lineWidth],
+        [](char const *ptr) { delete[] ptr; }};
+
+    auto doReadDataset = [&openDataset, this, &get, &rankTable]() {
+        Parameter<Operation::READ_DATASET> readDataset;
+        // read the whole thing
+        readDataset.offset.resize(2);
+        readDataset.extent = *openDataset.extent;
+        // @todo better cross-platform support by switching over
+        // *openDataset.dtype
+        readDataset.dtype = Datatype::CHAR;
+        readDataset.data = get;
+
+        IOHandler()->enqueue(IOTask(&rankTable.m_attributable, readDataset));
+        IOHandler()->flush(internal::defaultFlushParams);
+    };
+
+#if openPMD_HAVE_MPI
+    if (collective && series.m_communicator.has_value())
+    {
+        auto comm = series.m_communicator.value();
+        int rank{0}, size{1};
+        MPI_Comm_rank(comm, &rank);
+        MPI_Comm_size(comm, &size);
+        if (rank == 0)
+        {
+            doReadDataset();
+        }
+        MPI_Bcast(get.get(), writerRanks * lineWidth, MPI_CHAR, 0, comm);
+    }
+    else
+    {
+        doReadDataset();
+    }
+#else
+    doReadDataset();
+#endif
+
+#if 0
+    if (iterationEncoding() == IterationEncoding::fileBased)
+    {
+        // @todo only do this if the file was previously not open
+        auto &it = iterations.begin()->second;
+        Parameter<Operation::CLOSE_FILE> closeFile;
+        IOHandler()->enqueue(IOTask(this, closeFile));
+        it.get().m_closed = internal::CloseStatus::ClosedTemporarily;
+        IOHandler()->flush(internal::defaultFlushParams);
+    }
+#endif
+
+    chunk_assignment::RankMeta res;
+    for (size_t i = 0; i < writerRanks; ++i)
+    {
+        if (get.get()[(i + 1) * lineWidth - 1] != 0)
+        {
+            throw std::runtime_error(
+                "[Series] rankTable lines must be null-terminated strings.");
+        }
+        // Use C-String constructor for std::string in the following line
+        // std::string::string(char const*);
+        res[i] = get.get() + i * lineWidth;
+    }
+    rankTable.m_bufferedRead = res;
+    return res;
+}
+
+Series &Series::setRankTable(const std::string &myRankInfo)
+{
+    get().m_rankTable.m_rankTableSource =
+        internal::SeriesData::SourceSpecifiedManually{myRankInfo};
+    return *this;
+}
+
+void Series::flushRankTable()
+{
+    auto &series = get();
+    auto &rankTable = series.m_rankTable;
+    auto maybeMyRankInfo = std::visit(
+        auxiliary::overloaded{
+            [](internal::SeriesData::NoSourceSpecified &)
+                -> std::optional<std::string> { return std::nullopt; },
+            [&series](internal::SeriesData::SourceSpecifiedViaJSON &viaJson)
+                -> std::optional<std::string> {
+                host_info::Method method;
+                try
+                {
+#if openPMD_HAVE_MPI
+                    bool consider_mpi = series.m_communicator.has_value();
+#else
+                    (void)series;
+                    bool consider_mpi = false;
+#endif
+                    method = host_info::methodFromStringDescription(
+                        viaJson.value, consider_mpi);
+                }
+                catch (std::out_of_range const &)
+                {
+                    throw error::WrongAPIUsage(
+                        "[Series] Wrong value for JSON option 'rank_table': '" +
+                        viaJson.value + "'.");
+                }
+                return host_info::byMethod(method);
+            },
+            [](internal::SeriesData::SourceSpecifiedManually &manually)
+                -> std::optional<std::string> { return manually.value; }},
+        rankTable.m_rankTableSource);
+    if (!maybeMyRankInfo.has_value())
+    {
+        return;
+    }
+
+    auto myRankInfo = std::move(*maybeMyRankInfo);
+
+    unsigned long long mySize = myRankInfo.size() + 1; // null character
+    int rank{0}, size{1};
+    unsigned long long maxSize = mySize;
+
+    auto createRankTable = [&size, &maxSize, &rankTable, this]() {
+        if (rankTable.m_attributable.written())
+        {
+            return;
+        }
+        Parameter<Operation::CREATE_DATASET> param;
+        param.name = "rankTable";
+        param.dtype = Datatype::CHAR;
+        param.extent = {uint64_t(size), uint64_t(maxSize)};
+        IOHandler()->enqueue(
+            IOTask(&rankTable.m_attributable, std::move(param)));
+    };
+
+    auto writeDataset = [&rank, &maxSize, this, &rankTable](
+                            std::shared_ptr<char> put, size_t num_lines = 1) {
+        Parameter<Operation::WRITE_DATASET> chunk;
+        chunk.dtype = Datatype::CHAR;
+        chunk.offset = {uint64_t(rank), 0};
+        chunk.extent = {num_lines, maxSize};
+        chunk.data = std::move(put);
+        IOHandler()->enqueue(
+            IOTask(&rankTable.m_attributable, std::move(chunk)));
+    };
+
+#if openPMD_HAVE_MPI
+    if (series.m_communicator.has_value())
+    {
+        auto comm = *series.m_communicator;
+        MPI_Comm_rank(comm, &rank);
+        MPI_Comm_size(comm, &size);
+        // todo char portability
+        auto [charBuffer, lineLength, numLines] =
+            auxiliary::collectStringsAsMatrixTo(comm, 0, myRankInfo);
+        (void)numLines; // it's the MPI size
+        maxSize = lineLength;
+
+        if (backend() == "MPI_HDF5")
+        {
+            MPI_Bcast(&maxSize, 1, MPI_UNSIGNED_LONG_LONG, 0, comm);
+        }
+        if (rank == 0 || backend() == "MPI_HDF5")
+        {
+            createRankTable();
+        }
+
+        if (rank == 0)
+        {
+            auto asRawPtr = new std::vector<char>(std::move(charBuffer));
+            std::shared_ptr<char> put{
+                asRawPtr->data(),
+                /*
+                 * A nicer solution would be to std::move() the vector into the
+                 * closure and let RAII deal with it. But clang6 doesn't
+                 * correctly implement C++17 closure move initialization, so
+                 * we go the extra mile and use raw pointers.
+                 * > [m_charBuffer = std::move(charBuffer)](char *){
+                 * >     // no-op
+                 * > }
+                 */
+                [asRawPtr](char *) { delete asRawPtr; }};
+            writeDataset(std::move(put), /* num_lines = */ size);
+        }
+        return;
+    }
+#endif
+    // sic! no else
+    // if the Series was initialized without a communicator, then this code will
+    // run as well
+    createRankTable();
+
+    std::shared_ptr<char> put{
+        new char[maxSize]{}, [](char const *ptr) { delete[] ptr; }};
+    std::copy_n(myRankInfo.c_str(), mySize, put.get());
+
+    writeDataset(std::move(put));
+}
+
 std::string Series::particlesPath() const
 {
     return getAttribute("particlesPath").get<std::string>();
@@ -203,7 +482,7 @@ Series &Series::setParticlesPath(std::string const &pp)
         setAttribute("particlesPath", pp);
     else
         setAttribute("particlesPath", pp + "/");
-    dirty() = true;
+    setDirty(true);
     return *this;
 }
 
@@ -284,6 +563,10 @@ IterationEncoding Series::iterationEncoding() const
 Series &Series::setIterationEncoding(IterationEncoding ie)
 {
     auto &series = get();
+    if (series.m_deferred_initialization)
+    {
+        runDeferredInitialization();
+    }
     if (written())
         throw std::runtime_error(
             "A files iterationEncoding can not (yet) be changed after it has "
@@ -359,6 +642,10 @@ std::string Series::name() const
 Series &Series::setName(std::string const &n)
 {
     auto &series = get();
+    if (series.m_deferred_initialization)
+    {
+        runDeferredInitialization();
+    }
     if (written())
         throw std::runtime_error(
             "A files name can not (yet) be changed after it has been written.");
@@ -386,7 +673,7 @@ Series &Series::setName(std::string const &n)
     }
 
     series.m_name = n;
-    dirty() = true;
+    setDirty(true);
     return *this;
 }
 
@@ -395,6 +682,12 @@ std::string Series::backend() const
     return IOHandler()->backendName();
 }
 
+std::string Series::backend()
+{
+    /* this activates the non-const call to IOHandler() */
+    return IOHandler()->backendName();
+}
+
 void Series::flush(std::string backendConfig)
 {
     auto &series = get();
@@ -476,6 +769,11 @@ std::unique_ptr<Series::ParsedInput> Series::parseInput(std::string filepath)
     std::tie(input->name, input->filenameExtension) =
         cleanFilename(input->name, suffix(input->format)).decompose();
 
+    if (input->filenameExtension == ".%E")
+    {
+        input->filenameExtension = std::nullopt;
+    }
+
     return input;
 }
 
@@ -512,21 +810,16 @@ namespace
         std::string const &directory,
         MappingFunction &&mappingFunction)
     {
-        bool isContained;
-        int padding;
-        Series::IterationIndex_t iterationIndex;
         std::set<int> paddings;
         if (auxiliary::directory_exists(directory))
         {
             for (auto const &entry : auxiliary::list_directory(directory))
             {
-                std::tie(isContained, padding, iterationIndex) =
-                    isPartOfSeries(entry);
-                if (isContained)
+                Match match = isPartOfSeries(entry);
+                if (match.isContained)
                 {
-                    paddings.insert(padding);
-                    // no std::forward as this is called repeatedly
-                    mappingFunction(iterationIndex, entry);
+                    paddings.insert(match.padding);
+                    mappingFunction(entry, std::move(match));
                 }
             }
         }
@@ -542,26 +835,264 @@ namespace
         std::function<Match(std::string const &)> const &isPartOfSeries,
         std::string const &directory)
     {
-        return autoDetectPadding(
-            isPartOfSeries,
-            directory,
-            [](Series::IterationIndex_t index, std::string const &filename) {
-                (void)index;
-                (void)filename;
-            });
+        return autoDetectPadding(isPartOfSeries, directory, [](auto &&...) {});
     }
 } // namespace
 
+template <typename... MPI_Communicator>
 void Series::init(
+    std::string const &filepath,
+    Access at,
+    std::string const &options,
+    // Either an MPI_Comm or none, the template works for both options
+    MPI_Communicator &&...comm)
+{
+    auto init_directly = [this, &comm..., at, &filepath](
+                             std::unique_ptr<ParsedInput> parsed_input,
+                             json::TracingJSON tracing_json) {
+        auto io_handler = createIOHandler(
+            parsed_input->path,
+            at,
+            parsed_input->format,
+            parsed_input->filenameExtension.value_or(std::string()),
+            comm...,
+            tracing_json,
+            filepath);
+        initSeries(std::move(io_handler), std::move(parsed_input));
+        json::warnGlobalUnusedOptions(tracing_json);
+    };
+
+    auto init_deferred = [this, at, &filepath, &options, &comm...](
+                             std::string const &parsed_directory) {
+        // Set a temporary IOHandler so that API calls which require a present
+        // IOHandler don't fail
+        writable().IOHandler =
+            std::make_shared<std::optional<std::unique_ptr<AbstractIOHandler>>>(
+                std::make_unique<DummyIOHandler>(parsed_directory, at));
+        auto &series = get();
+        series.iterations.linkHierarchy(writable());
+        series.m_rankTable.m_attributable.linkHierarchy(writable());
+        series.m_deferred_initialization =
+            [called_this_already = false, filepath, options, at, comm...](
+                Series &s) mutable {
+                if (called_this_already)
+                {
+                    throw std::runtime_error("Must be called one time only");
+                }
+                else
+                {
+                    called_this_already = true;
+                }
+
+                auto [parsed_input, tracing_json] =
+                    s.initIOHandler<json::TracingJSON>(
+                        filepath,
+                        options,
+                        at,
+                        true,
+                        std::forward<MPI_Communicator>(comm)...);
+
+                auto io_handler = createIOHandler(
+                    parsed_input->path,
+                    at,
+                    parsed_input->format,
+                    parsed_input->filenameExtension.value_or(std::string()),
+                    comm...,
+                    tracing_json,
+                    filepath);
+                auto res = io_handler.get();
+                s.initSeries(std::move(io_handler), std::move(parsed_input));
+                json::warnGlobalUnusedOptions(tracing_json);
+                return res;
+            };
+    };
+
+    switch (at)
+    {
+    case Access::CREATE:
+    case Access::READ_WRITE:
+    case Access::READ_ONLY: {
+        auto [parsed_input, tracing_json] = initIOHandler<json::TracingJSON>(
+            filepath,
+            options,
+            at,
+            true,
+            std::forward<MPI_Communicator>(comm)...);
+        init_directly(std::move(parsed_input), std::move(tracing_json));
+    }
+    break;
+    case Access::READ_LINEAR:
+    case Access::APPEND: {
+        auto [first_parsed_input, first_tracing_json] =
+            initIOHandler<json::TracingJSON>(
+                filepath,
+                options,
+                at,
+                false,
+                std::forward<MPI_Communicator>(comm)...);
+        if (first_parsed_input->filenameExtension.has_value())
+        {
+            init_directly(
+                std::move(first_parsed_input), std::move(first_tracing_json));
+        }
+        else
+        {
+            /*
+             * Since we are still in the constructor, we want to avoid I/O
+             * accesses to resolve the file extension at the moment.
+             * -> Defer the proper initialization of the IO handler up to the
+             * point when we actually need it.
+             */
+            init_deferred(first_parsed_input->path);
+        }
+    }
+    break;
+    }
+}
+
+template <typename TracingJSON, typename... MPI_Communicator>
+auto Series::initIOHandler(
+    std::string const &filepath,
+    std::string const &options,
+    Access at,
+    bool resolve_generic_extension,
+    MPI_Communicator &&...comm)
+    -> std::tuple<std::unique_ptr<ParsedInput>, TracingJSON>
+{
+    auto &series = get();
+
+    json::TracingJSON optionsJson = json::parseOptions(
+        options,
+        std::forward<MPI_Communicator>(comm)...,
+        /* considerFiles = */ true);
+    auto input = parseInput(filepath);
+    if (resolve_generic_extension && input->format == Format::GENERIC &&
+        at != Access::CREATE)
+    {
+        auto isPartOfSeries =
+            input->iterationEncoding == IterationEncoding::fileBased
+            ? matcher(
+                  input->filenamePrefix,
+                  input->filenamePadding,
+                  input->filenamePostfix,
+                  std::nullopt)
+            : matcher(input->name, -1, "", std::nullopt);
+        std::optional<std::string> extension;
+        std::set<std::string> additional_extensions;
+        autoDetectPadding(
+            isPartOfSeries,
+            input->path,
+            [&extension,
+             &additional_extensions](std::string const &, Match const &match) {
+                auto const &ext = match.extension.value();
+                if (extension.has_value() && *extension != ext)
+                {
+                    additional_extensions.emplace(ext);
+                }
+                else
+                {
+                    extension = ext;
+                }
+            });
+        if (extension.has_value())
+        {
+            if (!additional_extensions.empty())
+            {
+                std::stringstream error;
+                error << "Found ambiguous filename extensions on disk: ";
+                auto it = additional_extensions.begin();
+                auto end = additional_extensions.end();
+                error << '\'' << *it++ << '\'';
+                for (; it != end; ++it)
+                {
+                    error << ", '" << *it << '\'';
+                }
+                error << " and '" + *extension + "'.";
+                throw error::ReadError(
+                    error::AffectedObject::File,
+                    error::Reason::Other,
+                    std::nullopt,
+                    error.str());
+            }
+            input->filenameExtension = *extension;
+            input->format = determineFormat(*extension);
+        }
+        else if (access::read(at))
+        {
+            throw error::ReadError(
+                error::AffectedObject::File,
+                error::Reason::NotFound,
+                std::nullopt,
+                "No file found that matches given pattern '" + filepath + "'.");
+        }
+    }
+
+    // default options
+    series.m_parseLazily = at == Access::READ_LINEAR;
+
+    // now check for user-specified options
+    parseJsonOptions(optionsJson, *input);
+
+    if (resolve_generic_extension && !input->filenameExtension.has_value())
+    {
+        if (input->format == /* still */ Format::GENERIC)
+        {
+            throw error::WrongAPIUsage(
+                "Unable to automatically determine filename extension. Please "
+                "specify in some way.");
+        }
+        else if (input->format == Format::ADIOS2_BP)
+        {
+            // Since ADIOS2 has multiple extensions depending on the engine,
+            // we need to pass this job on to the backend
+            input->filenameExtension = ".%E";
+        }
+        else
+        {
+            input->filenameExtension = suffix(input->format);
+        }
+    }
+    return std::make_tuple(std::move(input), std::move(optionsJson));
+}
+
+void Series::initSeries(
     std::unique_ptr<AbstractIOHandler> ioHandler,
     std::unique_ptr<Series::ParsedInput> input)
 {
     auto &series = get();
-    writable().IOHandler =
-        std::make_shared<std::optional<std::unique_ptr<AbstractIOHandler>>>(
-            std::move(ioHandler));
-    series.iterations.linkHierarchy(writable());
+    auto &writable = series.m_writable;
+
+    /*
+     * In Access modes READ_LINEAR and APPEND, the Series constructor might have
+     * emplaced a temporary IOHandler. Check if this is the case.
+     */
+    if (writable.IOHandler)
+    {
+        if (writable.IOHandler->has_value())
+        {
+            /*
+             * A temporary IOHandler has been used. In this case, copy the
+             * values from that IOHandler over into the real one.
+             */
+            ioHandler->operator=(***writable.IOHandler);
+            *writable.IOHandler = std::move(ioHandler);
+        }
+        else
+        {
+            throw error::Internal(
+                "Control flow error. This should not happen.");
+        }
+    }
+    else
+    {
+        writable.IOHandler =
+            std::make_shared<std::optional<std::unique_ptr<AbstractIOHandler>>>(
+                std::move(ioHandler));
+    }
+
+    series.iterations.linkHierarchy(writable);
     series.iterations.writable().ownKeyWithinParent = "iterations";
+    series.m_rankTable.m_attributable.linkHierarchy(writable);
 
     series.m_name = input->name;
 
@@ -570,7 +1101,7 @@ void Series::init(
     series.m_filenamePrefix = input->filenamePrefix;
     series.m_filenamePostfix = input->filenamePostfix;
     series.m_filenamePadding = input->filenamePadding;
-    series.m_filenameExtension = input->filenameExtension;
+    series.m_filenameExtension = input->filenameExtension.value();
 
     if (series.m_iterationEncoding == IterationEncoding::fileBased &&
         !series.m_filenamePrefix.empty() &&
@@ -613,12 +1144,12 @@ Given file pattern: ')END"
             {
                 /* Access::READ_WRITE can be used to create a new Series
                  * allow setting attributes in that case */
-                written() = false;
+                setWritten(false, Attributable::EnqueueAsynchronously::No);
 
                 initDefaults(input->iterationEncoding);
                 setIterationEncoding(input->iterationEncoding);
 
-                written() = true;
+                setWritten(true, Attributable::EnqueueAsynchronously::No);
             }
         }
         catch (...)
@@ -647,6 +1178,11 @@ Given file pattern: ')END"
                 series.m_filenamePrefix,
                 series.m_filenamePadding,
                 series.m_filenamePostfix,
+                /*
+                 * This might still be ".%E" if the backend is ADIOS2 and no
+                 * files are yet on disk.
+                 * In that case, this will just not find anything.
+                 */
                 series.m_filenameExtension),
             IOHandler()->directory);
         switch (padding)
@@ -656,8 +1192,10 @@ Given file pattern: ')END"
                 "Cannot write to a series with inconsistent iteration padding. "
                 "Please specify '%0<N>T' or open as read-only.");
         case -1:
-            std::cerr << "No matching iterations found: " << name()
-                      << std::endl;
+            /*
+             * No matching iterations found. No problem, Append mode is also
+             * fine for creating new datasets.
+             */
             break;
         default:
             series.m_filenamePadding = padding;
@@ -737,7 +1275,12 @@ std::future<void> Series::flush_impl(
     }
     catch (...)
     {
-        IOHandler()->m_lastFlushSuccessful = false;
+        auto handler = IOHandler();
+        handler->m_lastFlushSuccessful = false;
+        while (!handler->m_work.empty())
+        {
+            handler->m_work.pop();
+        }
         throw;
     }
 }
@@ -784,12 +1327,12 @@ void Series::flushFileBased(
                 it->second.get().m_closed =
                     internal::CloseStatus::ClosedInBackend;
             }
+        }
 
-            // Phase 3
-            if (flushIOHandler)
-            {
-                IOHandler()->flush(flushParams);
-            }
+        // Phase 3
+        if (flushIOHandler)
+        {
+            IOHandler()->flush(flushParams);
         }
         break;
     case Access::READ_WRITE:
@@ -807,12 +1350,14 @@ void Series::flushFileBased(
                  * emulate the file belonging to each iteration as not yet
                  * written, even if the iteration itself is already written
                  * (to ensure that the Series gets reassociated with the
-                 * current iteration)
+                 * current iteration by the backend)
                  */
-                written() = false;
-                series.iterations.written() = false;
+                this->setWritten(
+                    false, Attributable::EnqueueAsynchronously::Yes);
+                series.iterations.setWritten(
+                    false, Attributable::EnqueueAsynchronously::Yes);
 
-                dirty() |= it->second.dirty();
+                setDirty(dirty() || it->second.dirty());
                 std::string filename = iterationFilename(it->first);
 
                 if (!it->second.written())
@@ -842,18 +1387,18 @@ void Series::flushFileBased(
                 it->second.get().m_closed =
                     internal::CloseStatus::ClosedInBackend;
             }
-
-            // Phase 3
-            if (flushIOHandler)
-            {
-                IOHandler()->flush(flushParams);
-            }
             /* reset the dirty bit for every iteration (i.e. file)
              * otherwise only the first iteration will have updates attributes
              */
-            dirty() = allDirty;
+            setDirty(allDirty);
+        }
+        setDirty(false);
+
+        // Phase 3
+        if (flushIOHandler)
+        {
+            IOHandler()->flush(flushParams);
         }
-        dirty() = false;
         break;
     }
     }
@@ -866,6 +1411,7 @@ void Series::flushGorVBased(
     bool flushIOHandler)
 {
     auto &series = get();
+
     if (access::readOnly(IOHandler()->m_frontendAccess))
     {
         for (auto it = begin; it != end; ++it)
@@ -893,12 +1439,14 @@ void Series::flushGorVBased(
                 it->second.get().m_closed =
                     internal::CloseStatus::ClosedInBackend;
             }
+        }
 
-            // Phase 3
-            if (flushIOHandler)
-            {
-                IOHandler()->flush(flushParams);
-            }
+        // Phase 3
+        Parameter<Operation::TOUCH> touch;
+        IOHandler()->enqueue(IOTask(&writable(), touch));
+        if (flushIOHandler)
+        {
+            IOHandler()->flush(flushParams);
         }
     }
     else
@@ -925,6 +1473,8 @@ void Series::flushGorVBased(
             Parameter<Operation::CREATE_FILE> fCreate;
             fCreate.name = series.m_name;
             IOHandler()->enqueue(IOTask(this, fCreate));
+
+            flushRankTable();
         }
 
         series.iterations.flush(
@@ -971,6 +1521,8 @@ void Series::flushGorVBased(
         }
 
         flushAttributes(flushParams);
+        Parameter<Operation::TOUCH> touch;
+        IOHandler()->enqueue(IOTask(&writable(), touch));
         if (flushIOHandler)
         {
             IOHandler()->flush(flushParams);
@@ -1004,6 +1556,13 @@ void Series::readFileBased()
     Parameter<Operation::OPEN_FILE> fOpen;
     Parameter<Operation::READ_ATT> aRead;
 
+    // Tell the backend that we are parsing file-based iteration encoding.
+    // This especially means that READ_RANDOM_ACCESS will be used instead of
+    // READ_LINEAR, as READ_LINEAR is implemented in the frontend for file-based
+    // encoding. Don't set the iteration encoding in the frontend yet, will be
+    // set after reading the iteration encoding attribute from the opened file.
+    IOHandler()->setIterationEncoding(IterationEncoding::fileBased);
+
     if (!auxiliary::directory_exists(IOHandler()->directory))
         throw error::ReadError(
             error::AffectedObject::File,
@@ -1021,7 +1580,8 @@ void Series::readFileBased()
         isPartOfSeries,
         IOHandler()->directory,
         // foreach found file with `filename` and `index`:
-        [&series](IterationIndex_t index, std::string const &filename) {
+        [&series](std::string const &filename, Match const &match) {
+            auto index = match.iteration;
             Iteration &i = series.iterations[index];
             i.deferParseAccess(
                 {std::to_string(index),
@@ -1253,9 +1813,9 @@ void Series::readOneIterationFileBased(std::string const &filePath)
     IOHandler()->flush(internal::defaultFlushParams);
     if (*aRead.dtype == DT::STRING)
     {
-        written() = false;
+        setWritten(false, Attributable::EnqueueAsynchronously::No);
         setIterationFormat(Attribute(*aRead.resource).get<std::string>());
-        written() = true;
+        setWritten(true, Attributable::EnqueueAsynchronously::No);
     }
     else
         throw error::ReadError(
@@ -1404,9 +1964,9 @@ creating new iterations.
         IOHandler()->flush(internal::defaultFlushParams);
         if (*aRead.dtype == DT::STRING)
         {
-            written() = false;
+            setWritten(false, Attributable::EnqueueAsynchronously::No);
             setIterationFormat(Attribute(*aRead.resource).get<std::string>());
-            written() = true;
+            setWritten(true, Attributable::EnqueueAsynchronously::No);
         }
         else
             throw error::ReadError(
@@ -1671,12 +2231,14 @@ void Series::readBase()
         {
             /* allow setting the meshes path after completed IO */
             for (auto &it : series.iterations)
-                it.second.meshes.written() = false;
+                it.second.meshes.setWritten(
+                    false, Attributable::EnqueueAsynchronously::No);
 
             setMeshesPath(val.value());
 
             for (auto &it : series.iterations)
-                it.second.meshes.written() = true;
+                it.second.meshes.setWritten(
+                    true, Attributable::EnqueueAsynchronously::No);
         }
         else
             throw error::ReadError(
@@ -1701,12 +2263,14 @@ void Series::readBase()
         {
             /* allow setting the meshes path after completed IO */
             for (auto &it : series.iterations)
-                it.second.particles.written() = false;
+                it.second.particles.setWritten(
+                    false, Attributable::EnqueueAsynchronously::No);
 
             setParticlesPath(val.value());
 
             for (auto &it : series.iterations)
-                it.second.particles.written() = true;
+                it.second.particles.setWritten(
+                    true, Attributable::EnqueueAsynchronously::No);
         }
         else
             throw error::ReadError(
@@ -2147,7 +2711,7 @@ namespace
      * The string is converted to lower case.
      */
     template <typename Dest = std::string>
-    void getJsonOptionLowerCase(
+    bool getJsonOptionLowerCase(
         json::TracingJSON &config, std::string const &key, Dest &dest)
     {
         if (config.json().contains(key))
@@ -2163,6 +2727,11 @@ namespace
                 throw error::BackendConfigSchema(
                     {key}, "Must be convertible to string type.");
             }
+            return true;
+        }
+        else
+        {
+            return false;
         }
     }
 } // namespace
@@ -2173,6 +2742,11 @@ void Series::parseJsonOptions(TracingJSON &options, ParsedInput &input)
     auto &series = get();
     getJsonOption<bool>(
         options, "defer_iteration_parsing", series.m_parseLazily);
+    internal::SeriesData::SourceSpecifiedViaJSON rankTableSource;
+    if (getJsonOptionLowerCase(options, "rank_table", rankTableSource.value))
+    {
+        series.m_rankTable.m_rankTableSource = std::move(rankTableSource);
+    }
     // backend key
     {
         std::map<std::string, Format> const backendDescriptors{
@@ -2201,6 +2775,7 @@ void Series::parseJsonOptions(TracingJSON &options, ParsedInput &input)
                 }
                 else if (
                     input.format != Format::DUMMY &&
+                    input.format != Format::GENERIC &&
                     suffix(input.format) != suffix(it->second))
                 {
                     std::cerr << "[Warning] Supplied filename extension '"
@@ -2323,21 +2898,10 @@ Series::Series(
     std::string const &options)
     : Attributable(NoInit())
 {
-    setData(std::make_shared<internal::SeriesData>());
-    json::TracingJSON optionsJson =
-        json::parseOptions(options, comm, /* considerFiles = */ true);
-    auto input = parseInput(filepath);
-    parseJsonOptions(optionsJson, *input);
-    auto handler = createIOHandler(
-        input->path,
-        at,
-        input->format,
-        input->filenameExtension,
-        comm,
-        optionsJson,
-        filepath);
-    init(std::move(handler), std::move(input));
-    json::warnGlobalUnusedOptions(optionsJson);
+    auto data = std::make_shared<internal::SeriesData>();
+    data->m_communicator = comm;
+    setData(std::move(data));
+    init(filepath, at, options, comm);
 }
 #endif
 
@@ -2346,19 +2910,7 @@ Series::Series(
     : Attributable(NoInit())
 {
     setData(std::make_shared<internal::SeriesData>());
-    json::TracingJSON optionsJson =
-        json::parseOptions(options, /* considerFiles = */ true);
-    auto input = parseInput(filepath);
-    parseJsonOptions(optionsJson, *input);
-    auto handler = createIOHandler(
-        input->path,
-        at,
-        input->format,
-        input->filenameExtension,
-        optionsJson,
-        filepath);
-    init(std::move(handler), std::move(input));
-    json::warnGlobalUnusedOptions(optionsJson);
+    init(filepath, at, options);
 }
 
 Series::operator bool() const
@@ -2388,6 +2940,10 @@ WriteIterations Series::writeIterations()
     {
         series.m_writeIterations = WriteIterations(this->iterations);
     }
+    if (series.m_deferred_initialization.has_value())
+    {
+        runDeferredInitialization();
+    }
     return series.m_writeIterations.value();
 }
 
@@ -2444,6 +3000,37 @@ auto Series::currentSnapshot() const
     }
 }
 
+AbstractIOHandler *Series::runDeferredInitialization()
+{
+    auto &series = get();
+    if (series.m_deferred_initialization.has_value())
+    {
+        auto functor = std::move(*m_series->m_deferred_initialization);
+        m_series->m_deferred_initialization = std::nullopt;
+        return functor(*this);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+AbstractIOHandler *Series::IOHandler()
+{
+    auto res = Attributable::IOHandler();
+    if (res && //  res->backendName() == "Dummy" &&
+        m_series->m_deferred_initialization.has_value())
+    {
+        res = runDeferredInitialization();
+    }
+    return res;
+}
+AbstractIOHandler const *Series::IOHandler() const
+{
+    auto res = Attributable::IOHandler();
+    return res;
+}
+
 namespace
 {
     CleanedFilename cleanFilename(
@@ -2461,31 +3048,58 @@ namespace
         }
     }
 
-    std::function<Match(std::string const &)>
-    buildMatcher(std::string const &regexPattern, int padding)
+    std::function<Match(std::string const &)> buildMatcher(
+        std::string const &regexPattern,
+        int padding,
+        std::optional<size_t> index_of_extension)
     {
-        std::regex pattern(regexPattern);
-
-        return [pattern, padding](std::string const &filename) -> Match {
+        return [index_of_extension,
+                pattern = std::regex(regexPattern),
+                padding](std::string const &filename) -> Match {
             std::smatch regexMatches;
             bool match = std::regex_match(filename, regexMatches, pattern);
             int processedPadding =
-                padding != 0 ? padding : (match ? regexMatches[1].length() : 0);
+                padding != 0 ? padding : (match ? regexMatches[2].length() : 0);
             return {
                 match,
                 processedPadding,
-                match ? std::stoull(regexMatches[1]) : 0};
+                padding < 0 ? padding
+                    : match ? std::stoull(regexMatches[2])
+                            : 0,
+                index_of_extension.has_value()
+                    ? std::make_optional<std::string>(
+                          regexMatches[*index_of_extension])
+                    : std::nullopt};
         };
     }
 
+    namespace
+    {
+        auto sanitize_regex(std::string const &input) -> std::string
+        {
+            // need to escape special characters reserved for regexes, see
+            // https://stackoverflow.com/questions/40195412/c11-regex-search-for-exact-string-escape
+            // https://regex101.com/r/GDPK7E/3
+            std::regex specialChars{R"([-[\]{}()*+?.,\^$|#\s\\])"};
+            // `$&` is the matched substring, see
+            // https://en.cppreference.com/w/cpp/regex/regex_replace
+            return std::regex_replace(input, specialChars, R"(\$&)");
+        }
+    } // namespace
+
     std::function<Match(std::string const &)> matcher(
         std::string const &prefix,
         int padding,
         std::string const &postfix,
-        std::string const &filenameSuffix)
+        std::optional<std::string> const &filenameSuffix)
     {
-        std::string nameReg = "^" + prefix;
-        if (padding != 0)
+        std::string nameReg = "^(" + sanitize_regex(prefix) + ")";
+        size_t index_of_extension = 0;
+        if (padding < 0)
+        {
+            index_of_extension = 3;
+        }
+        else if (padding > 0)
         {
             // The part after the question mark:
             // The number must be at least `padding` digits long
@@ -2495,15 +3109,108 @@ namespace
             // iteration number via std::stoull(regexMatches[1])
             nameReg += "(([1-9][[:digit:]]*)?([[:digit:]]";
             nameReg += "{" + std::to_string(padding) + "}))";
+            index_of_extension = 6;
         }
         else
         {
             // No padding specified, any number of digits is ok.
             nameReg += "([[:digit:]]";
             nameReg += "+)";
+            index_of_extension = 4;
         }
-        nameReg += postfix + filenameSuffix + "$";
-        return buildMatcher(nameReg, padding);
+        nameReg += "(" + sanitize_regex(postfix) + ")" +
+            filenameSuffix.value_or("(\\.[[:alnum:]]+)") + "$";
+        return buildMatcher(
+            nameReg,
+            padding,
+            !filenameSuffix.has_value()
+                ? std::make_optional<size_t>(index_of_extension)
+                : std::nullopt);
     }
 } // namespace
+
+namespace debug
+{
+    void printDirty(Series const &series)
+    {
+        auto print = [](Attributable const &attr) {
+            size_t indent = 0;
+            {
+                auto current = attr.parent();
+                while (current)
+                {
+                    ++indent;
+                    current = current->parent;
+                }
+            }
+            auto make_indent = [&]() {
+                for (size_t i = 0; i < indent; ++i)
+                {
+                    std::cout << "\t";
+                }
+            };
+            make_indent();
+            auto const &w = attr.writable();
+            std::cout << w.ownKeyWithinParent << '\n';
+            make_indent();
+            std::cout << "Self: " << w.dirtySelf
+                      << "\tRec: " << w.dirtyRecursive << '\n';
+            std::cout << std::endl;
+        };
+        print(series);
+        print(series.iterations);
+        for (auto const &[it_name, it] : series.iterations)
+        {
+            (void)it_name;
+            print(it);
+            print(it.meshes);
+            for (auto const &[mesh_name, mesh] : it.meshes)
+            {
+                (void)mesh_name;
+                print(mesh);
+                if (!mesh.scalar())
+                {
+                    for (auto const &[comp_name, comp] : mesh)
+                    {
+                        (void)comp_name;
+                        print(comp);
+                    }
+                }
+            }
+            print(it.particles);
+            for (auto const &[species_name, species] : it.particles)
+            {
+                (void)species_name;
+                print(species);
+                print(species.particlePatches);
+                for (auto const &[patch_name, patch] : species.particlePatches)
+                {
+                    (void)patch_name;
+                    print(patch);
+                    if (!patch.scalar())
+                    {
+                        for (auto const &[component_name, component] : patch)
+                        {
+                            (void)component_name;
+                            print(component);
+                        }
+                    }
+                }
+                for (auto const &[record_name, record] : species)
+                {
+                    (void)record_name;
+                    print(record);
+                    if (!record.scalar())
+                    {
+                        for (auto const &[comp_name, comp] : record)
+                        {
+                            (void)comp_name;
+                            print(comp);
+                        }
+                    }
+                }
+            }
+        }
+    }
+} // namespace debug
 } // namespace openPMD
diff --git a/src/auxiliary/Filesystem.cpp b/src/auxiliary/Filesystem.cpp
index cce80b9d17..564d266ee3 100644
--- a/src/auxiliary/Filesystem.cpp
+++ b/src/auxiliary/Filesystem.cpp
@@ -195,7 +195,8 @@ std::string collective_file_read(std::string const &path, MPI_Comm comm)
         if (!handle.good())
         {
             throw std::runtime_error(
-                "Failed reading JSON config from file " + path + ".");
+                "[collective_file_read] Failed acessing file '" + path +
+                "' on MPI rank 0.");
         }
         stringLength = res.size() + 1;
     }
diff --git a/src/auxiliary/JSON.cpp b/src/auxiliary/JSON.cpp
index dd0825c33c..7c96221026 100644
--- a/src/auxiliary/JSON.cpp
+++ b/src/auxiliary/JSON.cpp
@@ -25,7 +25,10 @@
 #include "openPMD/Error.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
 #include "openPMD/auxiliary/StringManip.hpp"
+#include "openPMD/auxiliary/Variant.hpp"
 
+#include <limits>
+#include <queue>
 #include <toml.hpp>
 
 #include <algorithm>
@@ -62,6 +65,11 @@ nlohmann::json const &TracingJSON::getShadow() const
     return *m_positionInShadow;
 }
 
+nlohmann::json &TracingJSON::getShadow()
+{
+    return *m_positionInShadow;
+}
+
 nlohmann::json TracingJSON::invertShadow() const
 {
     nlohmann::json inverted = *m_positionInOriginal;
@@ -79,7 +87,13 @@ void TracingJSON::invertShadow(
     std::vector<std::string> toRemove;
     for (auto it = shadow.begin(); it != shadow.end(); ++it)
     {
-        nlohmann::json &partialResult = result[it.key()];
+        auto partialResultIterator = result.find(it.key());
+        if (partialResultIterator == result.end())
+        {
+            // The shadow contained a key that was not in the original dataset
+            continue;
+        }
+        nlohmann::json &partialResult = partialResultIterator.value();
         if (partialResult.is_object())
         {
             invertShadow(partialResult, it.value());
@@ -195,10 +209,6 @@ namespace
             }
             return result;
         }
-        else if (val.is_uninitialized())
-        {
-            return nlohmann::json(); // null
-        }
 
         // @todo maybe generalize error type
         throw error::BackendConfigSchema(
@@ -281,9 +291,22 @@ namespace
 {
     ParsedConfig parseInlineOptions(std::string const &options)
     {
+        // speed up default options
+        ParsedConfig res;
+        if (options.empty())
+        {
+            res.originallySpecifiedAs = SupportedLanguages::TOML;
+            res.config = nlohmann::json::object();
+            return res;
+        }
+        else if (options == "{}")
+        {
+            res.originallySpecifiedAs = SupportedLanguages::JSON;
+            res.config = nlohmann::json::object();
+            return res;
+        }
         std::string trimmed =
             auxiliary::trim(options, [](char c) { return std::isspace(c); });
-        ParsedConfig res;
         if (trimmed.empty())
         {
             return res;
@@ -530,7 +553,7 @@ void warnGlobalUnusedOptions(TracingJSON const &config)
             std::cerr
                 << "[Series] The following parts of the global TOML config "
                    "remains unused:\n"
-                << asToml << std::endl;
+                << json::format_toml(asToml) << std::endl;
         }
         }
     }
@@ -541,19 +564,19 @@ merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite)
 {
     if (defaultVal.is_object() && overwrite.is_object())
     {
-        std::vector<std::string> prunedKeys;
+        std::queue<std::string> prunedKeys;
         for (auto it = overwrite.begin(); it != overwrite.end(); ++it)
         {
             auto &valueInDefault = defaultVal[it.key()];
             merge(valueInDefault, it.value());
             if (valueInDefault.is_null())
             {
-                prunedKeys.emplace_back(it.key());
+                prunedKeys.push(it.key());
             }
         }
-        for (auto const &key : prunedKeys)
+        for (; !prunedKeys.empty(); prunedKeys.pop())
         {
-            defaultVal.erase(key);
+            defaultVal.erase(prunedKeys.front());
         }
     }
     else
@@ -586,10 +609,89 @@ std::string merge(std::string const &defaultValue, std::string const &overwrite)
     case SupportedLanguages::TOML: {
         auto asToml = json::jsonToToml(res);
         std::stringstream sstream;
-        sstream << asToml;
+        sstream << json::format_toml(asToml);
         return sstream.str();
     }
     }
     throw std::runtime_error("Unreachable!");
 }
+
+nlohmann::json &
+filterByTemplate(nlohmann::json &defaultVal, nlohmann::json const &positiveMask)
+{
+    if (defaultVal.is_object() && positiveMask.is_object())
+    {
+        std::queue<std::string> prunedKeys;
+        for (auto left_it = defaultVal.begin(); left_it != defaultVal.end();
+             ++left_it)
+        {
+            if (auto right_it = positiveMask.find(left_it.key());
+                right_it != positiveMask.end())
+            {
+                // value is covered by mask, keep it
+                filterByTemplate(left_it.value(), right_it.value());
+            }
+            else
+            {
+                prunedKeys.push(left_it.key());
+            }
+        }
+        for (; !prunedKeys.empty(); prunedKeys.pop())
+        {
+            defaultVal.erase(prunedKeys.front());
+        }
+    } // else noop
+    return defaultVal;
+}
+
+constexpr int toml_precision = std::numeric_limits<double>::digits10 + 1;
+
+#if TOML11_VERSION_MAJOR < 4
+template <typename toml_t>
+std ::string format_toml(toml_t &&val)
+{
+    std::stringstream res;
+    res << std::setprecision(toml_precision) << std::forward<toml_t>(val);
+    return res.str();
+}
+
+#else
+
+namespace
+{
+    auto set_precision(toml::value &) -> void;
+    auto set_precision(toml::value &val) -> void
+    {
+        if (val.is_table())
+        {
+            for (auto &pair : val.as_table())
+            {
+                set_precision(pair.second);
+            }
+        }
+        else if (val.is_array())
+        {
+            for (auto &entry : val.as_array())
+            {
+                set_precision(entry);
+            }
+        }
+        else if (val.is_floating())
+        {
+            val.as_floating_fmt().prec = toml_precision;
+        }
+    }
+} // namespace
+
+template <typename toml_t>
+std::string format_toml(toml_t &&val)
+{
+    set_precision(val);
+    return toml::format(std::forward<toml_t>(val));
+}
+
+#endif
+
+template std::string format_toml(toml::value &&);
+template std::string format_toml(toml::value &);
 } // namespace openPMD::json
diff --git a/src/auxiliary/Mpi.cpp b/src/auxiliary/Mpi.cpp
new file mode 100644
index 0000000000..1873237cb6
--- /dev/null
+++ b/src/auxiliary/Mpi.cpp
@@ -0,0 +1,113 @@
+#include "openPMD/auxiliary/Mpi.hpp"
+
+#include <algorithm>
+#include <numeric>
+
+#if openPMD_HAVE_MPI
+
+namespace openPMD::auxiliary
+{
+StringMatrix collectStringsAsMatrixTo(
+    MPI_Comm communicator, int destRank, std::string const &thisRankString)
+{
+    int rank, size;
+    MPI_Comm_rank(communicator, &rank);
+    MPI_Comm_size(communicator, &size);
+    int sendLength = thisRankString.size() + 1;
+    std::vector<int> recvcounts;
+
+    if (rank == destRank)
+    {
+        recvcounts.resize(size);
+    }
+
+    MPI_Gather(
+        &sendLength,
+        1,
+        MPI_INT,
+        recvcounts.data(),
+        1,
+        MPI_INT,
+        destRank,
+        MPI_COMM_WORLD);
+    int maxLength = std::accumulate(
+        recvcounts.begin(), recvcounts.end(), 0, [](int a, int b) {
+            return std::max(a, b);
+        });
+
+    StringMatrix res;
+    std::vector<int> displs;
+    if (rank == destRank)
+    {
+        res.line_length = maxLength;
+        res.num_lines = size;
+        res.char_buffer.resize(maxLength * res.num_lines);
+        displs.reserve(size);
+        for (int i = 0; i < size; ++i)
+        {
+            displs.emplace_back(i * maxLength);
+        }
+    }
+
+    MPI_Gatherv(
+        thisRankString.c_str(),
+        sendLength,
+        MPI_CHAR,
+        res.char_buffer.data(),
+        recvcounts.data(),
+        displs.data(),
+        MPI_CHAR,
+        destRank,
+        MPI_COMM_WORLD);
+
+    return res;
+}
+
+std::vector<std::string> distributeStringsToAllRanks(
+    MPI_Comm communicator, std::string const &thisRankString)
+{
+    int rank, size;
+    MPI_Comm_rank(communicator, &rank);
+    MPI_Comm_size(communicator, &size);
+    int sendLength = thisRankString.size() + 1;
+
+    int *sizesBuffer = new int[size];
+    int *displs = new int[size];
+
+    MPI_Allgather(
+        &sendLength, 1, MPI_INT, sizesBuffer, 1, MPI_INT, MPI_COMM_WORLD);
+
+    char *namesBuffer;
+    {
+        size_t sum = 0;
+        for (int i = 0; i < size; ++i)
+        {
+            displs[i] = sum;
+            sum += sizesBuffer[i];
+        }
+        namesBuffer = new char[sum];
+    }
+
+    MPI_Allgatherv(
+        thisRankString.c_str(),
+        sendLength,
+        MPI_CHAR,
+        namesBuffer,
+        sizesBuffer,
+        displs,
+        MPI_CHAR,
+        MPI_COMM_WORLD);
+
+    std::vector<std::string> hostnames(size);
+    for (int i = 0; i < size; ++i)
+    {
+        hostnames[i] = std::string(namesBuffer + displs[i]);
+    }
+
+    delete[] sizesBuffer;
+    delete[] displs;
+    delete[] namesBuffer;
+    return hostnames;
+}
+} // namespace openPMD::auxiliary
+#endif
diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp
index 7eaf47dd07..d5ff005389 100644
--- a/src/backend/Attributable.cpp
+++ b/src/backend/Attributable.cpp
@@ -20,14 +20,19 @@
  */
 #include "openPMD/backend/Attributable.hpp"
 #include "openPMD/Iteration.hpp"
+#include "openPMD/ParticleSpecies.hpp"
+#include "openPMD/RecordComponent.hpp"
 #include "openPMD/Series.hpp"
 #include "openPMD/auxiliary/DerefDynamicCast.hpp"
 #include "openPMD/auxiliary/StringManip.hpp"
+#include "openPMD/backend/Attribute.hpp"
 
 #include <algorithm>
 #include <complex>
 #include <iostream>
 #include <set>
+#include <sstream>
+#include <stdexcept>
 
 namespace openPMD
 {
@@ -125,61 +130,63 @@ Series Attributable::retrieveSeries() const
     {
         findSeries = findSeries->parent;
     }
-    auto seriesData = &auxiliary::deref_dynamic_cast<internal::SeriesData>(
-        findSeries->attributable);
-    Series res;
-    res.setData(
-        std::shared_ptr<internal::SeriesData>{seriesData, [](auto const *) {}});
-    return res;
+    return findSeries->attributable->asInternalCopyOf<Series>();
 }
 
-Iteration const &Attributable::containingIteration() const
+auto Attributable::containingIteration() const
+    -> std::pair<
+        std::optional<internal::IterationData const *>,
+        internal::SeriesData const *>
 {
-    std::vector<Writable const *> searchQueue;
-    searchQueue.reserve(7);
+    constexpr size_t search_queue_size = 3;
+    Writable const *search_queue[search_queue_size]{nullptr};
+    size_t search_queue_idx = 0;
     Writable const *findSeries = &writable();
-    while (findSeries)
+    while (true)
     {
-        searchQueue.push_back(findSeries);
+        search_queue[search_queue_idx] = findSeries;
         // we don't need to push the last Writable since it's the Series anyway
         findSeries = findSeries->parent;
+        if (!findSeries)
+        {
+            break;
+        }
+        else
+        {
+            search_queue_idx = (search_queue_idx + 1) % search_queue_size;
+        }
     }
     // End of the queue:
     // Iteration -> Series.iterations -> Series
-    if (searchQueue.size() < 3)
+    auto *series = &auxiliary::deref_dynamic_cast<internal::SeriesData const>(
+        search_queue[search_queue_idx]->attributable);
+    auto maybe_iteration = search_queue
+        [(search_queue_idx + (search_queue_size - 2)) % search_queue_size];
+    if (maybe_iteration)
     {
-        throw std::runtime_error(
-            "containingIteration(): Must be called for an object contained in "
-            "an iteration.");
+        auto *iteration =
+            &auxiliary::deref_dynamic_cast<internal::IterationData const>(
+                maybe_iteration->attributable);
+        return std::make_pair(std::make_optional(iteration), series);
     }
-    auto end = searchQueue.rbegin();
-    internal::AttributableData const *attr = (*(end + 2))->attributable;
-    if (attr == nullptr)
-        throw std::runtime_error(
-            "containingIteration(): attributable must not be a nullptr.");
-    /*
-     * We now know the unique instance of Attributable that corresponds with
-     * the iteration.
-     * Since the class Iteration itself still follows the old class design,
-     * we will have to take a detour via Series.
-     */
-    auto &series = auxiliary::deref_dynamic_cast<internal::SeriesData>(
-        (*searchQueue.rbegin())->attributable);
-    for (auto const &pair : series.iterations)
+    else
     {
-        if (&static_cast<Attributable const &>(pair.second).get() == attr)
-        {
-            return pair.second;
-        }
+        return std::make_pair(std::nullopt, series);
     }
-    throw std::runtime_error(
-        "Containing iteration not found in containing Series.");
 }
 
-Iteration &Attributable::containingIteration()
+auto Attributable::containingIteration()
+    -> std::
+        pair<std::optional<internal::IterationData *>, internal::SeriesData *>
 {
-    return const_cast<Iteration &>(
-        static_cast<Attributable const *>(this)->containingIteration());
+    auto const_res =
+        static_cast<Attributable const *>(this)->containingIteration();
+    return std::make_pair(
+        const_res.first.has_value()
+            ? std::make_optional(
+                  const_cast<internal::IterationData *>(*const_res.first))
+            : std::nullopt,
+        const_cast<internal::SeriesData *>(const_res.second));
 }
 
 std::string Attributable::MyPath::filePath() const
@@ -187,6 +194,26 @@ std::string Attributable::MyPath::filePath() const
     return directory + seriesName + seriesExtension;
 }
 
+std::string Attributable::MyPath::openPMDPath() const
+{
+    if (group.empty())
+    {
+        return std::string();
+    }
+    else
+    {
+        std::stringstream res;
+        auto it = group.begin();
+        auto end = group.end();
+        res << *it++;
+        for (; it != end; ++it)
+        {
+            res << '/' << *it;
+        }
+        return res.str();
+    }
+}
+
 auto Attributable::myPath() const -> MyPath
 {
     MyPath res;
@@ -240,8 +267,11 @@ void Attributable::flushAttributes(internal::FlushParams const &flushParams)
             aWrite.dtype = getAttribute(att_name).dtype;
             IOHandler()->enqueue(IOTask(this, aWrite));
         }
-
-        dirty() = false;
+    }
+    // Do this outside the if branch to also setDirty to dirtyRecursive
+    if (flushParams.flushLevel != FlushLevel::SkeletonOnly)
+    {
+        setDirty(false);
     }
 }
 
@@ -451,7 +481,24 @@ void Attributable::readAttributes(ReadMode mode)
         }
     }
 
-    dirty() = false;
+    setDirty(false);
+}
+
+void Attributable::setWritten(bool val, EnqueueAsynchronously ea)
+{
+    switch (ea)
+    {
+
+    case EnqueueAsynchronously::Yes: {
+        Parameter<Operation::SET_WRITTEN> param;
+        param.target_status = val;
+        IOHandler()->enqueue(IOTask(this, param));
+    }
+    break;
+    case EnqueueAsynchronously::No:
+        break;
+    }
+    writable().written = val;
 }
 
 void Attributable::linkHierarchy(Writable &w)
@@ -459,5 +506,55 @@ void Attributable::linkHierarchy(Writable &w)
     auto handler = w.IOHandler;
     writable().IOHandler = handler;
     writable().parent = &w;
+    setDirty(true);
 }
+
+namespace internal
+{
+    template <typename T>
+    T &makeOwning(T &self, Series s)
+    {
+        /*
+         * `self` is a handle object such as RecordComponent or Mesh (see
+         * instantiations below).
+         * These objects don't normally keep alive the Series, i.e. as soon as
+         * the Series is destroyed, the handle becomes invalid.
+         * This function modifies the handle such that it actually keeps the
+         * Series alive and behaves otherwise identically.
+         * First, get the internal shared pointer of the handle.
+         */
+        std::shared_ptr<typename T::Data_t> data_ptr = self.T::getShared();
+        auto raw_ptr = data_ptr.get();
+        /*
+         * Now, create a new shared pointer pointing to the same address as the
+         * actual pointer and replace the old internal shared pointer by the new
+         * one.
+         */
+        self.setData(std::shared_ptr<typename T::Data_t>{
+            raw_ptr,
+            /*
+             * Here comes the main trick.
+             * The new shared pointer stores (and thus keeps alive) two items
+             * via lambda capture in its destructor:
+             * 1. The old shared pointer.
+             * 2. The Series.
+             * It's important to notice that these two items are only stored
+             * within the newly created handle, and not internally within the
+             * actual openPMD object model. This means that no reference cycles
+             * can occur.
+             */
+            [s_lambda = std::move(s),
+             data_ptr_lambda = std::move(data_ptr)](auto const *) {
+                /* no-op, the lambda captures simply go out of scope */
+            }});
+        return self;
+    }
+
+    template RecordComponent &makeOwning(RecordComponent &, Series);
+    template MeshRecordComponent &makeOwning(MeshRecordComponent &, Series);
+    template Mesh &makeOwning(Mesh &, Series);
+    template Record &makeOwning(Record &, Series);
+    template ParticleSpecies &makeOwning(ParticleSpecies &, Series);
+    template Iteration &makeOwning(Iteration &, Series);
+} // namespace internal
 } // namespace openPMD
diff --git a/src/backend/BaseRecordComponent.cpp b/src/backend/BaseRecordComponent.cpp
index 96b38beed5..3f0f1b35c0 100644
--- a/src/backend/BaseRecordComponent.cpp
+++ b/src/backend/BaseRecordComponent.cpp
@@ -19,6 +19,7 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/backend/BaseRecordComponent.hpp"
+#include "openPMD/Error.hpp"
 #include "openPMD/Iteration.hpp"
 
 namespace openPMD
@@ -65,6 +66,19 @@ bool BaseRecordComponent::constant() const
     return get().m_isConstant;
 }
 
+std::optional<size_t> BaseRecordComponent::joinedDimension() const
+{
+    auto &rc = get();
+    if (rc.m_dataset.has_value())
+    {
+        return rc.m_dataset.value().joinedDimension();
+    }
+    else
+    {
+        return false;
+    }
+}
+
 ChunkTable BaseRecordComponent::availableChunks()
 {
     auto &rc = get();
@@ -77,7 +91,17 @@ ChunkTable BaseRecordComponent::availableChunks()
         Offset offset(rc.m_dataset.value().extent.size(), 0);
         return ChunkTable{{std::move(offset), rc.m_dataset.value().extent}};
     }
-    containingIteration().open();
+    if (auto iteration_data = containingIteration().first;
+        iteration_data.has_value())
+    {
+        (*iteration_data)->asInternalCopyOf<Iteration>().open();
+    }
+    else
+    {
+        throw error::Internal(
+            "Containing Iteration of BaseRecordComponent could not be "
+            "retrieved.");
+    }
     Parameter<Operation::AVAILABLE_CHUNKS> param;
     IOTask task(this, param);
     IOHandler()->enqueue(task);
diff --git a/src/backend/MeshRecordComponent.cpp b/src/backend/MeshRecordComponent.cpp
index 2be3879ea9..ed50080757 100644
--- a/src/backend/MeshRecordComponent.cpp
+++ b/src/backend/MeshRecordComponent.cpp
@@ -64,7 +64,7 @@ void MeshRecordComponent::read()
             "of any floating point type, found " +
                 datatypeToString(Attribute(*aRead.resource).dtype) + ")");
 
-    readBase();
+    readBase(/* require_unit_si = */ true);
 }
 
 void MeshRecordComponent::flush(
diff --git a/src/backend/PatchRecord.cpp b/src/backend/PatchRecord.cpp
index 25d72d62b6..5d2b38d50f 100644
--- a/src/backend/PatchRecord.cpp
+++ b/src/backend/PatchRecord.cpp
@@ -52,9 +52,9 @@ void PatchRecord::flush_impl(
     }
     else
         T_RecordComponent::flush(path, flushParams);
-    if (flushParams.flushLevel == FlushLevel::UserFlush)
+    if (flushParams.flushLevel != FlushLevel::SkeletonOnly)
     {
-        this->dirty() = false;
+        setDirty(false);
     }
 }
 
@@ -90,12 +90,12 @@ void PatchRecord::read()
         IOHandler()->enqueue(IOTask(&prc, dOpen));
         IOHandler()->flush(internal::defaultFlushParams);
         /* allow all attributes to be set */
-        prc.written() = false;
+        prc.setWritten(false, Attributable::EnqueueAsynchronously::No);
         prc.resetDataset(Dataset(*dOpen.dtype, *dOpen.extent));
-        prc.written() = true;
+        prc.setWritten(true, Attributable::EnqueueAsynchronously::No);
         try
         {
-            prc.read();
+            prc.read(/* require_unit_si = */ false);
         }
         catch (error::ReadError const &err)
         {
@@ -106,6 +106,6 @@ void PatchRecord::read()
             this->container().erase(component_name);
         }
     }
-    dirty() = false;
+    setDirty(false);
 }
 } // namespace openPMD
diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp
index 3277bac550..af19923fad 100644
--- a/src/backend/PatchRecordComponent.cpp
+++ b/src/backend/PatchRecordComponent.cpp
@@ -19,6 +19,7 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/backend/PatchRecordComponent.hpp"
+#include "openPMD/RecordComponent.hpp"
 #include "openPMD/auxiliary/Memory.hpp"
 #include "openPMD/backend/BaseRecord.hpp"
 
@@ -26,10 +27,6 @@
 
 namespace openPMD
 {
-namespace internal
-{
-    PatchRecordComponentData::PatchRecordComponentData() = default;
-} // namespace internal
 
 PatchRecordComponent &PatchRecordComponent::setUnitSI(double usi)
 {
@@ -45,15 +42,12 @@ PatchRecordComponent &PatchRecordComponent::resetDataset(Dataset d)
             "written.");
     if (d.extent.empty())
         throw std::runtime_error("Dataset extent must be at least 1D.");
-    if (std::any_of(
-            d.extent.begin(), d.extent.end(), [](Extent::value_type const &i) {
-                return i == 0u;
-            }))
+    if (d.empty())
         throw std::runtime_error(
             "Dataset extent must not be zero in any dimension.");
 
-    get().m_dataset = d;
-    dirty() = true;
+    get().m_dataset = std::move(d);
+    setDirty(true);
     return *this;
 }
 
@@ -77,111 +71,17 @@ Extent PatchRecordComponent::getExtent() const
 
 PatchRecordComponent::PatchRecordComponent(
     BaseRecord<PatchRecordComponent> const &baseRecord)
-    : BaseRecordComponent(NoInit())
+    : RecordComponent(NoInit())
 {
-    setData(baseRecord.m_patchRecordComponentData);
+    static_cast<RecordComponent &>(*this).operator=(baseRecord);
 }
 
-PatchRecordComponent::PatchRecordComponent() : BaseRecordComponent(NoInit())
+PatchRecordComponent::PatchRecordComponent() : RecordComponent(NoInit())
 {
     setData(std::make_shared<Data_t>());
     setUnitSI(1);
 }
 
-PatchRecordComponent::PatchRecordComponent(NoInit)
-    : BaseRecordComponent(NoInit())
+PatchRecordComponent::PatchRecordComponent(NoInit) : RecordComponent(NoInit())
 {}
-
-void PatchRecordComponent::flush(
-    std::string const &name, internal::FlushParams const &flushParams)
-{
-    auto &rc = get();
-    if (access::readOnly(IOHandler()->m_frontendAccess))
-    {
-        while (!rc.m_chunks.empty())
-        {
-            IOHandler()->enqueue(rc.m_chunks.front());
-            rc.m_chunks.pop();
-        }
-    }
-    else
-    {
-        if (!rc.m_dataset.has_value())
-        {
-            // The check for !written() is technically not needed, just
-            // defensive programming against internal bugs that go on us.
-            if (!written() && rc.m_chunks.empty())
-            {
-                // No data written yet, just accessed the object so far without
-                // doing anything
-                // Just do nothing and skip this record component.
-                return;
-            }
-            else
-            {
-                throw error::WrongAPIUsage(
-                    "[PatchRecordComponent] Must specify dataset type and "
-                    "extent before flushing (see "
-                    "RecordComponent::resetDataset()).");
-            }
-        }
-        if (!containsAttribute("unitSI"))
-        {
-            setUnitSI(1);
-        }
-        if (!written())
-        {
-            Parameter<Operation::CREATE_DATASET> dCreate;
-            dCreate.name = name;
-            dCreate.extent = getExtent();
-            dCreate.dtype = getDatatype();
-            dCreate.options = rc.m_dataset.value().options;
-            IOHandler()->enqueue(IOTask(this, dCreate));
-        }
-
-        while (!rc.m_chunks.empty())
-        {
-            IOHandler()->enqueue(rc.m_chunks.front());
-            rc.m_chunks.pop();
-        }
-
-        flushAttributes(flushParams);
-    }
-}
-
-void PatchRecordComponent::read()
-{
-    readAttributes(ReadMode::FullyReread); // this will set dirty() = false
-
-    if (containsAttribute("unitSI"))
-    {
-        /*
-         * No need to call setUnitSI
-         * If it's in the attributes map, then it's already set
-         * Just verify that it has the right type (getOptional<>() does
-         * conversions if possible, so this check is non-intrusive)
-         */
-        if (auto val = getAttribute("unitSI").getOptional<double>();
-            !val.has_value())
-        {
-            throw error::ReadError(
-                error::AffectedObject::Attribute,
-                error::Reason::UnexpectedContent,
-                {},
-                "Unexpected Attribute datatype for 'unitSI' (expected double, "
-                "found " +
-                    datatypeToString(getAttribute("unitSI").dtype) + ")");
-        }
-    }
-}
-
-bool PatchRecordComponent::dirtyRecursive() const
-{
-    if (this->dirty())
-    {
-        return true;
-    }
-    auto &rc = get();
-    return !rc.m_chunks.empty();
-}
 } // namespace openPMD
diff --git a/src/backend/Writable.cpp b/src/backend/Writable.cpp
index bd68941345..0e399a3a81 100644
--- a/src/backend/Writable.cpp
+++ b/src/backend/Writable.cpp
@@ -51,7 +51,14 @@ void Writable::seriesFlush(internal::FlushParams const &flushParams)
 {
     Attributable impl;
     impl.setData({attributable, [](auto const *) {}});
-    auto series = impl.retrieveSeries();
+    auto [iteration_internal, series_internal] = impl.containingIteration();
+    if (iteration_internal)
+    {
+        (*iteration_internal)
+            ->asInternalCopyOf<Iteration>()
+            .setDirtyRecursive(true);
+    }
+    auto series = series_internal->asInternalCopyOf<Series>();
     series.flush_impl(
         series.iterations.begin(), series.iterations.end(), flushParams);
 }
diff --git a/src/binding/python/Attributable.cpp b/src/binding/python/Attributable.cpp
index b3ce7885ea..2383ceab50 100644
--- a/src/binding/python/Attributable.cpp
+++ b/src/binding/python/Attributable.cpp
@@ -35,6 +35,7 @@
 #include <iterator>
 #include <map>
 #include <optional>
+#include <pybind11/pytypes.h>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -336,6 +337,16 @@ struct char_to_explicit_char<false>
 template <typename TargetType>
 std::optional<TargetType> tryCast(py::object const &obj)
 {
+    // Do a check to avoid throwing exceptions
+    if constexpr (std::is_default_constructible_v<TargetType>)
+    {
+        TargetType val{};
+        auto python_val = py::cast(std::move(val));
+        if (!py::isinstance(obj, python_val.get_type()))
+        {
+            return std::nullopt;
+        }
+    }
     try
     {
         return obj.cast<TargetType>();
@@ -358,17 +369,25 @@ bool setAttributeFromObject_char(
         std::is_same_v<Char_t, char>,
         typename char_to_explicit_char<>::type,
         Char_t>;
-    using ListChar = std::vector<char>;
     using ListString = std::vector<std::string>;
 
+    /*
+     * We cannot distinguish strings with length 1 from chars at this place,
+     * so avoid this cast. If the attribute is actually a char, skipping this
+     * branch means that it might be upcasted to string.
+     */
+#if 0
+    using ListChar = std::vector<char>;
     if (auto casted_char = tryCast<char>(obj); casted_char.has_value())
     {
         return attr.setAttribute<char>(key, *casted_char);
-    }
+    } else
+#endif
+
     // This must come after tryCast<char>
     // because tryCast<string> implicitly covers chars as well.
-    else if (auto casted_string = tryCast<std::string>(obj);
-             casted_string.has_value())
+    if (auto casted_string = tryCast<std::string>(obj);
+        casted_string.has_value())
     {
         return attr.setAttribute<std::string>(key, std::move(*casted_string));
     }
@@ -386,11 +405,20 @@ bool setAttributeFromObject_char(
     // NOW: List casts.
     // All list casts must come after all scalar casts,
     // because list casts implicitly cover scalars too.
+
+    /*
+     * We cannot distinguish strings with length 1 from chars at this place,
+     * so avoid this cast. If the attribute is actually a vector of char,
+     * skipping this branch means that it might be upcasted to a vector of
+     * string.
+     */
+#if 0
     else if (auto list_of_char = tryCast<ListChar>(obj);
              list_of_char.has_value())
     {
         return attr.setAttribute<ListChar>(key, std::move(*list_of_char));
     }
+#endif
     // this must come after tryCast<vector<char>>,
     // because tryCast<vector<string>> implicitly covers chars as well
     else if (auto list_of_string = tryCast<ListString>(obj);
diff --git a/src/binding/python/ChunkInfo.cpp b/src/binding/python/ChunkInfo.cpp
index 86bcb0128a..a392cdd3e2 100644
--- a/src/binding/python/ChunkInfo.cpp
+++ b/src/binding/python/ChunkInfo.cpp
@@ -19,6 +19,7 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/ChunkInfo.hpp"
+#include "openPMD/binding/python/Mpi.hpp"
 
 #include "openPMD/binding/python/Common.hpp"
 
@@ -73,4 +74,30 @@ void init_Chunk(py::module &m)
 
                 return WrittenChunkInfo(offset, extent, sourceID);
             }));
+
+    py::enum_<host_info::Method>(m, "HostInfo")
+        .value("POSIX_HOSTNAME", host_info::Method::POSIX_HOSTNAME)
+        .value("MPI_PROCESSOR_NAME", host_info::Method::MPI_PROCESSOR_NAME)
+#if openPMD_HAVE_MPI
+        .def(
+            "get_collective",
+            [](host_info::Method const &self, py::object &comm) {
+                auto variant = pythonObjectAsMpiComm(comm);
+                if (auto errorMsg = std::get_if<std::string>(&variant))
+                {
+                    throw std::runtime_error("[Series] " + *errorMsg);
+                }
+                else
+                {
+                    return host_info::byMethodCollective(
+                        std::get<MPI_Comm>(variant), self);
+                }
+            })
+#endif
+        .def(
+            "get",
+            [](host_info::Method const &self) {
+                return host_info::byMethod(self);
+            })
+        .def("available", &host_info::methodAvailable);
 }
diff --git a/src/binding/python/Dataset.cpp b/src/binding/python/Dataset.cpp
index 656cd59ea8..70d85721f2 100644
--- a/src/binding/python/Dataset.cpp
+++ b/src/binding/python/Dataset.cpp
@@ -27,58 +27,66 @@
 
 void init_Dataset(py::module &m)
 {
-    py::class_<Dataset>(m, "Dataset")
+    auto pyDataset =
+        py::class_<Dataset>(m, "Dataset")
+            .def(
+                py::init<Datatype, Extent>(),
+                py::arg("dtype"),
+                py::arg("extent"))
+            .def(py::init<Extent>(), py::arg("extent"))
+            .def(
+                py::init([](py::dtype dt, Extent const &e) {
+                    auto const d = dtype_from_numpy(std::move(dt));
+                    return new Dataset{d, e};
+                }),
+                py::arg("dtype"),
+                py::arg("extent"))
+            .def(
+                py::init<Datatype, Extent, std::string>(),
+                py::arg("dtype"),
+                py::arg("extent"),
+                py::arg("options"))
+            .def(
+                py::init([](py::dtype dt, Extent e, std::string options) {
+                    auto const d = dtype_from_numpy(std::move(dt));
+                    return new Dataset{d, std::move(e), std::move(options)};
+                }),
+                py::arg("dtype"),
+                py::arg("extent"),
+                py::arg("options"))
 
-        .def(py::init<Datatype, Extent>(), py::arg("dtype"), py::arg("extent"))
-        .def(py::init<Extent>(), py::arg("extent"))
-        .def(
-            py::init([](py::dtype dt, Extent const &e) {
-                auto const d = dtype_from_numpy(std::move(dt));
-                return new Dataset{d, e};
-            }),
-            py::arg("dtype"),
-            py::arg("extent"))
-        .def(
-            py::init<Datatype, Extent, std::string>(),
-            py::arg("dtype"),
-            py::arg("extent"),
-            py::arg("options"))
-        .def(
-            py::init([](py::dtype dt, Extent const &e, std::string options) {
-                auto const d = dtype_from_numpy(std::move(dt));
-                return new Dataset{d, e, std::move(options)};
-            }),
-            py::arg("dtype"),
-            py::arg("extent"),
-            py::arg("options"))
-
-        .def(
-            "__repr__",
-            [](const Dataset &d) {
-                std::stringstream stream;
-                stream << "<openPMD.Dataset of type '" << d.dtype
-                       << "' and with extent ";
-                if (d.extent.empty())
-                {
-                    stream << "[]>";
-                }
-                else
-                {
-                    auto begin = d.extent.begin();
-                    stream << '[' << *begin++;
-                    for (; begin != d.extent.end(); ++begin)
+            .def(
+                "__repr__",
+                [](const Dataset &d) {
+                    std::stringstream stream;
+                    stream << "<openPMD.Dataset of type '" << d.dtype
+                           << "' and with extent ";
+                    if (d.extent.empty())
+                    {
+                        stream << "[]>";
+                    }
+                    else
                     {
-                        stream << ", " << *begin;
+                        auto begin = d.extent.begin();
+                        stream << '[' << *begin++;
+                        for (; begin != d.extent.end(); ++begin)
+                        {
+                            stream << ", " << *begin;
+                        }
+                        stream << "]>";
                     }
-                    stream << "]>";
-                }
-                return stream.str();
-            })
+                    return stream.str();
+                })
 
-        .def_readonly("extent", &Dataset::extent)
-        .def("extend", &Dataset::extend)
-        .def_readonly("rank", &Dataset::rank)
-        .def_property_readonly(
-            "dtype", [](const Dataset &d) { return dtype_to_numpy(d.dtype); })
-        .def_readwrite("options", &Dataset::options);
+            .def_property_readonly(
+                "joined_dimension", &Dataset::joinedDimension)
+            .def_readonly("extent", &Dataset::extent)
+            .def("extend", &Dataset::extend)
+            .def_readonly("rank", &Dataset::rank)
+            .def_property_readonly(
+                "dtype",
+                [](const Dataset &d) { return dtype_to_numpy(d.dtype); })
+            .def_readwrite("options", &Dataset::options);
+    pyDataset.attr("JOINED_DIMENSION") =
+        py::int_(uint64_t(Dataset::JOINED_DIMENSION));
 }
diff --git a/src/binding/python/Iteration.cpp b/src/binding/python/Iteration.cpp
index df017114e6..cd5fecacb0 100644
--- a/src/binding/python/Iteration.cpp
+++ b/src/binding/python/Iteration.cpp
@@ -23,6 +23,7 @@
 #include "openPMD/backend/Attributable.hpp"
 #include "openPMD/binding/python/Common.hpp"
 #include "openPMD/binding/python/Container.H"
+#include "openPMD/binding/python/Pickle.hpp"
 
 #include <ios>
 #include <sstream>
@@ -33,6 +34,13 @@ void init_Iteration(py::module &m)
     auto py_it_cont = declare_container<PyIterationContainer, Attributable>(
         m, "Iteration_Container");
 
+    // `clang-format on/off` doesn't help here.
+    // Writing this without a macro would lead to a huge diff due to
+    // clang-format.
+#define OPENPMD_AVOID_CLANG_FORMAT auto cl =
+    OPENPMD_AVOID_CLANG_FORMAT
+#undef OPENPMD_AVOID_CLANG_FORMAT
+
     py::class_<Iteration, Attributable>(m, "Iteration")
         .def(py::init<Iteration const &>())
 
@@ -99,5 +107,12 @@ void init_Iteration(py::module &m)
             // garbage collection: return value must be freed before Iteration
             py::keep_alive<1, 0>());
 
+    add_pickle(
+        cl, [](openPMD::Series series, std::vector<std::string> const &group) {
+            uint64_t const n_it = std::stoull(group.at(1));
+            auto res = series.iterations[n_it];
+            return internal::makeOwning(res, std::move(series));
+        });
+
     finalize_container<PyIterationContainer>(py_it_cont);
 }
diff --git a/src/binding/python/Mesh.cpp b/src/binding/python/Mesh.cpp
index 55c6fd13a4..9d53e15591 100644
--- a/src/binding/python/Mesh.cpp
+++ b/src/binding/python/Mesh.cpp
@@ -115,9 +115,10 @@ void init_Mesh(py::module &m)
         .def("set_grid_global_offset", &Mesh::setGridGlobalOffset)
         .def("set_grid_unit_SI", &Mesh::setGridUnitSI);
     add_pickle(
-        cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
+        cl, [](openPMD::Series series, std::vector<std::string> const &group) {
             uint64_t const n_it = std::stoull(group.at(1));
-            return series.iterations[n_it].meshes[group.at(3)];
+            auto res = series.iterations[n_it].open().meshes[group.at(3)];
+            return internal::makeOwning(res, std::move(series));
         });
 
     finalize_container<PyMeshContainer>(py_m_cont);
diff --git a/src/binding/python/MeshRecordComponent.cpp b/src/binding/python/MeshRecordComponent.cpp
index 1a43f0e289..52055973b4 100644
--- a/src/binding/python/MeshRecordComponent.cpp
+++ b/src/binding/python/MeshRecordComponent.cpp
@@ -82,9 +82,15 @@ void init_MeshRecordComponent(py::module &m)
             "Relative position of the component on an element "
             "(node/cell/voxel) of the mesh");
     add_pickle(
-        cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
+        cl, [](openPMD::Series series, std::vector<std::string> const &group) {
             uint64_t const n_it = std::stoull(group.at(1));
-            return series.iterations[n_it].meshes[group.at(3)][group.at(4)];
+            auto res =
+                series.iterations[n_it]
+                    .open()
+                    .meshes[group.at(3)]
+                           [group.size() < 5 ? MeshRecordComponent::SCALAR
+                                             : group.at(4)];
+            return internal::makeOwning(res, std::move(series));
         });
 
     finalize_container<PyMeshRecordComponentContainer>(py_mrc_cnt);
diff --git a/src/binding/python/ParticleSpecies.cpp b/src/binding/python/ParticleSpecies.cpp
index 55fe0aaef0..c224800b98 100644
--- a/src/binding/python/ParticleSpecies.cpp
+++ b/src/binding/python/ParticleSpecies.cpp
@@ -55,9 +55,11 @@ void init_ParticleSpecies(py::module &m)
             // garbage collection: return value must be freed before Series
             py::keep_alive<1, 0>());
     add_pickle(
-        cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
+        cl, [](openPMD::Series series, std::vector<std::string> const &group) {
             uint64_t const n_it = std::stoull(group.at(1));
-            return series.iterations[n_it].particles[group.at(3)];
+            ParticleSpecies res =
+                series.iterations[n_it].open().particles[group.at(3)];
+            return internal::makeOwning(res, std::move(series));
         });
 
     finalize_container<PyPartContainer>(py_ps_cnt);
diff --git a/src/binding/python/PatchRecordComponent.cpp b/src/binding/python/PatchRecordComponent.cpp
index 8ad51d6e4f..311272f5b6 100644
--- a/src/binding/python/PatchRecordComponent.cpp
+++ b/src/binding/python/PatchRecordComponent.cpp
@@ -49,7 +49,7 @@ void init_PatchRecordComponent(py::module &m)
         declare_container<PyPatchRecordComponentContainer, Attributable>(
             m, "Patch_Record_Component_Container");
 
-    py::class_<PatchRecordComponent, BaseRecordComponent>(
+    py::class_<PatchRecordComponent, RecordComponent, BaseRecordComponent>(
         m, "Patch_Record_Component")
         .def_property(
             "unit_SI",
@@ -189,12 +189,14 @@ void init_PatchRecordComponent(py::module &m)
         // allowed python intrinsics, after (!) buffer matching
         .def(
             "store",
-            &PatchRecordComponent::store<double>,
+            py::overload_cast<uint64_t, double>(
+                &PatchRecordComponent::store<double>),
             py::arg("idx"),
             py::arg("data"))
         .def(
             "store",
-            &PatchRecordComponent::store<long>,
+            py::overload_cast<uint64_t, long>(
+                &PatchRecordComponent::store<long>),
             py::arg("idx"),
             py::arg("data"))
 
diff --git a/src/binding/python/Record.cpp b/src/binding/python/Record.cpp
index 9cad75d03a..b4f732a83d 100644
--- a/src/binding/python/Record.cpp
+++ b/src/binding/python/Record.cpp
@@ -72,9 +72,11 @@ void init_Record(py::module &m)
         .def("set_time_offset", &Record::setTimeOffset<double>)
         .def("set_time_offset", &Record::setTimeOffset<long double>);
     add_pickle(
-        cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
+        cl, [](openPMD::Series series, std::vector<std::string> const &group) {
             uint64_t const n_it = std::stoull(group.at(1));
-            return series.iterations[n_it].particles[group.at(3)][group.at(4)];
+            auto res = series.iterations[n_it].open().particles[group.at(3)]
+                                                               [group.at(4)];
+            return internal::makeOwning(res, std::move(series));
         });
 
     finalize_container<PyRecordContainer>(py_r_cnt);
diff --git a/src/binding/python/RecordComponent.cpp b/src/binding/python/RecordComponent.cpp
index 37ad9a7cff..5645053f0e 100644
--- a/src/binding/python/RecordComponent.cpp
+++ b/src/binding/python/RecordComponent.cpp
@@ -18,12 +18,17 @@
  * and the GNU Lesser General Public License along with openPMD-api.
  * If not, see <http://www.gnu.org/licenses/>.
  */
+#include <limits>
+#include <pybind11/detail/common.h>
 #include <pybind11/numpy.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
+#include "openPMD/Dataset.hpp"
+#include "openPMD/Datatype.hpp"
 #include "openPMD/DatatypeHelpers.hpp"
 #include "openPMD/Error.hpp"
+#include "openPMD/RecordComponent.hpp"
 #include "openPMD/Series.hpp"
 #include "openPMD/backend/BaseRecordComponent.hpp"
 
@@ -40,6 +45,7 @@
 #include <exception>
 #include <iostream>
 #include <sstream>
+#include <stdexcept>
 #include <string>
 #include <tuple>
 #include <type_traits>
@@ -111,14 +117,48 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(
             py::slice slice = py::cast<py::slice>(slices[i]);
 
             size_t start, stop, step, slicelength;
+            auto mocked_extent = full_extent.at(curAxis);
+            // py::ssize_t is a signed type, so we will need to use another
+            // magic number for JOINED_DIMENSION in this computation, since the
+            // C++ API's JOINED_DIMENSION would be interpreted as a negative
+            // index
+            bool undo_mocked_extent = false;
+            constexpr auto PYTHON_JOINED_DIMENSION =
+                std::numeric_limits<py::ssize_t>::max() - 1;
+            if (mocked_extent == Dataset::JOINED_DIMENSION)
+            {
+                undo_mocked_extent = true;
+                mocked_extent = PYTHON_JOINED_DIMENSION;
+            }
             if (!slice.compute(
-                    full_extent.at(curAxis),
-                    &start,
-                    &stop,
-                    &step,
-                    &slicelength))
+                    mocked_extent, &start, &stop, &step, &slicelength))
                 throw py::error_already_set();
 
+            if (undo_mocked_extent)
+            {
+                // do the same calculation again, but with another global extent
+                // (that is not smaller than the previous in order to avoid
+                // cutting off the range)
+                // this is to avoid the unlikely case
+                // that the mocked alternative value is actually the intended
+                // one
+                size_t start2, stop2, step2, slicelength2;
+                if (!slice.compute(
+                        mocked_extent + 1,
+                        &start2,
+                        &stop2,
+                        &step2,
+                        &slicelength2))
+                    throw py::error_already_set();
+                if (slicelength == slicelength2)
+                {
+                    // slicelength was given as an absolute value and
+                    // accidentally hit our mocked value
+                    // --> keep that value
+                    undo_mocked_extent = false;
+                }
+            }
+
             // TODO PySlice_AdjustIndices: Python 3.6.1+
             //      Adjust start/end slice indices assuming a sequence of the
             //      specified length. Out of bounds indices are clipped in a
@@ -132,7 +172,10 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(
 
             // verified for size later in C++ API
             offset.at(curAxis) = start;
-            extent.at(curAxis) = slicelength; // stop - start;
+            extent.at(curAxis) =
+                undo_mocked_extent && slicelength == PYTHON_JOINED_DIMENSION
+                ? Dataset::JOINED_DIMENSION
+                : slicelength; // stop - start;
 
             continue;
         }
@@ -187,6 +230,59 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(
     return std::make_tuple(offset, extent, flatten);
 }
 
+inline std::tuple<Offset, Extent, std::vector<bool>> parseJoinedTupleSlices(
+    uint8_t const ndim,
+    Extent const &full_extent,
+    py::tuple const &slices,
+    size_t joined_dim,
+    py::array const &a)
+{
+
+    std::vector<bool> flatten;
+    Offset offset;
+    Extent extent;
+    std::tie(offset, extent, flatten) =
+        parseTupleSlices(ndim, full_extent, slices);
+    for (size_t i = 0; i < ndim; ++i)
+    {
+        if (offset.at(i) != 0)
+        {
+            throw std::runtime_error(
+                "Joined array: Cannot use non-zero offset in store_chunk "
+                "(offset[" +
+                std::to_string(i) + "] = " + std::to_string(offset[i]) + ").");
+        }
+        if (flatten.at(i))
+        {
+            throw std::runtime_error(
+                "Flattened slices unimplemented for joined arrays.");
+        }
+
+        if (i == joined_dim)
+        {
+            if (extent.at(i) == 0 || extent.at(i) == Dataset::JOINED_DIMENSION)
+            {
+                extent[i] = a.shape()[i];
+            }
+        }
+        else
+        {
+            if (extent.at(i) != full_extent.at(i))
+            {
+                throw std::runtime_error(
+                    "Joined array: Must use full extent in store_chunk for "
+                    "non-joined dimension "
+                    "(local_extent[" +
+                    std::to_string(i) + "] = " + std::to_string(extent[i]) +
+                    " != global_extent[" + std::to_string(i) +
+                    "] = " + std::to_string(full_extent[i]) + ").");
+            }
+        }
+    }
+    offset.clear();
+    return std::make_tuple(offset, extent, flatten);
+}
+
 /** Check an array is a contiguous buffer
  *
  * Required are contiguous buffers for store and load
@@ -215,6 +311,75 @@ inline void check_buffer_is_contiguous(py::array &a)
     //       loop over the input data strides in store/load calls
 }
 
+namespace
+{
+struct StoreChunkFromPythonArray
+{
+    template <typename T>
+    static void call(
+        RecordComponent &r,
+        py::array &a,
+        Offset const &offset,
+        Extent const &extent)
+    {
+        // here, we increase a reference on the user-passed data so that
+        // temporary and lost-scope variables stay alive until we flush
+        // note: this does not yet prevent the user, as in C++, to build
+        // a race condition by manipulating the data that was passed
+        a.inc_ref();
+        void *data = a.mutable_data();
+        std::shared_ptr<T> shared((T *)data, [a](T *) { a.dec_ref(); });
+        r.storeChunk(std::move(shared), offset, extent);
+    }
+
+    static constexpr char const *errorMsg = "store_chunk()";
+};
+struct LoadChunkIntoPythonArray
+{
+    template <typename T>
+    static void call(
+        RecordComponent &r,
+        py::array &a,
+        Offset const &offset,
+        Extent const &extent)
+    {
+        // here, we increase a reference on the user-passed data so that
+        // temporary and lost-scope variables stay alive until we flush
+        // note: this does not yet prevent the user, as in C++, to build
+        // a race condition by manipulating the data that was passed
+        a.inc_ref();
+        void *data = a.mutable_data();
+        std::shared_ptr<T> shared((T *)data, [a](T *) { a.dec_ref(); });
+        r.loadChunk(std::move(shared), offset, extent);
+    }
+
+    static constexpr char const *errorMsg = "load_chunk()";
+};
+struct LoadChunkIntoPythonBuffer
+{
+    template <typename T>
+    static void call(
+        RecordComponent &r,
+        py::buffer &buffer,
+        py::buffer_info const &buffer_info,
+        Offset const &offset,
+        Extent const &extent)
+    {
+        // here, we increase a reference on the user-passed data so that
+        // temporary and lost-scope variables stay alive until we flush
+        // note: this does not yet prevent the user, as in C++, to build
+        // a race condition by manipulating the data that was passed
+        buffer.inc_ref();
+        void *data = buffer_info.ptr;
+        std::shared_ptr<T> shared(
+            (T *)data, [buffer](T *) { buffer.dec_ref(); });
+        r.loadChunk(std::move(shared), offset, extent);
+    }
+
+    static constexpr char const *errorMsg = "load_chunk()";
+};
+} // namespace
+
 /** Store Chunk
  *
  * Called with offset and extent that are already in the record component's
@@ -241,7 +406,7 @@ inline void store_chunk(
     size_t const numFlattenDims =
         std::count(flatten.begin(), flatten.end(), true);
     auto const r_extent = r.getExtent();
-    auto const s_extent(extent); // selected extent in r
+    auto const &s_extent(extent); // selected extent in r
     std::vector<std::uint64_t> r_shape(r_extent.size() - numFlattenDims);
     std::vector<std::uint64_t> s_shape(s_extent.size() - numFlattenDims);
     auto maskIt = flatten.begin();
@@ -265,85 +430,59 @@ inline void store_chunk(
                         "in record component (") +
             std::to_string(r_shape.size()) + std::string("D)"));
 
-    for (auto d = 0; d < a.ndim(); ++d)
+    if (auto joined_dim = r.joinedDimension(); joined_dim.has_value())
     {
-        // selection causes overflow of r
-        if (offset.at(d) + extent.at(d) > r_shape.at(d))
-            throw py::index_error(
-                std::string("slice ") + std::to_string(offset.at(d)) +
-                std::string(":") + std::to_string(extent.at(d)) +
-                std::string(" is out of bounds for axis ") + std::to_string(d) +
-                std::string(" with size ") + std::to_string(r_shape.at(d)));
-        // underflow of selection in r for given a
-        if (s_shape.at(d) != std::uint64_t(a.shape()[d]))
-            throw py::index_error(
-                std::string("size of chunk (") + std::to_string(a.shape()[d]) +
-                std::string(") for axis ") + std::to_string(d) +
-                std::string(" does not match selection ") +
-                std::string("size in record component (") +
-                std::to_string(s_extent.at(d)) + std::string(")"));
+        for (py::ssize_t d = 0; d < a.ndim(); ++d)
+        {
+            // selection causes overflow of r
+            if (d != py::ssize_t(*joined_dim) && extent.at(d) != r_shape.at(d))
+                throw py::index_error(
+                    std::string("selection for axis ") + std::to_string(d) +
+                    " of record component with joined dimension " +
+                    std::to_string(*joined_dim) +
+                    " must be equivalent to its global extent " +
+                    std::to_string(extent.at(d)) + ", but was " +
+                    std::to_string(r_shape.at(d)) + ".");
+            // underflow of selection in r for given a
+            if (s_shape.at(d) != std::uint64_t(a.shape()[d]))
+                throw py::index_error(
+                    std::string("size of chunk (") +
+                    std::to_string(a.shape()[d]) + std::string(") for axis ") +
+                    std::to_string(d) +
+                    std::string(" does not match selection ") +
+                    std::string("size in record component (") +
+                    std::to_string(s_extent.at(d)) + std::string(")"));
+        }
+    }
+    else
+    {
+        for (auto d = 0; d < a.ndim(); ++d)
+        {
+            // selection causes overflow of r
+            if (offset.at(d) + extent.at(d) > r_shape.at(d))
+                throw py::index_error(
+                    std::string("slice ") + std::to_string(offset.at(d)) +
+                    std::string(":") + std::to_string(extent.at(d)) +
+                    std::string(" is out of bounds for axis ") +
+                    std::to_string(d) + std::string(" with size ") +
+                    std::to_string(r_shape.at(d)));
+            // underflow of selection in r for given a
+            if (s_shape.at(d) != std::uint64_t(a.shape()[d]))
+                throw py::index_error(
+                    std::string("size of chunk (") +
+                    std::to_string(a.shape()[d]) + std::string(") for axis ") +
+                    std::to_string(d) +
+                    std::string(" does not match selection ") +
+                    std::string("size in record component (") +
+                    std::to_string(s_extent.at(d)) + std::string(")"));
+        }
     }
 
     check_buffer_is_contiguous(a);
 
-    // here, we increase a reference on the user-passed data so that
-    // temporary and lost-scope variables stay alive until we flush
-    // note: this does not yet prevent the user, as in C++, to build
-    //       a race condition by manipulating the data they passed
-    auto store_data = [&r, &a, &offset, &extent](auto cxxtype) {
-        using CXXType = decltype(cxxtype);
-        a.inc_ref();
-        void *data = a.mutable_data();
-        std::shared_ptr<CXXType> shared(
-            (CXXType *)data, [a](CXXType *) { a.dec_ref(); });
-        r.storeChunk(std::move(shared), offset, extent);
-    };
-
-    // store
-    auto const dtype = dtype_from_numpy(a.dtype());
-    if (dtype == Datatype::CHAR)
-        store_data(char());
-    else if (dtype == Datatype::UCHAR)
-        store_data((unsigned char)0);
-    else if (dtype == Datatype::SHORT)
-        store_data(short());
-    else if (dtype == Datatype::INT)
-        store_data(int());
-    else if (dtype == Datatype::LONG)
-        store_data(long());
-    else if (dtype == Datatype::LONGLONG)
-        store_data((long long)0);
-    else if (dtype == Datatype::USHORT)
-        store_data((unsigned short)0);
-    else if (dtype == Datatype::UINT)
-        store_data((unsigned int)0);
-    else if (dtype == Datatype::ULONG)
-        store_data((unsigned long)0);
-    else if (dtype == Datatype::ULONGLONG)
-        store_data((unsigned long long)0);
-    else if (dtype == Datatype::LONG_DOUBLE)
-        store_data((long double)0);
-    else if (dtype == Datatype::DOUBLE)
-        store_data(double());
-    else if (dtype == Datatype::FLOAT)
-        store_data(float());
-    else if (dtype == Datatype::CLONG_DOUBLE)
-        store_data(std::complex<long double>());
-    else if (dtype == Datatype::CDOUBLE)
-        store_data(std::complex<double>());
-    else if (dtype == Datatype::CFLOAT)
-        store_data(std::complex<float>());
-    /* @todo
-    .value("STRING", Datatype::STRING)
-    .value("VEC_STRING", Datatype::VEC_STRING)
-    .value("ARR_DBL_7", Datatype::ARR_DBL_7)
-    */
-    else if (dtype == Datatype::BOOL)
-        store_data(bool());
-    else
-        throw std::runtime_error(
-            std::string("Datatype '") + std::string(py::str(a.dtype())) +
-            std::string("' not known in 'storeChunk'!"));
+    // dtype_from_numpy(a.dtype())
+    switchDatasetType<StoreChunkFromPythonArray>(
+        r.getDatatype(), r, a, offset, extent);
 }
 
 /** Store Chunk
@@ -359,8 +498,17 @@ store_chunk(RecordComponent &r, py::array &a, py::tuple const &slices)
     Offset offset;
     Extent extent;
     std::vector<bool> flatten;
-    std::tie(offset, extent, flatten) =
-        parseTupleSlices(ndim, full_extent, slices);
+    if (auto joined_dimension = r.joinedDimension();
+        joined_dimension.has_value())
+    {
+        std::tie(offset, extent, flatten) = parseJoinedTupleSlices(
+            ndim, full_extent, slices, *joined_dimension, a);
+    }
+    else
+    {
+        std::tie(offset, extent, flatten) =
+            parseTupleSlices(ndim, full_extent, slices);
+    }
 
     store_chunk(r, a, offset, extent, flatten);
 }
@@ -550,60 +698,8 @@ void load_chunk(
         }
     }
 
-    // here, we increase a reference on the user-passed data so that
-    // temporary and lost-scope variables stay alive until we flush
-    // note: this does not yet prevent the user, as in C++, to build
-    //       a race condition by manipulating the data they passed
-    auto load_data =
-        [&r, &buffer, &buffer_info, &offset, &extent](auto cxxtype) {
-            using CXXType = decltype(cxxtype);
-            buffer.inc_ref();
-            // buffer_info.inc_ref();
-            void *data = buffer_info.ptr;
-            std::shared_ptr<CXXType> shared(
-                (CXXType *)data, [buffer](CXXType *) { buffer.dec_ref(); });
-            r.loadChunk(std::move(shared), offset, extent);
-        };
-
-    if (r.getDatatype() == Datatype::CHAR)
-        load_data((char)0);
-    else if (r.getDatatype() == Datatype::UCHAR)
-        load_data((unsigned char)0);
-    else if (r.getDatatype() == Datatype::SCHAR)
-        load_data((signed char)0);
-    else if (r.getDatatype() == Datatype::SHORT)
-        load_data((short)0);
-    else if (r.getDatatype() == Datatype::INT)
-        load_data((int)0);
-    else if (r.getDatatype() == Datatype::LONG)
-        load_data((long)0);
-    else if (r.getDatatype() == Datatype::LONGLONG)
-        load_data((long long)0);
-    else if (r.getDatatype() == Datatype::USHORT)
-        load_data((unsigned short)0);
-    else if (r.getDatatype() == Datatype::UINT)
-        load_data((unsigned int)0);
-    else if (r.getDatatype() == Datatype::ULONG)
-        load_data((unsigned long)0);
-    else if (r.getDatatype() == Datatype::ULONGLONG)
-        load_data((unsigned long long)0);
-    else if (r.getDatatype() == Datatype::LONG_DOUBLE)
-        load_data((long double)0);
-    else if (r.getDatatype() == Datatype::DOUBLE)
-        load_data((double)0);
-    else if (r.getDatatype() == Datatype::FLOAT)
-        load_data((float)0);
-    else if (r.getDatatype() == Datatype::CLONG_DOUBLE)
-        load_data((std::complex<long double>)0);
-    else if (r.getDatatype() == Datatype::CDOUBLE)
-        load_data((std::complex<double>)0);
-    else if (r.getDatatype() == Datatype::CFLOAT)
-        load_data((std::complex<float>)0);
-    else if (r.getDatatype() == Datatype::BOOL)
-        load_data((bool)0);
-    else
-        throw std::runtime_error(
-            std::string("Datatype not known in 'loadChunk'!"));
+    switchNonVectorType<LoadChunkIntoPythonBuffer>(
+        r.getDatatype(), r, buffer, buffer_info, offset, extent);
 }
 
 /** Load Chunk
@@ -660,58 +756,8 @@ inline void load_chunk(
 
     check_buffer_is_contiguous(a);
 
-    // here, we increase a reference on the user-passed data so that
-    // temporary and lost-scope variables stay alive until we flush
-    // note: this does not yet prevent the user, as in C++, to build
-    //       a race condition by manipulating the data they passed
-    auto load_data = [&r, &a, &offset, &extent](auto cxxtype) {
-        using CXXType = decltype(cxxtype);
-        a.inc_ref();
-        void *data = a.mutable_data();
-        std::shared_ptr<CXXType> shared(
-            (CXXType *)data, [a](CXXType *) { a.dec_ref(); });
-        r.loadChunk(std::move(shared), offset, extent);
-    };
-
-    if (r.getDatatype() == Datatype::CHAR)
-        load_data(char());
-    else if (r.getDatatype() == Datatype::UCHAR)
-        load_data((unsigned char)0);
-    else if (r.getDatatype() == Datatype::SCHAR)
-        load_data((signed char)0);
-    else if (r.getDatatype() == Datatype::SHORT)
-        load_data(short());
-    else if (r.getDatatype() == Datatype::INT)
-        load_data(int());
-    else if (r.getDatatype() == Datatype::LONG)
-        load_data(long());
-    else if (r.getDatatype() == Datatype::LONGLONG)
-        load_data((long long)0);
-    else if (r.getDatatype() == Datatype::USHORT)
-        load_data((unsigned short)0);
-    else if (r.getDatatype() == Datatype::UINT)
-        load_data((unsigned int)0);
-    else if (r.getDatatype() == Datatype::ULONG)
-        load_data((unsigned long)0);
-    else if (r.getDatatype() == Datatype::ULONGLONG)
-        load_data((unsigned long long)0);
-    else if (r.getDatatype() == Datatype::LONG_DOUBLE)
-        load_data((long double)0);
-    else if (r.getDatatype() == Datatype::DOUBLE)
-        load_data(double());
-    else if (r.getDatatype() == Datatype::FLOAT)
-        load_data(float());
-    else if (r.getDatatype() == Datatype::CLONG_DOUBLE)
-        load_data(std::complex<long double>());
-    else if (r.getDatatype() == Datatype::CDOUBLE)
-        load_data(std::complex<double>());
-    else if (r.getDatatype() == Datatype::CFLOAT)
-        load_data(std::complex<float>());
-    else if (r.getDatatype() == Datatype::BOOL)
-        load_data(bool());
-    else
-        throw std::runtime_error(
-            std::string("Datatype not known in 'load_chunk'!"));
+    switchDatasetType<LoadChunkIntoPythonArray>(
+        r.getDatatype(), r, a, offset, extent);
 }
 
 /** Load Chunk
@@ -1076,10 +1122,14 @@ void init_RecordComponent(py::module &m)
         .def("set_unit_SI", &RecordComponent::setUnitSI) // deprecated
         ;
     add_pickle(
-        cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
+        cl, [](openPMD::Series series, std::vector<std::string> const &group) {
             uint64_t const n_it = std::stoull(group.at(1));
-            return series.iterations[n_it]
-                .particles[group.at(3)][group.at(4)][group.at(5)];
+            auto res = series.iterations[n_it]
+                           .open()
+                           .particles[group.at(3)][group.at(4)]
+                                     [group.size() < 6 ? RecordComponent::SCALAR
+                                                       : group.at(5)];
+            return internal::makeOwning(res, std::move(series));
         });
 
     addRecordComponentSetGet(cl);
diff --git a/src/binding/python/Series.cpp b/src/binding/python/Series.cpp
index ed2a4180a8..37de823f2a 100644
--- a/src/binding/python/Series.cpp
+++ b/src/binding/python/Series.cpp
@@ -22,6 +22,7 @@
 #include "openPMD/IO/Access.hpp"
 #include "openPMD/IterationEncoding.hpp"
 #include "openPMD/auxiliary/JSON.hpp"
+#include "openPMD/binding/python/Pickle.hpp"
 #include "openPMD/config.hpp"
 
 #include "openPMD/binding/python/Common.hpp"
@@ -29,28 +30,13 @@
 #if openPMD_HAVE_MPI
 //  re-implemented signatures:
 //  include <mpi4py/mpi4py.h>
+#include "openPMD/binding/python/Mpi.hpp"
 #include <mpi.h>
 #endif
 
 #include <sstream>
 #include <string>
 
-#if openPMD_HAVE_MPI
-/** mpi4py communicator wrapper
- *
- * refs:
- * - https://github.com/mpi4py/mpi4py/blob/3.0.0/src/mpi4py/libmpi.pxd#L35-L36
- * - https://github.com/mpi4py/mpi4py/blob/3.0.0/src/mpi4py/MPI.pxd#L100-L105
- * - installed: include/mpi4py/mpi4py.MPI.h
- */
-struct openPMD_PyMPICommObject
-{
-    PyObject_HEAD MPI_Comm ob_mpi;
-    unsigned int flags;
-};
-using openPMD_PyMPIIntracommObject = openPMD_PyMPICommObject;
-#endif
-
 struct SeriesIteratorPythonAdaptor : SeriesIterator
 {
     SeriesIteratorPythonAdaptor(SeriesIterator it)
@@ -165,6 +151,13 @@ not possible once it has been closed.
             // keep handle alive while iterator exists
             py::keep_alive<0, 1>());
 
+    // `clang-format on/off` doesn't help here.
+    // Writing this without a macro would lead to a huge diff due to
+    // clang-format.
+#define OPENPMD_AVOID_CLANG_FORMAT auto cl =
+    OPENPMD_AVOID_CLANG_FORMAT
+#undef OPENPMD_AVOID_CLANG_FORMAT
+
     py::class_<Series, Attributable>(m, "Series")
 
         .def(
@@ -176,77 +169,87 @@ not possible once it has been closed.
             }),
             py::arg("filepath"),
             py::arg("access"),
-            py::arg("options") = "{}")
+            py::arg("options") = "{}",
+            R"END(
+Construct a new Series. Parameters:
+
+* filepath: The file path.
+* at: Access mode.
+* options: Advanced backend configuration via JSON.
+    May be specified as a JSON-formatted string directly, or as a path
+    to a JSON textfile, prepended by an at sign '@'.
+
+For details on access modes, JSON/TOML configuration and iteration encoding,
+refer to:
+
+* https://openpmd-api.readthedocs.io/en/latest/usage/workflow.html#access-modes
+* https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html
+* https://openpmd-api.readthedocs.io/en/latest/usage/concepts.html#iteration-and-series
+
+In case of file-based iteration encoding, the file names for each
+iteration are determined by an expansion pattern that must be specified.
+It takes one out of two possible forms:
+
+1. Simple form: %T is replaced with the iteration index, e.g.
+   `simData_%T.bp` becomes `simData_50.bp`.
+2. Padded form: e.g. %06T is replaced with the iteration index padded to
+   at least six digits. `simData_%06T.bp` becomes `simData_000050.bp`.
+
+The backend is determined:
+
+1. Explicitly via the JSON/TOML parameter `backend`, e.g. `{"backend":
+   "adios2"}`.
+2. Otherwise implicitly from the filename extension, e.g.
+   `simData_%T.h5`.
+
+The filename extension can be replaced with a globbing pattern %E.
+It will be replaced with an automatically determined file name extension:
+
+1. In CREATE mode: The extension is set to a backend-specific default
+   extension. This requires that the backend is specified via JSON/TOML.
+2. In READ_ONLY, READ_WRITE and READ_LINEAR modes: These modes require
+   that files already exist on disk. The disk will be scanned for files
+   that match the pattern and the resulting file extension will be used.
+   If the result is ambiguous or no such file is found, an error is
+   raised.
+3. In APPEND mode: Like (2.), except if no matching file is found. In
+   that case, the procedure of (1.) is used, owing to the fact that
+   APPEND mode can be used to create new datasets.
+            )END")
 #if openPMD_HAVE_MPI
         .def(
             py::init([](std::string const &filepath,
                         Access at,
                         py::object &comm,
                         std::string const &options) {
-                //! TODO perform mpi4py import test and check min-version
-                //!       careful: double MPI_Init risk? only import mpi4py.MPI?
-                //!       required C-API init? probably just checks:
-                //! refs:
-                //! -
-                //! https://bitbucket.org/mpi4py/mpi4py/src/3.0.0/demo/wrap-c/helloworld.c
-                //! - installed: include/mpi4py/mpi4py.MPI_api.h
-                // if( import_mpi4py() < 0 ) { here be dragons }
-
-                if (comm.ptr() == Py_None)
-                    throw std::runtime_error(
-                        "Series: MPI communicator cannot be None.");
-                if (comm.ptr() == nullptr)
-                    throw std::runtime_error(
-                        "Series: MPI communicator is a nullptr.");
-
-                // check type string to see if this is mpi4py
-                //   __str__ (pretty)
-                //   __repr__ (unambiguous)
-                //   mpi4py: <mpi4py.MPI.Intracomm object at 0x7f998e6e28d0>
-                //   pyMPI:  ... (TODO)
-                py::str const comm_pystr = py::repr(comm);
-                std::string const comm_str = comm_pystr.cast<std::string>();
-                if (comm_str.substr(0, 12) != std::string("<mpi4py.MPI."))
-                    throw std::runtime_error(
-                        "Series: comm is not an mpi4py communicator: " +
-                        comm_str);
-                // only checks same layout, e.g. an `int` in `PyObject` could
-                // pass this
-                if (!py::isinstance<py::class_<openPMD_PyMPIIntracommObject> >(
-                        comm.get_type()))
-                    // TODO add mpi4py version from above import check to error
-                    // message
-                    throw std::runtime_error(
-                        "Series: comm has unexpected type layout in " +
-                        comm_str +
-                        " (Mismatched MPI at compile vs. runtime? "
-                        "Breaking mpi4py release?)");
-
-                // todo other possible implementations:
-                // - pyMPI (inactive since 2008?): import mpi; mpi.WORLD
-
-                // reimplementation of mpi4py's:
-                // MPI_Comm* mpiCommPtr = PyMPIComm_Get(comm.ptr());
-                MPI_Comm *mpiCommPtr =
-                    &((openPMD_PyMPIIntracommObject *)(comm.ptr()))->ob_mpi;
-
-                if (PyErr_Occurred())
-                    throw std::runtime_error(
-                        "Series: MPI communicator access error.");
-                if (mpiCommPtr == nullptr)
+                auto variant = pythonObjectAsMpiComm(comm);
+                if (auto errorMsg = std::get_if<std::string>(&variant))
                 {
-                    throw std::runtime_error(
-                        "Series: MPI communicator cast failed. "
-                        "(Mismatched MPI at compile vs. runtime?)");
+                    throw std::runtime_error("[Series] " + *errorMsg);
+                }
+                else
+                {
+                    py::gil_scoped_release release;
+                    return new Series(
+                        filepath, at, std::get<MPI_Comm>(variant), options);
                 }
-
-                py::gil_scoped_release release;
-                return new Series(filepath, at, *mpiCommPtr, options);
             }),
             py::arg("filepath"),
             py::arg("access"),
             py::arg("mpi_communicator"),
-            py::arg("options") = "{}")
+            py::arg("options") = "{}",
+            R"END(
+Construct a new Series. Parameters:
+
+* filepath: The file path.
+* at: Access mode.
+* options: Advanced backend configuration via JSON.
+    May be specified as a JSON-formatted string directly, or as a path
+    to a JSON textfile, prepended by an at sign '@'.
+* mpi_communicator: The MPI communicator
+
+For further details, refer to the non-MPI overload.
+            )END")
 #endif
         .def("__bool__", &Series::operator bool)
         .def(
@@ -280,6 +283,8 @@ this method.
         .def_property("base_path", &Series::basePath, &Series::setBasePath)
         .def_property(
             "meshes_path", &Series::meshesPath, &Series::setMeshesPath)
+        .def("get_rank_table", &Series::rankTable, py::arg("collective"))
+        .def("set_rank_table", &Series::setRankTable, py::arg("my_rank_info"))
         .def_property(
             "particles_path", &Series::particlesPath, &Series::setParticlesPath)
         .def_property("author", &Series::author, &Series::setAuthor)
@@ -317,7 +322,8 @@ this method.
         .def_property("name", &Series::name, &Series::setName)
         .def("flush", &Series::flush, py::arg("backend_config") = "{}")
 
-        .def_property_readonly("backend", &Series::backend)
+        .def_property_readonly(
+            "backend", static_cast<std::string (Series::*)()>(&Series::backend))
 
         // TODO remove in future versions (deprecated)
         .def("set_openPMD", &Series::setOpenPMD)
@@ -396,6 +402,11 @@ this method twice.
 Look for the WriteIterations class for further documentation.
             )END");
 
+    add_pickle(
+        cl, [](openPMD::Series series, std::vector<std::string> const &) {
+            return series;
+        });
+
     m.def(
         "merge_json",
         &json::merge,
diff --git a/src/binding/python/openpmd_api/DataFrame.py b/src/binding/python/openpmd_api/DataFrame.py
index 1248136a5a..55db5d1769 100644
--- a/src/binding/python/openpmd_api/DataFrame.py
+++ b/src/binding/python/openpmd_api/DataFrame.py
@@ -74,3 +74,111 @@ def particles_to_dataframe(particle_species, slice=None):
     df.index.name = "row"
 
     return df
+
+
+def iterations_to_dataframe(series, species_name):
+    """
+    Load all iterations of a particle species into a Pandas DataFrame.
+
+    Parameters
+    ----------
+    series : openpmd_api.Series
+        A Series class in openPMD-api.
+    species_name : string
+        The name of a particle species.
+
+    Returns
+    -------
+    pandas.DataFrame
+        A pandas dataframe with particles as index and openPMD record
+        components of the particle_species as columns. Particles might be
+        repeated over multiple iterations and an "iteration" column is
+        added.
+
+    Raises
+    ------
+    ImportError
+        Raises an exception if pandas is not installed
+
+    See Also
+    --------
+    pandas.DataFrame : the central dataframe object created here
+    """
+    # import pandas here for a lazy import
+    try:
+        import pandas as pd
+    except ImportError:
+        raise ImportError("pandas NOT found. Install pandas for DataFrame "
+                          "support.")
+
+    df = pd.concat(
+        (
+            series.iterations[i]
+            .particles[species_name]
+            .to_df()
+            .assign(iteration=i)
+            for i in series.iterations
+        ),
+        axis=0,
+        ignore_index=True,
+    )
+
+    return df
+
+
+def iterations_to_cudf(series, species_name):
+    """
+    Load all iterations of a particle species into a cuDF DataFrame.
+
+    Parameters
+    ----------
+    series : openpmd_api.Series
+        A Series class in openPMD-api.
+    species_name : string
+        The name of a particle species.
+
+    Returns
+    -------
+    cudf.DataFrame
+        A cuDF (RAPIDS) dataframe with particles as index and openPMD record
+        components of the particle_species as columns. Particles might be
+        repeated over multiple iterations and an "iteration" column is
+        added.
+
+    Raises
+    ------
+    ImportError
+        Raises an exception if cuDF (RAPIDS) is not installed
+
+    See Also
+    --------
+    cudf.DataFrame : the central dataframe object created here
+    """
+    # import pandas here for a lazy import
+    try:
+        import pandas  # noqa
+    except ImportError:
+        raise ImportError("pandas NOT found. Install pandas for DataFrame "
+                          "support.")
+    # import cudf here for a lazy import
+    try:
+        import cudf
+    except ImportError:
+        raise ImportError("cudf NOT found. Install RAPIDS for CUDA DataFrame "
+                          "support.")
+
+    cdf = cudf.concat(
+        (
+            cudf.from_pandas(
+                series.iterations[i]
+                      .particles[species_name]
+                      .to_df()
+                      .assign(iteration=i)
+            )
+            for i in series.iterations
+        ),
+        axis=0,
+        ignore_index=True,
+    )
+
+    return cdf
diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py
index e1bb49ef7e..09f21026f9 100644
--- a/src/binding/python/openpmd_api/__init__.py
+++ b/src/binding/python/openpmd_api/__init__.py
@@ -1,7 +1,8 @@
 from . import openpmd_api_cxx as cxx
 from .DaskArray import record_component_to_daskarray
 from .DaskDataFrame import particles_to_daskdataframe
-from .DataFrame import particles_to_dataframe
+from .DataFrame import (iterations_to_cudf, iterations_to_dataframe,
+                        particles_to_dataframe)
 from .openpmd_api_cxx import *  # noqa
 
 __version__ = cxx.__version__
@@ -13,6 +14,8 @@
 ParticleSpecies.to_df = particles_to_dataframe  # noqa
 ParticleSpecies.to_dask = particles_to_daskdataframe  # noqa
 Record_Component.to_dask_array = record_component_to_daskarray  # noqa
+Series.to_df = iterations_to_dataframe  # noqa
+Series.to_cudf = iterations_to_cudf  # noqa
 
 # TODO remove in future versions (deprecated)
 Access_Type = Access  # noqa
diff --git a/src/binding/python/openpmd_api/pipe/__main__.py b/src/binding/python/openpmd_api/pipe/__main__.py
index 32a1df6513..7bd5305c56 100644
--- a/src/binding/python/openpmd_api/pipe/__main__.py
+++ b/src/binding/python/openpmd_api/pipe/__main__.py
@@ -166,28 +166,6 @@ def __init__(self, source, dynamicView, offset, extent):
         self.extent = extent
 
 
-class particle_patch_load:
-    """
-    A deferred load/store operation for a particle patch.
-    Our particle-patch API requires that users pass a concrete value for
-    storing, even if the actual write operation occurs much later at
-    series.flush().
-    So, unlike other record components, we cannot call .store_chunk() with
-    a buffer that has not yet been filled, but must wait until the point where
-    we actual have the data at hand already.
-    In short: calling .store() must be deferred, until the data has been fully
-    read from the sink.
-    This class stores the needed parameters to .store().
-    """
-    def __init__(self, data, dest):
-        self.data = data
-        self.dest = dest
-
-    def run(self):
-        for index, item in enumerate(self.data):
-            self.dest.store(index, item)
-
-
 class pipe:
     """
     Represents the configuration of one "pipe" pass.
@@ -292,7 +270,6 @@ def __copy(self, src, dest, current_path="/data/"):
                             print("\t {0}".format(r))
                 out_iteration = write_iterations[in_iteration.iteration_index]
                 sys.stdout.flush()
-                self.__particle_patches = []
                 self.__copy(
                     in_iteration, out_iteration,
                     current_path + str(in_iteration.iteration_index) + "/")
@@ -301,10 +278,7 @@ def __copy(self, src, dest, current_path="/data/"):
                         deferred.dynamicView.current_buffer(), deferred.offset,
                         deferred.extent)
                 in_iteration.close()
-                for patch_load in self.__particle_patches:
-                    patch_load.run()
                 out_iteration.close()
-                self.__particle_patches.clear()
                 self.loads.clear()
                 sys.stdout.flush()
         elif isinstance(src, io.Record_Component) and (not is_container
@@ -333,12 +307,6 @@ def __copy(self, src, dest, current_path="/data/"):
                 self.loads.append(
                     deferred_load(src, span, local_chunk.offset,
                                   local_chunk.extent))
-        elif isinstance(src, io.Patch_Record_Component) and (not is_container
-                                                             or src.scalar):
-            dest.reset_dataset(io.Dataset(src.dtype, src.shape))
-            if self.comm.rank == 0:
-                self.__particle_patches.append(
-                    particle_patch_load(src.load(), dest))
         elif isinstance(src, io.Iteration):
             self.__copy(src.meshes, dest.meshes, current_path + "meshes/")
             self.__copy(src.particles, dest.particles,
diff --git a/src/config.cpp b/src/config.cpp
index 89a824500c..d25c926fa3 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -22,6 +22,7 @@
 #include "openPMD/version.hpp"
 
 #if openPMD_HAVE_ADIOS2
+#include "openPMD/IO/ADIOS/macros.hpp"
 #include <adios2.h>
 #endif
 #include <map>
@@ -60,7 +61,7 @@ std::vector<std::string> openPMD::getFileExtensions()
     // BP4 is always available in ADIOS2
     fext.emplace_back("bp4");
 #endif
-#ifdef ADIOS2_HAVE_BP5
+#if openPMD_HAVE_ADIOS2_BP5
     fext.emplace_back("bp5");
 #endif
 #ifdef ADIOS2_HAVE_SST
diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp
index 084d118578..17739e0b28 100644
--- a/test/CoreTest.cpp
+++ b/test/CoreTest.cpp
@@ -7,6 +7,7 @@
 #endif
 #include "openPMD/openPMD.hpp"
 
+#include "openPMD/IO/ADIOS/macros.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
 #include "openPMD/auxiliary/JSON.hpp"
 #include "openPMD/auxiliary/UniquePtr.hpp"
@@ -417,7 +418,7 @@ TEST_CASE("particleSpecies_modification_test", "[core]")
     species["positionOffset"][RecordComponent::SCALAR].resetDataset(dset);
     REQUIRE(1 == species.count("positionOffset"));
     auto &patches = species.particlePatches;
-    REQUIRE(2 == patches.size());
+    REQUIRE(0 == patches.size());
     REQUIRE(0 == patches.numAttributes());
     auto &offset = patches["offset"];
     REQUIRE(0 == offset.size());
@@ -719,10 +720,10 @@ TEST_CASE("structure_test", "[core]")
             .parent() == getWritable(&o.iterations[1].particles["P"]));
 
     REQUIRE(
-        1 ==
+        0 ==
         o.iterations[1].particles["P"].particlePatches.count("numParticles"));
     REQUIRE(
-        1 ==
+        0 ==
         o.iterations[1].particles["P"].particlePatches.count(
             "numParticlesOffset"));
 
@@ -1180,6 +1181,98 @@ TEST_CASE("backend_via_json", "[core]")
         "../samples/optionsViaJsonPseudoFilebased%T.json"));
 }
 
+TEST_CASE("wildcard_extension", "[core]")
+{
+#if openPMD_HAVE_ADIOS2
+#if openPMD_HAVE_ADIOS2_BP5 && openPMD_HAS_ADIOS_2_9
+    constexpr char const *const default_file_ending = "bp5";
+#else
+    constexpr char const *const default_file_ending = "bp4";
+#endif
+#endif
+    auxiliary::remove_directory("../samples/wildcard");
+    auto run_test = [current_test = size_t(0)](
+                        std::string const &write_config,
+                        std::string const &expected_extension) mutable {
+        for (auto [write_access, infix] :
+             {std::make_pair(Access::CREATE, ""),
+              std::make_pair(Access::CREATE, "_%T"),
+              std::make_pair(Access::APPEND, ""),
+              std::make_pair(Access::APPEND, "_%T")})
+        {
+            std::string const wildcard_name = "../samples/wildcard/wildcard_" +
+                std::to_string(current_test) + std::string(infix) + ".%E";
+            std::string const explicit_name = "../samples/wildcard/wildcard_" +
+                std::to_string(current_test) + std::string(infix) + '.' +
+                expected_extension;
+            ++current_test;
+
+            Series series(wildcard_name, write_access, write_config);
+            series.setAttribute("test_index", 0);
+            series.setAttribute("json_config", write_config);
+            series.setAttribute("expected_extension", expected_extension);
+            if (*infix)
+            {
+                series.iterations[0];
+            }
+            series.close();
+
+            Series read(wildcard_name, Access::READ_ONLY);
+            REQUIRE(read.getAttribute("test_index").get<int>() == 0);
+            read.close();
+
+            read = Series(explicit_name, Access::READ_ONLY);
+            REQUIRE(read.getAttribute("test_index").get<int>() == 0);
+            read.close();
+        }
+    };
+#if openPMD_HAVE_ADIOS2
+#if openPMD_HAVE_ADIOS2_BP5
+    run_test(
+        R"({"adios2": {"engine": {"type": "bp5"}}, "backend": "adios2"})",
+        "bp5");
+#endif
+    run_test(
+        R"({"adios2": {"engine": {"type": "bp4"}}, "backend": "adios2"})",
+        "bp4");
+    run_test(R"({"backend": "adios2"})", default_file_ending);
+#endif
+#if openPMD_HAVE_HDF5
+    run_test(R"({"backend": "hdf5"})", "h5");
+#endif
+    run_test(R"({"backend": "json"})", "json");
+
+    for (auto &name :
+         {"../samples/wildcard/colliding.%E",
+          "../samples/wildcard/colliding_%T.%E"})
+    {
+        REQUIRE_THROWS_AS(
+            [&name]() {
+                Series read_nonexisting(name, Access::READ_ONLY);
+                (void)read_nonexisting;
+            }(),
+            error::ReadError);
+        Series write_json(name, Access::CREATE, R"({"backend": "json"})");
+        write_json.iterations[0];
+        write_json.close();
+        REQUIRE_NOTHROW([&name]() {
+            Series read_existing(name, Access::READ_ONLY);
+            (void)read_existing;
+        }());
+#if openPMD_HAVE_ADIOS2
+        Series write_adios(name, Access::CREATE, R"({"backend": "adios2"})");
+        write_adios.iterations[0];
+        write_adios.close();
+        REQUIRE_THROWS_AS(
+            [&name]() {
+                Series read_colliding(name, Access::READ_ONLY);
+                (void)read_colliding;
+            }(),
+            error::ReadError);
+#endif
+    }
+}
+
 TEST_CASE("custom_geometries", "[core]")
 {
     std::vector<int> sampleData(10, 0);
diff --git a/test/JSONTest.cpp b/test/JSONTest.cpp
index 46b2459e52..161f1fa3a3 100644
--- a/test/JSONTest.cpp
+++ b/test/JSONTest.cpp
@@ -210,7 +210,7 @@ right = "val"
                 raw, std::ios_base::binary | std::ios_base::in);
             toml::value tomlVal = toml::parse(istream);
             std::stringstream sstream;
-            sstream << tomlVal;
+            sstream << toml::format(tomlVal);
             return sort_lines(sstream.str());
         }();
 
diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index a82a300b0e..c6d90d773e 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -2,6 +2,7 @@
  * To guarantee a correct call to Init, launch the tests manually.
  */
 #include "openPMD/IO/ADIOS/macros.hpp"
+#include "openPMD/IO/Access.hpp"
 #include "openPMD/auxiliary/Environment.hpp"
 #include "openPMD/auxiliary/Filesystem.hpp"
 #include "openPMD/openPMD.hpp"
@@ -301,8 +302,11 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]")
     MPI_Comm_rank(MPI_COMM_WORLD, &mpi_r);
     auto mpi_size = static_cast<uint64_t>(mpi_s);
     auto mpi_rank = static_cast<uint64_t>(mpi_r);
-    Series o =
-        Series("../samples/parallel_write.h5", Access::CREATE, MPI_COMM_WORLD);
+    Series o = Series(
+        "../samples/parallel_write.h5",
+        Access::CREATE,
+        MPI_COMM_WORLD,
+        "hdf5.independent_stores = false");
 
     o.setAuthor("Parallel HDF5");
     ParticleSpecies &e = o.iterations[1].particles["e"];
@@ -315,10 +319,14 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]")
     std::shared_ptr<double> position_local(new double);
     *position_local = position_global[mpi_rank];
 
-    e["position"]["x"].resetDataset(
-        Dataset(determineDatatype(position_local), {mpi_size}));
+    e["position"]["x"].resetDataset(Dataset(
+        determineDatatype(position_local),
+        {mpi_size},
+        "hdf5.dataset.chunks = [1]"));
     e["position"]["x"].storeChunk(position_local, {mpi_rank}, {1});
 
+    o.flush("hdf5.independent_stores = true");
+
     std::vector<uint64_t> positionOffset_global(mpi_size);
     uint64_t posOff{0};
     std::generate(
@@ -328,11 +336,20 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]")
     std::shared_ptr<uint64_t> positionOffset_local(new uint64_t);
     *positionOffset_local = positionOffset_global[mpi_rank];
 
-    e["positionOffset"]["x"].resetDataset(
-        Dataset(determineDatatype(positionOffset_local), {mpi_size}));
+    e["positionOffset"]["x"].resetDataset(Dataset(
+        determineDatatype(positionOffset_local),
+        {mpi_size},
+        "hdf5.dataset.chunks = [" + std::to_string(mpi_size) + "]"));
     e["positionOffset"]["x"].storeChunk(positionOffset_local, {mpi_rank}, {1});
 
-    o.flush();
+    // Test that chunking settings are not carried over to other datasets.
+    // Just declare a dataset smaller than the previously chunks size to trigger
+    // a failure in case the chunking is erroneously carried over.
+    e["positionOffset"]["y"].resetDataset({Datatype::FLOAT, {1}});
+    e["positionOffset"]["y"].storeChunk(
+        std::make_unique<float>(3.141592654), {0}, {1});
+
+    o.flush("hdf5.independent_stores = false");
 }
 
 TEST_CASE("hdf5_write_test_zero_extent", "[parallel][hdf5]")
@@ -407,7 +424,8 @@ void available_chunks_test(std::string const &file_ending)
                << R"END(
             }
         }
-    }
+    },
+    "rank_table": "hostname"
 }
 )END";
 
@@ -515,8 +533,11 @@ TEST_CASE("extend_dataset", "[parallel]")
 #if openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI
 TEST_CASE("adios_write_test", "[parallel][adios]")
 {
-    Series o =
-        Series("../samples/parallel_write.bp", Access::CREATE, MPI_COMM_WORLD);
+    Series o = Series(
+        "../samples/parallel_write.bp",
+        Access::CREATE,
+        MPI_COMM_WORLD,
+        R"(rank_table= "hostname")");
 
     int size{-1};
     int rank{-1};
@@ -554,6 +575,48 @@ TEST_CASE("adios_write_test", "[parallel][adios]")
     e["positionOffset"]["x"].storeChunk(positionOffset_local, {mpi_rank}, {1});
 
     o.flush();
+    o.close();
+
+    chunk_assignment::RankMeta compare;
+    {
+        auto hostname =
+            host_info::byMethod(host_info::Method::MPI_PROCESSOR_NAME);
+        for (int i = 0; i < size; ++i)
+        {
+            compare[i] = hostname;
+        }
+    }
+
+    {
+        Series i(
+            "../samples/parallel_write.bp",
+            Access::READ_LINEAR,
+            MPI_COMM_WORLD);
+        i.parseBase();
+        REQUIRE(i.rankTable(/* collective = */ true) == compare);
+    }
+    {
+        Series i(
+            "../samples/parallel_write.bp",
+            Access::READ_LINEAR,
+            MPI_COMM_WORLD);
+        i.parseBase();
+        REQUIRE(i.rankTable(/* collective = */ false) == compare);
+    }
+    {
+        Series i(
+            "../samples/parallel_write.bp",
+            Access::READ_RANDOM_ACCESS,
+            MPI_COMM_WORLD);
+        REQUIRE(i.rankTable(/* collective = */ true) == compare);
+    }
+    {
+        Series i(
+            "../samples/parallel_write.bp",
+            Access::READ_RANDOM_ACCESS,
+            MPI_COMM_WORLD);
+        REQUIRE(i.rankTable(/* collective = */ false) == compare);
+    }
 }
 
 TEST_CASE("adios_write_test_zero_extent", "[parallel][adios]")
@@ -706,7 +769,8 @@ void close_iteration_test(std::string const &file_ending)
 
     std::vector<int> data{2, 4, 6, 8};
     // { // we do *not* need these parentheses
-    Series write(name, Access::CREATE, MPI_COMM_WORLD);
+    Series write(
+        name, Access::CREATE, MPI_COMM_WORLD, R"(rank_table= "hostname")");
     {
         Iteration it0 = write.iterations[0];
         auto E_x = it0.meshes["E"]["x"];
@@ -755,6 +819,42 @@ void close_iteration_test(std::string const &file_ending)
         auto read_again = E_x_read.loadChunk<int>({0, 0}, {mpi_size, 4});
         REQUIRE_THROWS(read.flush());
     }
+
+    chunk_assignment::RankMeta compare;
+    {
+        auto hostname =
+            host_info::byMethod(host_info::Method::MPI_PROCESSOR_NAME);
+        for (unsigned i = 0; i < mpi_size; ++i)
+        {
+            compare[i] = hostname;
+        }
+    }
+
+    for (auto const &filename :
+         {"../samples/close_iterations_parallel_%T.",
+          "../samples/close_iterations_parallel_0.",
+          "../samples/close_iterations_parallel_1."})
+    {
+        for (auto const &[at, read_collectively] :
+             {std::make_pair(Access::READ_LINEAR, true),
+              std::make_pair(Access::READ_LINEAR, false),
+              std::make_pair(Access::READ_RANDOM_ACCESS, true),
+              std::make_pair(Access::READ_RANDOM_ACCESS, false)})
+        {
+            std::cout << filename << file_ending << "\t"
+                      << (at == Access::READ_LINEAR ? "linear" : "random")
+                      << "\t" << read_collectively << std::endl;
+            Series i(filename + file_ending, at, MPI_COMM_WORLD);
+            if (at == Access::READ_LINEAR)
+            {
+                i.parseBase();
+            }
+            // Need this in file-based iteration encoding
+            i.iterations.begin()->second.open();
+            REQUIRE(
+                i.rankTable(/* collective = */ read_collectively) == compare);
+        }
+    }
 }
 
 TEST_CASE("close_iteration_test", "[parallel]")
@@ -836,7 +936,10 @@ void file_based_write_read(std::string const &file_ending)
                 });
 
             auto dataset = io::Dataset(
-                io::determineDatatype<precision>(), {global_Nx, global_Nz});
+                io::determineDatatype<precision>(),
+                {global_Nx, global_Nz},
+                "hdf5.dataset.chunks = [" + std::to_string(global_Nx) + ", " +
+                    std::to_string(local_Nz) + "]");
             E_x.resetDataset(dataset);
 
             Offset chunk_offset = {0, local_Nz * mpi_rank};
@@ -933,10 +1036,16 @@ void hipace_like_write(std::string const &file_ending)
     int const last_step = 100;
     int const my_first_step = i_mpi_rank * int(local_Nz);
     int const all_last_step = last_step + (i_mpi_size - 1) * int(local_Nz);
+
+    bool participate_in_barrier = true;
     for (int first_rank_step = 0; first_rank_step < all_last_step;
          ++first_rank_step)
     {
-        MPI_Barrier(MPI_COMM_WORLD);
+        if (participate_in_barrier)
+        {
+            MPI_Barrier(MPI_COMM_WORLD);
+        }
+        participate_in_barrier = true;
 
         // first_rank_step: this step will "lead" the opening of an output step
         // step on the local rank
@@ -975,16 +1084,25 @@ void hipace_like_write(std::string const &file_ending)
 
         // has this ranks started computations yet?
         if (step < 0)
+        {
+            participate_in_barrier = false;
             continue;
+        }
         // has this ranks stopped computations?
         if (step > last_step)
+        {
+            participate_in_barrier = false;
             continue;
+        }
         // does this rank contribute to with output currently?
         bool const rank_in_output_step =
             std::find(iterations.begin(), iterations.end(), step) !=
             iterations.end();
         if (!rank_in_output_step)
+        {
+            participate_in_barrier = false;
             continue;
+        }
 
         // now we write (parallel, independent I/O)
         auto it = series.iterations[step];
@@ -1037,6 +1155,41 @@ TEST_CASE("hipace_like_write", "[parallel]")
 }
 #endif
 
+#if openPMD_HAVE_ADIOS2 && openPMD_HAS_ADIOS_2_9 && openPMD_HAVE_MPI
+TEST_CASE("independent_write_with_collective_flush", "[parallel]")
+{
+    Series write(
+        "../samples/independent_write_with_collective_flush.bp5",
+        Access::CREATE,
+        MPI_COMM_WORLD,
+        "adios2.engine.preferred_flush_target = \"buffer\"");
+    int size, rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    auto iteration = write.iterations[0];
+    auto E_x = iteration.meshes["E"]["x"];
+    E_x.resetDataset({Datatype::DOUBLE, {10}});
+    write.flush();
+    if (rank == 1)
+    {
+        E_x.storeChunk(
+            std::unique_ptr<double[]>{new double[10]{4.2}}, {0}, {10});
+    }
+    /*
+     * Now, the iteration is dirty only on rank 1. But the following flush must
+     * run collectively anyway. The test has been designed in such a way that
+     * the PerformDataWrite() call required by the disk flush target will
+     * conflict with the default buffer target that will run in the destructor,
+     * unless the flush in the next line really is collective.
+     */
+    std::cout << "ENTER" << std::endl;
+    MPI_Barrier(MPI_COMM_WORLD);
+    iteration.seriesFlush("adios2.engine.preferred_flush_target = \"disk\"");
+    MPI_Barrier(MPI_COMM_WORLD);
+    std::cout << "LEAVE" << std::endl;
+}
+#endif
+
 #if openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI
 
 void adios2_streaming(bool variableBasedLayout)
@@ -1778,4 +1931,275 @@ TEST_CASE("unavailable_backend", "[core][parallel]")
     }
 #endif
 }
+
+void joined_dim(std::string const &ext)
+{
+    using type = float;
+    using patchType = uint64_t;
+    constexpr size_t patches_per_rank = 5;
+    constexpr size_t length_of_patch = 10;
+
+    int size{-1};
+    int rank{-1};
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    {
+        Series s(
+            "../samples/joinedDimParallel." + ext,
+            Access::CREATE,
+            MPI_COMM_WORLD);
+        std::vector<UniquePtrWithLambda<type>> writeFrom(patches_per_rank);
+
+        auto it = s.writeIterations()[100];
+
+        Dataset numParticlesDS(
+            determineDatatype<patchType>(), {Dataset::JOINED_DIMENSION});
+        auto numParticles =
+            it.particles["e"]
+                .particlePatches["numParticles"][RecordComponent::SCALAR];
+        auto numParticlesOffset =
+            it.particles["e"]
+                .particlePatches["numParticlesOffset"][RecordComponent::SCALAR];
+        numParticles.resetDataset(numParticlesDS);
+        numParticlesOffset.resetDataset(numParticlesDS);
+
+        auto patchOffset = it.particles["e"].particlePatches["offset"]["x"];
+        auto patchExtent = it.particles["e"].particlePatches["extent"]["x"];
+        Dataset particlePatchesDS(
+            determineDatatype<float>(), {Dataset::JOINED_DIMENSION});
+        patchOffset.resetDataset(particlePatchesDS);
+        patchExtent.resetDataset(particlePatchesDS);
+
+        float start_value = rank * patches_per_rank * length_of_patch;
+        for (size_t i = 0; i < 5; ++i)
+        {
+            writeFrom[i] = UniquePtrWithLambda<type>(
+                new type[length_of_patch],
+                [](auto const *ptr) { delete[] ptr; });
+            std::iota(
+                writeFrom[i].get(),
+                writeFrom[i].get() + 10,
+                start_value + length_of_patch * i);
+            patchOffset.store<type>(start_value + length_of_patch * i);
+        }
+
+        auto epx = it.particles["e"]["position"]["x"];
+        Dataset ds(determineDatatype<type>(), {Dataset::JOINED_DIMENSION});
+        epx.resetDataset(ds);
+
+        size_t counter = 0;
+        for (auto &chunk : writeFrom)
+        {
+            epx.storeChunk(std::move(chunk), {}, {length_of_patch});
+            numParticles.store<patchType>(length_of_patch);
+            /*
+             * For the sake of the test case, we know that the
+             * numParticlesOffset has this value. In general, the purpose of the
+             * joined array is that we don't need to know these values, so the
+             * specification of particle patches is somewhat difficult.
+             */
+            numParticlesOffset.store<patchType>(
+                start_value + counter++ * length_of_patch);
+            patchExtent.store<type>(10);
+        }
+        writeFrom.clear();
+        it.close();
+        s.close();
+    }
+
+    {
+        Series s(
+            "../samples/joinedDimParallel." + ext,
+            Access::READ_ONLY,
+            MPI_COMM_WORLD);
+        auto it = s.iterations[100];
+        auto e = it.particles["e"];
+
+        auto particleData = e["position"]["x"].loadChunk<type>();
+        auto numParticles =
+            e.particlePatches["numParticles"][RecordComponent::SCALAR]
+                .load<patchType>();
+        auto numParticlesOffset =
+            e.particlePatches["numParticlesOffset"][RecordComponent::SCALAR]
+                .load<patchType>();
+        auto patchOffset = e.particlePatches["offset"]["x"].load<type>();
+        auto patchExtent = e.particlePatches["extent"]["x"].load<type>();
+
+        it.close();
+
+        // check validity of particle patches
+        auto numPatches =
+            e.particlePatches["numParticlesOffset"][RecordComponent::SCALAR]
+                .getExtent()[0];
+        REQUIRE(
+            e.particlePatches["numParticles"][RecordComponent::SCALAR]
+                .getExtent()[0] == numPatches);
+        for (size_t i = 0; i < numPatches; ++i)
+        {
+            for (size_t j = 0; j < numParticles.get()[i]; ++j)
+            {
+                REQUIRE(
+                    patchOffset.get()[i] <=
+                    particleData.get()[numParticlesOffset.get()[i] + j]);
+                REQUIRE(
+                    particleData.get()[numParticlesOffset.get()[i] + j] <
+                    patchOffset.get()[i] + patchExtent.get()[i]);
+            }
+        }
+
+        /*
+         * Check that joined array joins early writes before later writes from
+         * the same rank
+         */
+        for (size_t i = 0; i < size * length_of_patch * patches_per_rank; ++i)
+        {
+            REQUIRE(float(i) == particleData.get()[i]);
+        }
+        for (size_t i = 0; i < size * patches_per_rank; ++i)
+        {
+            REQUIRE(length_of_patch * i == numParticlesOffset.get()[i]);
+            REQUIRE(type(length_of_patch * i) == patchOffset.get()[i]);
+        }
+    }
+}
+
+TEST_CASE("joined_dim", "[parallel]")
+{
+#if 100000000 * ADIOS2_VERSION_MAJOR + 1000000 * ADIOS2_VERSION_MINOR +        \
+        10000 * ADIOS2_VERSION_PATCH + 100 * ADIOS2_VERSION_TWEAK >=           \
+    209000000
+    constexpr char const *supportsJoinedDims[] = {"bp", "bp4", "bp5"};
+#else
+    // no zero-size arrays
+    std::vector<char const *> supportsJoinedDims;
+#endif
+    for (auto const &t : testedFileExtensions())
+    {
+        for (auto const supported : supportsJoinedDims)
+        {
+            if (t == supported)
+            {
+                joined_dim(t);
+                break;
+            }
+        }
+    }
+}
+
+#if openPMD_HAVE_ADIOS2_BP5
+// Parallel version of the same test from SerialIOTest.cpp
+TEST_CASE("adios2_flush_via_step")
+{
+    int size_i(0), rank_i(0);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank_i);
+    MPI_Comm_size(MPI_COMM_WORLD, &size_i);
+    Extent::value_type const size(size_i), rank(rank_i);
+
+    Series write(
+        "../samples/adios2_flush_via_step_parallel/simData_%T.bp5",
+        Access::CREATE,
+        MPI_COMM_WORLD,
+        R"(adios2.engine.parameters.FlattenSteps = "on")");
+    std::vector<float> data(10);
+    for (Iteration::IterationIndex_t i = 0; i < 5; ++i)
+    {
+        Iteration it = write.writeIterations()[i];
+        auto E_x = it.meshes["E"]["x"];
+        E_x.resetDataset({Datatype::FLOAT, {size, 10, 10}});
+        for (Extent::value_type j = 0; j < 10; ++j)
+        {
+            std::iota(
+                data.begin(), data.end(), i * 100 * size + rank * 100 + j * 10);
+            E_x.storeChunk(data, {rank, j, 0}, {1, 1, 10});
+            write.flush(R"(adios2.engine.preferred_flush_target = "new_step")");
+        }
+        it.close();
+    }
+
+#if openPMD_HAS_ADIOS_2_10_1
+    for (auto access : {Access::READ_RANDOM_ACCESS, Access::READ_LINEAR})
+    {
+        Series read(
+            "../samples/adios2_flush_via_step_parallel/simData_%T.%E",
+            access,
+            MPI_COMM_WORLD);
+        std::vector<float> load_data(100 * size);
+        data.resize(100 * size);
+        for (auto iteration : read.readIterations())
+        {
+            std::iota(
+                data.begin(),
+                data.end(),
+                iteration.iterationIndex * size * 100);
+            iteration.meshes["E"]["x"].loadChunkRaw(
+                load_data.data(), {0, 0, 0}, {size, 10, 10});
+            iteration.close();
+            REQUIRE(load_data == data);
+        }
+    }
+#endif
+
+    /*
+     * Now emulate restarting from a checkpoint after a crash and continuing to
+     * write to the output Series. The semantics of openPMD::Access::APPEND
+     * don't fully fit here since that mode is for adding new Iterations to an
+     * existing Series. What we truly want to do is to continue writing to an
+     * Iteration without replacing it with a new one. So we must use the option
+     * adios2.engine.access_mode = "append" to tell the ADIOS2 backend that new
+     * steps should be added to an existing Iteration file.
+     */
+
+    write = Series(
+        "../samples/adios2_flush_via_step_parallel/simData_%T.bp5",
+        Access::APPEND,
+        MPI_COMM_WORLD,
+        R"(
+            [adios2.engine]
+            access_mode = "append"
+            parameters.FlattenSteps = "on"
+        )");
+    for (Iteration::IterationIndex_t i = 0; i < 5; ++i)
+    {
+        Iteration it = write.writeIterations()[i];
+        auto E_x = it.meshes["E"]["y"];
+        E_x.resetDataset({Datatype::FLOAT, {size, 10, 10}});
+        for (Extent::value_type j = 0; j < 10; ++j)
+        {
+            std::iota(
+                data.begin(), data.end(), i * 100 * size + rank * 100 + j * 10);
+            E_x.storeChunk(data, {rank, j, 0}, {1, 1, 10});
+            write.flush(R"(adios2.engine.preferred_flush_target = "new_step")");
+        }
+        it.close();
+    }
+
+#if openPMD_HAS_ADIOS_2_10_1
+    for (auto access : {Access::READ_RANDOM_ACCESS, Access::READ_LINEAR})
+    {
+        Series read(
+            "../samples/adios2_flush_via_step_parallel/simData_%T.%E",
+            access,
+            MPI_COMM_WORLD);
+        std::vector<float> load_data(100 * size);
+        data.resize(100 * size);
+        for (auto iteration : read.readIterations())
+        {
+            std::iota(
+                data.begin(),
+                data.end(),
+                iteration.iterationIndex * size * 100);
+            iteration.meshes["E"]["x"].loadChunkRaw(
+                load_data.data(), {0, 0, 0}, {size, 10, 10});
+            iteration.meshes["E"]["y"].loadChunkRaw(
+                load_data.data(), {0, 0, 0}, {size, 10, 10});
+            iteration.close();
+            REQUIRE(load_data == data);
+            REQUIRE(load_data == data);
+        }
+    }
+#endif
+}
+#endif
+
 #endif // openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI
diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp
index 7f126e104f..7323a32582 100644
--- a/test/SerialIOTest.cpp
+++ b/test/SerialIOTest.cpp
@@ -1,4 +1,5 @@
 // expose private and protected members for invasive testing
+#include "openPMD/ChunkInfo_internal.hpp"
 #include "openPMD/Datatype.hpp"
 #include "openPMD/IO/Access.hpp"
 #if openPMD_USE_INVASIVE_TESTS
@@ -40,6 +41,12 @@
 #include <unistd.h>
 #endif
 
+#ifdef _WIN32
+#include <windows.h>
+// windows.h defines this macro and it breaks any function with the same name
+#undef max
+#endif
+
 using namespace openPMD;
 
 struct BackendSelection
@@ -1555,7 +1562,17 @@ struct ReadFromAnyType
 
 inline void write_test(const std::string &backend)
 {
-    Series o = Series("../samples/serial_write." + backend, Access::CREATE);
+#ifdef _WIN32
+    std::string jsonCfg = "{}";
+#else
+    std::string jsonCfg = R"({"rank_table": "posix_hostname"})";
+    chunk_assignment::RankMeta compare{
+        {0,
+         host_info::byMethod(
+             host_info::methodFromStringDescription("posix_hostname", false))}};
+#endif
+    Series o =
+        Series("../samples/serial_write." + backend, Access::CREATE, jsonCfg);
 
     ParticleSpecies &e_1 = o.iterations[1].particles["e"];
 
@@ -1666,6 +1683,10 @@ inline void write_test(const std::string &backend)
                       << '\'' << std::endl;
         },
         variantTypeDataset);
+
+#ifndef _WIN32
+    REQUIRE(read.rankTable(/* collective = */ false) == compare);
+#endif
 }
 
 TEST_CASE("write_test", "[serial]")
@@ -1816,13 +1837,19 @@ fileBased_add_EDpic(ParticleSpecies &e, uint64_t const num_particles)
 
 inline void fileBased_write_test(const std::string &backend)
 {
+#ifdef _WIN32
+    std::string jsonCfg = "{}";
+#else
+    std::string jsonCfg = R"({"rank_table": "posix_hostname"})";
+#endif
     if (auxiliary::directory_exists("../samples/subdir"))
         auxiliary::remove_directory("../samples/subdir");
 
     {
         Series o = Series(
             "../samples/subdir/serial_fileBased_write%03T." + backend,
-            Access::CREATE);
+            Access::CREATE,
+            jsonCfg);
 
         ParticleSpecies &e_1 = o.iterations[1].particles["e"];
 
@@ -1941,7 +1968,8 @@ inline void fileBased_write_test(const std::string &backend)
     {
         Series o = Series(
             "../samples/subdir/serial_fileBased_write%T." + backend,
-            Access::READ_ONLY);
+            Access::READ_ONLY,
+            jsonCfg);
 
         REQUIRE(o.iterations.size() == 5);
         REQUIRE(o.iterations.count(1) == 1);
@@ -2018,7 +2046,8 @@ inline void fileBased_write_test(const std::string &backend)
         // padding
         Series o = Series(
             "../samples/subdir/serial_fileBased_write%T." + backend,
-            Access::READ_WRITE);
+            Access::READ_WRITE,
+            jsonCfg);
 
         REQUIRE(o.iterations.size() == 5);
         o.iterations[6];
@@ -2042,6 +2071,7 @@ inline void fileBased_write_test(const std::string &backend)
             .makeConstant<double>(1.0);
 
         o.iterations[overlong_it].setTime(static_cast<double>(overlong_it));
+        o.flush();
         REQUIRE(o.iterations.size() == 7);
     }
     REQUIRE(
@@ -2059,7 +2089,8 @@ inline void fileBased_write_test(const std::string &backend)
     {
         Series o = Series(
             "../samples/subdir/serial_fileBased_write%01T." + backend,
-            Access::READ_WRITE);
+            Access::READ_WRITE,
+            jsonCfg);
 
         REQUIRE(o.iterations.size() == 1);
         /*
@@ -2152,6 +2183,44 @@ inline void fileBased_write_test(const std::string &backend)
             Access::READ_ONLY};
         helper::listSeries(list);
     }
+
+#ifdef __unix__
+    /*
+     * Check that the ranktable was written correctly to every iteration file.
+     */
+    {
+        int dirfd = open("../samples/subdir/", O_RDONLY);
+        if (dirfd < 0)
+        {
+            throw std::system_error(
+                std::error_code(errno, std::system_category()));
+        }
+        DIR *directory = fdopendir(dirfd);
+        if (!directory)
+        {
+            close(dirfd);
+            throw std::system_error(
+                std::error_code(errno, std::system_category()));
+        }
+        chunk_assignment::RankMeta compare{{0, host_info::posix_hostname()}};
+        dirent *entry;
+        while ((entry = readdir(directory)) != nullptr)
+        {
+            if (strcmp(entry->d_name, ".") == 0 ||
+                strcmp(entry->d_name, "..") == 0 ||
+                !auxiliary::ends_with(entry->d_name, "." + backend))
+            {
+                continue;
+            }
+            std::string fullPath =
+                std::string("../samples/subdir/") + entry->d_name;
+            Series single_file(fullPath, Access::READ_ONLY);
+            REQUIRE(single_file.rankTable(/* collective = */ false) == compare);
+        }
+        closedir(directory);
+        close(dirfd);
+    }
+#endif // defined(__unix__)
 }
 
 TEST_CASE("fileBased_write_test", "[serial]")
@@ -4075,7 +4144,7 @@ TEST_CASE("git_adios2_early_chunk_query", "[serial][adios2]")
 /*
  * Require __unix__ since we need all that filestat stuff for this test.
  */
-#if defined(__unix__) && defined(ADIOS2_HAVE_BP5)
+#if defined(__unix__) && openPMD_HAVE_ADIOS2_BP5
 
 enum class FlushDuringStep
 {
@@ -4217,11 +4286,11 @@ void adios2_bp5_flush(std::string const &cfg, FlushDuringStep flushDuringStep)
             REQUIRE(currentSize <= 4096);
         }
 
-        bool has_been_deleted = false;
+        auto has_been_deleted = std::make_shared<bool>(false);
         UniquePtrWithLambda<int32_t> copied_as_unique(
-            new int[size], [&has_been_deleted](int const *ptr) {
+            new int[size], [has_been_deleted](int const *ptr) {
                 delete[] ptr;
-                has_been_deleted = true;
+                *has_been_deleted = true;
             });
         std::copy_n(data.data(), size, copied_as_unique.get());
         {
@@ -4239,13 +4308,13 @@ void adios2_bp5_flush(std::string const &cfg, FlushDuringStep flushDuringStep)
         {
             // should now be roughly within 1% of 16Mb
             REQUIRE(std::abs(1 - double(currentSize) / (16 * size)) <= 0.01);
-            REQUIRE(has_been_deleted);
+            REQUIRE(*has_been_deleted);
         }
         else
         {
             // should be roughly zero
             REQUIRE(currentSize <= 4096);
-            REQUIRE(!has_been_deleted);
+            REQUIRE(!*has_been_deleted);
         }
     }
     auto currentSize = getsize();
@@ -4346,6 +4415,118 @@ BufferChunkSize = 2147483646 # 2^31 - 2
 )";
 
     adios2_bp5_flush(cfg5, /* flushDuringStep = */ FlushDuringStep::Always);
+
+#if openPMD_HAVE_ADIOS2_BP5
+    std::string cfg6 = R"(
+[adios2]
+
+[adios2.engine]
+preferred_flush_target = "disk"
+
+[adios2.engine.parameters]
+AggregationType = "TwoLevelShm"
+MaxShmSize = 3221225472
+NumSubFiles = 1
+NumAggregators = 1
+BufferChunkSize = 2147483646 # 2^31 - 2
+)";
+
+    adios2_bp5_flush(
+        cfg6, /* flushDuringStep = */ FlushDuringStep::Default_Yes);
+#endif
+}
+#endif
+
+#if openPMD_HAVE_ADIOS2_BP5
+TEST_CASE("adios2_flush_via_step")
+{
+    Series write(
+        "../samples/adios2_flush_via_step/simData_%T.bp5",
+        Access::CREATE,
+        R"(adios2.engine.parameters.FlattenSteps = "on")");
+    std::vector<float> data(10);
+    for (Iteration::IterationIndex_t i = 0; i < 5; ++i)
+    {
+        Iteration it = write.writeIterations()[i];
+        auto E_x = it.meshes["E"]["x"];
+        E_x.resetDataset({Datatype::FLOAT, {10, 10}});
+        for (Extent::value_type j = 0; j < 10; ++j)
+        {
+            std::iota(data.begin(), data.end(), i * 100 + j * 10);
+            E_x.storeChunk(data, {j, 0}, {1, 10});
+            write.flush(R"(adios2.engine.preferred_flush_target = "new_step")");
+        }
+        it.close();
+    }
+
+#if openPMD_HAS_ADIOS_2_10_1
+    for (auto access : {Access::READ_RANDOM_ACCESS, Access::READ_LINEAR})
+    {
+        Series read("../samples/adios2_flush_via_step/simData_%T.%E", access);
+        std::vector<float> load_data(100);
+        data.resize(100);
+        for (auto iteration : read.readIterations())
+        {
+            std::iota(data.begin(), data.end(), iteration.iterationIndex * 100);
+            iteration.meshes["E"]["x"].loadChunkRaw(
+                load_data.data(), {0, 0}, {10, 10});
+            iteration.close();
+            REQUIRE(load_data == data);
+        }
+    }
+#endif
+
+    /*
+     * Now emulate restarting from a checkpoint after a crash and continuing to
+     * write to the output Series. The semantics of openPMD::Access::APPEND
+     * don't fully fit here since that mode is for adding new Iterations to an
+     * existing Series. What we truly want to do is to continue writing to an
+     * Iteration without replacing it with a new one. So we must use the option
+     * adios2.engine.access_mode = "append" to tell the ADIOS2 backend that new
+     * steps should be added to an existing Iteration file.
+     */
+
+    write = Series(
+        "../samples/adios2_flush_via_step/simData_%T.bp5",
+        Access::APPEND,
+        R"(
+            [adios2.engine]
+            access_mode = "append"
+            parameters.FlattenSteps = "on"
+        )");
+    for (Iteration::IterationIndex_t i = 0; i < 5; ++i)
+    {
+        Iteration it = write.writeIterations()[i];
+        auto E_x = it.meshes["E"]["y"];
+        E_x.resetDataset({Datatype::FLOAT, {10, 10}});
+        for (Extent::value_type j = 0; j < 10; ++j)
+        {
+            std::iota(data.begin(), data.end(), i * 100 + j * 10);
+            E_x.storeChunk(data, {j, 0}, {1, 10});
+            write.flush(R"(adios2.engine.preferred_flush_target = "new_step")");
+        }
+        it.close();
+    }
+
+#if openPMD_HAS_ADIOS_2_10_1
+    for (auto access : {Access::READ_RANDOM_ACCESS, Access::READ_LINEAR})
+    {
+        Series read("../samples/adios2_flush_via_step/simData_%T.%E", access);
+        std::vector<float> load_data(100);
+        data.resize(100);
+        for (auto iteration : read.readIterations())
+        {
+            std::iota(data.begin(), data.end(), iteration.iterationIndex * 100);
+            iteration.meshes["E"]["x"].loadChunkRaw(
+                load_data.data(), {0, 0}, {10, 10});
+            iteration.meshes["E"]["y"].loadChunkRaw(
+                load_data.data(), {0, 0}, {10, 10});
+            iteration.close();
+            REQUIRE(load_data == data);
+            REQUIRE(load_data == data);
+        }
+    }
+#endif
 }
 #endif
 
@@ -4406,7 +4587,7 @@ TEST_CASE("adios2_engines_and_file_endings")
     groupbased_test_explicit_backend(
         "", true, "bp4", "", "adios2.engine.type = \"bp4\"");
 
-#ifdef ADIOS2_HAVE_BP5
+#if openPMD_HAVE_ADIOS2_BP5
     // BP5 tests
     groupbased_test_explicit_backend(".bp5", true, "bp5", "");
     groupbased_test_explicit_backend(
@@ -4436,6 +4617,7 @@ TEST_CASE("adios2_engines_and_file_endings")
                 filesystemExt.empty() ? name : basename + filesystemExt;
             {
                 Series write(name, Access::CREATE, jsonCfg);
+                write.close();
             }
             if (directory)
             {
@@ -4471,7 +4653,7 @@ TEST_CASE("adios2_engines_and_file_endings")
     REQUIRE_THROWS(groupbased_test_no_explicit_backend(
         "", true, "bp4", "", "adios2.engine.type = \"bp4\""));
 
-#ifdef ADIOS2_HAVE_BP5
+#if openPMD_HAVE_ADIOS2_BP5
     // BP5 tests
     groupbased_test_no_explicit_backend(".bp5", true, "bp5", "");
     groupbased_test_no_explicit_backend(
@@ -4550,7 +4732,7 @@ TEST_CASE("adios2_engines_and_file_endings")
     filebased_test_explicit_backend(
         "", true, "bp4", "", "adios2.engine.type = \"bp4\"");
 
-#ifdef ADIOS2_HAVE_BP5
+#if openPMD_HAVE_ADIOS2_BP5
     // BP5 tests
     filebased_test_explicit_backend(".bp5", true, "bp5", "");
     filebased_test_explicit_backend(
@@ -4626,7 +4808,7 @@ TEST_CASE("adios2_engines_and_file_endings")
     REQUIRE_THROWS(filebased_test_no_explicit_backend(
         "", true, "bp4", "", "adios2.engine.type = \"bp4\""));
 
-#ifdef ADIOS2_HAVE_BP5
+#if openPMD_HAVE_ADIOS2_BP5
     // BP5 tests
     filebased_test_no_explicit_backend(".bp5", true, "bp5", "");
     filebased_test_no_explicit_backend(
@@ -5007,8 +5189,16 @@ TEST_CASE("serial_iterator", "[serial][adios2]")
 {
     for (auto const &t : testedFileExtensions())
     {
+#ifdef _WIN32
         serial_iterator("../samples/serial_iterator_filebased_%T." + t);
         serial_iterator("../samples/serial_iterator_groupbased." + t);
+#else
+        // Add some regex characters into the file names to see that we can deal
+        // with that. Don't do that on Windows because Windows does not like
+        // those characters within file paths.
+        serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t);
+        serial_iterator("../samples/serial_iterator_groupbased_+?." + t);
+#endif
     }
 }
 
@@ -5020,9 +5210,6 @@ void variableBasedSingleIteration(std::string const &file)
             file,
             Access::CREATE,
             R"({"iteration_encoding": "variable_based"})");
-        REQUIRE(
-            writeSeries.iterationEncoding() ==
-            IterationEncoding::variableBased);
         auto iterations = writeSeries.writeIterations();
         auto iteration = iterations[0];
         auto E_x = iteration.meshes["E"]["x"];
@@ -5031,6 +5218,9 @@ void variableBasedSingleIteration(std::string const &file)
         std::iota(data.begin(), data.end(), 0);
         E_x.storeChunk(data, {0}, {1000});
         writeSeries.flush();
+        REQUIRE(
+            writeSeries.iterationEncoding() ==
+            IterationEncoding::variableBased);
     }
 
     {
@@ -5096,6 +5286,39 @@ bool areEqual(T a, T b)
 } // namespace epsilon
 
 #if openPMD_HAVE_ADIOS2
+
+#define openPMD_VERBOSE_CHUNKS 0
+
+#if openPMD_VERBOSE_CHUNKS
+static std::string format_chunk(ChunkInfo const &chunk_info)
+{
+    std::stringstream result;
+    auto print_vector = [&result](auto const &vec) {
+        if (vec.empty())
+        {
+            result << "[]";
+        }
+        else
+        {
+            auto it = vec.begin();
+            result << '[' << *it++;
+            auto end = vec.end();
+            for (; it != end; ++it)
+            {
+                result << ',' << *it;
+            }
+            result << ']';
+        }
+    };
+    result << '(';
+    print_vector(chunk_info.offset);
+    result << '|';
+    print_vector(chunk_info.extent);
+    result << ')';
+    return result.str();
+}
+#endif
+
 TEST_CASE("git_adios2_sample_test", "[serial][adios2]")
 {
     using namespace epsilon;
@@ -5105,11 +5328,73 @@ TEST_CASE("git_adios2_sample_test", "[serial][adios2]")
 
     std::string const samplePath =
         "../samples/git-sample/3d-bp4/example-3d-bp4.bp";
+    std::string const samplePathFilebased =
+        "../samples/git-sample/3d-bp4/example-3d-bp4_%T.bp";
     if (!auxiliary::directory_exists(samplePath))
     {
         std::cerr << "git sample '" << samplePath << "' not accessible \n";
         return;
     }
+
+    /*
+     * This checks a regression introduced by
+     * https://github.com/openPMD/openPMD-api/pull/1498 and fixed by
+     * https://github.com/openPMD/openPMD-api/pull/1586
+     */
+    for (auto const &[filepath, access] :
+         {std::make_pair(samplePath, Access::READ_ONLY),
+          std::make_pair(samplePathFilebased, Access::READ_ONLY),
+          std::make_pair(samplePath, Access::READ_LINEAR),
+          std::make_pair(samplePathFilebased, Access::READ_LINEAR)})
+    {
+        Series read(filepath, access);
+
+        // false positive by clang-tidy?
+        // NOLINTNEXTLINE(performance-for-range-copy)
+        for (auto iteration : read.readIterations())
+        {
+            for (auto &mesh : iteration.meshes)
+            {
+                for (auto &component : mesh.second)
+                {
+#if openPMD_VERBOSE_CHUNKS
+                    std::cout << "Chunks for '"
+                              << component.second.myPath().openPMDPath()
+                              << "':" << std::endl;
+                    for (auto const &chunk : component.second.availableChunks())
+                    {
+                        std::cout << "\t" << format_chunk(chunk) << std::endl;
+                    }
+#else
+                    component.second.availableChunks();
+#endif
+                }
+            }
+            for (auto &particle_species : iteration.particles)
+            {
+                for (auto &record : particle_species.second)
+                {
+                    for (auto &component : record.second)
+                    {
+#if openPMD_VERBOSE_CHUNKS
+                        std::cout << "Chunks for '"
+                                  << component.second.myPath().openPMDPath()
+                                  << "':" << std::endl;
+                        for (auto const &chunk :
+                             component.second.availableChunks())
+                        {
+                            std::cout << "\t" << format_chunk(chunk)
+                                      << std::endl;
+                        }
+#else
+                        component.second.availableChunks();
+#endif
+                    }
+                }
+            }
+        }
+    }
+
     Series o(samplePath, Access::READ_ONLY, R"({"backend": "adios2"})");
     REQUIRE(o.openPMD() == "1.1.0");
     REQUIRE(o.openPMDextension() == 0);
@@ -5374,6 +5659,7 @@ void adios2_group_table(
     write.close();
 
     Series read("../samples/group_table.bp", Access::READ_LINEAR, jsonRead);
+    // NOLINTNEXTLINE(performance-for-range-copy)
     for (auto iteration : read.readIterations())
     {
         switch (iteration.iterationIndex)
@@ -6021,7 +6307,7 @@ TEST_CASE("iterate_nonstreaming_series", "[serial][adios2]")
                 backend.extension,
             false,
             backend.jsonBaseConfig());
-#if openPMD_HAVE_ADIOS2 && defined(ADIOS2_HAVE_BP5)
+#if openPMD_HAVE_ADIOS2 && openPMD_HAVE_ADIOS2_BP5
         if (backend.extension == "bp")
         {
             iterate_nonstreaming_series(
@@ -6047,7 +6333,7 @@ TEST_CASE("iterate_nonstreaming_series", "[serial][adios2]")
 #endif
 }
 
-#if openPMD_HAVE_ADIOS2 && defined(ADIOS2_HAVE_BP5)
+#if openPMD_HAVE_ADIOS2 && openPMD_HAVE_ADIOS2_BP5
 void adios2_bp5_no_steps(bool usesteps)
 {
     std::string const config = R"END(
@@ -6420,7 +6706,7 @@ TEST_CASE("deferred_parsing", "[serial]")
 }
 
 #if openPMD_HAS_ADIOS_2_9
-void chaotic_stream(std::string filename, bool variableBased)
+void chaotic_stream(std::string const &filename, bool variableBased)
 {
     /*
      * We will write iterations in the following order.
@@ -7276,3 +7562,147 @@ TEST_CASE("groupbased_read_write", "[serial]")
         groupbased_read_write("toml");
     }
 }
+
+void joined_dim(std::string const &ext)
+{
+    using type = float;
+    using patchType = uint64_t;
+    constexpr size_t patches_per_rank = 5;
+    constexpr size_t length_of_patch = 10;
+
+    {
+        Series s("../samples/joinedDimParallel." + ext, Access::CREATE);
+        std::vector<UniquePtrWithLambda<type>> writeFrom(patches_per_rank);
+
+        auto it = s.writeIterations()[100];
+
+        Dataset numParticlesDS(
+            determineDatatype<patchType>(), {Dataset::JOINED_DIMENSION});
+        auto numParticles =
+            it.particles["e"]
+                .particlePatches["numParticles"][RecordComponent::SCALAR];
+        auto numParticlesOffset =
+            it.particles["e"]
+                .particlePatches["numParticlesOffset"][RecordComponent::SCALAR];
+        numParticles.resetDataset(numParticlesDS);
+        numParticlesOffset.resetDataset(numParticlesDS);
+
+        auto patchOffset = it.particles["e"].particlePatches["offset"]["x"];
+        auto patchExtent = it.particles["e"].particlePatches["extent"]["x"];
+        Dataset particlePatchesDS(
+            determineDatatype<float>(), {Dataset::JOINED_DIMENSION});
+        patchOffset.resetDataset(particlePatchesDS);
+        patchExtent.resetDataset(particlePatchesDS);
+
+        for (size_t i = 0; i < 5; ++i)
+        {
+            writeFrom[i] = UniquePtrWithLambda<type>(
+                new type[length_of_patch],
+                [](auto const *ptr) { delete[] ptr; });
+            std::iota(
+                writeFrom[i].get(),
+                writeFrom[i].get() + 10,
+                length_of_patch * i);
+            patchOffset.store<type>(length_of_patch * i);
+        }
+
+        auto epx = it.particles["e"]["position"]["x"];
+        Dataset ds(determineDatatype<type>(), {Dataset::JOINED_DIMENSION});
+        epx.resetDataset(ds);
+
+        size_t counter = 0;
+        for (auto &chunk : writeFrom)
+        {
+            epx.storeChunk(std::move(chunk), {}, {length_of_patch});
+            numParticles.store<patchType>(length_of_patch);
+            /*
+             * For the sake of the test case, we know that the
+             * numParticlesOffset has this value. In general, the purpose of the
+             * joined array is that we don't need to know these values, so the
+             * specification of particle patches is somewhat difficult.
+             */
+            numParticlesOffset.store<patchType>(counter++ * length_of_patch);
+            patchExtent.store<type>(10);
+        }
+        writeFrom.clear();
+        it.close();
+        s.close();
+    }
+
+    {
+        Series s("../samples/joinedDimParallel." + ext, Access::READ_ONLY);
+        auto it = s.iterations[100];
+        auto e = it.particles["e"];
+
+        auto particleData = e["position"]["x"].loadChunk<type>();
+        auto numParticles =
+            e.particlePatches["numParticles"][RecordComponent::SCALAR]
+                .load<patchType>();
+        auto numParticlesOffset =
+            e.particlePatches["numParticlesOffset"][RecordComponent::SCALAR]
+                .load<patchType>();
+        auto patchOffset = e.particlePatches["offset"]["x"].load<type>();
+        auto patchExtent = e.particlePatches["extent"]["x"].load<type>();
+
+        it.close();
+
+        // check validity of particle patches
+        auto numPatches =
+            e.particlePatches["numParticlesOffset"][RecordComponent::SCALAR]
+                .getExtent()[0];
+        REQUIRE(
+            e.particlePatches["numParticles"][RecordComponent::SCALAR]
+                .getExtent()[0] == numPatches);
+        for (size_t i = 0; i < numPatches; ++i)
+        {
+            for (size_t j = 0; j < numParticles.get()[i]; ++j)
+            {
+                REQUIRE(
+                    patchOffset.get()[i] <=
+                    particleData.get()[numParticlesOffset.get()[i] + j]);
+                REQUIRE(
+                    particleData.get()[numParticlesOffset.get()[i] + j] <
+                    patchOffset.get()[i] + patchExtent.get()[i]);
+            }
+        }
+
+        /*
+         * Check that:
+         * 1. Joined array joins writes from lower ranks before higher ranks
+         * 2. Joined array joins early writes before later writes from the same
+         *    rank
+         */
+        for (size_t i = 0; i < length_of_patch * patches_per_rank; ++i)
+        {
+            REQUIRE(float(i) == particleData.get()[i]);
+        }
+        for (size_t i = 0; i < patches_per_rank; ++i)
+        {
+            REQUIRE(length_of_patch * i == numParticlesOffset.get()[i]);
+            REQUIRE(type(length_of_patch * i) == patchOffset.get()[i]);
+        }
+    }
+}
+
+TEST_CASE("joined_dim", "[serial]")
+{
+#if 100000000 * ADIOS2_VERSION_MAJOR + 1000000 * ADIOS2_VERSION_MINOR +        \
+        10000 * ADIOS2_VERSION_PATCH + 100 * ADIOS2_VERSION_TWEAK >=           \
+    209000000
+    constexpr char const *supportsJoinedDims[] = {"bp", "bp4", "bp5"};
+#else
+    // no zero-size arrays
+    std::vector<char const *> supportsJoinedDims;
+#endif
+    for (auto const &t : testedFileExtensions())
+    {
+        for (auto const supported : supportsJoinedDims)
+        {
+            if (t == supported)
+            {
+                joined_dim(t);
+                break;
+            }
+        }
+    }
+}
diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py
index 6ff987f657..59e6b5c97e 100644
--- a/test/python/unittest/API/APITest.py
+++ b/test/python/unittest/API/APITest.py
@@ -971,6 +971,8 @@ def testPickle(self):
         series.flush()
 
         # Pickle
+        pickled_s = pickle.dumps(series)
+        pickled_i = pickle.dumps(i)
         pickled_E = pickle.dumps(E)
         pickled_E_x = pickle.dumps(E_x)
         pickled_electrons = pickle.dumps(electrons)
@@ -980,6 +982,7 @@ def testPickle(self):
         pickled_w = pickle.dumps(w)
         print(f"This is my pickled object:\n{pickled_E_x}\n")
 
+        series.close()
         del E
         del E_x
         del electrons
@@ -987,9 +990,12 @@ def testPickle(self):
         del pos
         del pos_y
         del w
+        del i
         del series
 
         # Unpickling the object
+        series = pickle.loads(pickled_s)
+        i = pickle.loads(pickled_i)
         E = pickle.loads(pickled_E)
         E_x = pickle.loads(pickled_E_x)
         electrons = pickle.loads(pickled_electrons)
@@ -1000,6 +1006,8 @@ def testPickle(self):
         print(
             f"This is E_x.position of the unpickled object:\n{E_x.position}\n")
 
+        self.assertIsInstance(series, io.Series)
+        self.assertIsInstance(i, io.Iteration)
         self.assertIsInstance(E, io.Mesh)
         self.assertIsInstance(E_x, io.Mesh_Record_Component)
         self.assertIsInstance(electrons, io.ParticleSpecies)