From f9927b45aa7c269e46ceb0db6caa465cc8768c29 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 28 Jan 2025 18:05:58 -0800 Subject: [PATCH] `iMultiFab` Unit Tests --- tests/conftest.py | 42 ++++ tests/test_imultifab.py | 486 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 528 insertions(+) create mode 100644 tests/test_imultifab.py diff --git a/tests/conftest.py b/tests/conftest.py index d963af22..1c6a6776 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -138,3 +138,45 @@ def __exit__(self, exc_type, exc_value, traceback): with MfabDeviceContextManager() as mfab: yield mfab + + +@pytest.fixture(scope="function", params=list(itertools.product([1, 3], [0, 1]))) +def imfab(boxarr, distmap, request): + class iMfabContextManager: + def __enter__(self): + num_components = request.param[0] + num_ghost = request.param[1] + self.imfab = amr.iMultiFab(boxarr, distmap, num_components, num_ghost) + self.imfab.set_val(0, 0, num_components) + return self.imfab + + def __exit__(self, exc_type, exc_value, traceback): + self.imfab.clear() + del self.imfab + + with iMfabContextManager() as imfab: + yield imfab + + +@pytest.fixture(scope="function", params=list(itertools.product([1, 3], [0, 1]))) +def imfab_device(boxarr, distmap, request): + class iMfabDeviceContextManager: + def __enter__(self): + num_components = request.param[0] + num_ghost = request.param[1] + self.imfab = amr.iMultiFab( + boxarr, + distmap, + num_components, + num_ghost, + amr.MFInfo().set_arena(amr.The_Device_Arena()), + ) + self.imfab.set_val(0, 0, num_components) + return self.imfab + + def __exit__(self, exc_type, exc_value, traceback): + self.imfab.clear() + del self.imfab + + with iMfabDeviceContextManager() as imfab: + yield imfab diff --git a/tests/test_imultifab.py b/tests/test_imultifab.py new file mode 100644 index 00000000..da0b94af --- /dev/null +++ b/tests/test_imultifab.py @@ -0,0 +1,486 @@ +# -*- coding: utf-8 -*- + +import math + +import numpy as np +import pytest + +import amrex.space3d as amr + + +def test_imfab_numpy(imfab): + # finest active MR level, get from a + # simulation's AmrMesh object, e.g.: + # finest_level = sim.finest_level + finest_level = 0 # no MR + + # iterate over mesh-refinement levels + for lev in range(finest_level + 1): + # get an existing MultiFab, e.g., + # from a simulation: + # imfab = sim.get_field(lev=lev) + # Config = sim.extension.Config + + # grow (aka guard/ghost/halo) regions + ngv = imfab.n_grow_vect + + # get every local block of the field + for mfi in imfab: + # global index box w/ guards + bx = mfi.tilebox().grow(ngv) + print(bx) + + # numpy/cupy representation: non- + # copying view, w/ guard/ghost + field = imfab.array(mfi).to_xp() + + # notes on indexing in field: + # - numpy uses locally zero-based indexing + # - layout is F_CONTIGUOUS by default, just like AMReX + + field[()] = 42 + + # finest active MR level, get from a + # simulation's AmrMesh object, e.g.: + # finest_level = sim.finest_level + finest_level = 0 # no MR + + # iterate over mesh-refinement levels + for lev in range(finest_level + 1): + # get an existing MultiFab, e.g., + # from a simulation: + # imfab = sim.get_field(lev=lev) + # Config = sim.extension.Config + + field_list = imfab.to_xp() + + for field in field_list: + field[()] = 42 + + # finest active MR level, get from a + # simulation's AmrMesh object, e.g.: + # finest_level = sim.finest_level + finest_level = 0 # no MR + + # iterate over mesh-refinement levels + for lev in range(finest_level + 1): + # get an existing MultiFab, e.g., + # from a simulation: + # imfab = sim.get_field(lev=lev) + # Config = sim.extension.Config + + # Using global indexing + # Set all valid cells (and internal ghost cells) + imfab[...] = 42 + + # Set a range of cells. Indices are in Fortran order. + # First dimension, sets from first lower guard cell to first upper guard cell. + # - Imaginary indices refer to the guard cells, negative lower, positive upper. + # Second dimension, sets all valid cells. + # Third dimension, sets all valid and ghost cells + # - The empty tuple is used to specify the range to include all valid and ghost cells. + # Components dimension, sets first component. + imfab[-1j:2j, :, (), 0] = 42 + + # Get a range of cells + # Get the data along the valid cells in the first dimension (gathering data across blocks + # and processors), at the first upper guard cell in the second dimensionn, and cell 2 of + # the third (with 2 being relative to 0 which is the lower end of the valid cells of the full domain). + # Note that in an MPI context, this is a global operation, so caution is required when + # scaling to large numbers of processors. + if imfab.n_grow_vect.max > 0: + mfslice = imfab[:, 1j, 2] + # The assignment is to the last valid cell of the second dimension. + imfab[:, -1, 2] = 2 * mfslice + + +@pytest.mark.skipif(amr.Config.have_gpu, reason="This test only runs on CPU") +def test_imfab_loop_slow(imfab): + ngv = imfab.n_grow_vect + print(f"\n imfab={imfab}, imfab.n_grow_vect={ngv}") + + for mfi in imfab: + bx = mfi.tilebox().grow(ngv) + marr = imfab.array(mfi) + + # print(imfab.num_comp) + # print(imfab.size) + # print(marr.size) + # print(marr.nComp) + + # index by index assignment + # notes: + # - this is AMReX Array4, F-order indices + # - even though we iterate by fastest varying index, + # such loops are naturally very slow in Python + three_comps = imfab.num_comp == 3 + if three_comps: + for i, j, k in bx: + # print(i,j,k) + marr[i, j, k, 0] = 10 * i + marr[i, j, k, 1] = 10 * j + marr[i, j, k, 2] = 10 * k + else: + for i, j, k in bx: + # print(i,j,k) + marr[i, j, k] = 10 * i + + # numpy representation: non-copying view, zero-indexed, + # includes the guard/ghost region + marr_np = marr.to_numpy() + + # check the values at start/end are the same: first component + assert marr_np[0, 0, 0, 0] == marr[bx.small_end] + assert marr_np[-1, -1, -1, 0] == marr[bx.big_end] + # same check, but for all components + for n in range(imfab.num_comp): + small_end_comp = list(bx.small_end) + [n] + big_end_comp = list(bx.big_end) + [n] + assert marr_np[0, 0, 0, n] == marr[small_end_comp] + assert marr_np[-1, -1, -1, n] == marr[big_end_comp] + + # all components and all indices set at once to 42 + marr_np[()] = 42 + + # values in start & end still match? + assert marr_np[0, 0, 0, 0] == marr[bx.small_end] + assert marr_np[-1, -1, -1, -1] == marr[bx.big_end] + + # all values for all indices match between multifab & numpy view? + for n in range(imfab.num_comp): + for i, j, k in bx: + assert marr[i, j, k, n] == 42 + + +def test_imfab_loop(imfab): + ngv = imfab.n_grow_vect + print(f"\n imfab={imfab}, imfab.n_grow_vect={ngv}") + + for mfi in imfab: + bx = mfi.tilebox().grow(ngv) + marr = imfab.array(mfi) + + # note: offset from index space in numpy + # in numpy, we start indices from zero, not small_end + + # numpy/cupy representation: non-copying view, including the + # guard/ghost region + marr_xp = marr.to_xp() + + marr_xp[()] = 10 # TODO: fill with index value or so as in test_imfab_loop_slow + + def iv2s(iv, comp): + return tuple(iv) + (comp,) + + # check the values at start/end are the same: first component + for n in range(imfab.num_comp): + assert marr_xp[0, 0, 0, n] == 10 + assert marr_xp[-1, -1, -1, n] == marr_xp[iv2s(bx.big_end - bx.small_end, n)] + + # now we do some faster assignments, using range based access + # This should fail as out-of-bounds, but does not. + # Does NumPy/CuPy not check array access for non-owned views? + # marr_xp[24:200, :, :, :] = 42. + + # all components and all indices set at once to 42 + marr_xp[()] = 42 + + # values in start & end still match? + for n in range(imfab.num_comp): + assert marr_xp[0, 0, 0, n] == 42 + assert marr_xp[-1, -1, -1, n] == marr_xp[iv2s(bx.big_end - bx.small_end, n)] + + +def test_imfab_simple(imfab): + assert imfab.is_all_cell_centered + # assert(all(not imfab.is_nodal(i) for i in [-1, 0, 1, 2])) # -1?? + assert all(not imfab.is_nodal(i) for i in [0, 1, 2]) + + for i in range(imfab.num_comp): + imfab.set_val(-10 * (i + 1), i, 1) + imfab.abs(0, imfab.num_comp) + for i in range(imfab.num_comp): + assert imfab.max(i) == (10 * (i + 1)) # Assert: None == 10 for i=0 + assert imfab.min(i) == (10 * (i + 1)) + + imfab.plus(20, 0, imfab.num_comp) + for i in range(imfab.num_comp): + np.testing.assert_allclose(imfab.max(i), 20 + (10 * (i + 1))) + np.testing.assert_allclose(imfab.min(i), 20 + (10 * (i + 1))) + + imfab.mult(10, 0, imfab.num_comp) + for i in range(imfab.num_comp): + np.testing.assert_allclose(imfab.max(i), 10 * (20 + (10 * (i + 1)))) + np.testing.assert_allclose(imfab.min(i), 10 * (20 + (10 * (i + 1)))) + + imfab.negate(0, imfab.num_comp) + for i in range(imfab.num_comp): + np.testing.assert_allclose(imfab.max(i), -10 * (20 + (10 * (i + 1)))) + np.testing.assert_allclose(imfab.min(i), -10 * (20 + (10 * (i + 1)))) + + +@pytest.mark.parametrize("nghost", [0, 1]) +def test_imfab_ops(boxarr, distmap, nghost): + src = amr.MultiFab(boxarr, distmap, 3, nghost) + dst = amr.MultiFab(boxarr, distmap, 1, nghost) + + src.set_val(10, 0, 1) + src.set_val(20, 1, 1) + src.set_val(30, 2, 1) + dst.set_val(0, 0, 1) + + dst.add(src, 2, 0, 1, nghost) + dst.subtract(src, 1, 0, 1, nghost) + dst.multiply(src, 0, 0, 1, nghost) + dst.divide(src, 1, 0, 1, nghost) + + print(dst.min(0)) + np.testing.assert_allclose(dst.min(0), 5) + np.testing.assert_allclose(dst.max(0), 5) + + +def test_imfab_mfiter(imfab): + assert iter(imfab).is_valid + assert iter(imfab).length == 8 + + cnt = 0 + for _mfi in imfab: + cnt += 1 + + assert iter(imfab).length == cnt + + +@pytest.mark.skipif( + amr.Config.gpu_backend != "CUDA", reason="Requires AMReX_GPU_BACKEND=CUDA" +) +def test_imfab_ops_cuda_numba(imfab_device): + # https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html + from numba import cuda + + ngv = imfab_device.n_grow_vect + + # assign 3: define kernel + @cuda.jit + def set_to_three(array): + i, j, k = cuda.grid(3) + if i < array.shape[0] and j < array.shape[1] and k < array.shape[2]: + array[i, j, k] = 3 + + # assign 3: loop through boxes and launch kernels + for mfi in imfab_device: + bx = mfi.tilebox().grow(ngv) # noqa + marr = imfab_device.array(mfi) + marr_numba = cuda.as_cuda_array(marr) + + # kernel launch + threadsperblock = (4, 4, 4) + blockspergrid = tuple( + [math.ceil(s / b) for s, b in zip(marr_numba.shape, threadsperblock)] + ) + set_to_three[blockspergrid, threadsperblock](marr_numba) + + # Check results + shape = 32**3 * 8 + sum_threes = imfab_device.sum_unique(comp=0, local=False) + assert sum_threes == shape * 3 + + +@pytest.mark.skipif( + amr.Config.gpu_backend != "CUDA", reason="Requires AMReX_GPU_BACKEND=CUDA" +) +def test_imfab_ops_cuda_cupy(imfab_device): + # https://docs.cupy.dev/en/stable/user_guide/interoperability.html + import cupy as cp + import cupyx.profiler + + # AMReX -> cupy + ngv = imfab_device.n_grow_vect + print(f"\n imfab_device={imfab_device}, imfab_device.n_grow_vect={ngv}") + + # assign 3 + with cupyx.profiler.time_range("assign 3 [()]", color_id=0): + for mfi in imfab_device: + bx = mfi.tilebox().grow(ngv) # noqa + marr_cupy = imfab_device.array(mfi).to_cupy(order="C") + # print(marr_cupy.shape) # 1, 32, 32, 32 + # print(marr_cupy.dtype) # float64 + # performance: + # https://github.com/AMReX-Codes/pyamrex/issues/55#issuecomment-1579610074 + + # write and read into the marr_cupy + marr_cupy[()] = 3 + + # verify result with a .sum_unique + with cupyx.profiler.time_range("verify 3", color_id=0): + shape = 32**3 * 8 + # print(imfab_device.shape) + sum_threes = imfab_device.sum_unique(comp=0, local=False) + assert sum_threes == shape * 3 + + # assign 2 + with cupyx.profiler.time_range("assign 2 (set_val)", color_id=1): + imfab_device.set_val(2) + with cupyx.profiler.time_range("verify 2", color_id=1): + sum_twos = imfab_device.sum_unique(comp=0, local=False) + assert sum_twos == shape * 2 + + # assign 5 + with cupyx.profiler.time_range("assign 5 (ones-like)", color_id=2): + + def set_to_five(mm): + xp = cp.get_array_module(mm) + assert xp.__name__ == "cupy" + mm = xp.ones_like(mm) * 10 + mm /= 2 + return mm + + for mfi in imfab_device: + bx = mfi.tilebox().grow(ngv) # noqa + marr_cupy = imfab_device.array(mfi).to_cupy(order="F") + # print(marr_cupy.shape) # 32, 32, 32, 1 + # print(marr_cupy.dtype) # float64 + # performance: + # https://github.com/AMReX-Codes/pyamrex/issues/55#issuecomment-1579610074 + + # write and read into the marr_cupy + fives_cp = set_to_five(marr_cupy) + marr_cupy[()] = 0 + marr_cupy += fives_cp + + # verify + with cupyx.profiler.time_range("verify 5", color_id=2): + sum = imfab_device.sum_unique(comp=0, local=False) + assert sum == shape * 5 + + # assign 7 + with cupyx.profiler.time_range("assign 7 (fuse)", color_id=3): + + @cp.fuse(kernel_name="set_to_seven") + def set_to_seven(x): + x[...] = 7 + + for mfi in imfab_device: + bx = mfi.tilebox().grow(ngv) # noqa + marr_cupy = imfab_device.array(mfi).to_cupy(order="C") + + # write and read into the marr_cupy + set_to_seven(marr_cupy) + + # verify + with cupyx.profiler.time_range("verify 7", color_id=3): + sum = imfab_device.sum_unique(comp=0, local=False) + assert sum == shape * 7 + + # TODO: @jit.rawkernel() + + +@pytest.mark.skipif( + amr.Config.gpu_backend != "CUDA", reason="Requires AMReX_GPU_BACKEND=CUDA" +) +def test_imfab_ops_cuda_pytorch(imfab_device): + # https://docs.cupy.dev/en/stable/user_guide/interoperability.html#pytorch + import torch + + # assign 3: loop through boxes and launch kernel + for mfi in imfab_device: + marr = imfab_device.array(mfi) + marr_torch = torch.as_tensor(marr, device="cuda") + marr_torch[:, :, :] = 3 + + # Check results + shape = 32**3 * 8 + sum_threes = imfab_device.sum_unique(comp=0, local=False) + assert sum_threes == shape * 3 + + +@pytest.mark.skipif( + amr.Config.gpu_backend != "CUDA", reason="Requires AMReX_GPU_BACKEND=CUDA" +) +def test_imfab_ops_cuda_cuml(imfab_device): + pass + # https://github.com/rapidsai/cuml + # https://github.com/rapidsai/cudf + # maybe better for particles as a dataframe test + # import cudf + # import cuml + + # AMReX -> RAPIDSAI cuML + # arr_cuml = ... + # assert(arr_cuml.__cuda_array_interface__['data'][0] == arr.__cuda_array_interface__['data'][0]) + # TODO + + +@pytest.mark.skipif( + amr.Config.gpu_backend != "CUDA", reason="Requires AMReX_GPU_BACKEND=CUDA" +) +def test_imfab_dtoh_copy(imfab_device): + class MfabPinnedContextManager: + def __enter__(self): + self.imfab = amr.MultiFab( + imfab_device.box_array(), + imfab_device.dm(), + imfab_device.n_comp, + imfab_device.n_grow_vect, + amr.MFInfo().set_arena(amr.The_Pinned_Arena()), + ) + return self.imfab + + def __exit__(self, exc_type, exc_value, traceback): + self.imfab.clear() + del self.imfab + + with MfabPinnedContextManager() as imfab_host: + imfab_host.set_val(42) + + amr.dtoh_memcpy(imfab_host, imfab_device) + + # assert all are 0 on host + host_min = imfab_host.min(0) + host_max = imfab_host.max(0) + assert host_min == host_max + assert host_max == 0 + + dev_val = 11 + imfab_host.set_val(dev_val) + amr.htod_memcpy(imfab_device, imfab_host) + + # assert all are 11 on device + for n in range(imfab_device.n_comp): + assert imfab_device.min(comp=n) == dev_val + assert imfab_device.max(comp=n) == dev_val + + # numpy bindings (w/ copy) + local_boxes_host = imfab_device.to_numpy(copy=True) + assert max([np.max(box) for box in local_boxes_host]) == dev_val + del local_boxes_host + + # numpy bindings (w/ copy) + for mfi in imfab_device: + marr = imfab_device.array(mfi).to_numpy(copy=True) + assert np.min(marr) >= dev_val + assert np.max(marr) <= dev_val + + # cupy bindings (w/o copy) + import cupy as cp + + local_boxes_device = imfab_device.to_cupy() + assert max([cp.max(box) for box in local_boxes_device]) == dev_val + + +def test_imfab_copy(imfab): + # write to imfab + imfab.set_val(42) + for i in range(imfab.num_comp): + np.testing.assert_allclose(imfab.max(i), 42) + + # copy + new_imfab = imfab.copy() + + # write to old imfab + imfab.set_val(1) + for i in range(imfab.num_comp): + np.testing.assert_allclose(imfab.max(i), 1) + + # check new imfab is the original data + for i in range(new_imfab.num_comp): + np.testing.assert_allclose(new_imfab.max(i), 42)