From a21499b4c2c0db79485ce98b4c0eba71cac93d7f Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Wed, 28 Aug 2024 12:51:37 -0400 Subject: [PATCH 1/6] add updated Frontier scaling with ROCm 6.1.3 (#2948) --- .../frontier/frontier-scaling-2024-08-21.txt | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-08-21.txt diff --git a/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-08-21.txt b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-08-21.txt new file mode 100644 index 0000000000..660b7af7b9 --- /dev/null +++ b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-08-21.txt @@ -0,0 +1,39 @@ +# new scaling numbers for the 3D XRB +# using the same inputs.He.25cm.static.1000Hz as previously +# modules: +# +# module load PrgEnv-gnu +# module load cray-mpich/8.1.28 +# module load craype-accel-amd-gfx90a +# module load amd-mixed/6.1.3 +# module unload darshan-runtime +# +# build info: +# +# EOS: /ccs/home/zingale/Microphysics/EOS/helmholtz +# NETWORK: /ccs/home/zingale/Microphysics/networks/iso7 +# CONDUCTIVITY: /ccs/home/zingale/Microphysics/conductivity/stellar +# INTEGRATOR: VODE +# SCREENING: screen5 +# +# Castro git describe: 24.08-3-g15327db6b +# AMReX git describe: 24.08-25-g6dcaa1223 +# Microphysics git describe: 24.08-2-g8ce3375a + +# nodes rocm mag_grid_size avg time / std dev +# step + 48 6.1.3 128 59.0711 0.2525 + 64 6.1.3 128 42.6938 0.285659 + 128 6.1.3 128 24.5353 1.36496 + 256 6.1.3 128 13.3647 0.108731 + 512 6.1.3 128 7.88166 0.0856889 +1024 6.1.3 128 5.54221 0.0979851 +2048 6.1.3 128 4.55679 0.0528629 + + +# note that the 2048 run uses a blocking factor of 16) + +# in contrast to the previous run, we've disabled all inlining with +# ROCm to get around some compiler bugs, so that might explain some +# slight slowdown here. + From b3696adfdbbb2f3cce176e9dfec8bb5f3d859105 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Wed, 28 Aug 2024 12:52:07 -0400 Subject: [PATCH 2/6] update the plotting script for bubble_convergence (#2947) this now gives more reasonable defaults --- .../analysis/slice_multi.py | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/Exec/reacting_tests/bubble_convergence/analysis/slice_multi.py b/Exec/reacting_tests/bubble_convergence/analysis/slice_multi.py index d05bd7fba9..2f8c1e16b8 100755 --- a/Exec/reacting_tests/bubble_convergence/analysis/slice_multi.py +++ b/Exec/reacting_tests/bubble_convergence/analysis/slice_multi.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 +import matplotlib +matplotlib.use('agg') + import os import sys import yt @@ -26,35 +29,37 @@ fig = plt.figure() fig.set_size_inches(12.0, 9.0) -grid = ImageGrid(fig, 111, nrows_ncols=(2, 2), axes_pad=0.75, cbar_pad="2%", +grid = ImageGrid(fig, 111, nrows_ncols=(2, 2), + axes_pad=0.75, cbar_pad="2%", label_mode="L", cbar_mode="each") -fields = ["Temp", "magvel", "X(C12)", "rho_enuc"] +fields = ["Temp", "magvel", "X(C12)", "enuc"] for i, f in enumerate(fields): - sp = yt.SlicePlot(ds, "z", f, center=[xctr, yctr, 0.0], width=[L_x, L_y, 0.0], fontsize="12") + sp = yt.SlicePlot(ds, "z", f, center=[xctr, yctr, 0.0*cm], + width=[L_x, L_y, 0.0*cm], fontsize="12") sp.set_buff_size((2000,2000)) if f == "X(C12)": sp.set_log(f, True) - sp.set_cmap(f, "plasma") - sp.set_zlim(f, 1.e-8, 2.e-4) + sp.set_cmap(f, "magma") + sp.set_zlim(f, 1.e-8, 1.e-4) elif f == "magvel": sp.set_log(f, False) #sp.set_zlim(f, 1.e-3, 2.5e-2) - sp.set_cmap(f, "magma") + sp.set_cmap(f, "cividis") elif f == "Temp": - sp.set_log(f, False) - #sp.set_zlim(f, 1.e-3, 2.5e-2) - - elif f == "rho_enuc": sp.set_log(f, True) + sp.set_zlim(f, 5.e7, 2.e8) + + elif f == "enuc": + sp.set_log(f, True, linthresh=1.e11) + sp.set_zlim(f, 1.e11, 1.e14) sp.set_cmap(f, "plasma") - #sp.set_zlim(f, 1.e-3, 2.5e-2) sp.set_axes_unit("cm") @@ -71,5 +76,4 @@ fig.set_size_inches(8.0, 8.0) plt.tight_layout() -plt.savefig("{}_slice.pdf".format(os.path.basename(plotfile))) - +plt.savefig("{}_slice.png".format(os.path.basename(plotfile))) From 0896c6fb310ffeec970c907fa43bcf2cdda59596 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Wed, 28 Aug 2024 12:53:29 -0400 Subject: [PATCH 3/6] update the convergence details for hse_convergence (#2946) the best results now come from use_pslope and reflecting BCs this is because of work we've done since the README was last updated. --- Exec/gravity_tests/hse_convergence/README.md | 31 ++++++++------ .../hse_convergence/convergence_plm.sh | 41 +++---------------- .../hse_convergence/convergence_ppm.sh | 12 +++--- 3 files changed, 31 insertions(+), 53 deletions(-) diff --git a/Exec/gravity_tests/hse_convergence/README.md b/Exec/gravity_tests/hse_convergence/README.md index fa639f4708..fab88d7bd7 100644 --- a/Exec/gravity_tests/hse_convergence/README.md +++ b/Exec/gravity_tests/hse_convergence/README.md @@ -7,29 +7,34 @@ in the plotfiles. To run this problem, use one of the convergence scripts: - * ``convergence_plm.sh`` : + * `convergence_plm.sh` : - this runs CTU + PLM using the default HSE BCs and default - use_pslope, then with reflect BCs, then without use_pslope, and - finally runs with reflect instead of HSE BCs. + this runs CTU + PLM using: + 1. the default HSE BCs and `use_pslope` + 2. the HSE BCs with reflection and `use_pslope` + 3. reflect BCs instead of HSE BCs without `use_pslope` + 4. reflect BCs with `use_pslope` - These tests show that the best results come from HSE BCs + reflect vel + These tests show that the best results (by far) come from + `use_pslope=1` and reflecting BCs * convergence_ppm.sh : this runs CTU + PPM in a similar set of configurations as PLM above - (with one additional one: grav_source_type = 4) + 1. the default HSE BCs + 2. HSE BCs with reflection + 3. reflecting BCs + 4. reflecting BCs with `use_pslope` - These tests show that the best results come from HSE BCs + reflect vel + These tests show that the best results (by far) come from + reflecting BCs with `use_pslope=1`, just like the PLM case. * convergence_sdc.sh : - this uses the TRUE_SDC integration, first with SDC-2 + PLM and reflecting BCs, - the SDC-2 + PPM and reflecting BCs, then the same but HSE BCs, and finally - SDC-4 + reflect + this uses the TRUE_SDC integration, first with SDC-2 + PLM and + reflecting BCs, the SDC-2 + PPM and reflecting BCs, then the same + but HSE BCs, and finally SDC-4 + reflect These tests show that the PLM + reflect (which uses the well-balanced use_pslope) and the SDC-4 + reflect give the lowest - errors and expected (or better) convergence: - - + errors and expected (or better) convergence. diff --git a/Exec/gravity_tests/hse_convergence/convergence_plm.sh b/Exec/gravity_tests/hse_convergence/convergence_plm.sh index 285cfed67d..3d12427ad5 100755 --- a/Exec/gravity_tests/hse_convergence/convergence_plm.sh +++ b/Exec/gravity_tests/hse_convergence/convergence_plm.sh @@ -58,43 +58,15 @@ pfile=`ls -t | grep -i hse_512_plt | head -1` fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile} -## plm + hse reflect + no pslope - -ofile=plm-hsereflect-nopslope.converge.out - -RUNPARAMS=" -castro.ppm_type=0 -castro.use_pslope=0 -castro.hse_interp_temp=1 -castro.hse_reflect_vels=1 -""" - -${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out -pfile=`ls -t | grep -i hse_64_plt | head -1` -fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel > ${ofile} - -${EXEC} inputs.ppm.128 ${RUNPARAMS} >& 128.out -pfile=`ls -t | grep -i hse_128_plt | head -1` -fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile} - -${EXEC} inputs.ppm.256 ${RUNPARAMS} >& 256.out -pfile=`ls -t | grep -i hse_256_plt | head -1` -fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile} - -${EXEC} inputs.ppm.512 ${RUNPARAMS} >& 512.out -pfile=`ls -t | grep -i hse_512_plt | head -1` -fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile} - - -## plm + reflect +## plm + reflect + nopslope -ofile=plm-reflect.converge.out +ofile=plm-reflect-nopslope.converge.out RUNPARAMS=" castro.ppm_type=0 -castro.use_pslope=1 castro.lo_bc=3 castro.hi_bc=3 +castro.use_pslope=0 """ ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out @@ -114,16 +86,15 @@ pfile=`ls -t | grep -i hse_512_plt | head -1` fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile} +## plm + reflect + pslope -## plm + reflect + nopslope - -ofile=plm-reflect-nopslope.converge.out +ofile=plm-reflect-pslope.converge.out RUNPARAMS=" castro.ppm_type=0 castro.lo_bc=3 castro.hi_bc=3 -castro.use_pslope=0 +castro.use_pslope=1 """ ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out diff --git a/Exec/gravity_tests/hse_convergence/convergence_ppm.sh b/Exec/gravity_tests/hse_convergence/convergence_ppm.sh index ff6e2c7620..27b4bb86ef 100755 --- a/Exec/gravity_tests/hse_convergence/convergence_ppm.sh +++ b/Exec/gravity_tests/hse_convergence/convergence_ppm.sh @@ -50,12 +50,13 @@ pfile=`ls -t | grep -i hse_512_plt | head -1` fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile} -## ppm + grav_source_type = 4 +## ppm + reflect -ofile=ppm-grav4.converge.out +ofile=ppm-reflect.converge.out RUNPARAMS=" -castro.grav_source_type=4 +castro.lo_bc=3 +castro.hi_bc=3 """ ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out @@ -75,13 +76,14 @@ pfile=`ls -t | grep -i hse_512_plt | head -1` fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile} -## ppm + reflect +## ppm + reflect + pslope -ofile=ppm-reflect.converge.out +ofile=ppm-reflect-pslope.converge.out RUNPARAMS=" castro.lo_bc=3 castro.hi_bc=3 +castro.use_pslope=1 """ ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out From 2b3c8537aca60efea5c8e17329dc968bff366b86 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Wed, 28 Aug 2024 14:10:10 -0400 Subject: [PATCH 4/6] fix some clang-tidy issues for wdmerger (#2949) --- Exec/science/wdmerger/Prob.cpp | 2 +- Exec/science/wdmerger/Problem.H | 3 +++ Exec/science/wdmerger/problem_checkpoint.H | 2 +- Exec/science/wdmerger/problem_restart.H | 2 +- Exec/science/wdmerger/wdmerger_util.cpp | 2 +- Source/gravity/Gravity.cpp | 12 ++++++------ Source/gravity/binary.H | 2 +- 7 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Exec/science/wdmerger/Prob.cpp b/Exec/science/wdmerger/Prob.cpp index ac6a41eceb..ba28a87a18 100644 --- a/Exec/science/wdmerger/Prob.cpp +++ b/Exec/science/wdmerger/Prob.cpp @@ -604,7 +604,7 @@ Castro::update_relaxation(Real time, Real dt) { const Real ldt = new_time - old_time; - force[lev].reset(new MultiFab(getLevel(lev).grids, getLevel(lev).dmap, NUM_STATE, 0)); + force[lev] = std::make_unique(getLevel(lev).grids, getLevel(lev).dmap, NUM_STATE, 0); force[lev]->setVal(0.0); MultiFab& S_new = getLevel(lev).get_new_data(State_Type); diff --git a/Exec/science/wdmerger/Problem.H b/Exec/science/wdmerger/Problem.H index db48b224e7..8696516ab6 100644 --- a/Exec/science/wdmerger/Problem.H +++ b/Exec/science/wdmerger/Problem.H @@ -27,11 +27,13 @@ void volInBoundary (amrex::Real time, amrex::Real& vol_p, amrex::Real& vol_s, am // Computes standard dot product of two three-vectors. +static amrex::Real dot_product(const amrex::Real a[], const amrex::Real b[]); // Computes norm of a three vector. +static amrex::Real norm(const amrex::Real a[]); // Problem post-initialization routine. @@ -48,6 +50,7 @@ void problem_post_timestep(); // Write out the git hashes for the various parts of the code. +static void writeGitHashes(std::ostream& log); // Update relaxation process. diff --git a/Exec/science/wdmerger/problem_checkpoint.H b/Exec/science/wdmerger/problem_checkpoint.H index 1ddee5e049..9ee3b16846 100644 --- a/Exec/science/wdmerger/problem_checkpoint.H +++ b/Exec/science/wdmerger/problem_checkpoint.H @@ -7,7 +7,7 @@ #include AMREX_INLINE -void problem_checkpoint (std::string checkpoint_dir) +void problem_checkpoint (const std::string& checkpoint_dir) { std::ofstream com; com.open(checkpoint_dir + "/COM", std::ios::out); diff --git a/Exec/science/wdmerger/problem_restart.H b/Exec/science/wdmerger/problem_restart.H index 2246732b64..3acbe99165 100644 --- a/Exec/science/wdmerger/problem_restart.H +++ b/Exec/science/wdmerger/problem_restart.H @@ -5,7 +5,7 @@ #include AMREX_INLINE -void problem_restart (std::string checkpoint_dir) +void problem_restart (const std::string& checkpoint_dir) { std::ifstream com; com.open(checkpoint_dir + "/COM", std::ios::in); diff --git a/Exec/science/wdmerger/wdmerger_util.cpp b/Exec/science/wdmerger/wdmerger_util.cpp index 65b09f89e5..6cb481b5e4 100644 --- a/Exec/science/wdmerger/wdmerger_util.cpp +++ b/Exec/science/wdmerger/wdmerger_util.cpp @@ -658,7 +658,7 @@ void binary_setup () Real v_P_r, v_S_r, v_P_phi, v_S_phi; - kepler_third_law(problem::radius_P, problem::mass_P, problem::radius_S, problem::mass_S, + kepler_third_law(problem::radius_P, problem::mass_P, problem::radius_S, problem::mass_S, // NOLINT(readability-suspicious-call-argument) castro::rotational_period, problem::orbital_eccentricity, problem::orbital_angle, problem::a, problem::r_P_initial, problem::r_S_initial, v_P_r, v_S_r, v_P_phi, v_S_phi); diff --git a/Source/gravity/Gravity.cpp b/Source/gravity/Gravity.cpp index 18eebefbfc..c5f7bac162 100644 --- a/Source/gravity/Gravity.cpp +++ b/Source/gravity/Gravity.cpp @@ -2777,12 +2777,12 @@ Gravity::fill_direct_sum_BCs(int crse_level, int fine_level, const Vector::max()); BL_ASSERT(nPtsYZ <= std::numeric_limits::max()); - ParallelDescriptor::ReduceRealSum(bcXYLo.dataPtr(), nPtsXY); - ParallelDescriptor::ReduceRealSum(bcXYHi.dataPtr(), nPtsXY); - ParallelDescriptor::ReduceRealSum(bcXZLo.dataPtr(), nPtsXZ); - ParallelDescriptor::ReduceRealSum(bcXZHi.dataPtr(), nPtsXZ); - ParallelDescriptor::ReduceRealSum(bcYZLo.dataPtr(), nPtsYZ); - ParallelDescriptor::ReduceRealSum(bcYZHi.dataPtr(), nPtsYZ); + ParallelDescriptor::ReduceRealSum(bcXYLo.dataPtr(), static_cast(nPtsXY)); + ParallelDescriptor::ReduceRealSum(bcXYHi.dataPtr(), static_cast(nPtsXY)); + ParallelDescriptor::ReduceRealSum(bcXZLo.dataPtr(), static_cast(nPtsXZ)); + ParallelDescriptor::ReduceRealSum(bcXZHi.dataPtr(), static_cast(nPtsXZ)); + ParallelDescriptor::ReduceRealSum(bcYZLo.dataPtr(), static_cast(nPtsYZ)); + ParallelDescriptor::ReduceRealSum(bcYZHi.dataPtr(), static_cast(nPtsYZ)); #ifdef _OPENMP #pragma omp parallel diff --git a/Source/gravity/binary.H b/Source/gravity/binary.H index 684d875139..1d2438b7d4 100644 --- a/Source/gravity/binary.H +++ b/Source/gravity/binary.H @@ -67,7 +67,7 @@ void lagrange_iterate (Real& r, Real mass_1, Real mass_2, Real r1, Real r2, Real const Real tolerance = 1.0e-8_rt; const int max_iters = 200; - Real rm, rp; + Real rm{}, rp{}; if (r_min == 0.0_rt && r_max == 0.0_rt) { amrex::Abort("Lagrange point iteration must have at least one non-zero bound provided."); From 75330a94ae73cb9c21d6bc3a9baf02c205a9500e Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Sun, 1 Sep 2024 11:53:25 -0400 Subject: [PATCH 5/6] update to 24.09 --- external/Microphysics | 2 +- external/amrex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/Microphysics b/external/Microphysics index 14b8b0e317..3961b439ba 160000 --- a/external/Microphysics +++ b/external/Microphysics @@ -1 +1 @@ -Subproject commit 14b8b0e3173041968943d4bbac2c4803a33abceb +Subproject commit 3961b439ba5c8193975018300e4c23a09b533206 diff --git a/external/amrex b/external/amrex index ac5dde35b6..74127d6d8f 160000 --- a/external/amrex +++ b/external/amrex @@ -1 +1 @@ -Subproject commit ac5dde35b6c10f5d91e289edeff218bde84878a4 +Subproject commit 74127d6d8fa83f922069a25e7ef9f153aa73f68c From a79e5b01110021ccda6b71e20007af1aeafc126e Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Sun, 1 Sep 2024 11:53:42 -0400 Subject: [PATCH 6/6] update for 24.09 --- CHANGES.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 7b7e78a0da..665eff135e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,20 @@ +# 24.09 + + * Code clean-ups / clang-tidy (#2942, #2949) + + * update the `hse_convergence` readme to reflect current convergence + (#2946) + + * update the `bubble_convergence` plotting script (#2947) + + * new Frontier scaling numbers (#2948) + + * more GPU error printing (@3944) + + * science problem updates: `flame_wave` (#2943) + + * documentation updates (#2939) + # 24.08 * lazy QueueReduction has been enabled for the timing diagnostics