From a21499b4c2c0db79485ce98b4c0eba71cac93d7f Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Wed, 28 Aug 2024 12:51:37 -0400
Subject: [PATCH 1/6] add updated Frontier scaling with ROCm 6.1.3 (#2948)

---
 .../frontier/frontier-scaling-2024-08-21.txt  | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-08-21.txt

diff --git a/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-08-21.txt b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-08-21.txt
new file mode 100644
index 0000000000..660b7af7b9
--- /dev/null
+++ b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-08-21.txt
@@ -0,0 +1,39 @@
+# new scaling numbers for the 3D XRB
+# using the same inputs.He.25cm.static.1000Hz as previously
+# modules:
+#
+#   module load PrgEnv-gnu
+#   module load cray-mpich/8.1.28
+#   module load craype-accel-amd-gfx90a
+#   module load amd-mixed/6.1.3
+#   module unload darshan-runtime
+#
+# build info:
+#
+# EOS: /ccs/home/zingale/Microphysics/EOS/helmholtz
+# NETWORK: /ccs/home/zingale/Microphysics/networks/iso7
+# CONDUCTIVITY: /ccs/home/zingale/Microphysics/conductivity/stellar
+# INTEGRATOR: VODE
+# SCREENING: screen5
+#
+# Castro       git describe: 24.08-3-g15327db6b
+# AMReX        git describe: 24.08-25-g6dcaa1223
+# Microphysics git describe: 24.08-2-g8ce3375a
+
+# nodes  rocm      mag_grid_size   avg time /   std dev
+#                          step
+  48      6.1.3          128           59.0711   0.2525
+  64      6.1.3          128           42.6938 0.285659
+ 128      6.1.3          128           24.5353 1.36496
+ 256      6.1.3          128           13.3647 0.108731
+ 512      6.1.3          128            7.88166 0.0856889
+1024      6.1.3          128            5.54221 0.0979851
+2048      6.1.3          128            4.55679 0.0528629
+
+
+# note that the 2048 run uses a blocking factor of 16)
+
+# in contrast to the previous run, we've disabled all inlining with
+# ROCm to get around some compiler bugs, so that might explain some
+# slight slowdown here.
+

From b3696adfdbbb2f3cce176e9dfec8bb5f3d859105 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Wed, 28 Aug 2024 12:52:07 -0400
Subject: [PATCH 2/6] update the plotting script for bubble_convergence (#2947)

this now gives more reasonable defaults
---
 .../analysis/slice_multi.py                   | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/Exec/reacting_tests/bubble_convergence/analysis/slice_multi.py b/Exec/reacting_tests/bubble_convergence/analysis/slice_multi.py
index d05bd7fba9..2f8c1e16b8 100755
--- a/Exec/reacting_tests/bubble_convergence/analysis/slice_multi.py
+++ b/Exec/reacting_tests/bubble_convergence/analysis/slice_multi.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+import matplotlib
+matplotlib.use('agg')
+
 import os
 import sys
 import yt
@@ -26,35 +29,37 @@
 fig = plt.figure()
 fig.set_size_inches(12.0, 9.0)
 
-grid = ImageGrid(fig, 111, nrows_ncols=(2, 2), axes_pad=0.75, cbar_pad="2%",
+grid = ImageGrid(fig, 111, nrows_ncols=(2, 2),
+                 axes_pad=0.75, cbar_pad="2%",
                  label_mode="L", cbar_mode="each")
 
 
-fields = ["Temp", "magvel", "X(C12)", "rho_enuc"]
+fields = ["Temp", "magvel", "X(C12)", "enuc"]
 
 for i, f in enumerate(fields):
 
-    sp = yt.SlicePlot(ds, "z", f, center=[xctr, yctr, 0.0], width=[L_x, L_y, 0.0], fontsize="12")
+    sp = yt.SlicePlot(ds, "z", f, center=[xctr, yctr, 0.0*cm],
+                      width=[L_x, L_y, 0.0*cm], fontsize="12")
     sp.set_buff_size((2000,2000))
 
     if f == "X(C12)":
         sp.set_log(f, True)
-        sp.set_cmap(f, "plasma")
-        sp.set_zlim(f, 1.e-8, 2.e-4)
+        sp.set_cmap(f, "magma")
+        sp.set_zlim(f, 1.e-8, 1.e-4)
 
     elif f == "magvel":
         sp.set_log(f, False)
         #sp.set_zlim(f, 1.e-3, 2.5e-2)
-        sp.set_cmap(f, "magma")
+        sp.set_cmap(f, "cividis")
 
     elif f == "Temp":
-        sp.set_log(f, False)
-        #sp.set_zlim(f, 1.e-3, 2.5e-2)
-
-    elif f == "rho_enuc":
         sp.set_log(f, True)
+        sp.set_zlim(f, 5.e7, 2.e8)
+
+    elif f == "enuc":
+        sp.set_log(f, True, linthresh=1.e11)
+        sp.set_zlim(f, 1.e11, 1.e14)
         sp.set_cmap(f, "plasma")
-        #sp.set_zlim(f, 1.e-3, 2.5e-2)
 
     sp.set_axes_unit("cm")
 
@@ -71,5 +76,4 @@
 
 fig.set_size_inches(8.0, 8.0)
 plt.tight_layout()
-plt.savefig("{}_slice.pdf".format(os.path.basename(plotfile)))
-
+plt.savefig("{}_slice.png".format(os.path.basename(plotfile)))

From 0896c6fb310ffeec970c907fa43bcf2cdda59596 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Wed, 28 Aug 2024 12:53:29 -0400
Subject: [PATCH 3/6] update the convergence details for hse_convergence
 (#2946)

the best results now come from use_pslope and reflecting BCs
this is because of work we've done since the README was last
updated.
---
 Exec/gravity_tests/hse_convergence/README.md  | 31 ++++++++------
 .../hse_convergence/convergence_plm.sh        | 41 +++----------------
 .../hse_convergence/convergence_ppm.sh        | 12 +++---
 3 files changed, 31 insertions(+), 53 deletions(-)

diff --git a/Exec/gravity_tests/hse_convergence/README.md b/Exec/gravity_tests/hse_convergence/README.md
index fa639f4708..fab88d7bd7 100644
--- a/Exec/gravity_tests/hse_convergence/README.md
+++ b/Exec/gravity_tests/hse_convergence/README.md
@@ -7,29 +7,34 @@ in the plotfiles.
 
 To run this problem, use one of the convergence scripts:
 
-  * ``convergence_plm.sh`` :
+  * `convergence_plm.sh` :
 
-    this runs CTU + PLM using the default HSE BCs and default
-    use_pslope, then with reflect BCs, then without use_pslope, and
-    finally runs with reflect instead of HSE BCs.
+    this runs CTU + PLM using:
+    1. the default HSE BCs and `use_pslope`
+    2. the HSE BCs with reflection and `use_pslope`
+    3. reflect BCs instead of HSE BCs without `use_pslope`
+    4. reflect BCs with `use_pslope`
 
-    These tests show that the best results come from HSE BCs + reflect vel
+    These tests show that the best results (by far) come from
+    `use_pslope=1` and reflecting BCs
 
   * convergence_ppm.sh :
 
     this runs CTU + PPM in a similar set of configurations as PLM above
-    (with one additional one: grav_source_type = 4)
+    1. the default HSE BCs
+    2. HSE BCs with reflection
+    3. reflecting BCs
+    4. reflecting BCs with `use_pslope`
 
-    These tests show that the best results come from HSE BCs + reflect vel
+    These tests show that the best results (by far) come from
+    reflecting BCs with `use_pslope=1`, just like the PLM case.
 
   * convergence_sdc.sh :
 
-    this uses the TRUE_SDC integration, first with SDC-2 + PLM  and reflecting BCs,
-    the SDC-2 + PPM and reflecting BCs, then the same but HSE BCs, and finally
-    SDC-4 + reflect
+    this uses the TRUE_SDC integration, first with SDC-2 + PLM and
+    reflecting BCs, the SDC-2 + PPM and reflecting BCs, then the same
+    but HSE BCs, and finally SDC-4 + reflect
 
     These tests show that the PLM + reflect (which uses the
     well-balanced use_pslope) and the SDC-4 + reflect give the lowest
-    errors and expected (or better) convergence:
-
-
+    errors and expected (or better) convergence.
diff --git a/Exec/gravity_tests/hse_convergence/convergence_plm.sh b/Exec/gravity_tests/hse_convergence/convergence_plm.sh
index 285cfed67d..3d12427ad5 100755
--- a/Exec/gravity_tests/hse_convergence/convergence_plm.sh
+++ b/Exec/gravity_tests/hse_convergence/convergence_plm.sh
@@ -58,43 +58,15 @@ pfile=`ls -t | grep -i hse_512_plt | head -1`
 fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
 
 
-## plm + hse reflect + no pslope
-
-ofile=plm-hsereflect-nopslope.converge.out
-
-RUNPARAMS="
-castro.ppm_type=0
-castro.use_pslope=0
-castro.hse_interp_temp=1
-castro.hse_reflect_vels=1
-"""
-
-${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out
-pfile=`ls -t | grep -i hse_64_plt | head -1`
-fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel > ${ofile}
-
-${EXEC} inputs.ppm.128 ${RUNPARAMS} >& 128.out
-pfile=`ls -t | grep -i hse_128_plt | head -1`
-fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
-
-${EXEC} inputs.ppm.256 ${RUNPARAMS} >& 256.out
-pfile=`ls -t | grep -i hse_256_plt | head -1`
-fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
-
-${EXEC} inputs.ppm.512 ${RUNPARAMS} >& 512.out
-pfile=`ls -t | grep -i hse_512_plt | head -1`
-fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
-
-
-## plm + reflect
+## plm + reflect + nopslope
 
-ofile=plm-reflect.converge.out
+ofile=plm-reflect-nopslope.converge.out
 
 RUNPARAMS="
 castro.ppm_type=0
-castro.use_pslope=1
 castro.lo_bc=3
 castro.hi_bc=3
+castro.use_pslope=0
 """
 
 ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out
@@ -114,16 +86,15 @@ pfile=`ls -t | grep -i hse_512_plt | head -1`
 fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
 
 
+## plm + reflect + pslope
 
-## plm + reflect + nopslope
-
-ofile=plm-reflect-nopslope.converge.out
+ofile=plm-reflect-pslope.converge.out
 
 RUNPARAMS="
 castro.ppm_type=0
 castro.lo_bc=3
 castro.hi_bc=3
-castro.use_pslope=0
+castro.use_pslope=1
 """
 
 ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out
diff --git a/Exec/gravity_tests/hse_convergence/convergence_ppm.sh b/Exec/gravity_tests/hse_convergence/convergence_ppm.sh
index ff6e2c7620..27b4bb86ef 100755
--- a/Exec/gravity_tests/hse_convergence/convergence_ppm.sh
+++ b/Exec/gravity_tests/hse_convergence/convergence_ppm.sh
@@ -50,12 +50,13 @@ pfile=`ls -t | grep -i hse_512_plt | head -1`
 fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
 
 
-## ppm + grav_source_type = 4
+## ppm + reflect
 
-ofile=ppm-grav4.converge.out
+ofile=ppm-reflect.converge.out
 
 RUNPARAMS="
-castro.grav_source_type=4
+castro.lo_bc=3
+castro.hi_bc=3
 """
 
 ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out
@@ -75,13 +76,14 @@ pfile=`ls -t | grep -i hse_512_plt | head -1`
 fextrema.gnu.ex -v magvel ${pfile} | grep -i magvel >> ${ofile}
 
 
-## ppm + reflect
+## ppm + reflect + pslope
 
-ofile=ppm-reflect.converge.out
+ofile=ppm-reflect-pslope.converge.out
 
 RUNPARAMS="
 castro.lo_bc=3
 castro.hi_bc=3
+castro.use_pslope=1
 """
 
 ${EXEC} inputs.ppm.64 ${RUNPARAMS} >& 64.out

From 2b3c8537aca60efea5c8e17329dc968bff366b86 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Wed, 28 Aug 2024 14:10:10 -0400
Subject: [PATCH 4/6] fix some clang-tidy issues for wdmerger (#2949)

---
 Exec/science/wdmerger/Prob.cpp             |  2 +-
 Exec/science/wdmerger/Problem.H            |  3 +++
 Exec/science/wdmerger/problem_checkpoint.H |  2 +-
 Exec/science/wdmerger/problem_restart.H    |  2 +-
 Exec/science/wdmerger/wdmerger_util.cpp    |  2 +-
 Source/gravity/Gravity.cpp                 | 12 ++++++------
 Source/gravity/binary.H                    |  2 +-
 7 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/Exec/science/wdmerger/Prob.cpp b/Exec/science/wdmerger/Prob.cpp
index ac6a41eceb..ba28a87a18 100644
--- a/Exec/science/wdmerger/Prob.cpp
+++ b/Exec/science/wdmerger/Prob.cpp
@@ -604,7 +604,7 @@ Castro::update_relaxation(Real time, Real dt) {
 
         const Real ldt = new_time - old_time;
 
-        force[lev].reset(new MultiFab(getLevel(lev).grids, getLevel(lev).dmap, NUM_STATE, 0));
+        force[lev] = std::make_unique<MultiFab>(getLevel(lev).grids, getLevel(lev).dmap, NUM_STATE, 0);
         force[lev]->setVal(0.0);
 
         MultiFab& S_new = getLevel(lev).get_new_data(State_Type);
diff --git a/Exec/science/wdmerger/Problem.H b/Exec/science/wdmerger/Problem.H
index db48b224e7..8696516ab6 100644
--- a/Exec/science/wdmerger/Problem.H
+++ b/Exec/science/wdmerger/Problem.H
@@ -27,11 +27,13 @@ void volInBoundary (amrex::Real time, amrex::Real& vol_p, amrex::Real& vol_s, am
 
 // Computes standard dot product of two three-vectors.
 
+static
 amrex::Real dot_product(const amrex::Real a[], const amrex::Real b[]);
 
 
 // Computes norm of a three vector.
 
+static
 amrex::Real norm(const amrex::Real a[]);
 
 // Problem post-initialization routine.
@@ -48,6 +50,7 @@ void problem_post_timestep();
 
 // Write out the git hashes for the various parts of the code.
 
+static
 void writeGitHashes(std::ostream& log);
 
 // Update relaxation process.
diff --git a/Exec/science/wdmerger/problem_checkpoint.H b/Exec/science/wdmerger/problem_checkpoint.H
index 1ddee5e049..9ee3b16846 100644
--- a/Exec/science/wdmerger/problem_checkpoint.H
+++ b/Exec/science/wdmerger/problem_checkpoint.H
@@ -7,7 +7,7 @@
 #include <prob_parameters.H>
 
 AMREX_INLINE
-void problem_checkpoint (std::string checkpoint_dir)
+void problem_checkpoint (const std::string& checkpoint_dir)
 {
     std::ofstream com;
     com.open(checkpoint_dir + "/COM", std::ios::out);
diff --git a/Exec/science/wdmerger/problem_restart.H b/Exec/science/wdmerger/problem_restart.H
index 2246732b64..3acbe99165 100644
--- a/Exec/science/wdmerger/problem_restart.H
+++ b/Exec/science/wdmerger/problem_restart.H
@@ -5,7 +5,7 @@
 #include <fstream>
 
 AMREX_INLINE
-void problem_restart (std::string checkpoint_dir)
+void problem_restart (const std::string& checkpoint_dir)
 {
     std::ifstream com;
     com.open(checkpoint_dir + "/COM", std::ios::in);
diff --git a/Exec/science/wdmerger/wdmerger_util.cpp b/Exec/science/wdmerger/wdmerger_util.cpp
index 65b09f89e5..6cb481b5e4 100644
--- a/Exec/science/wdmerger/wdmerger_util.cpp
+++ b/Exec/science/wdmerger/wdmerger_util.cpp
@@ -658,7 +658,7 @@ void binary_setup ()
 
             Real v_P_r, v_S_r, v_P_phi, v_S_phi;
 
-            kepler_third_law(problem::radius_P, problem::mass_P, problem::radius_S, problem::mass_S,
+            kepler_third_law(problem::radius_P, problem::mass_P, problem::radius_S, problem::mass_S,  // NOLINT(readability-suspicious-call-argument)
                              castro::rotational_period, problem::orbital_eccentricity, problem::orbital_angle,
                              problem::a, problem::r_P_initial, problem::r_S_initial, v_P_r, v_S_r, v_P_phi, v_S_phi);
 
diff --git a/Source/gravity/Gravity.cpp b/Source/gravity/Gravity.cpp
index 18eebefbfc..c5f7bac162 100644
--- a/Source/gravity/Gravity.cpp
+++ b/Source/gravity/Gravity.cpp
@@ -2777,12 +2777,12 @@ Gravity::fill_direct_sum_BCs(int crse_level, int fine_level, const Vector<MultiF
     BL_ASSERT(nPtsXZ <= std::numeric_limits<int>::max());
     BL_ASSERT(nPtsYZ <= std::numeric_limits<int>::max());
 
-    ParallelDescriptor::ReduceRealSum(bcXYLo.dataPtr(), nPtsXY);
-    ParallelDescriptor::ReduceRealSum(bcXYHi.dataPtr(), nPtsXY);
-    ParallelDescriptor::ReduceRealSum(bcXZLo.dataPtr(), nPtsXZ);
-    ParallelDescriptor::ReduceRealSum(bcXZHi.dataPtr(), nPtsXZ);
-    ParallelDescriptor::ReduceRealSum(bcYZLo.dataPtr(), nPtsYZ);
-    ParallelDescriptor::ReduceRealSum(bcYZHi.dataPtr(), nPtsYZ);
+    ParallelDescriptor::ReduceRealSum(bcXYLo.dataPtr(), static_cast<int>(nPtsXY));
+    ParallelDescriptor::ReduceRealSum(bcXYHi.dataPtr(), static_cast<int>(nPtsXY));
+    ParallelDescriptor::ReduceRealSum(bcXZLo.dataPtr(), static_cast<int>(nPtsXZ));
+    ParallelDescriptor::ReduceRealSum(bcXZHi.dataPtr(), static_cast<int>(nPtsXZ));
+    ParallelDescriptor::ReduceRealSum(bcYZLo.dataPtr(), static_cast<int>(nPtsYZ));
+    ParallelDescriptor::ReduceRealSum(bcYZHi.dataPtr(), static_cast<int>(nPtsYZ));
 
 #ifdef _OPENMP
 #pragma omp parallel
diff --git a/Source/gravity/binary.H b/Source/gravity/binary.H
index 684d875139..1d2438b7d4 100644
--- a/Source/gravity/binary.H
+++ b/Source/gravity/binary.H
@@ -67,7 +67,7 @@ void lagrange_iterate (Real& r, Real mass_1, Real mass_2, Real r1, Real r2, Real
     const Real tolerance = 1.0e-8_rt;
     const int max_iters = 200;
 
-    Real rm, rp;
+    Real rm{}, rp{};
 
     if (r_min == 0.0_rt && r_max == 0.0_rt) {
         amrex::Abort("Lagrange point iteration must have at least one non-zero bound provided.");

From 75330a94ae73cb9c21d6bc3a9baf02c205a9500e Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Sun, 1 Sep 2024 11:53:25 -0400
Subject: [PATCH 5/6] update to 24.09

---
 external/Microphysics | 2 +-
 external/amrex        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/Microphysics b/external/Microphysics
index 14b8b0e317..3961b439ba 160000
--- a/external/Microphysics
+++ b/external/Microphysics
@@ -1 +1 @@
-Subproject commit 14b8b0e3173041968943d4bbac2c4803a33abceb
+Subproject commit 3961b439ba5c8193975018300e4c23a09b533206
diff --git a/external/amrex b/external/amrex
index ac5dde35b6..74127d6d8f 160000
--- a/external/amrex
+++ b/external/amrex
@@ -1 +1 @@
-Subproject commit ac5dde35b6c10f5d91e289edeff218bde84878a4
+Subproject commit 74127d6d8fa83f922069a25e7ef9f153aa73f68c

From a79e5b01110021ccda6b71e20007af1aeafc126e Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Sun, 1 Sep 2024 11:53:42 -0400
Subject: [PATCH 6/6] update for 24.09

---
 CHANGES.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 7b7e78a0da..665eff135e 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,20 @@
+# 24.09
+
+  * Code clean-ups / clang-tidy (#2942, #2949)
+
+  * update the `hse_convergence` readme to reflect current convergence
+    (#2946)
+
+  * update the `bubble_convergence` plotting script (#2947)
+
+  * new Frontier scaling numbers (#2948)
+
+  * more GPU error printing (@3944)
+
+  * science problem updates: `flame_wave` (#2943)
+
+  * documentation updates (#2939)
+
 # 24.08
 
   * lazy QueueReduction has been enabled for the timing diagnostics