AMReX-Astro · zingale · Jul 12, 2024 · Jul 8, 2024 · Jul 9, 2024 · Jul 9, 2024
diff --git a/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04-subch_simple.txt b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04-subch_simple.txt
@@ -0,0 +1,33 @@
+# scaling numbers for the 3D XRB -- using subch_simple
+# using the same inputs.He.25cm.static.1000Hz as previously
+# modules:
+#
+#   module load PrgEnv-gnu
+#   module load cray-mpich/8.1.27
+#   module load craype-accel-amd-gfx90a
+#   module load amd-mixed/6.0.0
+#   module unload darshan-runtime
+#
+# build info:
+#
+# EOS: /ccs/home/zingale/Microphysics/EOS/helmholtz
+# NETWORK: /ccs/home/zingale/Microphysics/networks/subch_simple
+# CONDUCTIVITY: /ccs/home/zingale/Microphysics/conductivity/stellar
+# INTEGRATOR: VODE
+# SCREENING: screen5
+#
+# Castro       git describe: 24.07
+# AMReX        git describe: 24.07-3-g7dc2081a9
+# Microphysics git describe: 24.07-1-g0a96241b
+
+
+# nodes  rocm      mag_grid_size   avg time /   std dev
+#                          step
+#  48      6.0          128      --- crashes due to not enough GPU memory ---
+  64      6.0          128           167.502   0.419448
+ 128      6.0          128            85.4082  0.29416
+ 256      6.0          128            46.4092  0.876868
+ 512      6.0          128            25.5446  0.123848
+1024      6.0          128            17.3517  0.0857189
+2048      6.0          128            13.564   0.0498023
+
diff --git a/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04.txt b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04.txt
@@ -0,0 +1,38 @@
+# new scaling numbers for the 3D XRB
+# using the same inputs.He.25cm.static.1000Hz as previously
+# modules:
+#
+#   module load PrgEnv-gnu
+#   module load cray-mpich/8.1.27
+#   module load craype-accel-amd-gfx90a
+#   module load amd-mixed/6.0.0
+#   module unload darshan-runtime
+#
+# build info:
+#
+# EOS: /ccs/home/zingale/Microphysics/EOS/helmholtz
+# NETWORK: /ccs/home/zingale/Microphysics/networks/iso7
+# CONDUCTIVITY: /ccs/home/zingale/Microphysics/conductivity/stellar
+# INTEGRATOR: VODE
+# SCREENING: screen5
+#
+# Castro       git describe: 24.07
+# AMReX        git describe: 24.07-3-g7dc2081a9
+# Microphysics git describe: 24.07-1-g0a96241b
+
+# nodes  rocm      mag_grid_size   avg time /   std dev
+#                          step
+  48      6.0          128           69.3646   0.286297
+  64      6.0          128           50.2029   0.681104
+ 128      6.0          128           28.4001   0.221368
+ 256      6.0          128           15.7771   0.133056
+ 512      6.0          128            9.6077   0.331669
+1024      6.0          128            6.66329  0.103599
+2048      6.0          128            5.15287  0.0542774
+
+# note that the 2048 run uses a blocking factor of 16)
+
+# in contrast to the previous run, we've disabled all inlining with
+# ROCm to get around some compiler bugs, so that might explain some
+# slight slowdown here.
+
diff --git a/Exec/science/flame_wave/scaling/frontier/frontier-scaling-rkc-2024-07-04.txt b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-rkc-2024-07-04.txt
@@ -0,0 +1,39 @@
+# new scaling numbers for the 3D XRB
+# using the same inputs.He.25cm.static.1000Hz as previously
+# modules:
+#
+#   module load PrgEnv-gnu
+#   module load cray-mpich/8.1.27
+#   module load craype-accel-amd-gfx90a
+#   module load amd-mixed/6.0.0
+#   module unload darshan-runtime
+#
+# build info:
+#
+# EOS: /ccs/home/zingale/Microphysics/EOS/helmholtz
+# NETWORK: /ccs/home/zingale/Microphysics/networks/iso7
+# CONDUCTIVITY: /ccs/home/zingale/Microphysics/conductivity/stellar
+# INTEGRATOR: RKC
+# SCREENING: screen5
+#
+# Castro       git describe: 24.07
+# AMReX        git describe: 24.07-3-g7dc2081a9
+# Microphysics git describe: 24.07-1-g0a96241b
+
+# nodes  rocm      mag_grid_size   avg time /   std dev
+#                          step
+  48      6.0          128           54.7583   0.287289
+  64      6.0          128           39.4336   0.32115
+ 128      6.0          128           22.8627   0.22725
+ 256      6.0          128           12.9558   0.12379
+ 512      6.0          128            7.9563   0.104311
+1024      6.0          128            5.65801  0.109306
+2048      6.0          128            4.33241  0.0577508
+
+# note the 2048 node run using blocking_factor = 16
+
+
+# in contrast to the previous run, we've disabled all inlining with
+# ROCm to get around some compiler bugs, so that might explain some
+# slight slowdown here.
+
diff --git a/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling.png b/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling.png
diff --git a/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling_by_gpus.png b/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling_by_gpus.png
diff --git a/Exec/science/flame_wave/scaling/frontier/frontier_scaling.py b/Exec/science/flame_wave/scaling/frontier/frontier_scaling.py
@@ -9,13 +9,13 @@
 plt.rc("axes", linewidth=1.5)
 plt.rc("lines", markeredgewidth=1.5)
 
-frontier_data = np.loadtxt("frontier-scaling-2023-04-06.txt")
+frontier_data = np.loadtxt("frontier-scaling-2024-07-04.txt")
 
 frontier_nodes = frontier_data[:, 0]
 frontier_times = frontier_data[:, 3]
 frontier_std = frontier_data[:, 4]
 
-frontier_rkc_data = np.loadtxt("frontier-scaling-rkc-2023-05-31.txt")
+frontier_rkc_data = np.loadtxt("frontier-scaling-rkc-2024-07-04.txt")
 
 frontier_rkc_nodes = frontier_rkc_data[:, 0]
 frontier_rkc_times = frontier_rkc_data[:, 3]
@@ -27,6 +27,12 @@
 summit_times = summit_data[:, 2]
 summit_std = summit_data[:, 3]
 
+frontier_bignet_data = np.loadtxt("frontier-scaling-2024-07-04-subch_simple.txt")
+
+frontier_bignet_nodes = frontier_bignet_data[:, 0]
+frontier_bignet_times = frontier_bignet_data[:, 3]
+frontier_bignet_std = frontier_bignet_data[:, 4]
+
 
 def trend_line(c, t):
     cnew = np.array(sorted(list(set(c))))
@@ -39,43 +45,31 @@ def trend_line(c, t):
 
 fig, ax = plt.subplots(1)
 
-ax.errorbar(frontier_nodes, frontier_times, yerr=frontier_std, ls="None", marker="x", label="Frontier (ROCm 5.3)")
-ax.errorbar(frontier_rkc_nodes, frontier_rkc_times, yerr=frontier_rkc_std, ls="None", marker="x", label="Frontier (RKC integrator)")
-ax.errorbar(summit_nodes, summit_times, yerr=summit_std, ls="None", marker="x", label="Summit (CUDA 11.4)")
+ax.errorbar(frontier_nodes, frontier_times, yerr=frontier_std,
+            ls="None", marker="x", label="Frontier (ROCm 6.0)")
+ax.errorbar(frontier_rkc_nodes, frontier_rkc_times, yerr=frontier_rkc_std,
+            ls="None", marker="x", label="Frontier (ROCm 6.0; RKC integrator)")
+ax.errorbar(summit_nodes, summit_times, yerr=summit_std,
+            ls="None", marker="^", label="Summit (CUDA 11.4)")
+ax.errorbar(frontier_bignet_nodes, frontier_bignet_times, yerr=frontier_bignet_std,
+            ls="None", marker="o", label="Frontier (ROCm 6.0; big network)")
 
 c, t = trend_line(frontier_nodes, frontier_times)
-ax.plot(c, t, alpha=0.5, linestyle=":")
-
-ax.set_ylabel("wallclock time / step")
-ax.set_xlabel("number of nodes")
-
-ax.set_xscale("log")
-ax.set_yscale("log")
-
-ax.legend()
+ax.plot(c, t, alpha=0.5, linestyle=":", color="k")
 
-fig.savefig("frontier_flame_wave_scaling.png")
-
-
-# now by GPUs
-
-fig, ax = plt.subplots(1)
+c, t = trend_line(frontier_bignet_nodes, frontier_bignet_times)
+ax.plot(c, t, alpha=0.5, linestyle=":", color="k")
 
-nfrontier_gpu = 8
-nsummit_gpu = 6
-
-ax.errorbar(frontier_nodes * nfrontier_gpu, frontier_times, yerr=frontier_std, ls="None", marker="x", label="Frontier (ROCm 5.3)")
-ax.errorbar(summit_nodes * nsummit_gpu, summit_times, yerr=summit_std, ls="None", marker="x", label="Summit (CUDA 11.4)")
-
-c, t = trend_line(frontier_nodes * nfrontier_gpu, frontier_times)
-ax.plot(c, t, alpha=0.5, linestyle=":")
 
 ax.set_ylabel("wallclock time / step")
-ax.set_xlabel("number of GPUs")
+ax.set_xlabel("number of nodes")
 
 ax.set_xscale("log")
 ax.set_yscale("log")
 
 ax.legend()
 
-fig.savefig("frontier_flame_wave_scaling_by_gpus.png")
+ax.set_title("3D XRB flame scaling")
+
+fig.tight_layout()
+fig.savefig("frontier_flame_wave_scaling.png")