diff --git a/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04-subch_simple.txt b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04-subch_simple.txt new file mode 100644 index 0000000000..4ec22a5c01 --- /dev/null +++ b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04-subch_simple.txt @@ -0,0 +1,33 @@ +# scaling numbers for the 3D XRB -- using subch_simple +# using the same inputs.He.25cm.static.1000Hz as previously +# modules: +# +# module load PrgEnv-gnu +# module load cray-mpich/8.1.27 +# module load craype-accel-amd-gfx90a +# module load amd-mixed/6.0.0 +# module unload darshan-runtime +# +# build info: +# +# EOS: /ccs/home/zingale/Microphysics/EOS/helmholtz +# NETWORK: /ccs/home/zingale/Microphysics/networks/subch_simple +# CONDUCTIVITY: /ccs/home/zingale/Microphysics/conductivity/stellar +# INTEGRATOR: VODE +# SCREENING: screen5 +# +# Castro git describe: 24.07 +# AMReX git describe: 24.07-3-g7dc2081a9 +# Microphysics git describe: 24.07-1-g0a96241b + + +# nodes rocm mag_grid_size avg time / std dev +# step +# 48 6.0 128 --- crashes due to not enough GPU memory --- + 64 6.0 128 167.502 0.419448 + 128 6.0 128 85.4082 0.29416 + 256 6.0 128 46.4092 0.876868 + 512 6.0 128 25.5446 0.123848 +1024 6.0 128 17.3517 0.0857189 +2048 6.0 128 13.564 0.0498023 + diff --git a/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04.txt b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04.txt new file mode 100644 index 0000000000..9d99d87a71 --- /dev/null +++ b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-2024-07-04.txt @@ -0,0 +1,38 @@ +# new scaling numbers for the 3D XRB +# using the same inputs.He.25cm.static.1000Hz as previously +# modules: +# +# module load PrgEnv-gnu +# module load cray-mpich/8.1.27 +# module load craype-accel-amd-gfx90a +# module load amd-mixed/6.0.0 +# module unload darshan-runtime +# +# build info: +# +# EOS: /ccs/home/zingale/Microphysics/EOS/helmholtz +# NETWORK: /ccs/home/zingale/Microphysics/networks/iso7 +# CONDUCTIVITY: /ccs/home/zingale/Microphysics/conductivity/stellar +# INTEGRATOR: VODE +# SCREENING: screen5 +# +# Castro git describe: 24.07 +# AMReX git describe: 24.07-3-g7dc2081a9 +# Microphysics git describe: 24.07-1-g0a96241b + +# nodes rocm mag_grid_size avg time / std dev +# step + 48 6.0 128 69.3646 0.286297 + 64 6.0 128 50.2029 0.681104 + 128 6.0 128 28.4001 0.221368 + 256 6.0 128 15.7771 0.133056 + 512 6.0 128 9.6077 0.331669 +1024 6.0 128 6.66329 0.103599 +2048 6.0 128 5.15287 0.0542774 + +# note that the 2048 run uses a blocking factor of 16) + +# in contrast to the previous run, we've disabled all inlining with +# ROCm to get around some compiler bugs, so that might explain some +# slight slowdown here. + diff --git a/Exec/science/flame_wave/scaling/frontier/frontier-scaling-rkc-2024-07-04.txt b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-rkc-2024-07-04.txt new file mode 100644 index 0000000000..ea6da1f3ff --- /dev/null +++ b/Exec/science/flame_wave/scaling/frontier/frontier-scaling-rkc-2024-07-04.txt @@ -0,0 +1,39 @@ +# new scaling numbers for the 3D XRB +# using the same inputs.He.25cm.static.1000Hz as previously +# modules: +# +# module load PrgEnv-gnu +# module load cray-mpich/8.1.27 +# module load craype-accel-amd-gfx90a +# module load amd-mixed/6.0.0 +# module unload darshan-runtime +# +# build info: +# +# EOS: /ccs/home/zingale/Microphysics/EOS/helmholtz +# NETWORK: /ccs/home/zingale/Microphysics/networks/iso7 +# CONDUCTIVITY: /ccs/home/zingale/Microphysics/conductivity/stellar +# INTEGRATOR: RKC +# SCREENING: screen5 +# +# Castro git describe: 24.07 +# AMReX git describe: 24.07-3-g7dc2081a9 +# Microphysics git describe: 24.07-1-g0a96241b + +# nodes rocm mag_grid_size avg time / std dev +# step + 48 6.0 128 54.7583 0.287289 + 64 6.0 128 39.4336 0.32115 + 128 6.0 128 22.8627 0.22725 + 256 6.0 128 12.9558 0.12379 + 512 6.0 128 7.9563 0.104311 +1024 6.0 128 5.65801 0.109306 +2048 6.0 128 4.33241 0.0577508 + +# note the 2048 node run using blocking_factor = 16 + + +# in contrast to the previous run, we've disabled all inlining with +# ROCm to get around some compiler bugs, so that might explain some +# slight slowdown here. + diff --git a/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling.png b/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling.png index 17d1a8bca0..c9e6ac439d 100644 Binary files a/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling.png and b/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling.png differ diff --git a/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling_by_gpus.png b/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling_by_gpus.png deleted file mode 100644 index 18bfa6a5a1..0000000000 Binary files a/Exec/science/flame_wave/scaling/frontier/frontier_flame_wave_scaling_by_gpus.png and /dev/null differ diff --git a/Exec/science/flame_wave/scaling/frontier/frontier_scaling.py b/Exec/science/flame_wave/scaling/frontier/frontier_scaling.py index ac56d1de94..3bb2388a45 100644 --- a/Exec/science/flame_wave/scaling/frontier/frontier_scaling.py +++ b/Exec/science/flame_wave/scaling/frontier/frontier_scaling.py @@ -9,13 +9,13 @@ plt.rc("axes", linewidth=1.5) plt.rc("lines", markeredgewidth=1.5) -frontier_data = np.loadtxt("frontier-scaling-2023-04-06.txt") +frontier_data = np.loadtxt("frontier-scaling-2024-07-04.txt") frontier_nodes = frontier_data[:, 0] frontier_times = frontier_data[:, 3] frontier_std = frontier_data[:, 4] -frontier_rkc_data = np.loadtxt("frontier-scaling-rkc-2023-05-31.txt") +frontier_rkc_data = np.loadtxt("frontier-scaling-rkc-2024-07-04.txt") frontier_rkc_nodes = frontier_rkc_data[:, 0] frontier_rkc_times = frontier_rkc_data[:, 3] @@ -27,6 +27,12 @@ summit_times = summit_data[:, 2] summit_std = summit_data[:, 3] +frontier_bignet_data = np.loadtxt("frontier-scaling-2024-07-04-subch_simple.txt") + +frontier_bignet_nodes = frontier_bignet_data[:, 0] +frontier_bignet_times = frontier_bignet_data[:, 3] +frontier_bignet_std = frontier_bignet_data[:, 4] + def trend_line(c, t): cnew = np.array(sorted(list(set(c)))) @@ -39,43 +45,31 @@ def trend_line(c, t): fig, ax = plt.subplots(1) -ax.errorbar(frontier_nodes, frontier_times, yerr=frontier_std, ls="None", marker="x", label="Frontier (ROCm 5.3)") -ax.errorbar(frontier_rkc_nodes, frontier_rkc_times, yerr=frontier_rkc_std, ls="None", marker="x", label="Frontier (RKC integrator)") -ax.errorbar(summit_nodes, summit_times, yerr=summit_std, ls="None", marker="x", label="Summit (CUDA 11.4)") +ax.errorbar(frontier_nodes, frontier_times, yerr=frontier_std, + ls="None", marker="x", label="Frontier (ROCm 6.0)") +ax.errorbar(frontier_rkc_nodes, frontier_rkc_times, yerr=frontier_rkc_std, + ls="None", marker="x", label="Frontier (ROCm 6.0; RKC integrator)") +ax.errorbar(summit_nodes, summit_times, yerr=summit_std, + ls="None", marker="^", label="Summit (CUDA 11.4)") +ax.errorbar(frontier_bignet_nodes, frontier_bignet_times, yerr=frontier_bignet_std, + ls="None", marker="o", label="Frontier (ROCm 6.0; big network)") c, t = trend_line(frontier_nodes, frontier_times) -ax.plot(c, t, alpha=0.5, linestyle=":") - -ax.set_ylabel("wallclock time / step") -ax.set_xlabel("number of nodes") - -ax.set_xscale("log") -ax.set_yscale("log") - -ax.legend() +ax.plot(c, t, alpha=0.5, linestyle=":", color="k") -fig.savefig("frontier_flame_wave_scaling.png") - - -# now by GPUs - -fig, ax = plt.subplots(1) +c, t = trend_line(frontier_bignet_nodes, frontier_bignet_times) +ax.plot(c, t, alpha=0.5, linestyle=":", color="k") -nfrontier_gpu = 8 -nsummit_gpu = 6 - -ax.errorbar(frontier_nodes * nfrontier_gpu, frontier_times, yerr=frontier_std, ls="None", marker="x", label="Frontier (ROCm 5.3)") -ax.errorbar(summit_nodes * nsummit_gpu, summit_times, yerr=summit_std, ls="None", marker="x", label="Summit (CUDA 11.4)") - -c, t = trend_line(frontier_nodes * nfrontier_gpu, frontier_times) -ax.plot(c, t, alpha=0.5, linestyle=":") ax.set_ylabel("wallclock time / step") -ax.set_xlabel("number of GPUs") +ax.set_xlabel("number of nodes") ax.set_xscale("log") ax.set_yscale("log") ax.legend() -fig.savefig("frontier_flame_wave_scaling_by_gpus.png") +ax.set_title("3D XRB flame scaling") + +fig.tight_layout() +fig.savefig("frontier_flame_wave_scaling.png")