diff --git a/apps/Fluent/Arm/Fluent-Benchmark.OMPI.sbatch b/apps/Fluent/Arm/Fluent-Benchmark.OMPI.sbatch new file mode 100644 index 0000000..cc6e751 --- /dev/null +++ b/apps/Fluent/Arm/Fluent-Benchmark.OMPI.sbatch @@ -0,0 +1,45 @@ +#!/bin/bash +#SBATCH --exclusive +#SBATCH --nodes=2 +#SBATCH --ntasks=384 +#SBATCH --constraint=hpc7a.96xlarge +#SBATCH --partition=hpc7a +#SBATCH --time=12:00:00 + +######################## EFA settings ######################## +module load openmpi5 +module load libfabric-aws +export OPENMPI_ROOT="$(dirname $(dirname $(which mpirun)))" +######################## EFA settings ######################## + +fluentversion=${1:-"v242"} +benchmark_uri=${2:-"s3://YOUR_BUCKET/f1_racecar_140m.tar"} +basedir=${BASE_DIR:-"/fsx"} +export ANSYSLMD_LICENSE_FILE=${ANSYSLMD_LICENSE_FILE:-"1055@XXX.YYY.ZZZ.XYZ"} + +benchmark_file=$(basename ${benchmark_uri}) +APP_BIN_PATH="${basedir}/ansys_inc/${fluentversion}/fluent/bin/fluentbench.pl" +export FLUENT_ARCH=lnarm64 + +TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") +workdir="$(readlink -m "${basedir}/${SLURM_JOB_NAME%.*}")/${benchmark_file%.*}/${SLURM_JOB_ID}-${SLURM_JOB_NUM_NODES}x$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/instance-type)-${SLURM_NPROCS}-$(date "+%d-%m-%Y-%H-%M")" + +echo "Execution directory is: ${workdir}" +mkdir -p "${workdir}" && cd "${workdir}" + +echo "building the MPI hostfile" +scontrol show nodes $SLURM_NODELIST | grep NodeHostName= | awk '{print $2}' | sed 's/NodeHostName=//' >hostfile + +cp $0 . +aws s3 cp --quiet "${benchmark_uri}" . +tar xf "${benchmark_file}" +ln -s bench/fluent/v6/${benchmark_file%.*}/cas_dat/* . + +echo "Drop caches on all nodes" +mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c "sync && echo 3 | sudo tee /proc/sys/vm/drop_caches" + +echo "Enabling Transparent Huge Pages (THP)" +mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c "echo always | sudo tee /sys/kernel/mm/transparent_hugepage/enabled" + +echo "Run Fluent Benchmark: ${APP_BIN_PATH} ${benchmark_file%.*} -t${SLURM_NPROCS} -cnf=hostfile -part=4 -platform=intel -nosyslog -noloadchk -ssh -mpi=openmpi -norm" +"${APP_BIN_PATH}" ${benchmark_file%.*} -t${SLURM_NPROCS} -cnf=hostfile -part=4 -platform=openmpi -nosyslog -noloadchk -ssh -mpi=intel -norm | tee output.$SLURM_JOBID.out \ No newline at end of file diff --git a/apps/Fluent/Arm/Fluent-Benchmark.sbatch b/apps/Fluent/Arm/Fluent-Benchmark.sbatch new file mode 100644 index 0000000..ff1dd5b --- /dev/null +++ b/apps/Fluent/Arm/Fluent-Benchmark.sbatch @@ -0,0 +1,50 @@ +#!/bin/bash +#SBATCH --exclusive +#SBATCH --nodes=2 +#SBATCH --ntasks=384 +#SBATCH --constraint=hpc7a.96xlarge +#SBATCH --partition=hpc7a +#SBATCH --time=12:00:00 + +######################## EFA settings ######################## +export I_MPI_OFI_LIBRARY_INTERNAL=0 +module load intelmpi +export I_MPI_DEBUG=5 +export I_MPI_FABRICS=shm:ofi +export I_MPI_OFI_PROVIDER=efa +export I_MPI_MULTIRAIL=1 +module load libfabric-aws +export INTELMPI_ROOT="$(dirname $(dirname $(which mpirun)))" +######################## EFA settings ######################## + +fluentversion=${1:-"v241"} +benchmark_uri=${2:-"s3://YOUR_BUCKET/f1_racecar_140m.tar"} +basedir=${BASE_DIR:-"/fsx"} +export ANSYSLMD_LICENSE_FILE=${ANSYSLMD_LICENSE_FILE:-"1055@XXX.YYY.ZZZ.XYZ"} + +benchmark_file=$(basename ${benchmark_uri}) +APP_BIN_PATH="${basedir}/ansys_inc/${fluentversion}/fluent/bin/fluentbench.pl" +export FLUENT_ARCH=lnamd64 + +TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") +workdir="$(readlink -m "${basedir}/${SLURM_JOB_NAME%.*}")/${benchmark_file%.*}/${SLURM_JOB_ID}-${SLURM_JOB_NUM_NODES}x$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/instance-type)-${SLURM_NPROCS}-$(date "+%d-%m-%Y-%H-%M")" + +echo "Execution directory is: ${workdir}" +mkdir -p "${workdir}" && cd "${workdir}" + +echo "building the MPI hostfile" +scontrol show nodes $SLURM_NODELIST | grep NodeHostName= | awk '{print $2}' | sed 's/NodeHostName=//' >hostfile + +cp $0 . +aws s3 cp --quiet "${benchmark_uri}" . +tar xf "${benchmark_file}" +ln -s bench/fluent/v6/${benchmark_file%.*}/cas_dat/* . + +echo "Drop caches on all nodes" +mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c "sync && echo 3 | sudo tee /proc/sys/vm/drop_caches" + +echo "Enabling Transparent Huge Pages (THP)" +mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c "echo always | sudo tee /sys/kernel/mm/transparent_hugepage/enabled" + +echo "Run Fluent Benchmark: ${APP_BIN_PATH} ${benchmark_file%.*} -t${SLURM_NPROCS} -cnf=hostfile -part=4 -platform=intel -nosyslog -noloadchk -ssh -mpi=intel -norm" +"${APP_BIN_PATH}" ${benchmark_file%.*} -t${SLURM_NPROCS} -cnf=hostfile -part=4 -platform=intel -nosyslog -noloadchk -ssh -mpi=intel -norm | tee output.$SLURM_JOBID.out diff --git a/apps/Fluent/Arm/Fluent.sbatch b/apps/Fluent/Arm/Fluent.sbatch new file mode 100644 index 0000000..45ea576 --- /dev/null +++ b/apps/Fluent/Arm/Fluent.sbatch @@ -0,0 +1,52 @@ +#!/bin/bash +#SBATCH --exclusive +#SBATCH --nodes=2 +#SBATCH --ntasks=384 +#SBATCH --constraint=hpc7a.96xlarge +#SBATCH --partition=hpc7a + +######################## EFA settings ######################## +export I_MPI_OFI_LIBRARY_INTERNAL=0 +module load intelmpi +export I_MPI_DEBUG=5 +export I_MPI_FABRICS=shm:ofi +export I_MPI_OFI_PROVIDER=efa +export I_MPI_MULTIRAIL=1 +module load libfabric-aws +export INTELMPI_ROOT="$(dirname $(dirname $(which mpirun)))" +######################## EFA settings ######################## + +fluentversion=${1:-"v241"} +journal_file=${2:-"/fsx/MY_FLUENT_EXAMPLE/MY_JOURNAL_FILE.jou"} +dataset_cas=${3:-"/fsx/MY_FLUENT_EXAMPLE/MY_CAS_FILE.cas.gz"} +dataset_dat=${4:-"/fsx/MY_FLUENT_EXAMPLE/MY_DAT_FILE.dat.gz"} +basedir=${BASE_DIR:-"/fsx"} +fluent_mode=${FLUENT_MODE:-"3ddp"} + +export ANSYSLMD_LICENSE_FILE=${ANSYSLMD_LICENSE_FILE:-"1055@XXX.YYY.ZZZ.XYZ"} + +APP_BIN_PATH="${basedir}/ansys_inc/${fluentversion}/fluent/bin/fluent" +export FLUENT_ARCH=lnamd64 + +TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") +workdir="$(readlink -m "${basedir}/${SLURM_JOB_NAME%.*}")/$(basename "${journal_file%.*}")/Run/${SLURM_JOB_ID}-${SLURM_JOB_NUM_NODES}x$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/instance-type)-${SLURM_NPROCS}-$(date "+%d-%m-%Y-%H-%M")" + +echo "Execution directory is: ${workdir}" +mkdir -p "${workdir}" && cd "${workdir}" + +echo "building the MPI hostfile" +scontrol show nodes $SLURM_NODELIST | grep NodeHostName= | awk '{print $2}' | sed 's/NodeHostName=//' >hostfile + +cp $0 . +ln -s "${journal_file}" . +ln -s "${dataset_cas}" . +ln -s "${dataset_dat}" . + +echo "Drop caches on all nodes" +mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c "sync && echo 3 | sudo tee /proc/sys/vm/drop_caches" + +echo "Enabling Transparent Huge Pages (THP)" +mpirun -np $SLURM_JOB_NUM_NODES -ppn 1 /bin/bash -c "echo always | sudo tee /sys/kernel/mm/transparent_hugepage/enabled" + +echo "Run Fluent: ${APP_BIN_PATH} ${fluent_mode} -g -t${SLURM_NPROCS} -cnf=hostfile -platform=intel -ssh -mpi=intel -i $(basename ${journal_file})" +"${APP_BIN_PATH}" ${fluent_mode} -g -t${SLURM_NPROCS} -cnf=hostfile -platform=intel -ssh -mpi=intel -i $(basename ${journal_file}) | tee output.$SLURM_JOBID.out