55# Ignore SIGHUP to survive login node session drops
66trap ' ' HUP
77
8+ # Determine compiler flag from directory name
9+ SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
10+ cluster_name=" $( basename " $SCRIPT_DIR " ) "
11+ case " $cluster_name " in
12+ frontier) compiler_flag=" f" ;;
13+ frontier_amd) compiler_flag=" famd" ;;
14+ * ) echo " ERROR: Unknown cluster '$cluster_name '" ; exit 1 ;;
15+ esac
16+
817usage () {
9- echo " Usage: $0 [script.sh] [cpu|gpu]"
18+ echo " Usage: $0 [script.sh] [cpu|gpu] [none|acc|omp] [shard] "
1019}
1120
1221if [ ! -z " $1 " ]; then
1625 exit 1
1726fi
1827
28+ # Detect job type from submitted script basename
29+ script_basename=" $( basename " $1 " .sh) "
30+ case " $script_basename " in
31+ bench* ) job_type=" bench" ;;
32+ * ) job_type=" test" ;;
33+ esac
34+
1935if [ " $2 " = " cpu" ]; then
2036 sbatch_device_opts=" \
2137#SBATCH -n 32 # Number of cores required"
2743 exit 1
2844fi
2945
46+ # Select SBATCH params based on job type
47+ if [ " $job_type " = " bench" ]; then
48+ sbatch_account=" #SBATCH -A ENG160"
49+ sbatch_time=" #SBATCH -t 05:59:00"
50+ sbatch_partition=" #SBATCH -p extended"
51+ sbatch_extra=" "
52+ else
53+ sbatch_account=" #SBATCH -A CFD154"
54+ sbatch_time=" #SBATCH -t 01:59:00"
55+ sbatch_partition=" #SBATCH -p batch"
56+ sbatch_extra=" #SBATCH --qos=normal"
57+ fi
3058
31- job_slug=" ` basename " $1 " | sed ' s/\.sh$//' | sed ' s/[^a-zA-Z0-9]/-/g' ` -$2 -$3 "
59+ shard_suffix=" "
60+ if [ -n " $4 " ]; then
61+ shard_suffix=" -$( echo " $4 " | sed ' s|/|-of-|' ) "
62+ fi
63+ job_slug=" ` basename " $1 " | sed ' s/\.sh$//' | sed ' s/[^a-zA-Z0-9]/-/g' ` -$2 -$3 ${shard_suffix} "
3264output_file=" $job_slug .out"
3365
3466submit_output=$( sbatch << EOT
3567#!/bin/bash
3668#SBATCH -J MFC-$job_slug # Job name
37- #SBATCH -A ENG160 # charge account
69+ $sbatch_account
3870#SBATCH -N 1 # Number of nodes required
3971$sbatch_device_opts
40- #SBATCH -t 05:59:00 # Duration of the job (Ex: 15 mins)
72+ $sbatch_time
4173#SBATCH -o$output_file # Combined output and error messages file
42- #SBATCH -p extended # Extended partition for shorter queues
74+ $sbatch_partition
75+ $sbatch_extra
4376
4477set -e
4578set -x
@@ -50,8 +83,10 @@ echo "Running in $(pwd):"
5083job_slug="$job_slug "
5184job_device="$2 "
5285job_interface="$3 "
86+ job_shard="$4 "
87+ job_cluster="$cluster_name "
5388
54- . ./mfc.sh load -c f -m $( [ " $2 " = " gpu" ] && echo " g" || echo " c" )
89+ . ./mfc.sh load -c $compiler_flag -m $( [ " $2 " = " gpu" ] && echo " g" || echo " c" )
5590
5691$sbatch_script_contents
5792
68103echo " Submitted batch job $job_id "
69104
70105# Use resilient monitoring instead of sbatch -W
71- SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
72106bash " $SCRIPT_DIR /../../scripts/monitor_slurm_job.sh" " $job_id " " $output_file "
0 commit comments