Skip to content

Commit a9a1f88

Browse files
committed
Added a new, experimental way to launch both the services and AD tasks together under a single script. This circumvents issues with VNIs on Frontier as well as simplifying the launch process.
Currently requires SLURM. Core binding option requires python3 and taskset
1 parent 65410a2 commit a9a1f88

1 file changed

Lines changed: 85 additions & 0 deletions

File tree

scripts/launch/chimbuko.sh

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/bin/bash
2+
3+
source ${CHIMBUKO_CONFIG}
4+
5+
#export HG_LOG_LEVEL=debug
6+
#export HG_LOG_SUBSYS=cls
7+
#export FI_LOG_LEVEL=debug
8+
9+
rank=${SLURM_PROCID}
10+
11+
narg=$#
12+
if [[ $narg -lt 1 ]]; then
13+
echo "Require at least one argument: the number of application tasks/ranks"
14+
exit 1
15+
fi
16+
17+
app_tasks=$1
18+
echo "rank \"${rank}\" app_tasks \"${app_tasks}\" node \"${SLURMD_NODENAME}\""
19+
if [[ ${rank} -ge ${app_tasks} ]]; then
20+
#Service task
21+
if [[ ${SLURM_LOCALID} -eq 0 ]]; then
22+
echo "Launching service task on node ${SLURMD_NODENAME} local ID ${SLURM_LOCALID}"
23+
echo "Environment:"
24+
printenv
25+
26+
exec ${chimbuko_services} 2>&1 | tee services.log
27+
fi
28+
else
29+
echo "Driver rank ${rank} waiting for service spin-up"
30+
while [ ! -f chimbuko/vars/chimbuko_ad_cmdline.var ]; do sleep 1; done
31+
ad_opts=$(cat chimbuko/vars/chimbuko_ad_opts.var)
32+
ad_cmd="driver ${TAU_ADIOS2_ENGINE} ${TAU_ADIOS2_PATH} ${TAU_ADIOS2_FILE_PREFIX}-${EXE_NAME} ${ad_opts} -rank ${rank}"
33+
34+
a=1
35+
args=("$@")
36+
37+
preop=""
38+
39+
while [ $a -lt $narg ]; do
40+
arga=${args[$a]}
41+
if [[ "$arga" == "--core_bind" ]]; then
42+
ap1=$(( a + 1 ))
43+
bnd=${args[$ap1]}
44+
45+
rbnd="None"
46+
cnt=0
47+
for i in $(echo $bnd | tr ":" "\n")
48+
do
49+
if [[ $cnt -eq $rank ]]; then
50+
rbnd=$i
51+
fi
52+
cnt=$(( cnt + 1 ))
53+
done
54+
55+
if [[ "$rbnd" == "None" ]]; then
56+
echo "Could not parse binding for rank ${rank}"
57+
exit 1
58+
fi
59+
60+
cmask=`python3 <<EOF
61+
import sys
62+
63+
#Provide a comma-separated list of CPUs
64+
#Output: binary and hex masks corresponding to the list
65+
bnd="${rbnd}"
66+
lst=bnd.split(',')
67+
68+
val=0
69+
for v in lst:
70+
val = val | (1<<int(v))
71+
72+
print(hex(val))
73+
EOF`
74+
75+
echo "Binding rank ${rank} to ${rbnd} with core mask ${cmask}"
76+
preop="taskset ${cmask} ${preop}"
77+
a=$(( a + 2 ))
78+
else
79+
echo "Unrecognized argument \"$arga\""
80+
exit 1
81+
fi
82+
done #end while
83+
84+
exec ${preop} ${ad_cmd} 2>&1 | tee chimbuko/logs/ad.${SLURM_PROCID}.log
85+
fi

0 commit comments

Comments
 (0)