Skip to content

Commit bfc3de0

Browse files
committed
Updated GPU benchmark_suite apps to use launch scripts
1 parent 4631b7b commit bfc3de0

4 files changed

Lines changed: 244 additions & 226 deletions

File tree

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#Note: This configuration file is sourced into the bash environment for Chimbuko startup scripts, thus the user must follow correct shell conventions
2+
#Please do not remove any of the variables!
3+
4+
#IMPORTANT NOTE: Variables that cannot be left as default are marked as <------------ ***SET ME***
5+
6+
service_node_iface=eth0 #network interface upon which communication to the service node is performed <------------ ***SET ME***
7+
8+
####################################
9+
#Options for visualization module
10+
####################################
11+
use_viz=1 #enable or disable the visualization
12+
viz_root=/opt/chimbuko/viz #the root directory of the visualization module <------------ ***SET ME (if using viz)***
13+
viz_worker_port=6379 #the port on which to run the redis server for the visualization backend
14+
viz_port=5002 #the port on which to run the webserver
15+
export C_FORCE_ROOT=1 #required only for docker runs, allows celery to execute properly as root user <----------------- *** SET ME (if using Docker)
16+
17+
############################################################
18+
#General options for Chimbuko backend (pserver, ad, provdb)
19+
############################################################
20+
backend_root="infer" #The root install directory of the PerformanceAnalysis libraries. If set to "infer" it will be inferred from the path of the executables
21+
chimbuko_services="infer" #The location of the Chimbuko service script. If set to "infer" it will be inferred from backend_root
22+
23+
####################################
24+
#Options for the provenance database
25+
####################################
26+
use_provdb=1 #enable or disable the provDB. If disabled the provenance data will be written as JSON ASCII into the ${provdb_writedir} set below
27+
provdb_extra_args="" #any extra command line arguments to pass
28+
provdb_nshards=4 #number of database shards
29+
provdb_engine="ofi+tcp;ofi_rxm" #the OFI libfabric provider used for the Mochi stack
30+
provdb_port=5000 #the port of the provenance database
31+
provdb_nthreads=4 #number of worker threads; should be >= the number of shards
32+
provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default
33+
provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end
34+
35+
#With "verbs" provider (used for infiniband, iWarp, etc) we need to also specify the domain, which can be found by running fi_info (on a compute node)
36+
provdb_domain=mlx5_0 #only needed for verbs provider <------------ ***SET ME (if using verbs)***
37+
38+
39+
####################################
40+
#Options for the parameter server
41+
####################################
42+
use_pserver=1 #enable or disable the pserver
43+
pserver_extra_args="" #any extra command line arguments to pass
44+
pserver_port=5559 #port for parameter server
45+
pserver_nt=2 #number of worker threads
46+
####################################
47+
#Options for the AD module
48+
####################################
49+
ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script
50+
ad_win_size=5 #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small!
51+
ad_alg="sstd" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
52+
ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm
53+
ad_outlier_sstd_sigma=12 #number of standard deviations that defines an outlier in the SSTD algorithm
54+
####################################
55+
#Options for TAU
56+
#Note: Only the TAU_ADIOS2_PATH, TAU_ADIOS2_FILE_PREFIX, EXE_NAME and TAU_ADIOS2_ENGINE variables are used by the Chimbuko services script and there only to generate the suggested
57+
# command to launch the AD (output to chimbuko/vars/chimbuko_ad_cmdline.var); they can be overridden by the run script if desired providing the appropriate modifications
58+
# are made to the AD launch command. The remainder of the variables are used only by TAU and can be freely overridden.
59+
####################################
60+
export TAU_ADIOS2_ENGINE=SST #online communication engine (alternative BP4 although this goes through the disk system and may be slower unless the BPfiles are stored on a burst disk)
61+
export TAU_ADIOS2_ONE_FILE=FALSE #a different connection file for each rank
62+
export TAU_ADIOS2_PERIODIC=1 #enable/disable ADIOS2 periodic output
63+
export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps
64+
export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads
65+
export TAU_THROTTLE=0 #enable/disable throttling of short-running functions
66+
67+
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-python-cupti-pdt-adios2 #The TAU makefile to use <------------ ***SET ME***
68+
69+
#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>"
70+
#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of
71+
# ADIOS2/TAU must exist
72+
#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko.
73+
TAU_EXEC="tau_exec -T papi,mpi,pthread,python,cupti,pdt,adios2 -adios2_trace -cupti -um" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
74+
TAU_PYTHON="tau_python -T papi,mpi,pthread,python,cupti,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it
75+
#to tee in realtime <--- SET ME (if !python3)
76+
77+
export EXE_NAME=main #the name of the executable (without path) <------------ ***SET ME***
78+
79+
TAU_ADIOS2_PATH=chimbuko/adios2 #path where the adios2 files are to be stored. Chimbuko services creates the directory chimbuko/adios2 in the working directory and this should be used by default
80+
TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filename is ${TAU_ADIOS2_PREFIX}-${EXE_NAME}-${RANK}.bp
81+
82+
83+
84+
85+
86+
87+
88+
89+
90+
91+
92+
93+
94+
###########################################################################
95+
# NON-USER VARIABLES BELOW = DON'T MODIFY THESE!!
96+
###########################################################################
97+
#Extra processing
98+
export TAU_ADIOS2_FILENAME="${TAU_ADIOS2_PATH}/${TAU_ADIOS2_FILE_PREFIX}"
99+
100+
if [[ ${backend_root} == "infer" ]]; then
101+
backend_root=$( readlink -f $(which provdb_admin | sed 's/provdb_admin//')/../ )
102+
fi
103+
104+
if [[ ${chimbuko_services} == "infer" ]]; then
105+
chimbuko_services="${backend_root}/scripts/launch/run_services.sh"
106+
if [ ! -f "${chimbuko_services}" ]; then
107+
echo "Could not infer service script location: service script does not exist at ${chimbuko_services}!"
108+
exit 1
109+
fi
110+
fi
111+
Lines changed: 12 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/bin/bash
22

3+
set -e
4+
35
####################### START OF USER VARIABLES ####################
46
cycles=1000 #Total number of loop iterations
57
freq=100 #How often to introduce anomalies into the loop
@@ -9,127 +11,28 @@ anom_mult=100 #multiplier for anomalies
911
device_max=2 #maximum number of GPUs to run on
1012
####################### END OF USER VARIABLES ####################
1113

12-
export TAU_ROOT=/opt/tau2/x86_64
13-
export TAU_MAKEFILE=$TAU_ROOT/lib/Makefile.tau-papi-mpi-pthread-python-cupti-pdt-adios2
14-
export TAU_PLUGINS_PATH=$TAU_ROOT/lib/shared-papi-mpi-pthread-python-cupti-pdt-adios2
15-
16-
export TAU_ADIOS2_PERIODIC=1
17-
export TAU_ADIOS2_PERIOD=1000000
18-
#export TAU_ADIOS2_ENGINE=BPFile
19-
export TAU_ADIOS2_ENGINE=SST
20-
export TAU_ADIOS2_FILENAME=tau-metrics
21-
export TAU_VERBOSE=1
22-
23-
extra_args=""
24-
ps_extra_args=""
25-
26-
#Instantiate provdb
27-
if (( 1 )); then
28-
rm provdb.*.unqlite provider.address
29-
30-
ip=$(hostname -i)
31-
port=1234
32-
33-
echo "Instantiating provenance database"
34-
provdb_admin ${ip}:${port} &
35-
36-
sleep 1
37-
if ! [[ -f provider.address ]]; then
38-
echo "Provider address file not created after 1 second"
39-
exit 1
40-
fi
41-
42-
prov_add=$(cat provider.address)
43-
extra_args="-provdb_addr ${prov_add}"
44-
ps_extra_args+=" -provdb_addr ${prov_add}"
45-
echo "Enabling provenance database with arg: ${extra_args}"
46-
fi
47-
48-
#Run the viz
49-
using_viz=0
50-
if (( 1 )); then
51-
using_viz=1
52-
ip=$(hostname -i)
53-
run_dir=$(pwd)
54-
echo "Run dir ${run_dir}"
55-
56-
rm -rf viz_data
57-
mkdir viz_data
58-
viz_dir=$(readlink -f viz_data)
59-
provdb_dir=$(pwd)
60-
viz_install=/opt/chimbuko/viz
61-
62-
export SERVER_CONFIG="production"
63-
if [ -z "${CHIMBUKO_VERBOSE}x" ]; then
64-
export SERVER_CONFIG="development"
65-
fi
66-
67-
export DATABASE_URL="sqlite:///${viz_dir}/main.sqlite"
68-
export ANOMALY_STATS_URL="sqlite:///${viz_dir}/anomaly_stats.sqlite"
69-
export ANOMALY_DATA_URL="sqlite:///${viz_dir}/anomaly_data.sqlite"
70-
export FUNC_STATS_URL="sqlite:///${viz_dir}/func_stats.sqlite"
71-
export PROVENANCE_DB="${provdb_dir}/"
72-
export PROVDB_ADDR=$(cat provider.address)
73-
export SHARDED_NUM=1
74-
export C_FORCE_ROOT=1
75-
76-
cd ${viz_install}
77-
78-
echo "run redis ..."
79-
redis-stable/src/redis-server redis-stable/redis.conf 2>&1 | tee ${run_dir}/redis.log &
80-
sleep 5
81-
82-
echo "run celery ..."
83-
python3 manager.py celery --loglevel=info 2>&1 | tee ${run_dir}/celery.log &
84-
sleep 5
14+
rm -rf chimbuko
15+
export CHIMBUKO_CONFIG=chimbuko_config.sh
16+
source ${CHIMBUKO_CONFIG}
8517

86-
echo "create db ..."
87-
python3 manager.py createdb 2>&1 | tee ${run_dir}/create_db.log
88-
sleep 2
89-
90-
echo "run webserver (server config ${SERVER_CONFIG}) with provdb on ${PROVDB_ADDR}... Logging to ${run_dir}/viz.log"
91-
python3 manager.py runserver --host 0.0.0.0 --port 5002 --debug 2>&1 | tee ${run_dir}/viz.log &
92-
sleep 2
93-
94-
cd -
95-
96-
ws_addr="http://${ip}:5002/api/anomalydata"
97-
ps_extra_args+=" -ws_addr ${ws_addr}"
98-
fi
99-
100-
#Instantiate the pserver
10118
if (( 1 )); then
102-
pserver_addr=tcp://${ip}:5559
103-
pserver_nt=1
104-
pserver_logdir="."
105-
echo "Instantiating pserver"
106-
echo "Pserver $pserver_addr"
107-
pserver -nt ${pserver_nt} -logdir ${pserver_logdir} ${ps_extra_args} 2>&1 | tee pserver.log &
108-
extra_args="${extra_args} -pserver_addr ${pserver_addr}"
109-
sleep 2
19+
echo "Running services"
20+
${chimbuko_services} 2>&1 | tee services.log &
21+
echo "Waiting"
22+
while [ ! -f chimbuko/vars/chimbuko_ad_cmdline.var ]; do sleep 1; done
23+
ad_cmd=$(cat chimbuko/vars/chimbuko_ad_cmdline.var)
11024
fi
11125

112-
113-
#Instantiate the AD
11426
if (( 1 )); then
11527
echo "Instantiating AD"
116-
mpirun --allow-run-as-root -n 1 driver SST . tau-metrics-main -prov_outputpath . ${extra_args} 2>&1 | tee ad.log &
28+
eval "mpirun --allow-run-as-root -n 1 ${ad_cmd} &"
11729
sleep 2
11830
fi
11931

12032
#Run the main program
12133
if (( 1 )); then
12234
echo "Running main"
123-
EXEC_OPTS="-cupti -um -adios2_trace"
124-
EXEC_T="-T papi,mpi,pthread,cupti,pdt,adios2"
125-
mpirun --allow-run-as-root -n 1 tau_exec ${EXEC_OPTS} ${EXEC_T} ./main ${cycles} ${freq} ${startcyc} -device_max ${device_max} -mult ${anom_mult} -base ${base_cycles} 2>&1 | tee main.log
126-
fi
127-
128-
#Shutdown the viz
129-
if (( ${using_viz} == 1 )); then
130-
echo "Shutting down viz"
131-
cd /opt/chimbuko/viz
132-
./webserver/shutdown_webserver.sh
35+
mpirun --allow-run-as-root -n 1 ${TAU_EXEC} ./main ${cycles} ${freq} ${startcyc} -device_max ${device_max} -mult ${anom_mult} -base ${base_cycles} 2>&1 | tee chimbuko/logs/main.log
13336
fi
13437

13538
wait
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#Note: This configuration file is sourced into the bash environment for Chimbuko startup scripts, thus the user must follow correct shell conventions
2+
#Please do not remove any of the variables!
3+
4+
#IMPORTANT NOTE: Variables that cannot be left as default are marked as <------------ ***SET ME***
5+
6+
service_node_iface=eth0 #network interface upon which communication to the service node is performed <------------ ***SET ME***
7+
8+
####################################
9+
#Options for visualization module
10+
####################################
11+
use_viz=0 #enable or disable the visualization
12+
viz_root=/opt/chimbuko/viz #the root directory of the visualization module <------------ ***SET ME (if using viz)***
13+
viz_worker_port=6379 #the port on which to run the redis server for the visualization backend
14+
viz_port=5002 #the port on which to run the webserver
15+
export C_FORCE_ROOT=1 #required only for docker runs, allows celery to execute properly as root user <----------------- *** SET ME (if using Docker)
16+
17+
############################################################
18+
#General options for Chimbuko backend (pserver, ad, provdb)
19+
############################################################
20+
backend_root="infer" #The root install directory of the PerformanceAnalysis libraries. If set to "infer" it will be inferred from the path of the executables
21+
chimbuko_services="infer" #The location of the Chimbuko service script. If set to "infer" it will be inferred from backend_root
22+
23+
####################################
24+
#Options for the provenance database
25+
####################################
26+
use_provdb=1 #enable or disable the provDB. If disabled the provenance data will be written as JSON ASCII into the ${provdb_writedir} set below
27+
provdb_extra_args="" #any extra command line arguments to pass
28+
provdb_nshards=4 #number of database shards
29+
provdb_engine="ofi+tcp;ofi_rxm" #the OFI libfabric provider used for the Mochi stack
30+
provdb_port=5000 #the port of the provenance database
31+
provdb_nthreads=4 #number of worker threads; should be >= the number of shards
32+
provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default
33+
provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end
34+
35+
#With "verbs" provider (used for infiniband, iWarp, etc) we need to also specify the domain, which can be found by running fi_info (on a compute node)
36+
provdb_domain=mlx5_0 #only needed for verbs provider <------------ ***SET ME (if using verbs)***
37+
38+
39+
####################################
40+
#Options for the parameter server
41+
####################################
42+
use_pserver=1 #enable or disable the pserver
43+
pserver_extra_args="" #any extra command line arguments to pass
44+
pserver_port=5559 #port for parameter server
45+
pserver_nt=2 #number of worker threads
46+
####################################
47+
#Options for the AD module
48+
####################################
49+
ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script
50+
ad_win_size=5 #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small!
51+
ad_alg="sstd" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
52+
ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm
53+
ad_outlier_sstd_sigma=12 #number of standard deviations that defines an outlier in the SSTD algorithm
54+
####################################
55+
#Options for TAU
56+
#Note: Only the TAU_ADIOS2_PATH, TAU_ADIOS2_FILE_PREFIX, EXE_NAME and TAU_ADIOS2_ENGINE variables are used by the Chimbuko services script and there only to generate the suggested
57+
# command to launch the AD (output to chimbuko/vars/chimbuko_ad_cmdline.var); they can be overridden by the run script if desired providing the appropriate modifications
58+
# are made to the AD launch command. The remainder of the variables are used only by TAU and can be freely overridden.
59+
####################################
60+
export TAU_ADIOS2_ENGINE=SST #online communication engine (alternative BP4 although this goes through the disk system and may be slower unless the BPfiles are stored on a burst disk)
61+
export TAU_ADIOS2_ONE_FILE=FALSE #a different connection file for each rank
62+
export TAU_ADIOS2_PERIODIC=1 #enable/disable ADIOS2 periodic output
63+
export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps
64+
export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads
65+
export TAU_THROTTLE=0 #enable/disable throttling of short-running functions
66+
67+
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-python-cupti-pdt-adios2 #The TAU makefile to use <------------ ***SET ME***
68+
69+
#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>"
70+
#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of
71+
# ADIOS2/TAU must exist
72+
#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko.
73+
TAU_EXEC="tau_exec -T papi,mpi,pthread,python,cupti,pdt,adios2 -adios2_trace -cupti -um" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
74+
TAU_PYTHON="tau_python -T papi,mpi,pthread,python,cupti,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it
75+
#to tee in realtime <--- SET ME (if !python3)
76+
77+
export EXE_NAME=main #the name of the executable (without path) <------------ ***SET ME***
78+
79+
TAU_ADIOS2_PATH=chimbuko/adios2 #path where the adios2 files are to be stored. Chimbuko services creates the directory chimbuko/adios2 in the working directory and this should be used by default
80+
TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filename is ${TAU_ADIOS2_PREFIX}-${EXE_NAME}-${RANK}.bp
81+
82+
83+
84+
85+
86+
87+
88+
89+
90+
91+
92+
93+
94+
###########################################################################
95+
# NON-USER VARIABLES BELOW = DON'T MODIFY THESE!!
96+
###########################################################################
97+
#Extra processing
98+
export TAU_ADIOS2_FILENAME="${TAU_ADIOS2_PATH}/${TAU_ADIOS2_FILE_PREFIX}"
99+
100+
if [[ ${backend_root} == "infer" ]]; then
101+
backend_root=$( readlink -f $(which provdb_admin | sed 's/provdb_admin//')/../ )
102+
fi
103+
104+
if [[ ${chimbuko_services} == "infer" ]]; then
105+
chimbuko_services="${backend_root}/scripts/launch/run_services.sh"
106+
if [ ! -f "${chimbuko_services}" ]; then
107+
echo "Could not infer service script location: service script does not exist at ${chimbuko_services}!"
108+
exit 1
109+
fi
110+
fi
111+

0 commit comments

Comments
 (0)