Skip to content

Commit cdb791d

Browse files
committed
Updated Chimbuko+NWChem docker image to use launch scripts
1 parent 9288143 commit cdb791d

3 files changed

Lines changed: 142 additions & 149 deletions

File tree

docker/ubuntu18.04/openmpi4.0.4/Dockerfile.chimbuko.nwchem

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/SZ/lib
2121

2222
WORKDIR /
2323

24-
RUN echo "INVALIDATE_1" > /dev/null
24+
RUN echo "INVALIDATE_2" > /dev/null
2525
COPY run_nwchem_chimbuko.sh /
2626
COPY sos_filter.txt /
27+
COPY chimbuko_config.templ /
2728

2829
EXPOSE 5000
2930

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#Note: This configuration file is sourced into the bash environment for Chimbuko startup scripts, thus the user must follow correct shell conventions
2+
#Please do not remove any of the variables!
3+
4+
#IMPORTANT NOTE: Variables that cannot be left as default are marked as <------------ ***SET ME***
5+
6+
service_node_iface=eth0 #network interface upon which communication to the service node is performed <------------ ***SET ME***
7+
8+
####################################
9+
#Options for visualization module
10+
####################################
11+
use_viz=1 #enable or disable the visualization
12+
viz_root=<VIZ_ROOT> #the root directory of the visualization module <------------ ***SET ME (if using viz)***
13+
viz_worker_port=6379 #the port on which to run the redis server for the visualization backend
14+
viz_port=5002 #the port on which to run the webserver
15+
export C_FORCE_ROOT=1 #required only for docker runs, allows celery to execute properly as root user <----------------- *** SET ME (if using Docker)
16+
17+
############################################################
18+
#General options for Chimbuko backend (pserver, ad, provdb)
19+
############################################################
20+
backend_root="infer" #The root install directory of the PerformanceAnalysis libraries. If set to "infer" it will be inferred from the path of the executables
21+
chimbuko_services="infer" #The location of the Chimbuko service script. If set to "infer" it will be inferred from backend_root
22+
23+
####################################
24+
#Options for the provenance database
25+
####################################
26+
use_provdb=1 #enable or disable the provDB. If disabled the provenance data will be written as JSON ASCII into the ${provdb_writedir} set below
27+
provdb_extra_args="" #any extra command line arguments to pass
28+
provdb_nshards=4 #number of database shards
29+
provdb_engine="ofi+tcp;ofi_rxm" #the OFI libfabric provider used for the Mochi stack
30+
provdb_port=5000 #the port of the provenance database
31+
provdb_nthreads=4 #number of worker threads; should be >= the number of shards
32+
provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default
33+
provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end
34+
35+
#With "verbs" provider (used for infiniband, iWarp, etc) we need to also specify the domain, which can be found by running fi_info (on a compute node)
36+
provdb_domain=mlx5_0 #only needed for verbs provider <------------ ***SET ME (if using verbs)***
37+
38+
39+
####################################
40+
#Options for the parameter server
41+
####################################
42+
use_pserver=1 #enable or disable the pserver
43+
pserver_extra_args="" #any extra command line arguments to pass
44+
pserver_port=5559 #port for parameter server
45+
pserver_nt=2 #number of worker threads
46+
####################################
47+
#Options for the AD module
48+
####################################
49+
ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1 -interval_msec <AD_INTERVAL>" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script
50+
ad_win_size=<AD_WIN_SIZE> #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small!
51+
ad_alg="sstd" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
52+
ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm
53+
ad_outlier_sstd_sigma=<AD_SIGMA> #number of standard deviations that defines an outlier in the SSTD algorithm
54+
####################################
55+
#Options for TAU
56+
#Note: Only the TAU_ADIOS2_PATH, TAU_ADIOS2_FILE_PREFIX, EXE_NAME and TAU_ADIOS2_ENGINE variables are used by the Chimbuko services script and there only to generate the suggested
57+
# command to launch the AD (output to chimbuko/vars/chimbuko_ad_cmdline.var); they can be overridden by the run script if desired providing the appropriate modifications
58+
# are made to the AD launch command. The remainder of the variables are used only by TAU and can be freely overridden.
59+
####################################
60+
export TAU_ADIOS2_ENGINE=<TAU_ADIOS2_ENGINE> #online communication engine (alternative BP4 although this goes through the disk system and may be slower unless the BPfiles are stored on a burst disk)
61+
export TAU_ADIOS2_ONE_FILE=FALSE #a different connection file for each rank
62+
export TAU_ADIOS2_PERIODIC=1 #enable/disable ADIOS2 periodic output
63+
export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps
64+
export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads
65+
export TAU_THROTTLE=0 #enable/disable throttling of short-running functions
66+
67+
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-pdt-adios2 #The TAU makefile to use <------------ ***SET ME***
68+
69+
#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>"
70+
#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of
71+
# ADIOS2/TAU must exist
72+
#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko.
73+
TAU_EXEC="tau_exec -T papi,mpi,pthread,pdt,adios2 -adios2_trace" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
74+
TAU_PYTHON="tau_python -T papi,mpi,pthread,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it
75+
#to tee in realtime <--- SET ME (if !python3)
76+
77+
export EXE_NAME=nwchem #the name of the executable (without path) <------------ ***SET ME***
78+
79+
TAU_ADIOS2_PATH=chimbuko/adios2 #path where the adios2 files are to be stored. Chimbuko services creates the directory chimbuko/adios2 in the working directory and this should be used by default
80+
TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filename is ${TAU_ADIOS2_PREFIX}-${EXE_NAME}-${RANK}.bp
81+
82+
83+
84+
85+
86+
87+
88+
89+
90+
91+
92+
93+
94+
###########################################################################
95+
# NON-USER VARIABLES BELOW = DON'T MODIFY THESE!!
96+
###########################################################################
97+
#Extra processing
98+
export TAU_ADIOS2_FILENAME="${TAU_ADIOS2_PATH}/${TAU_ADIOS2_FILE_PREFIX}"
99+
100+
if [[ ${backend_root} == "infer" ]]; then
101+
backend_root=$( readlink -f $(which provdb_admin | sed 's/provdb_admin//')/../ )
102+
fi
103+
104+
if [[ ${chimbuko_services} == "infer" ]]; then
105+
chimbuko_services="${backend_root}/scripts/launch/run_services.sh"
106+
if [ ! -f "${chimbuko_services}" ]; then
107+
echo "Could not infer service script location: service script does not exist at ${chimbuko_services}!"
108+
exit 1
109+
fi
110+
fi
111+

docker/ubuntu18.04/openmpi4.0.4/run_nwchem_chimbuko.sh

Lines changed: 29 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -40,57 +40,38 @@ echo "AD WINSZ: ${AD_WINSZ}"
4040
echo "AD INTERVAL: ${AD_INTERVAL} msec"
4141
echo "BATCH DIR: ${BATCH_DIR}"
4242
echo "============================"
43-
sleep 3
43+
sleep 1
4444

4545
# NWChem environments
46-
export NWCHEM_TOP=/Codar/nwchem-1
47-
export NWCHEM_DAT=$NWCHEM_TOP/QA/tests/ethanol
4846
export AD_ROOT=/opt/chimbuko/ad
49-
50-
# Chimbuko environments
5147
export VIZ_ROOT=/opt/chimbuko/viz
5248

53-
# TAU environments
54-
export TAU_ROOT=/opt/tau2/x86_64
55-
export TAU_MAKEFILE=$TAU_ROOT/lib/Makefile.tau-papi-mpi-pthread-pdt-adios2
56-
export TAU_PLUGINS_PATH=$TAU_ROOT/lib/shared-papi-mpi-pthread-pdt-adios2
57-
export TAU_PLUGINS=libTAU-adios2-trace-plugin.so
49+
#Chimbuko and NWChem environments
50+
echo "Loading spack packages. Please be patient!"
51+
source /spack/spack/share/spack/setup-env.sh && spack load py-mochi-sonata
52+
export NWCHEM_TOP=/Codar/nwchem-1
53+
export PATH=${NWCHEM_TOP}/bin/LINUX64/:${PATH}
54+
export PATH=${AD_ROOT}/bin/:${PATH}
55+
export PATH=${VIZ_ROOT}/redis-stable/src/:${PATH}
56+
export LD_LIBRARY_PATH=${AD_ROOT}/lib:${LD_LIBRARY_PATH}
57+
58+
echo "PATH=" ${PATH}
59+
which redis-server
5860

59-
# Create work directory under the given batch directory
6061
mkdir -p $BATCH_DIR
6162
cd $BATCH_DIR
62-
rm -rf DB executions logs
63-
mkdir -p logs DB BP executions
63+
rm -rf *
6464

65-
LOG_DIR=${BATCH_DIR}/logs
66-
DB_DIR=${BATCH_DIR}/DB
67-
BP_DIR=${BATCH_DIR}/BP
65+
# #Override config script with user options
66+
cat /chimbuko_config.templ | sed "s/<TAU_ADIOS2_ENGINE>/${ADIOS_MODE}/" | sed "s/<AD_WIN_SIZE>/${AD_WINSZ}/" | sed "s/<AD_SIGMA>/${AD_SIGMA}/" | sed "s/<AD_INTERVAL>/${AD_INTERVAL}/" | sed "s|<VIZ_ROOT>|${VIZ_ROOT}|" > chimbuko_config.sh
67+
export CHIMBUKO_CONFIG=chimbuko_config.sh
68+
source ${CHIMBUKO_CONFIG}
6869

69-
# TAU plug-in environments
70-
BP_PREFIX=tau-metrics-nwchem
71-
export TAU_ADIOS2_PERIODIC=1
72-
export TAU_ADIOS2_PERIOD=1000000
70+
#Tau extra arguments
7371
export TAU_ADIOS2_SELECTION_FILE=${BATCH_DIR}/sos_filter.txt #filter out some irrelevant functions
74-
export TAU_ADIOS2_ENGINE=$ADIOS_MODE
75-
export TAU_ADIOS2_FILENAME=${BP_DIR}/tau-metrics
76-
#export TAU_VERBOSE=1
77-
78-
# visualization server
79-
export DATABASE_URL="sqlite:///${DB_DIR}/main.sqlite"
80-
export ANOMALY_STATS_URL="sqlite:///${DB_DIR}/anomaly_stats.sqlite"
81-
export ANOMALY_DATA_URL="sqlite:///${DB_DIR}/anomaly_data.sqlite"
82-
export FUNC_STATS_URL="sqlite:///${DB_DIR}/func_stats.sqlite"
83-
export SHARDED_NUM=1 #Number of provdb shards
84-
export C_FORCE_ROOT=1
85-
86-
#Chimbuko and NWChem environments
87-
source /spack/spack/share/spack/setup-env.sh && spack load py-mochi-sonata
88-
export PATH=${NWCHEM_TOP}/bin/LINUX64/:${PATH}
89-
export PATH=${AD_ROOT}/bin/:${PATH}
90-
export LD_LIBRARY_PATH=${AD_ROOT}/lib:${LD_LIBRARY_PATH}
9172

9273
# copy binaries and data
93-
#cp $NWCHEM_TOP/bin/LINUX64/nwchem .
74+
export NWCHEM_DAT=$NWCHEM_TOP/QA/tests/ethanol
9475
cp $NWCHEM_DAT/ethanol_md.nw .
9576
cp $NWCHEM_DAT/*.pdb .
9677
cp $NWCHEM_DAT/ethanol_md.rst .
@@ -105,131 +86,31 @@ sed -i '21s|set|#set|' ethanol_md.nw
10586
sed -i '22s|#set|set|' ethanol_md.nw
10687
sed -i "s|data 1000|data ${DATA_STEPS}|" ethanol_md.nw
10788

108-
extra_args=""
109-
ps_extra_args=""
110-
if (( 1 )); then
111-
echo ""
112-
echo "=========================================="
113-
echo "Launch Chimbuko provenance database"
114-
echo "=========================================="
115-
116-
cd ${DB_DIR}
117-
118-
rm -f provdb.*.unqlite
119-
ip=$(hostname -i)
120-
provdb_admin ${ip}:5555 2>&1 | tee ${LOG_DIR}/provdb.log &
121-
provdb_pid=$!
122-
123-
sleep 1
124-
if ! [[ -f provider.address ]]; then
125-
echo "Provider address file not created after 1 second"
126-
exit 1
127-
fi
128-
129-
prov_add=$(cat provider.address)
130-
extra_args="-provdb_addr ${prov_add}"
131-
ps_extra_args="-provdb_addr ${prov_add}"
132-
133-
#For viz
134-
export PROVENANCE_DB="${DB_DIR}/"
135-
export PROVDB_ADDR=${prov_add}
136-
137-
echo "Enabling provenance database with arg: ${extra_args}"
138-
sleep 2
139-
cd ${BATCH_DIR}
140-
fi
141-
142-
using_viz=0
143-
if (( 1 )); then
144-
echo ""
145-
echo "=========================================="
146-
echo "Launch Chimbuko visualization server"
147-
echo "=========================================="
148-
149-
using_viz=1
150-
ip=$(hostname -i)
151-
152-
cd ${VIZ_ROOT}
153-
154-
echo "run redis ..."
155-
redis-stable/src/redis-server redis-stable/redis.conf 2>&1 | tee ${LOG_DIR}/redis.log &
156-
sleep 5
157-
158-
echo "run celery ..."
159-
python3 manager.py celery --loglevel=info 2>&1 | tee ${LOG_DIR}/celery.log &
160-
sleep 5
161-
162-
echo "create db ..."
163-
python3 manager.py createdb 2>&1 | tee ${LOG_DIR}/create_db.log
164-
sleep 2
165-
166-
echo "run webserver (server config ${SERVER_CONFIG}) with provdb on ${PROVDB_ADDR}... Logging to ${LOG_DIR}/viz.log"
167-
python3 manager.py runserver --host 0.0.0.0 --port 5002 --debug 2>&1 | tee ${LOG_DIR}/viz.log &
168-
sleep 2
169-
170-
cd -
171-
172-
ws_addr="http://${ip}:5002/api/anomalydata"
173-
ps_extra_args+=" -ws_addr ${ws_addr}"
174-
fi
175-
17689
if (( 1 )); then
177-
echo ""
178-
echo "=========================================="
179-
echo "Launch Chimbuko parameter server"
180-
echo "=========================================="
181-
ip=$(hostname -i)
182-
pserver_port=9999
183-
pserver_addr=tcp://${ip}:${pserver_port}
184-
185-
pserver -nt 2 -logdir "${LOG_DIR}" -port ${pserver_port} ${ps_extra_args} 2>&1 | tee ${LOG_DIR}/pserver.log &
186-
ps_pid=$!
187-
sleep 2
188-
extra_args+=" -pserver_addr ${pserver_addr}"
90+
echo "Running services"
91+
${chimbuko_services} 2>&1 | tee services.log &
92+
echo "Waiting"
93+
while [ ! -f chimbuko/vars/chimbuko_ad_cmdline.var ]; do sleep 1; done
94+
ad_cmd=$(cat chimbuko/vars/chimbuko_ad_cmdline.var)
18995
fi
19096

191-
19297
if [ "$ADIOS_MODE" == "SST" ]
19398
then
19499
echo "Launch Application with anomaly detectors"
195-
mpirun --allow-run-as-root -n $NMPIS driver $ADIOS_MODE \
196-
${BP_DIR} $BP_PREFIX -prov_outputdir "${BATCH_DIR}/executions" ${extra_args} -outlier_sigma ${AD_SIGMA} -anom_win_size ${AD_WINSZ} 2>&1 | tee ${LOG_DIR}/ad.log &
197-
ad_pid=$!
100+
eval "mpirun --allow-run-as-root -n ${NMPIS} ${ad_cmd} &"
198101
sleep 5
199102

200-
cd $BATCH_DIR
201-
mpirun --allow-run-as-root -n $NMPIS nwchem ethanol_md.nw 2>&1 | tee logs/nwchem.log
202-
203-
wait ${ad_pid}
103+
mpirun --allow-run-as-root -n $NMPIS ${TAU_EXEC} nwchem ethanol_md.nw 2>&1 | tee chimbuko/logs/nwchem.log
204104
else
205105
echo "Use BP mode"
206106
if ! $HAS_BPFILE
207107
then
208108
echo "Run NWChem"
209-
cd $BATCH_DIR
210-
mpirun --allow-run-as-root -n $NMPIS nwchem ethanol_md.nw 2>&1 | tee logs/nwchem.log
109+
mpirun --allow-run-as-root -n $NMPIS ${TAU_EXEC} nwchem ethanol_md.nw 2>&1 | tee logs/nwchem.log
211110
fi
212111
echo "Run anomaly detectors"
213-
cd $WORK_DIR/ad
214-
mpirun --allow-run-as-root -n $NMPIS driver $ADIOS_MODE \
215-
$WORK_DIR/BP $BP_PREFIX -prov_outputdir "${WORK_DIR}/executions" ${extra_args} -outlier_sigma ${AD_SIGMA} -anom_win_size ${AD_WINSZ} -interval_msec ${AD_INTERVAL} 2>&1 | tee ${LOG_DIR}/ad.log
216-
fi
217-
218-
echo "Waiting for services to finish"
219-
wait $ps_pid
220-
wait $provdb_pid
221-
222-
# wait about 10 min. so that users can keep interacting with visualization.
223-
#sleep 600
224-
225-
if (( ${using_viz} == 1 )); then
226-
echo ""
227-
echo "=========================================="
228-
echo "Shutdown Chimbuko visualization server"
229-
echo "=========================================="
230-
231-
cd ${VIZ_ROOT}
232-
./webserver/shutdown_webserver.sh
112+
eval "mpirun --allow-run-as-root -n ${NMPIS} ${ad_cmd} &"
233113
fi
234114

115+
wait
235116
echo "Bye~~!!"

0 commit comments

Comments
 (0)