Skip to content

Commit fe2a6a0

Browse files
committed
Updated NWChem+Chimbuko Dockerfile and scripts
Configuration uses HBOS by default and includes monitoring plugin Run scripts now supports BP4 tracing Disable NWChem ADIOS2 output in config
1 parent fc1bfca commit fe2a6a0

3 files changed

Lines changed: 19 additions & 7 deletions

File tree

docker/ubuntu18.04/openmpi4.0.4/Dockerfile.chimbuko.nwchem

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/tau2/x86_64/lib
1919
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/adios2/lib
2020
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/SZ/lib
2121

22+
RUN apt-get update && apt-get install -y gdb emacs-nox vim
23+
2224
WORKDIR /
2325

24-
RUN echo "INVALIDATE_2" > /dev/null
26+
RUN echo "INVALIDATE_3" > /dev/null
2527
COPY run_nwchem_chimbuko.sh /
2628
COPY sos_filter.txt /
2729
COPY chimbuko_config.templ /

docker/ubuntu18.04/openmpi4.0.4/chimbuko_config.templ

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,18 @@ chimbuko_services="infer" #The location of the Chimbuko service script. If set t
2626
use_provdb=1 #enable or disable the provDB. If disabled the provenance data will be written as JSON ASCII into the ${provdb_writedir} set below
2727
provdb_extra_args="" #any extra command line arguments to pass
2828
provdb_nshards=4 #number of database shards
29+
provdb_ninstances=1 #number of database server instances. Shards are distributed over instances
2930
provdb_engine="ofi+tcp;ofi_rxm" #the OFI libfabric provider used for the Mochi stack
3031
provdb_port=5000 #the port of the provenance database
31-
provdb_nthreads=4 #number of worker threads; should be >= the number of shards
3232
provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default
3333
provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end
3434

3535
#With "verbs" provider (used for infiniband, iWarp, etc) we need to also specify the domain, which can be found by running fi_info (on a compute node)
3636
provdb_domain=mlx5_0 #only needed for verbs provider <------------ ***SET ME (if using verbs)***
3737

38+
export FI_UNIVERSE_SIZE=1600 # Defines the expected number of provenance DB clients per instance <------------- *** SET ME (should be larger than the number of clients/instance)
39+
export FI_MR_CACHE_MAX_COUNT=0 # disable MR cache in libfabric; still problematic as of libfabric 1.10.1
40+
export FI_OFI_RXM_USE_SRX=1 # use shared recv context in RXM; should improve scalability
3841

3942
####################################
4043
#Options for the parameter server
@@ -48,7 +51,7 @@ pserver_nt=2 #number of worker threads
4851
####################################
4952
ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1 -interval_msec <AD_INTERVAL>" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script
5053
ad_win_size=<AD_WIN_SIZE> #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small!
51-
ad_alg="sstd" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
54+
ad_alg="hbos" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
5255
ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm
5356
ad_outlier_sstd_sigma=<AD_SIGMA> #number of standard deviations that defines an outlier in the SSTD algorithm
5457
####################################
@@ -64,13 +67,15 @@ export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps
6467
export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads
6568
export TAU_THROTTLE=0 #enable/disable throttling of short-running functions
6669

67-
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-pdt-adios2 #The TAU makefile to use <------------ ***SET ME***
70+
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-pdt-adios2 #The TAU makefile to use. If using a TAU installation built by Spack, this variable is already set in the environment and can be commented out here <------------ ***SET ME***
71+
72+
tau_monitoring_conf="default" #Provide a configuration file for the TAU monitoring plugin. It will be copied to the work directory as "tau_monitoring.json" (unless it is already there!). If set to default, Chimbuko will generate one automatically
6873

6974
#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>"
7075
#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of
7176
# ADIOS2/TAU must exist
7277
#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko.
73-
TAU_EXEC="tau_exec -T papi,mpi,pthread,pdt,adios2 -adios2_trace" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
78+
TAU_EXEC="tau_exec -T papi,mpi,pthread,pdt,adios2 -adios2_trace -monitoring" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
7479
TAU_PYTHON="tau_python -T papi,mpi,pthread,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it
7580
#to tee in realtime <--- SET ME (if !python3)
7681
@@ -98,6 +103,11 @@ TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filena
98103
export TAU_ADIOS2_FILENAME="${TAU_ADIOS2_PATH}/${TAU_ADIOS2_FILE_PREFIX}"
99104

100105
if [[ ${backend_root} == "infer" ]]; then
106+
if [[ $(which provdb_admin) == "" ]]; then
107+
echo "When inferring the backend root directory, could not find provdb_admin in PATH. Please add your Chimbuko bin directory to PATH"
108+
exit 1
109+
fi
110+
101111
backend_root=$( readlink -f $(which provdb_admin | sed 's/provdb_admin//')/../ )
102112
fi
103113

docker/ubuntu18.04/openmpi4.0.4/run_nwchem_chimbuko.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ sed -i 's/coord 0/coord 1/' ethanol_md.nw
8383
sed -i 's/scoor 0/scoor 1/' ethanol_md.nw
8484
sed -i 's/step 0.001/step 0.001/' ethanol_md.nw
8585
sed -i '21s|set|#set|' ethanol_md.nw
86-
sed -i '22s|#set|set|' ethanol_md.nw
86+
sed -i '17s|#set|set|' ethanol_md.nw
8787
sed -i "s|data 1000|data ${DATA_STEPS}|" ethanol_md.nw
8888

8989
if (( 1 )); then
@@ -94,7 +94,7 @@ if (( 1 )); then
9494
ad_cmd=$(cat chimbuko/vars/chimbuko_ad_cmdline.var)
9595
fi
9696

97-
if [ "$ADIOS_MODE" == "SST" ]
97+
if [[ "$ADIOS_MODE" == "SST" || "$ADIOS_MODE" == "BP4" ]]
9898
then
9999
echo "Launch Application with anomaly detectors"
100100
eval "mpirun --allow-run-as-root -n ${NMPIS} ${ad_cmd} &"

0 commit comments

Comments
 (0)