You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docker/ubuntu18.04/openmpi4.0.4/chimbuko_config.templ
+14-4Lines changed: 14 additions & 4 deletions
Original file line number
Diff line number
Diff line change
@@ -26,15 +26,18 @@ chimbuko_services="infer" #The location of the Chimbuko service script. If set t
26
26
use_provdb=1 #enable or disable the provDB. If disabled the provenance data will be written as JSONASCII into the ${provdb_writedir} set below
27
27
provdb_extra_args="" #any extra command line arguments to pass
28
28
provdb_nshards=4 #number of database shards
29
+
provdb_ninstances=1 #number of database server instances. Shards are distributed over instances
29
30
provdb_engine="ofi+tcp;ofi_rxm" #the OFI libfabric provider used for the Mochi stack
30
31
provdb_port=5000 #the port of the provenance database
31
-
provdb_nthreads=4 #number of worker threads; should be >= the number of shards
32
32
provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default
33
33
provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end
34
34
35
35
#With "verbs"provider (used for infiniband, iWarp, etc) we need to also specify the domain, which can be found by running fi_info (on a compute node)
36
36
provdb_domain=mlx5_0 #only needed for verbs provider <------------ ***SET ME (if using verbs)***
37
37
38
+
export FI_UNIVERSE_SIZE=1600 # Defines the expected number of provenance DB clients per instance <------------- *** SETME (should be larger than the number of clients/instance)
39
+
export FI_MR_CACHE_MAX_COUNT=0 # disable MR cache in libfabric; still problematic as of libfabric 1.10.1
40
+
export FI_OFI_RXM_USE_SRX=1 # use shared recv context in RXM; should improve scalability
38
41
39
42
####################################
40
43
#Options for the parameter server
@@ -48,7 +51,7 @@ pserver_nt=2 #number of worker threads
48
51
####################################
49
52
ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1 -interval_msec <AD_INTERVAL>" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script
50
53
ad_win_size=<AD_WIN_SIZE> #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small!
51
-
ad_alg="sstd" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
54
+
ad_alg="hbos" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
52
55
ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm
53
56
ad_outlier_sstd_sigma=<AD_SIGMA> #number of standard deviations that defines an outlier in the SSTD algorithm
54
57
####################################
@@ -64,13 +67,15 @@ export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps
64
67
export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads
65
68
export TAU_THROTTLE=0 #enable/disable throttling of short-running functions
66
69
67
-
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-pdt-adios2 #The TAU makefile to use <------------ ***SET ME***
70
+
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-pdt-adios2 #The TAU makefile to use. If using a TAU installation built by Spack, this variable is already set in the environment and can be commented out here <------------ ***SET ME***
71
+
72
+
tau_monitoring_conf="default" #Provide a configuration file for the TAU monitoring plugin. It will be copied to the work directory as "tau_monitoring.json" (unless it is already there!). If set to default, Chimbuko will generate one automatically
68
73
69
74
#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>"
70
75
#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of
71
76
# ADIOS2/TAU must exist
72
77
#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko.
73
-
TAU_EXEC="tau_exec -T papi,mpi,pthread,pdt,adios2 -adios2_trace" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
78
+
TAU_EXEC="tau_exec -T papi,mpi,pthread,pdt,adios2 -adios2_trace -monitoring" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
74
79
TAU_PYTHON="tau_python -T papi,mpi,pthread,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it
75
80
#to tee in realtime <--- SET ME (if !python3)
76
81
@@ -98,6 +103,11 @@ TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filena
0 commit comments