Skip to content

Commit 6a46853

Browse files
committed
Added a benchmark_suite mini-app for testing AMD GPU support
ProvDB client wait for outstanding send queue to drain now outputs heartbeat log entries every 20s while waiting ProvDB admin outputs heartbeat log entry every 20s if verbose output enabled Improved verbose logging for AD->pserver connect/disconnect
1 parent cc527ba commit 6a46853

7 files changed

Lines changed: 317 additions & 4 deletions

File tree

app/provdb_admin.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,9 +300,12 @@ int main(int argc, char** argv) {
300300
//Spin quietly until SIGTERM sent
301301
signal(SIGTERM, termSignalHandler);
302302
progressStream << "ProvDB Admin: main thread waiting for completion" << std::endl;
303+
304+
size_t iter = 0;
303305
while(!stop_wait_loop) { //stop wait loop will be set by SIGTERM handler
304306
tl::thread::sleep(engine, 1000); //Thallium engine sleeps but listens for rpc requests
305-
307+
if(iter % 20 == 0){ verboseStream << "ProvDB Admin heartbeat" << std::endl; }
308+
306309
unsigned long commit_timer_ms = std::chrono::duration_cast<std::chrono::milliseconds>(Clock::now() - commit_timer_start).count();
307310
if(args.db_commit_freq > 0 && commit_timer_ms >= args.db_commit_freq){
308311
verboseStream << "ProvDB Admin: committing database to disk" << std::endl;
@@ -331,7 +334,9 @@ int main(int argc, char** argv) {
331334
#endif
332335
break;
333336
}
334-
}
337+
338+
++iter;
339+
}//wait loop
335340
}//client scope
336341

337342
//If the pserver didn't connect (it is optional), delete the empty database
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
SHELL := /bin/bash
2+
MPI_INC := $(shell mpic++ -showme:compile)
3+
MPI_LINK := $(shell mpic++ -showme:link)
4+
5+
# -optCompInst
6+
main_notau: main.C
7+
hipcc -x hip main.C -o main -g $(MPI_INC) $(MPI_LINK)
8+
main: main.C
9+
#tau_cxx.sh -x hip main.C -o main -g
10+
hipcc -x hip main.C -o main -g -Xarch_host -finstrument-functions $(MPI_INC) $(MPI_LINK)
11+
clean:
12+
rm main
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
export FI_UNIVERSE_SIZE=10000
2+
export HG_NA_LOG_LEVEL=error
3+
export HG_LOG_LEVEL=debug
4+
#SstVerbose=5 # (1-5, 5 most verbose)
5+
#export MARGO_ENABLE_PROFILING=1
6+
export FI_LOG_LEVEL=Debug
7+
8+
#Note: This configuration file is sourced into the bash environment for Chimbuko startup scripts, thus the user must follow correct shell conventions
9+
#Please do not remove any of the variables!
10+
11+
#IMPORTANT NOTE: Variables that cannot be left as default are marked as <------------ ***SET ME***
12+
13+
service_node_iface=enp71s0 #network interface upon which communication to the service node is performed <------------ ***SET ME***
14+
15+
####################################
16+
#Options for visualization module
17+
####################################
18+
use_viz=0 #enable or disable the visualization
19+
viz_root=/home/ckelly/src/spack/opt/spack/linux-ubuntu20.04-zen2/gcc-9.3.0/chimbuko-visualization2-master-c5vwtxt7gv2jsyrxymg6toveszc6dfbg #the root directory of the visualization module <------------ ***SET ME (if using viz)***
20+
viz_worker_port=6379 #the port on which to run the redis server for the visualization backend
21+
viz_port=5002 #the port on which to run the webserver
22+
#export C_FORCE_ROOT=1 #required only for docker runs, allows celery to execute properly as root user <----------------- *** SET ME (if using Docker)
23+
24+
############################################################
25+
#General options for Chimbuko backend (pserver, ad, provdb)
26+
############################################################
27+
backend_root="infer" #The root install directory of the PerformanceAnalysis libraries. If set to "infer" it will be inferred from the path of the executables
28+
chimbuko_services="infer" #The location of the Chimbuko service script. If set to "infer" it will be inferred from backend_root
29+
30+
####################################
31+
#Options for the provenance database
32+
####################################
33+
use_provdb=0 #enable or disable the provDB. If disabled the provenance data will be written as JSON ASCII into the ${provdb_writedir} set below
34+
provdb_extra_args="" #any extra command line arguments to pass
35+
provdb_nshards=4 #number of database shards
36+
provdb_engine="sockets" #the OFI libfabric provider used for the Mochi stack
37+
provdb_port=5000 #the port of the provenance database
38+
provdb_nthreads=4 #number of worker threads; should be >= the number of shards
39+
provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default
40+
provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end
41+
42+
#With "verbs" provider (used for infiniband, iWarp, etc) we need to also specify the domain, which can be found by running fi_info (on a compute node)
43+
provdb_domain=mlx5_0 #only needed for verbs provider <------------ ***SET ME (if using verbs)***
44+
45+
46+
####################################
47+
#Options for the parameter server
48+
####################################
49+
use_pserver=1 #enable or disable the pserver
50+
pserver_extra_args="" #any extra command line arguments to pass
51+
pserver_port=5559 #port for parameter server
52+
pserver_nt=2 #number of worker threads
53+
####################################
54+
#Options for the AD module
55+
####################################
56+
ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script
57+
ad_win_size=5 #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small!
58+
ad_alg="hbos" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
59+
ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm
60+
ad_outlier_sstd_sigma=12 #number of standard deviations that defines an outlier in the SSTD algorithm
61+
####################################
62+
#Options for TAU
63+
#Note: Only the TAU_ADIOS2_PATH, TAU_ADIOS2_FILE_PREFIX, EXE_NAME and TAU_ADIOS2_ENGINE variables are used by the Chimbuko services script and there only to generate the suggested
64+
# command to launch the AD (output to chimbuko/vars/chimbuko_ad_cmdline.var); they can be overridden by the run script if desired providing the appropriate modifications
65+
# are made to the AD launch command. The remainder of the variables are used only by TAU and can be freely overridden.
66+
####################################
67+
export TAU_ADIOS2_ENGINE=BP4 #online communication engine (alternative BP4 although this goes through the disk system and may be slower unless the BPfiles are stored on a burst disk)
68+
export TAU_ADIOS2_ONE_FILE=FALSE #a different connection file for each rank
69+
export TAU_ADIOS2_PERIODIC=1 #enable/disable ADIOS2 periodic output
70+
export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps
71+
export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads
72+
export TAU_THROTTLE=0 #enable/disable throttling of short-running functions
73+
74+
#export TAU_MAKEFILE=/home/ckelly/install/tau/x86_64/lib/Makefile.tau-rocm-roctracer-clang-papi-mpi-pthread-pdt-adios2 #The TAU makefile to use <------------ ***SET ME***
75+
76+
#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>"
77+
#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of
78+
# ADIOS2/TAU must exist
79+
#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko.
80+
81+
82+
#-rw-rw-r-- 1 ckelly ckelly 98545 Aug 25 09:21 /home/ckelly/install/tau/x86_64/lib/Makefile.tau-rocm-roctracer-clang-papi-mpi-pthread-pdt-adios2
83+
#-rw-rw-r-- 1 ckelly ckelly 98617 Aug 25 16:04 /home/ckelly/install/tau/x86_64/lib/Makefile.tau-rocm-roctracer-rocprofiler-clang-papi-mpi-pthread-pdt-adios2
84+
#-rw-rw-r-- 1 ckelly ckelly 98561 Aug 25 16:06 /home/ckelly/install/tau/x86_64/lib/Makefile.tau-rocm-rocprofiler-clang-papi-mpi-pthread-pdt-adios2
85+
86+
87+
TAU_EXEC="tau_exec -T rocm,roctracer,clang,papi,mpi,pthread,pdt,adios2 -rocm -adios2_trace" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
88+
TAU_PYTHON="tau_python -T papi,mpi,pthread,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it
89+
#to tee in realtime <--- SET ME (if !python3)
90+
91+
export EXE_NAME=main #the name of the executable (without path) <------------ ***SET ME***
92+
93+
TAU_ADIOS2_PATH=chimbuko/adios2 #path where the adios2 files are to be stored. Chimbuko services creates the directory chimbuko/adios2 in the working directory and this should be used by default
94+
TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filename is ${TAU_ADIOS2_PREFIX}-${EXE_NAME}-${RANK}.bp
95+
96+
97+
98+
99+
100+
101+
102+
103+
104+
105+
106+
107+
108+
###########################################################################
109+
# NON-USER VARIABLES BELOW = DON'T MODIFY THESE!!
110+
###########################################################################
111+
#Extra processing
112+
export TAU_ADIOS2_FILENAME="${TAU_ADIOS2_PATH}/${TAU_ADIOS2_FILE_PREFIX}"
113+
114+
if [[ ${backend_root} == "infer" ]]; then
115+
if [[ $(which provdb_admin) == "" ]]; then
116+
echo "When inferring the backend root directory, could not find provdb_admin in PATH. Please add your Chimbuko bin directory to PATH"
117+
exit 1
118+
fi
119+
120+
backend_root=$( readlink -f $(which provdb_admin | sed 's/provdb_admin//')/../ )
121+
fi
122+
123+
if [[ ${chimbuko_services} == "infer" ]]; then
124+
chimbuko_services="${backend_root}/scripts/launch/run_services.sh"
125+
if [ ! -f "${chimbuko_services}" ]; then
126+
echo "Could not infer service script location: service script does not exist at ${chimbuko_services}!"
127+
exit 1
128+
fi
129+
fi
130+
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#include <iostream>
2+
#include <math.h>
3+
#include <random>
4+
#include <string>
5+
#include <cassert>
6+
#include <mpi.h>
7+
#include <sstream>
8+
#include <hip/hip_runtime.h>
9+
10+
using clock_value_t = long long;
11+
12+
__device__ void sleep(clock_value_t sleep_cycles)
13+
{
14+
clock_value_t start = clock64();
15+
clock_value_t cycles_elapsed;
16+
do { cycles_elapsed = clock64() - start; }
17+
while (cycles_elapsed < sleep_cycles);
18+
}
19+
__global__ void the_kernel(clock_value_t wait){
20+
sleep(wait);
21+
}
22+
23+
24+
25+
int main(int argc, char **argv)
26+
{
27+
MPI_Init(&argc, &argv); //tau plugin system is only initialized if MPI_Init called, even if not actually using MPI!
28+
assert(argc>=3);
29+
int cycles = std::stoi(argv[1]);
30+
int freq = std::stoi(argv[2]); //how frequently (in cycles) an outlier is inserted
31+
int ooffset = std::stoi(argv[3]); //how many cycles we wait for the first outlier to be inserted
32+
33+
int device_max = -1;
34+
35+
int which_device =0; //to which device is the outlier inserted
36+
long long base_cycles = 1e8;
37+
int mult = 100;
38+
int arg = 4;
39+
while(arg < argc){
40+
std::string sarg = argv[arg];
41+
if(sarg == "-mult"){
42+
mult = std::stoi(argv[arg+1]);
43+
std::cout << "Set mult to " << mult << std::endl;
44+
arg+=2;
45+
}else if(sarg == "-base"){
46+
std::stringstream ss; ss << argv[arg+1]; ss >> base_cycles;
47+
std::cout << "Set base cycles to " << base_cycles << std::endl;
48+
arg+=2;
49+
}else if(sarg == "-device_max"){
50+
device_max = std::stoi(argv[arg+1]);
51+
std::cout << "Set device max to " << device_max << std::endl;
52+
arg+=2;
53+
}else if(sarg == "-device"){
54+
which_device = std::stoi(argv[arg+1]);
55+
std::cout << "Outliers are being applied to device " << which_device << std::endl;
56+
arg+=2;
57+
}else{
58+
std::cerr << "Unknown arg " << sarg << std::endl;
59+
exit(-1);
60+
}
61+
}
62+
63+
int ndevice = 1;
64+
assert( hipGetDeviceCount(&ndevice) == hipSuccess );
65+
std::cout << "Number of devices " << ndevice << std::endl;
66+
67+
if(device_max != -1 && ndevice > device_max){
68+
ndevice = device_max;
69+
std::cout << "Constraining to " << ndevice << " devices" << std::endl;
70+
}
71+
72+
float *x;
73+
hipMallocManaged(&x, sizeof(float)); ///necessary to stop adios2 hanging!
74+
75+
std::default_random_engine reng(1234);
76+
std::uniform_real_distribution<> uniform_dist(0., 1.0);
77+
78+
for(int c=0;c<cycles;c++){
79+
std::cout << "Running kernel for cycle " << c << std::endl;
80+
for(int d=0;d<ndevice;d++){
81+
//long long wait = base_cycles * uniform_dist(reng);
82+
83+
long long wait = base_cycles;
84+
if(c>=ooffset && (c - ooffset) % freq == 0 && d==which_device)
85+
wait *= mult;
86+
87+
std::cout << "Issuing to device " << d << " a wait of " << wait << " cycles" << std::endl;
88+
hipSetDevice(d);
89+
the_kernel<<<1, 1>>>(wait);
90+
}
91+
92+
for(int d=0;d<ndevice;d++){
93+
std::cout << "Syncing device " << d << std::endl;
94+
hipSetDevice(d);
95+
hipDeviceSynchronize();
96+
}
97+
std::cout << "Sync complete" << std::endl;
98+
}
99+
100+
hipFree(x);
101+
102+
std::cout << "Done" << std::endl;
103+
104+
MPI_Finalize();
105+
106+
return 0;
107+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
####################### START OF USER VARIABLES ####################
6+
cycles=5000 #Total number of loop iterations
7+
freq=200 #How often to introduce anomalies into the loop
8+
startcyc=200 #which cycle to introduce the first anomaly
9+
base_cycles=10000000 #number of clock cycles in a normal kernel
10+
anom_mult=100 #multiplier for anomalies
11+
device_max=1 #maximum number of GPUs to run on
12+
####################### END OF USER VARIABLES ####################
13+
14+
rm -rf chimbuko
15+
export CHIMBUKO_CONFIG=chimbuko_config.sh
16+
source ${CHIMBUKO_CONFIG}
17+
export CHIMBUKO_VERBOSE=1
18+
19+
if (( 1 )); then
20+
echo "Running services"
21+
${chimbuko_services} 2>&1 | tee services.log &
22+
echo "Waiting"
23+
while [ ! -f chimbuko/vars/chimbuko_ad_cmdline.var ]; do sleep 1; done
24+
ad_cmd=$(cat chimbuko/vars/chimbuko_ad_cmdline.var)
25+
fi
26+
27+
if (( 1 )); then
28+
echo "Instantiating AD"
29+
eval "mpirun --allow-run-as-root -n 1 ${ad_cmd} 2>&1 | tee chimbuko/logs/ad.log &"
30+
sleep 2
31+
fi
32+
33+
#Run the main program
34+
if (( 1 )); then
35+
echo "Running main"
36+
37+
mpirun --allow-run-as-root -n 1 ${TAU_EXEC} ./main ${cycles} ${freq} ${startcyc} -device_max ${device_max} -mult ${anom_mult} -base ${base_cycles} 2>&1 | tee chimbuko/logs/main.log
38+
fi
39+
40+
wait

src/ad/ADNetClient.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,16 +277,18 @@ struct ClientActionConnect: public ADThreadNetClient::ClientAction{
277277
ClientActionConnect(int rank, int srank, const std::string &sname): rank(rank), srank(srank), sname(sname){}
278278

279279
void perform(ADNetClient &client){
280-
std::cout << "Connecting to client" << std::endl;
280+
verboseStream << "ADThreadNetClient rank " << rank << " connecting to PS" << std::endl;
281281
client.connect_ps(rank, srank, sname);
282+
verboseStream << "ADThreadNetClient rank " << rank << " successfully connected to PS" << std::endl;
282283
}
283284
bool do_delete() const{ return true; }
284285
};
285286

286287
struct ClientActionDisconnect: public ADThreadNetClient::ClientAction{
287288
void perform(ADNetClient &client){
288-
std::cout << "Disconnecting from client" << std::endl;
289+
verboseStream << "ADThreadNetClient disconnecting from PS" << std::endl;
289290
client.disconnect_ps();
291+
verboseStream << "ADThreadNetClient successfully disconnected from PS" << std::endl;
290292
}
291293
bool do_delete() const{ return true; }
292294
bool shutdown_worker() const{ return true; }

src/ad/ADProvenanceDBclient.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,27 @@ sonata::AsyncRequest & AnomalousSendManager::getNewRequest(){
2525
}
2626

2727
void AnomalousSendManager::waitAll(){
28+
//Have another thread produce heartbeat information so we can know if the AD gets stuck waiting to flush
29+
std::atomic<bool> ready(false);
30+
int heartbeat_freq = 20;
31+
std::thread heartbeat([heartbeat_freq, &ready]{
32+
typedef std::chrono::high_resolution_clock Clock;
33+
typedef std::chrono::seconds Sec;
34+
Clock::time_point start = Clock::now();
35+
while(!ready.load(std::memory_order_relaxed)){
36+
int sec = std::chrono::duration_cast<Sec>(Clock::now() - start).count();
37+
if(sec >= heartbeat_freq && sec % heartbeat_freq == 0) //complain every heartbeat_freq seconds
38+
std::cout << "AnomalousSendManager::waitAll still waiting for queue flush after " << sec << "s" << std::endl;
39+
std::this_thread::sleep_for(std::chrono::seconds(1));
40+
}
41+
});
42+
2843
while(!outstanding.empty()){ //flush the queue
2944
outstanding.front().wait();
3045
outstanding.pop();
3146
}
47+
ready.store(true, std::memory_order_relaxed);
48+
heartbeat.join();
3249
}
3350

3451
size_t AnomalousSendManager::getNoutstanding(){

0 commit comments

Comments
 (0)