Skip to content

Commit 5ab15fb

Browse files
authored
Merge pull request #18 from CODARcode/sm_release
HBOS functionality
2 parents 3361b0d + f6ca7ed commit 5ab15fb

50 files changed

Lines changed: 3403 additions & 673 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

app/driver.cpp

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// #include "chimbuko/AD.hpp"
12
#include "chimbuko/chimbuko.hpp"
23
#include "chimbuko/verbose.hpp"
34
#include "chimbuko/util/string.hpp"
@@ -59,12 +60,15 @@ struct setLoggingHeadRankArg: public optionalCommandLineArgBase<ChimbukoParams>{
5960
}
6061
};
6162

62-
63+
6364

6465
optionalArgsParser & getOptionalArgsParser(){
6566
static bool initialized = false;
6667
static optionalArgsParser p;
6768
if(!initialized){
69+
addOptionalCommandLineArg(p, ad_algorithm, "Set an AD algorithm to use: hbos or sstd.");
70+
addOptionalCommandLineArg(p, hbos_threshold, "Set Threshold for HBOS anomaly detection filter.");
71+
addOptionalCommandLineArg(p, hbos_use_global_threshold, "Set true to use a global threshold in HBOS algorithm. Dafault is true.");
6872
addOptionalCommandLineArg(p, program_idx, "Set the index associated with the instrumented program. Use to label components of a workflow. (default 0)");
6973
addOptionalCommandLineArg(p, outlier_sigma, "Set the number of standard deviations that defines an anomalous event (default 6)");
7074
addOptionalCommandLineArg(p, pserver_addr, "Set the address of the parameter server. If empty (default) the pserver will not be used.");
@@ -73,9 +77,9 @@ optionalArgsParser & getOptionalArgsParser(){
7377
addOptionalCommandLineArg(p, anom_win_size, "When anomaly data are recorded a window of this size (in units of function execution events) around the anomalous event are also recorded (default 10)");
7478
addOptionalCommandLineArg(p, prov_outputpath, "Output provenance data to this directory. Can be used in place of or in conjunction with the provenance database. An empty string \"\" (default) disables this output");
7579
#ifdef ENABLE_PROVDB
76-
addOptionalCommandLineArg(p, provdb_addr, "Address of the provenance database. If empty (default) the provenance DB will not be used.\nHas format \"ofi+tcp;ofi_rxm://${IP_ADDR}:${PORT}\". Should also accept \"tcp://${IP_ADDR}:${PORT}\"");
77-
addOptionalCommandLineArg(p, nprovdb_shards, "Number of provenance database shards. Clients connect to shards round-robin by rank (default 1)");
78-
#endif
80+
addOptionalCommandLineArg(p, provdb_addr, "Address of the provenance database. If empty (default) the provenance DB will not be used.\nHas format \"ofi+tcp;ofi_rxm://${IP_ADDR}:${PORT}\". Should also accept \"tcp://${IP_ADDR}:${PORT}\"");
81+
addOptionalCommandLineArg(p, nprovdb_shards, "Number of provenance database shards. Clients connect to shards round-robin by rank (default 1)");
82+
#endif
7983
#ifdef _PERF_METRIC
8084
addOptionalCommandLineArg(p, perf_outputpath, "Output path for AD performance monitoring data. If an empty string (default) no output is written.");
8185
addOptionalCommandLineArg(p, perf_step, "How frequently (in IO steps) the performance data is dumped (default 10)");
@@ -97,7 +101,7 @@ optionalArgsParser & getOptionalArgsParser(){
97101
};
98102

99103
void printHelp(){
100-
std::cout << "Usage: driver <Trace engine type> <Trace directory> <Trace file prefix> <Options>\n"
104+
std::cout << "Usage: driver <Trace engine type> <Trace directory> <Trace file prefix> <Options>\n"
101105
<< "Where <Trace engine type> : BPFile or SST\n"
102106
<< " <Trace directory> : The directory in which the BPFile or SST file is located\n"
103107
<< " <Trace file prefix> : The prefix of the file (the trace file name without extension e.g. \"tau-metrics-mybinary\" for \"tau-metrics-mybinary.bp\")\n"
@@ -115,14 +119,16 @@ ChimbukoParams getParamsFromCommandLine(int argc, char** argv, const int mpi_wor
115119
// Parse command line arguments (cf chimbuko.hpp for detailed description of parameters)
116120
// -----------------------------------------------------------------------
117121
ChimbukoParams params;
118-
122+
119123
//Parameters for the connection to the instrumented binary trace output
120124
params.trace_engineType = argv[1]; // BPFile or SST
121125
params.trace_data_dir = argv[2]; // *.bp location
122126
std::string bp_prefix = argv[3]; // bp file prefix (e.g. tau-metrics-[nwchem])
123127

124128
//The remainder are optional arguments. Enable using the appropriate command line switch
125-
129+
params.ad_algorithm = "hbos";
130+
params.hbos_threshold = 0.99;
131+
params.hbos_use_global_threshold = true;
126132
params.pserver_addr = ""; //don't use pserver by default
127133
params.hpserver_nthr = 1;
128134
params.outlier_sigma = 6.0; // anomaly detection algorithm parameter
@@ -140,7 +146,7 @@ ChimbukoParams getParamsFromCommandLine(int argc, char** argv, const int mpi_wor
140146
params.rank = -1234; //assign an invalid value as default for use below
141147
params.outlier_statistic = "exclusive_runtime";
142148
params.step_report_freq = 1;
143-
149+
144150
getOptionalArgsParser().parse(params, argc-4, (const char**)(argv+4));
145151

146152
//By default assign the rank index of the trace data as the MPI rank of the AD process
@@ -157,7 +163,7 @@ ChimbukoParams getParamsFromCommandLine(int argc, char** argv, const int mpi_wor
157163
//Thus we need to obtain the input data rank also from the command line and modify the filename accordingly
158164
if(params.override_rank)
159165
params.trace_inputFile = bp_prefix + "-" + std::to_string(overrideRankArg::input_data_rank()) + ".bp";
160-
166+
161167
//If neither the provenance database or the provenance output path are set, default to outputting to pwd
162168
if(params.prov_outputpath.size() == 0
163169
#ifdef ENABLE_PROVDB
@@ -169,21 +175,21 @@ ChimbukoParams getParamsFromCommandLine(int argc, char** argv, const int mpi_wor
169175

170176
return params;
171177
}
172-
173-
178+
179+
174180

175181

176182
int main(int argc, char ** argv){
177183
if(argc == 1 || (argc == 2 && std::string(argv[1]) == "-help") ){
178184
printHelp();
179185
return 0;
180186
}
181-
187+
182188
assert( MPI_Init(&argc, &argv) == MPI_SUCCESS );
183189

184190
int mpi_world_rank, mpi_world_size;
185191
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank);
186-
MPI_Comm_size(MPI_COMM_WORLD, &mpi_world_size);
192+
MPI_Comm_size(MPI_COMM_WORLD, &mpi_world_size);
187193

188194
//Parse environment variables
189195
if(const char* env_p = std::getenv("CHIMBUKO_VERBOSE"))
@@ -202,7 +208,7 @@ int main(int argc, char ** argv){
202208

203209
bool error = false;
204210

205-
try
211+
try
206212
{
207213
//Instantiate Chimbuko
208214
Chimbuko driver(params);
@@ -214,24 +220,24 @@ int main(int argc, char ** argv){
214220
unsigned long total_n_outliers = 0, n_outliers = 0;
215221
unsigned long total_processing_time = 0, processing_time = 0;
216222
unsigned long long n_func_events = 0, n_comm_events = 0, n_counter_events = 0;
217-
unsigned long long total_n_func_events = 0, total_n_comm_events = 0, total_n_counter_events = 0;
223+
unsigned long long total_n_func_events = 0, total_n_comm_events = 0, total_n_counter_events = 0;
218224
high_resolution_clock::time_point t1, t2;
219225

220226
// -----------------------------------------------------------------------
221227
// Start analysis
222228
// -----------------------------------------------------------------------
223-
headProgressStream(params.rank) << "Driver rank " << params.rank
224-
<< ": analysis start " << (driver.use_ps() ? "with": "without")
229+
headProgressStream(params.rank) << "Driver rank " << params.rank
230+
<< ": analysis start " << (driver.use_ps() ? "with": "without")
225231
<< " pserver" << std::endl;
226232

227233
t1 = high_resolution_clock::now();
228-
driver.run(n_func_events,
234+
driver.run(n_func_events,
229235
n_comm_events,
230236
n_counter_events,
231237
n_outliers,
232238
frames);
233239
t2 = high_resolution_clock::now();
234-
240+
235241
if (params.rank == 0) {
236242
headProgressStream(params.rank) << "Driver rank " << params.rank << ": analysis done!\n";
237243
driver.show_status(true);
@@ -266,7 +272,7 @@ int main(int argc, char ** argv){
266272
total_n_comm_events = global_measures[1];
267273
total_n_counter_events = global_measures[2];
268274
}
269-
275+
270276
headProgressStream(params.rank) << "Driver rank " << params.rank << ": Final report\n"
271277
<< "Avg. num. frames over MPI ranks : " << (double)total_frames/(double)mpi_world_size << "\n"
272278
<< "Avg. processing time over MPI ranks : " << (double)total_processing_time/(double)mpi_world_size << " msec\n"

app/pserver.cpp

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616

1717
#include <chimbuko/param/sstd_param.hpp>
1818
#include <chimbuko/util/commandLineParser.hpp>
19+
#include <chimbuko/util/error.hpp>
1920
#include <fstream>
2021
#include "chimbuko/verbose.hpp"
2122

2223

24+
2325
using namespace chimbuko;
2426

2527
struct pserverArgs{
@@ -40,6 +42,7 @@ struct pserverArgs{
4042
int stat_send_freq;
4143

4244
std::string stat_outputdir;
45+
std::string ad;
4346

4447
#ifdef _USE_ZMQNET
4548
int max_pollcyc_msg;
@@ -50,8 +53,8 @@ struct pserverArgs{
5053
#ifdef ENABLE_PROVDB
5154
std::string provdb_addr;
5255
#endif
53-
54-
pserverArgs(): nt(-1), logdir("."), ws_addr(""), load_params_set(false), save_params_set(false), freeze_params(false), stat_send_freq(1000), stat_outputdir(""), port(5559)
56+
57+
pserverArgs(): ad("hbos"), nt(-1), logdir("."), ws_addr(""), load_params_set(false), save_params_set(false), freeze_params(false), stat_send_freq(1000), stat_outputdir(""), port(5559)
5558
#ifdef _USE_ZMQNET
5659
, max_pollcyc_msg(10), zmq_io_thr(1), autoshutdown(true)
5760
#endif
@@ -64,6 +67,7 @@ struct pserverArgs{
6467
static bool init = false;
6568
static commandLineParser<pserverArgs> p;
6669
if(!init){
70+
addOptionalCommandLineArg(p, ad, "Set AD algorithm to use.");
6771
addOptionalCommandLineArg(p, nt, "Set the number of RPC handler threads (max-2 by default)");
6872
addOptionalCommandLineArg(p, logdir, "Set the output log directory (default: job directory)");
6973
addOptionalCommandLineArg(p, port, "Set the pserver port (default: 5559)");
@@ -79,7 +83,7 @@ struct pserverArgs{
7983
addOptionalCommandLineArg(p, autoshutdown, "If enabled the pserver will automatically shutdown when all clients have disconnected (default: true)");
8084
#endif
8185
#ifdef ENABLE_PROVDB
82-
addOptionalCommandLineArg(p, provdb_addr, "Address of the provenance database. If empty (default) the global function and counter statistics will not be send to the provenance DB.\nHas format \"ofi+tcp;ofi_rxm://${IP_ADDR}:${PORT}\". Should also accept \"tcp://${IP_ADDR}:${PORT}\"");
86+
addOptionalCommandLineArg(p, provdb_addr, "Address of the provenance database. If empty (default) the global function and counter statistics will not be send to the provenance DB.\nHas format \"ofi+tcp;ofi_rxm://${IP_ADDR}:${PORT}\". Should also accept \"tcp://${IP_ADDR}:${PORT}\"");
8387
#endif
8488

8589
init = true;
@@ -107,13 +111,18 @@ int main (int argc, char ** argv){
107111
if(const char* env_p = std::getenv("CHIMBUKO_VERBOSE")){
108112
progressStream << "Pserver: Enabling verbose debug output" << std::endl;
109113
enableVerboseLogging() = true;
110-
}
114+
}
111115

112-
SstdParam param; //global collection of parameters used to identify anomalies
116+
ParamInterface * param = ParamInterface::set_AdParam(args.ad); //"hbos"); //sstd"); //HbosParam param; //global collection of parameters used to identify anomalies
117+
if (param == nullptr) {
118+
fatal_error("INCORRECT algorithm for AdParam: Not Found. Choose sstd or hbos.");
119+
// verboseStream << "INCORRECT algorithm for AdParam: Not Found. Choose sstd or hbos." << std::endl;
120+
// exit(EXIT_FAILURE);
121+
}
113122
GlobalAnomalyStats global_func_stats; //global anomaly statistics
114123
GlobalCounterStats global_counter_stats; //global counter statistics
115124
PSglobalFunctionIndexMap global_func_index_map; //mapping of function name to global index
116-
125+
117126
//Optionally load previously-computed AD algorithm statistics
118127
if(args.load_params_set){
119128
progressStream << "Pserver: Loading parameters from input file " << args.load_params << std::endl;
@@ -122,7 +131,7 @@ int main (int argc, char ** argv){
122131
nlohmann::json in_p;
123132
in >> in_p;
124133
global_func_index_map.deserialize(in_p["func_index_map"]);
125-
param.assign(in_p["alg_params"].dump());
134+
param->assign(in_p["alg_params"].dump()); //param.assign(in_p["alg_params"].dump());
126135
}
127136

128137
#ifdef _USE_MPINET
@@ -172,8 +181,8 @@ int main (int argc, char ** argv){
172181
if(args.stat_outputdir.size()) std::cout << "(dir @ " << args.stat_outputdir << ")";
173182
}
174183

175-
net.add_payload(new NetPayloadUpdateParams(&param, args.freeze_params));
176-
net.add_payload(new NetPayloadGetParams(&param));
184+
net.add_payload(new NetPayloadUpdateParams(param, args.freeze_params)); //new NetPayloadUpdateParams(&param, args.freeze_params));
185+
net.add_payload(new NetPayloadGetParams(param)); //new NetPayloadGetParams(&param));
177186
net.add_payload(new NetPayloadUpdateAnomalyStats(&global_func_stats));
178187
net.add_payload(new NetPayloadUpdateCounterStats(&global_counter_stats));
179188
net.add_payload(new NetPayloadGlobalFunctionIndexMapBatched(&global_func_index_map));
@@ -206,7 +215,7 @@ int main (int argc, char ** argv){
206215
progressStream << "Pserver: sending final statistics to provDB" << std::endl;
207216
provdb_client.sendMultipleData(global_func_stats.collect_func_data(), GlobalProvenanceDataType::FunctionStats);
208217
provdb_client.sendMultipleData(global_counter_stats.get_json_state(), GlobalProvenanceDataType::CounterStats);
209-
progressStream << "Pserver: disconnecting from provDB" << std::endl;
218+
progressStream << "Pserver: disconnecting from provDB" << std::endl;
210219
provdb_client.disconnect();
211220
}
212221
#endif
@@ -218,7 +227,7 @@ int main (int argc, char ** argv){
218227
o.open(args.logdir + "/parameters.txt");
219228
if (o.is_open())
220229
{
221-
param.show(o);
230+
param->show(o); //param.show(o);
222231
o.close();
223232
}
224233
}
@@ -242,12 +251,12 @@ int main (int argc, char ** argv){
242251

243252
//Optionally save the final AD algorithm parameters
244253
if(args.save_params_set){
245-
progressStream << "PServer: Saving parameters to output file " << args.save_params << std::endl;
254+
progressStream << "PServer: Saving parameters to output file " << args.save_params << std::endl;
246255
std::ofstream out(args.save_params);
247256
if(!out.good()) throw std::runtime_error("Could not write anomaly algorithm parameters to the file provided");
248257
nlohmann::json out_p;
249258
out_p["func_index_map"] = global_func_index_map.serialize();
250-
out_p["alg_params"] = nlohmann::json::parse(param.serialize());
259+
out_p["alg_params"] = nlohmann::json::parse(param->serialize()); //param.serialize());
251260
out << out_p;
252261
}
253262

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ if test "x$with_adios2" != "x"; then
9494
AC_MSG_NOTICE([Detected ADIOS 2.6+])
9595
fi
9696
else
97-
AC_MSG_FAILURE(["ADIOS2 install path must be specified with --with-adios2"])
97+
AC_MSG_FAILURE(["ADIOS2 install path must be specified with --with-adios2"])
9898
fi
9999

100100
#Choose network

include/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
nobase_include_HEADERS = chimbuko/chimbuko.hpp chimbuko/ad/ADOutlier.hpp chimbuko/ad/ADNetClient.hpp chimbuko/ad/ADAnomalyProvenance.hpp chimbuko/ad/ADParser.hpp chimbuko/ad/ADProvenanceDBengine.hpp chimbuko/ad/ADLocalFuncStatistics.hpp chimbuko/ad/ADMetadataParser.hpp chimbuko/ad/ADDefine.hpp chimbuko/ad/ADglobalFunctionIndexMap.hpp chimbuko/ad/ADProvenanceDBclient.hpp chimbuko/ad/ADEvent.hpp chimbuko/ad/ADCounter.hpp chimbuko/ad/ADLocalCounterStatistics.hpp chimbuko/ad/ExecData.hpp chimbuko/ad/AnomalyData.hpp chimbuko/ad/utils.hpp chimbuko/ad/ADio.hpp chimbuko/verbose.hpp chimbuko/pserver/global_anomaly_stats.hpp chimbuko/pserver/PSglobalFunctionIndexMap.hpp chimbuko/pserver/PSstatSender.hpp chimbuko/pserver/global_counter_stats.hpp chimbuko/param/sstd_param.hpp chimbuko/AD.hpp chimbuko/message.hpp chimbuko/net.hpp chimbuko/pserver.hpp chimbuko/param.hpp chimbuko/net/mpi_net.hpp chimbuko/net/zmq_net.hpp chimbuko/net/zmqme_net.hpp chimbuko/util/RunStats.hpp chimbuko/util/ADIOS2parseUtils.hpp chimbuko/util/map.hpp chimbuko/util/string.hpp chimbuko/util/DispatchQueue.hpp chimbuko/util/PerfStats.hpp chimbuko/util/RunMetric.hpp chimbuko/util/Anomalies.hpp chimbuko/util/barrier.hpp chimbuko/util/threadPool.hpp chimbuko/util/hash.hpp chimbuko/util/mtQueue.hpp chimbuko/util/commandLineParser.hpp
1+
nobase_include_HEADERS = chimbuko/chimbuko.hpp chimbuko/ad/ADOutlier.hpp chimbuko/ad/ADNetClient.hpp chimbuko/ad/ADAnomalyProvenance.hpp chimbuko/ad/ADParser.hpp chimbuko/ad/ADProvenanceDBengine.hpp chimbuko/ad/ADLocalFuncStatistics.hpp chimbuko/ad/ADMetadataParser.hpp chimbuko/ad/ADDefine.hpp chimbuko/ad/ADglobalFunctionIndexMap.hpp chimbuko/ad/ADProvenanceDBclient.hpp chimbuko/ad/ADEvent.hpp chimbuko/ad/ADCounter.hpp chimbuko/ad/ADLocalCounterStatistics.hpp chimbuko/ad/ExecData.hpp chimbuko/ad/AnomalyData.hpp chimbuko/ad/utils.hpp chimbuko/ad/ADio.hpp chimbuko/verbose.hpp chimbuko/pserver/global_anomaly_stats.hpp chimbuko/pserver/PSglobalFunctionIndexMap.hpp chimbuko/pserver/PSstatSender.hpp chimbuko/pserver/global_counter_stats.hpp chimbuko/param/sstd_param.hpp chimbuko/param/hbos_param.hpp chimbuko/AD.hpp chimbuko/message.hpp chimbuko/net.hpp chimbuko/pserver.hpp chimbuko/param.hpp chimbuko/net/mpi_net.hpp chimbuko/net/zmq_net.hpp chimbuko/net/zmqme_net.hpp chimbuko/util/RunStats.hpp chimbuko/util/ADIOS2parseUtils.hpp chimbuko/util/map.hpp chimbuko/util/string.hpp chimbuko/util/DispatchQueue.hpp chimbuko/util/PerfStats.hpp chimbuko/util/RunMetric.hpp chimbuko/util/Anomalies.hpp chimbuko/util/barrier.hpp chimbuko/util/threadPool.hpp chimbuko/util/hash.hpp chimbuko/util/mtQueue.hpp chimbuko/util/commandLineParser.hpp

include/chimbuko/ad/ADAnomalyProvenance.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,21 @@ namespace chimbuko{
2626
* @param io_step Index of io step
2727
* @param io_step_tstart Timestamp of beginning of io frame
2828
* @param io_step_tend Timestamp of end of io frame
29-
*/
29+
*/
3030
ADAnomalyProvenance(const ExecData_t &call,
3131
const ADEvent &event_man, //for stack trace
3232
const ParamInterface &algo_params, //algorithm parameters
3333
const ADCounter &counters, //for counters
3434
const ADMetadataParser &metadata, //for env information including GPU context/device/stream
3535
const int window_size,
36-
const int io_step,
36+
const int io_step,
3737
const unsigned long io_step_tstart, const unsigned long io_step_tend);
3838

3939
/**
4040
* @brief Serialize anomaly data into JSON construct
4141
*/
4242
nlohmann::json get_json() const;
43-
43+
4444
/**
4545
* @brief Extract the json provDB entries for the anomalies and normal events from an Anomalies collection
4646
*
@@ -69,7 +69,7 @@ namespace chimbuko{
6969
const unsigned int anom_win_size,
7070
const ParamInterface &algo_params,
7171
const ADEvent &event_man,
72-
const ADCounter &counters,
72+
const ADCounter &counters,
7373
const ADMetadataParser &metadata);
7474

7575
/**
@@ -98,20 +98,20 @@ namespace chimbuko{
9898
const unsigned int anom_win_size,
9999
const ParamInterface &algo_params,
100100
const ADEvent &event_man,
101-
const ADCounter &counters,
101+
const ADCounter &counters,
102102
const ADMetadataParser &metadata){
103103
PerfStats perf;
104104
getProvenanceEntries(anom_event_entries, normal_event_entries,
105105
normal_event_manager, perf, anomalies, step, first_event_ts, last_event_ts,
106106
anom_win_size, algo_params, event_man, counters, metadata);
107-
}
107+
}
108108

109109
private:
110110
/**
111111
* @brief Get the call stack
112112
*/
113113
void getStackInformation(const ExecData_t &call, const ADEvent &event_man);
114-
114+
115115
/**
116116
* @brief Get counters in execution window
117117
*/

0 commit comments

Comments
 (0)