Skip to content

Commit 0b63ba6

Browse files
committed
Added a optionalCommandLineArgBase derived class to AlgoParams allowing it to be constructed automatically by giving a filename through the usual cmdline parser tools
AD algorithms now have a function to return their name/type as a string Moved the algorithm parameters into an AlgoParams instance inside the Chimbuko base class params struct Passing of algorithm parameters to the Chimbuko driver is now performed by passing a single argument of the filename to the AlgoParams file. At the moment this JSON file is built by run_services to avoid changing the usage pattern
1 parent 23b1934 commit 0b63ba6

9 files changed

Lines changed: 93 additions & 42 deletions

File tree

include/chimbuko/core/ad/ADOutlier.hpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "chimbuko/core/ad/ADNetClient.hpp"
1212
#include "chimbuko/core/util/PerfStats.hpp"
1313
#include "chimbuko/core/ad/ADDataInterface.hpp"
14+
#include "chimbuko/core/util/commandLineParser.hpp"
1415

1516
namespace chimbuko {
1617
/**
@@ -57,6 +58,31 @@ namespace chimbuko {
5758
* @brief Equivalence operator
5859
*/
5960
bool operator==(const AlgoParams &r) const;
61+
62+
/**
63+
* @brief Parser object that reads the data from a json file with provided filename
64+
*/
65+
class cmdlineParser: public optionalCommandLineArgBase{
66+
private:
67+
std::string m_arg; /**< The argument, format "-a" */
68+
std::string m_help_str; /**< The help string */
69+
AlgoParams &member;
70+
public:
71+
cmdlineParser(AlgoParams &member, const std::string &arg, const std::string &help_str): m_arg(arg), m_help_str(help_str), member(member){}
72+
73+
/**
74+
* @brief If the first string matches the internal arg string (eg "-help"), a number of strings are consumed from the array 'vals' and that number returned.
75+
* A value of -1 indicates the argument did not match.
76+
*
77+
* @param vals An array of strings
78+
* @param vals_size The length of the string array
79+
*/
80+
int parse(const std::string &arg, const char** vals, const int vals_size) override;
81+
/**
82+
* @brief Print the help string for this argument to the ostream
83+
*/
84+
void help(std::ostream &os) const override;
85+
};
6086
};
6187

6288

@@ -120,6 +146,10 @@ namespace chimbuko {
120146
*/
121147
void setGlobalModelSyncFrequency(int to){ m_global_model_sync_freq = to; }
122148

149+
/**
150+
* @brief Return the algorithm name
151+
*/
152+
virtual std::string getAlgorithmName() const = 0;
123153
protected:
124154
/** @brief Synchronize the input model with the global model
125155
*
@@ -176,6 +206,11 @@ namespace chimbuko {
176206

177207
void run(ADDataInterface &data, int step=0) override;
178208

209+
/**
210+
* @brief Return the algorithm name
211+
*/
212+
std::string getAlgorithmName() const override{ return "sstd"; }
213+
179214
protected:
180215

181216
/**
@@ -234,6 +269,11 @@ namespace chimbuko {
234269

235270
void run(ADDataInterface &data, int step=0) override;
236271

272+
/**
273+
* @brief Return the algorithm name
274+
*/
275+
std::string getAlgorithmName() const override{ return "hbos"; }
276+
237277
/**
238278
* @brief Override the default threshold for a particular function
239279
* @param func The function name
@@ -307,6 +347,11 @@ namespace chimbuko {
307347

308348
void run(ADDataInterface &data, int step=0) override;
309349

350+
/**
351+
* @brief Return the algorithm name
352+
*/
353+
std::string getAlgorithmName() const override{ return "copod"; }
354+
310355
/**
311356
* @brief Override the default threshold for a particular function
312357
* @param func The function name

include/chimbuko/core/chimbuko.hpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,9 @@ namespace chimbuko {
2424
int prov_record_stopstep; /**< If != -1, the IO step on which to stop recording provenance information for anomalies */
2525
int prov_io_freq; /**< The frequency, in steps, at which provenance data is written/sent to the provDB. For steps between it is buffered.*/
2626

27-
2827
//Parameters associated with the outlier detection algorithm
29-
std::string ad_algorithm; /**< Algorithm for Anomaly Detection. Set in config file*/
30-
31-
double outlier_sigma; /**< The number of sigma (standard deviations) away from the mean runtime for an event to be considered anomalous */
32-
33-
double hbos_threshold; /**< Threshold used by HBOS algorithm to filter outliers. Set in config file*/
34-
bool hbos_use_global_threshold; /**< Global threshold flag in HBOS*/
35-
int hbos_max_bins; /**< Maximum number of bins to use in HBOS algorithm histograms*/
36-
28+
ADOutlier::AlgoParams algo_params; /**< The algorithm parameters */
29+
3730
int global_model_sync_freq; /**< How often (in steps) the global model is updated (default 1)*/
3831

3932
//Parameters associated with communicating with the parameter server

scripts/launch/run_services.sh

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,17 @@ echo "==========================================="
7878
echo "Starting Chimbuko services with module: ${module}"
7979
echo "==========================================="
8080

81+
#Generate the AlgoParams JSON script from the inputs
82+
algo_params_file=${var_dir}/algo_params.json
83+
cat <<EOF > ${algo_params_file}
84+
{
85+
"algorithm" : "${ad_alg}",
86+
"glob_thres" : true,
87+
"hbos_max_bins" : 200,
88+
"hbos_thres" : ${ad_outlier_hbos_threshold},
89+
"sstd_sigma" : ${ad_outlier_sstd_sigma}
90+
}
91+
EOF
8192

8293
#Provenance database
8394
extra_args=${ad_extra_args}
@@ -445,14 +456,11 @@ fi
445456
#echo "Chimbuko Services: Processes are: " $(ps)
446457

447458
#Check that the variables passed to the AD from the config file are defined
448-
testit=${ad_outlier_sstd_sigma}
449459
testit=${ad_win_size}
450-
testit=${ad_alg}
451-
testit=${ad_outlier_hbos_threshold}
452460

453461
############################################
454462
#Generate the command to launch the AD module
455-
ad_opts="${extra_args} -err_outputpath ${log_dir} -outlier_sigma ${ad_outlier_sstd_sigma} -anom_win_size ${ad_win_size} -ad_algorithm ${ad_alg} -hbos_threshold ${ad_outlier_hbos_threshold}"
463+
ad_opts="${extra_args} -err_outputpath ${log_dir} -algo_params_file ${algo_params_file} -anom_win_size ${ad_win_size}"
456464

457465
if [[ "$(declare -p EXE_NAME)" =~ "declare -a" ]]; then
458466
echo "The user has specified a workflow comprising multiple components. Chimbuko will generate separate files containing pre-generated launch commands."

src/core/ad/ADOutlier.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,25 @@ nlohmann::json ADOutlier::AlgoParams::getJson() const{
6161
#undef JSON_SET
6262
}
6363

64+
int ADOutlier::AlgoParams::cmdlineParser::parse(const std::string &arg, const char** vals, const int vals_size){
65+
std::cout << "TEST ARG " << arg << "==" << m_arg << std::endl;
66+
if(arg == m_arg){
67+
std::cout << "TEST ARG FOUND ARG " << arg << std::endl;
68+
if(vals_size < 1) return -1;
69+
70+
try{
71+
member.loadJsonFile(vals[0]);
72+
}catch(const std::exception &exc){
73+
return -1;
74+
}
75+
return 1;
76+
}
77+
return -1;
78+
}
79+
void ADOutlier::AlgoParams::cmdlineParser::help(std::ostream &os) const{
80+
os << m_arg << " : " << m_help_str;
81+
}
82+
6483

6584

6685
/* ---------------------------------------------------------------------------

src/core/ad/ADcmdLineArgs.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
11
#include<chimbuko/core/ad/ADcmdLineArgs.hpp>
2+
#include<chimbuko/core/ad/ADOutlier.hpp>
23
#include<chimbuko/core/verbose.hpp>
34

45
using namespace chimbuko;
56

67
void chimbuko::setupBaseOptionalArgs(commandLineParser &parser, ChimbukoBaseParams &into){
7-
addOptionalCommandLineArgWithDefault(parser, into, ad_algorithm, "hbos", "Set an AD algorithm to use: hbos or sstd (default \"hbos\").");
8-
addOptionalCommandLineArgWithDefault(parser, into, hbos_threshold, 0.99, "Set Threshold for HBOS anomaly detection filter (default 0.99).");
9-
addOptionalCommandLineArgWithDefault(parser, into, hbos_use_global_threshold, true, "Set true to use a global threshold in HBOS algorithm (default true).");
10-
addOptionalCommandLineArgWithDefault(parser, into, hbos_max_bins, 200, "Set the maximum number of bins for histograms in the HBOS algorithm (default 200).");
118
addOptionalCommandLineArgOptArgWithDefault(parser, into, ana_obj_idx, program_idx, 0, "Set the index associated with the instrumented program. Use to label components of a workflow. (default 0)");
12-
addOptionalCommandLineArgWithDefault(parser, into, outlier_sigma, 6.0, "Set the number of standard deviations that defines an anomalous event (default 6)");
139
addOptionalCommandLineArgWithDefault(parser, into, net_recv_timeout, 30000, "Timeout (in ms) for blocking receives on client from parameter server (default 30000)");
1410
addOptionalCommandLineArgWithDefault(parser, into, pserver_addr, "", "Set the address of the parameter server. If empty (default) the pserver will not be used.");
1511
addOptionalCommandLineArgWithDefault(parser, into, hpserver_nthr, 1, "Set the number of threads used by the hierarchical PS. This parameter is used to compute a port offset for the particular endpoint that this AD rank connects to (default 1)");
@@ -39,4 +35,6 @@ void chimbuko::setupBaseOptionalArgs(commandLineParser &parser, ChimbukoBasePara
3935
addOptionalCommandLineArgWithDefault(parser, into, prov_io_freq, 1, "Set the frequency in steps at which provenance data is writen/sent to the provDB (default 1)");
4036
addOptionalCommandLineArgWithDefault(parser, into, analysis_step_freq, 1, "Set the frequency in IO steps between analyzing the data. Data will be accumulated over intermediate steps. (default 1)");
4137
parser.addOptionalArg(progressHeadRank()=0, "-logging_head_rank", "Set the head rank upon which progress logging will be output (default 0)");
38+
39+
parser.addOptionalArg(new ADOutlier::AlgoParams::cmdlineParser(into.algo_params, "-algo_params_file", "Set the filename containing the algorithm name and hyperparameters."));
4240
}

src/core/chimbuko.cpp

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,8 @@ using namespace chimbuko;
1212
ChimbukoBaseParams::ChimbukoBaseParams(): rank(-1234), //not set!
1313
ana_obj_idx(0),
1414
verbose(true),
15-
outlier_sigma(6.),
1615
net_recv_timeout(30000),
1716
pserver_addr(""), hpserver_nthr(1),
18-
ad_algorithm("hbos"),
19-
hbos_threshold(0.99),
20-
hbos_use_global_threshold(true),
21-
hbos_max_bins(200),
2217
#ifdef ENABLE_PROVDB
2318
provdb_addr_dir(""), nprovdb_shards(1), nprovdb_instances(1), provdb_mercury_auth_key(""),
2419
#endif
@@ -38,15 +33,12 @@ ChimbukoBaseParams::ChimbukoBaseParams(): rank(-1234), //not set!
3833

3934

4035
void ChimbukoBaseParams::print() const{
41-
std::cout << "AD Algorithm: " << ad_algorithm
36+
std::cout << "AD Algorithm: " << algo_params.algorithm
4237
<< "\nAnalysis Objective Idx: " << ana_obj_idx
4338
<< "\nRank : " << rank
4439
#ifdef _USE_ZMQNET
4540
<< "\nPS Addr : " << pserver_addr
4641
#endif
47-
<< "\nSigma : " << outlier_sigma
48-
<< "\nHBOS/COPOD Threshold: " << hbos_threshold
49-
<< "\nUsing Global threshold: " << hbos_use_global_threshold
5042
<< "\nInterval : " << interval_msec << " msec"
5143
<< "\nNetClient Receive Timeout : " << net_recv_timeout << "msec"
5244
<< "\nPerf. metric outpath : " << perf_outputpath
@@ -143,15 +135,10 @@ void ChimbukoBase::init_net_client(){
143135

144136

145137
void ChimbukoBase::init_outlier(){
146-
ADOutlier::AlgoParams params;
147-
params.algorithm = m_base_params.ad_algorithm;
148-
params.hbos_thres = m_base_params.hbos_threshold;
149-
params.glob_thres = m_base_params.hbos_use_global_threshold;
150-
params.sstd_sigma = m_base_params.outlier_sigma;
151-
params.hbos_max_bins = m_base_params.hbos_max_bins;
138+
headProgressStream(m_base_params.rank) << "driver rank " << m_base_params.rank << " initializing outlier algorith with params:\n" << m_base_params.algo_params.getJson().dump(4) << std::endl;
152139
//params.func_threshold_file = m_base_params.func_threshold_file;
153140

154-
m_outlier = ADOutlier::set_algorithm(m_base_params.rank, params);
141+
m_outlier = ADOutlier::set_algorithm(m_base_params.rank, m_base_params.algo_params);
155142
if(m_net_client) m_outlier->linkNetworkClient(m_net_client);
156143
m_outlier->linkPerf(&m_perf);
157144
m_outlier->setGlobalModelSyncFrequency(m_base_params.global_model_sync_freq);

src/modules/performance_analysis/chimbuko.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ bool Chimbuko::readStep(std::unique_ptr<ADDataInterface> &iface){
448448
else{ fatal_error("Invalid statistic"); }
449449

450450
ADExecDataInterface *data_iface = new ADExecDataInterface(m_event->getExecDataMap(), stat);
451-
if( (this->getBaseParams().ad_algorithm == "sst" || this->getBaseParams().ad_algorithm == "SST") && std::getenv("CHIMBUKO_DISABLE_CUDA_JIT_WORKAROUND") == nullptr )
451+
if( this->getAD().getAlgorithmName() == "sstd" && std::getenv("CHIMBUKO_DISABLE_CUDA_JIT_WORKAROUND") == nullptr )
452452
data_iface->setIgnoreFirstFunctionCall(&m_func_seen);
453453
iface.reset(data_iface);
454454
}

test/mainADwithProvDB.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ TEST(ADTestWithProvDB, BpfileTest)
2828

2929
params.base_params.prov_outputpath = ""; //don't output
3030

31-
params.base_params.outlier_sigma = 6.0;
31+
params.base_params.algo_params.sstd_sigma = 6.0;
3232
params.base_params.only_one_frame = true; //just analyze first IO frame
33-
params.base_params.ad_algorithm = "sstd";
33+
params.base_params.algo_params.algorithm = "sstd";
3434

3535
params.base_params.provdb_addr_dir = addr_file_dir;
3636
params.base_params.nprovdb_shards = nshards;

test/test_ad.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,13 @@ TEST_F(ADTest, BpfileTest)
7676

7777
params.base_params.prov_outputpath = "./temp"; //have to output somewhere if provdb not in use
7878

79-
params.base_params.outlier_sigma = 6.0;
79+
params.base_params.algo_params.sstd_sigma = 6.0;
80+
params.base_params.algo_params.algorithm = "sstd";
8081
params.base_params.only_one_frame = true; //just analyze first IO frame
8182

8283
params.anom_win_size = 0;
8384

84-
params.base_params.ad_algorithm = "sstd";
85+
8586

8687
Chimbuko driver;
8788
int step;
@@ -249,10 +250,10 @@ TEST_F(ADTest, BpfileWithNetTest)
249250
params.base_params.perf_step = 1;
250251
#endif
251252

252-
params.base_params.only_one_frame = true;
253-
params.base_params.outlier_sigma = 6.0;
253+
params.base_params.algo_params.sstd_sigma = 6.0;
254+
params.base_params.algo_params.algorithm = "sstd";
254255

255-
params.base_params.ad_algorithm = "sstd";
256+
params.base_params.only_one_frame = true;
256257

257258
Chimbuko driver;
258259

0 commit comments

Comments
 (0)