Skip to content

Commit 2ccf3c4

Browse files
committed
For the new non-unit frequency options for PS stats send, AD model update and provDB data send, the sending is now staggered by the rank index to make the network node less "spiky"
1 parent 22c59d7 commit 2ccf3c4

6 files changed

Lines changed: 27 additions & 23 deletions

File tree

include/chimbuko/ad/ADOutlier.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,17 @@ namespace chimbuko {
5151
* @brief Construct a new ADOutlier object
5252
*
5353
*/
54-
ADOutlier(OutlierStatistic stat = ExclusiveRuntime);
54+
ADOutlier(int rank, OutlierStatistic stat = ExclusiveRuntime);
5555
/**
5656
* @brief Destroy the ADOutlier object
5757
*
5858
*/
5959
virtual ~ADOutlier();
6060

6161
/**
62-
* @brief Fatory method to select AD algorithm at runtime
62+
* @brief Factory method to select AD algorithm at runtime
6363
*/
64-
static ADOutlier *set_algorithm(const std::string & algorithm, const AlgoParams &params);
64+
static ADOutlier *set_algorithm(int rank, const std::string & algorithm, const AlgoParams &params);
6565

6666
/**
6767
* @brief check if the parameter server is in use
@@ -170,6 +170,7 @@ namespace chimbuko {
170170
int m_sync_call_count; /**< count of calls to sync_param */
171171
int m_global_model_sync_freq; /**< how often the local model is pushed and synchronized with the globel model (default 1)*/
172172

173+
int m_rank; /**< rank index*/
173174
PerfStats *m_perf;
174175
private:
175176
OutlierStatistic m_statistic; /** Which statistic to use for outlier detection */
@@ -187,7 +188,7 @@ namespace chimbuko {
187188
* @brief Construct a new ADOutlierSSTD object
188189
*
189190
*/
190-
ADOutlierSSTD(OutlierStatistic stat = ExclusiveRuntime, double sigma = 6.0);
191+
ADOutlierSSTD(int rank, OutlierStatistic stat = ExclusiveRuntime, double sigma = 6.0);
191192
/**
192193
* @brief Destroy the ADOutlierSSTD object
193194
*
@@ -239,13 +240,14 @@ namespace chimbuko {
239240

240241
/**
241242
* @brief Construct a new ADOutlierHBOS object
243+
* @param rank Rank of AD instance
242244
* @param stat Which statistic to use
243245
* @param threshold The threshold defining an outlier
244246
* @param use_global_threshold The threshold is maintained as part of the global model
245247
* @param maxbins The maximum number of bins in the histograms
246248
*
247249
*/
248-
ADOutlierHBOS(OutlierStatistic stat = ExclusiveRuntime, double threshold = 0.99, bool use_global_threshold = true, int maxbins = 200);
250+
ADOutlierHBOS(int rank, OutlierStatistic stat = ExclusiveRuntime, double threshold = 0.99, bool use_global_threshold = true, int maxbins = 200);
249251

250252
/**
251253
* @brief Destroy the ADOutlierHBOS object
@@ -337,7 +339,7 @@ namespace chimbuko {
337339
* @brief Construct a new ADOutlierCOPOD object
338340
*
339341
*/
340-
ADOutlierCOPOD(OutlierStatistic stat = ExclusiveRuntime, double threshold = 0.99, bool use_global_threshold = true);
342+
ADOutlierCOPOD(int rank, OutlierStatistic stat = ExclusiveRuntime, double threshold = 0.99, bool use_global_threshold = true);
341343

342344
/**
343345
* @brief Destroy the ADOutlierCOPOD object

sim/src/ad.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ void ADsim::init(int window_size, int pid, int rid, unsigned long program_start,
7575
params.glob_thres = p.glob_thres;
7676
params.sstd_sigma = p.sstd_sigma;
7777

78-
m_outlier = ADOutlier::set_algorithm(p.algorithm, params);
78+
m_outlier = ADOutlier::set_algorithm(rid, p.algorithm, params);
7979
getPserver(); //force construction of pserver
8080
m_net_client = new ADThreadNetClient(true); //use local comms
8181
m_net_client->connect_ps(m_rid);

src/ad/ADOutlier.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ ADOutlier::AlgoParams::AlgoParams(): stat(ADOutlier::ExclusiveRuntime), sstd_sig
1919
/* ---------------------------------------------------------------------------
2020
* Implementation of ADOutlier class
2121
* --------------------------------------------------------------------------- */
22-
ADOutlier::ADOutlier(OutlierStatistic stat)
23-
: m_execDataMap(nullptr), m_param(nullptr), m_local_param(nullptr), m_use_ps(false), m_perf(nullptr), m_statistic(stat), m_sync_call_count(0), m_global_model_sync_freq(1)
22+
ADOutlier::ADOutlier(int rank, OutlierStatistic stat)
23+
: m_execDataMap(nullptr), m_param(nullptr), m_local_param(nullptr), m_use_ps(false), m_perf(nullptr), m_statistic(stat), m_sync_call_count(0), m_global_model_sync_freq(1), m_rank(rank)
2424
{
2525
}
2626

@@ -49,17 +49,17 @@ void loadPerFunctionThresholds(ADOutlierType* into, const std::string &filename)
4949
}
5050

5151

52-
ADOutlier *ADOutlier::set_algorithm(const std::string & algorithm, const AlgoParams &params) {
52+
ADOutlier *ADOutlier::set_algorithm(int rank, const std::string & algorithm, const AlgoParams &params) {
5353
if (algorithm == "sstd" || algorithm == "SSTD") {
54-
return new ADOutlierSSTD(params.stat, params.sstd_sigma);
54+
return new ADOutlierSSTD(rank, params.stat, params.sstd_sigma);
5555
}
5656
else if (algorithm == "hbos" || algorithm == "HBOS") {
57-
ADOutlierHBOS* alg = new ADOutlierHBOS(params.stat, params.hbos_thres, params.glob_thres, params.hbos_max_bins);
57+
ADOutlierHBOS* alg = new ADOutlierHBOS(rank, params.stat, params.hbos_thres, params.glob_thres, params.hbos_max_bins);
5858
loadPerFunctionThresholds(alg,params.func_threshold_file);
5959
return alg;
6060
}
6161
else if (algorithm == "copod" || algorithm == "COPOD") {
62-
ADOutlierCOPOD* alg = new ADOutlierCOPOD(params.stat, params.hbos_thres);
62+
ADOutlierCOPOD* alg = new ADOutlierCOPOD(rank, params.stat, params.hbos_thres);
6363
loadPerFunctionThresholds(alg,params.func_threshold_file);
6464
return alg;
6565
}
@@ -111,8 +111,8 @@ void ADOutlier::updateGlobalModel()
111111
PerfTimer timer;
112112
timer.start();
113113

114-
if ( m_sync_call_count % m_global_model_sync_freq == 0 ){
115-
verboseStream << "ADOutlier performing synchronization of local and global model" << std::endl;
114+
if ( m_sync_call_count == 0 || (m_sync_call_count + m_rank) % m_global_model_sync_freq == 0 ){ //apart from on first step, stagger updates over ranks by rank index
115+
verboseStream << "ADOutlier rank " << m_rank << " performing synchronization of local and global model on call count " << m_sync_call_count << std::endl;
116116
PerfTimer utimer;
117117
utimer.start();
118118

@@ -144,7 +144,7 @@ void ADOutlier::setIgnoreFunction(const std::string &func){
144144
/* ---------------------------------------------------------------------------
145145
* Implementation of ADOutlierSSTD class
146146
* --------------------------------------------------------------------------- */
147-
ADOutlierSSTD::ADOutlierSSTD(OutlierStatistic stat, double sigma) : ADOutlier(stat), m_sigma(sigma) {
147+
ADOutlierSSTD::ADOutlierSSTD(int rank, OutlierStatistic stat, double sigma) : ADOutlier(rank, stat), m_sigma(sigma) {
148148
m_param = new SstdParam();
149149
m_local_param = new SstdParam();
150150
}
@@ -281,7 +281,7 @@ unsigned long ADOutlierSSTD::compute_outliers(Anomalies &outliers,
281281
/* ---------------------------------------------------------------------------
282282
* Implementation of ADOutlierHBOS class
283283
* --------------------------------------------------------------------------- */
284-
ADOutlierHBOS::ADOutlierHBOS(OutlierStatistic stat, double threshold, bool use_global_threshold, int maxbins) : ADOutlier(stat), m_alpha(78.88e-32), m_threshold(threshold), m_use_global_threshold(use_global_threshold), m_maxbins(maxbins) {
284+
ADOutlierHBOS::ADOutlierHBOS(int rank, OutlierStatistic stat, double threshold, bool use_global_threshold, int maxbins) : ADOutlier(rank, stat), m_alpha(78.88e-32), m_threshold(threshold), m_use_global_threshold(use_global_threshold), m_maxbins(maxbins) {
285285
m_param = new HbosParam();
286286
m_local_param = new HbosParam();
287287
}
@@ -531,7 +531,7 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
531531
/* ---------------------------------------------------------------------------
532532
* Implementation of ADOutlierCOPOD class
533533
* --------------------------------------------------------------------------- */
534-
ADOutlierCOPOD::ADOutlierCOPOD(OutlierStatistic stat, double threshold, bool use_global_threshold) : ADOutlier(stat), m_alpha(78.88e-32), m_threshold(threshold), m_use_global_threshold(use_global_threshold) {
534+
ADOutlierCOPOD::ADOutlierCOPOD(int rank, OutlierStatistic stat, double threshold, bool use_global_threshold) : ADOutlier(rank, stat), m_alpha(78.88e-32), m_threshold(threshold), m_use_global_threshold(use_global_threshold) {
535535
m_param = new CopodParam();
536536
m_local_param = new CopodParam();
537537
}

src/chimbuko.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ void Chimbuko::init_outlier(){
248248
else if(m_params.outlier_statistic == "inclusive_runtime") params.stat = ADOutlier::InclusiveRuntime;
249249
else{ fatal_error("Invalid statistic"); }
250250

251-
m_outlier = ADOutlier::set_algorithm(m_params.ad_algorithm, params);
251+
m_outlier = ADOutlier::set_algorithm(m_params.rank, m_params.ad_algorithm, params);
252252
m_outlier->linkExecDataMap(m_event->getExecDataMap()); //link the map of function index to completed calls such that they can be tagged as outliers if appropriate
253253
if(m_net_client) m_outlier->linkNetworkClient(m_net_client);
254254
m_outlier->linkPerf(&m_perf);
@@ -534,9 +534,10 @@ void Chimbuko::sendProvenance(const int step, bool force){
534534
#endif
535535
)
536536
&&
537-
( step % m_params.prov_io_freq == 0 || force )
537+
( (step + m_params.rank) % m_params.prov_io_freq == 0 || force ) //stagger sends over ranks by offsetting by rank index
538538
){
539539
//Get the provenance data
540+
verboseStream << "Chimbuko rank " << m_params.rank << " performing send of provenance data on step " << step << std::endl;
540541

541542
PerfTimer timer;
542543
//Write and send provenance data
@@ -633,8 +634,9 @@ void Chimbuko::gatherPSdata(const Anomalies &anomalies,
633634
}
634635

635636
void Chimbuko::sendPSdata(const int step, bool force){
636-
if(m_net_client && m_net_client->use_ps() && m_funcstats_buf.size() && (step % m_params.ps_send_stats_freq == 0 || force) ){
637+
if(m_net_client && m_net_client->use_ps() && m_funcstats_buf.size() && ( (step + m_params.rank) % m_params.ps_send_stats_freq == 0 || force) ){ //stagger sends by offsetting by rank
637638
//Send the data in a single communication
639+
verboseStream << "Chimbuko rank " << m_params.rank << " performing send of PS stats data on step " << step << std::endl;
638640
PerfTimer timer;
639641
timer.start();
640642
ADcombinedPSdataArray comb_stats(m_funcstats_buf, m_countstats_buf, m_anom_metrics_buf, &m_perf);

test/unit_tests/ad/COPODOutlier.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ TEST(CopodADOutlierTest, TestFunctionThresholdOverride){
264264
int func_id2 = 202;
265265

266266
double default_threshold = 0.99;
267-
ADOutlierCOPOD ad(ADOutlier::ExclusiveRuntime, default_threshold);
267+
ADOutlierCOPOD ad(0, ADOutlier::ExclusiveRuntime, default_threshold);
268268
ad.overrideFuncThreshold("my_func",0.77);
269269

270270
EXPECT_EQ(ad.getFunctionThreshold("my_func"), 0.77);

test/unit_tests/ad/HBOSOutlier.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ TEST(HBOSADOutlierTest, TestFunctionThresholdOverride){
287287
int func_id2 = 202;
288288

289289
double default_threshold = 0.99;
290-
ADOutlierHBOS ad(ADOutlier::ExclusiveRuntime, default_threshold);
290+
ADOutlierHBOS ad(0, ADOutlier::ExclusiveRuntime, default_threshold);
291291
ad.overrideFuncThreshold("my_func",0.77);
292292

293293
EXPECT_EQ(ad.getFunctionThreshold("my_func"), 0.77);

0 commit comments

Comments
 (0)