Skip to content

Commit 53d4af0

Browse files
committed
std<=100
1 parent a7c0af4 commit 53d4af0

4 files changed

Lines changed: 35 additions & 73 deletions

File tree

src/ad/ADOutlier.cpp

Lines changed: 32 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -297,28 +297,18 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
297297

298298
HbosParam& param = *(HbosParam*)m_param;
299299

300-
//Display global bin_edges
301-
//std::vector<double> tmp_b_edges = param[func_id].bin_edges();
302-
//std::cout << "global bin_edges in compute_outliers: Size: " << tmp_b_edges.size() << std::endl;
303-
//for(int i=0; i<tmp_b_edges.size(); i++){
304-
// std::cout << tmp_b_edges.at(i) << std::endl;
305-
//}
306-
307-
//if (param[func_id].count() < 2){
308-
// VERBOSE(std::cout << "Less than 2 events in stats associated with that func, stats not complete" << std::endl);
309-
// return 0;
310-
//}
300+
311301
unsigned long n_outliers = 0;
312302

313303
//probability of runtime counts
314304
std::vector<double> prob_counts = std::vector<double>(param[func_id].counts().size(), 0.0);
315305
double tot_runtimes = std::accumulate(param[func_id].counts().begin(), param[func_id].counts().end(), 0.0);
316-
//std::cout << "Count and its Probability for func_id: " << std::to_string(func_id) << std::endl;
306+
317307
for(int i=0; i < param[func_id].counts().size(); i++){
318308
int count = param[func_id].counts().at(i);
319309
double p = count / tot_runtimes;
320310
prob_counts.at(i) += p;
321-
//std::cout << "Count: " << count << ", Probability: " << prob_counts.at(i) << std::endl;
311+
322312
}
323313

324314
//Create HBOS score vector
@@ -340,14 +330,15 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
340330
}
341331
}
342332
std::cout << std::endl;
343-
// std::cout << "out_score_i size: " << out_scores_i.size() << std::endl;
344-
// std::cout << "min_score = " << min_score << std::endl;
345-
// std::cout << "max_score = " << max_score << std::endl;
333+
std::cout << "out_score_i size: " << out_scores_i.size() << std::endl;
334+
std::cout << "min_score = " << min_score << std::endl;
335+
std::cout << "max_score = " << max_score << std::endl;
336+
346337
if (out_scores_i.size() == 0) return 0;
347338

348339
//compute threshold
349-
//std::cout << "Global threshold before comparison with local threshold = " << param[func_id].get_threshold() << std::endl;
350-
double l_threshold = min_score + (m_threshold * (max_score - min_score));//m_threshold * max_score;
340+
std::cout << "Global threshold before comparison with local threshold = " << param[func_id].get_threshold() << std::endl;
341+
double l_threshold = min_score + (m_threshold * (max_score - min_score));
351342
if(m_use_global_threshold) {
352343
if(l_threshold < param[func_id].get_threshold()) {
353344
l_threshold = param[func_id].get_threshold();
@@ -357,47 +348,30 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
357348
}
358349
}
359350

360-
//std::cout << "local threshold = " << l_threshold << " updated global_threshold = " << param[func_id].get_threshold() << std::endl;
361-
// For each datapoint get its corresponding bin index
362-
//std::vector<int> bin_inds = ADOutlierHBOS::np_digitize(param[func_id].runtimes, param[func_id].bin_edges);
363-
//if (bin_inds.size() < param[func_id].runtimes.size()) {
364-
// VERBOSE(std::cout << "INCORRECT bin_inds.size() < param[func_id].runtimes.size()\t: " << bin_inds.size() << " < " << param[func_id].runtimes.size() << std::endl);
365-
// return 0;
366-
//}
367-
368351
//Compute HBOS based score for each datapoint
369352
const double bin_width = param[func_id].bin_edges().at(1) - param[func_id].bin_edges().at(0);
370353
const int num_bins = param[func_id].counts().size();
371-
// std::cout << "Bin width: " << bin_width << std::endl;
372-
// std::cout << "Bin edges: " << std::endl;
373-
//for (int i=0; i< param[func_id].bin_edges().size(); i++){
374-
//std::cout << param[func_id].bin_edges().at(i) << std::endl;
375-
//}
354+
std::cout << "Bin width: " << bin_width << std::endl;
376355

377356
int top_out = 0;
378357
for (auto itt : data) {
379358
if (itt->get_label() == 0) {
380359

381360
const double runtime_i = this->getStatisticValue(*itt); //runtimes.push_back(this->getStatisticValue(*itt));
382361
double ad_score;
383-
// auto bin_it = std::upper_bound(param[func_id].bin_edges().begin(), param[func_id].bin_edges().end(), runtime_i);
384-
// if(bin_it == param[func_id].bin_edges().end()) {// Not in histogram
385-
// ad_score = max_score;
386-
// }
387-
// else{ //Found in Histogram
388-
// const int index = std::distance(param[func_id].bin_edges().begin(), bin_it);
389-
// ad_score = out_scores_i.at(index);
390-
// }
362+
391363
const int bin_ind = ADOutlierHBOS::np_digitize_get_bin_inds(runtime_i, param[func_id].bin_edges());
392364
std::cout << "bin_ind: " << bin_ind << " for runtime_i: " << runtime_i << std::endl;
393-
// If the sample does not belong to any bins
394-
// bin_ind == 0 (fall outside since it is too small)
365+
/**
366+
* If the sample does not belong to any bins
367+
* bin_ind == 0 (fall outside since it is too small)
368+
*/
395369
if( bin_ind == 0){
396-
double first_bin_edge = param[func_id].bin_edges().at(0);
397-
double dist = first_bin_edge - runtime_i;
370+
const double first_bin_edge = param[func_id].bin_edges().at(0);
371+
const double dist = first_bin_edge - runtime_i;
398372
if( dist <= (bin_width * 0.05) ){
399373
std::cout << runtime_i << " is on left of histogram but NOT outlier" << std::endl;
400-
if(param[func_id].counts().at(0) == 0) { // Ignore zero counts
374+
if(param[func_id].counts().at(0) == 0) { /**< Ignore zero counts */
401375

402376
ad_score = l_threshold - 1;
403377
std::cout << "corrected ad_score: " << ad_score << std::endl;
@@ -407,44 +381,46 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
407381
}
408382
}
409383
else{
410-
std::cout << runtime_i << " is small and outlier" << std::endl;
384+
std::cout << runtime_i << " is on left of histogram and an outlier" << std::endl;
411385
ad_score = max_score;
412386
}
413-
//std::cout << "bin_index=0: Anomaly score of " << runtime_i << " = " << ad_score <<std::endl;
387+
414388
}
415-
// If the sample does not belong to any bins
389+
/**
390+
* If the sample does not belong to any bins
391+
*/
416392
else if(bin_ind == num_bins + 1){
417-
int last_idx = param[func_id].bin_edges().size() - 1;
418-
double last_bin_edge = param[func_id].bin_edges().at(last_idx);
419-
double dist = runtime_i - last_bin_edge;
393+
const int last_idx = param[func_id].bin_edges().size() - 1;
394+
const double last_bin_edge = param[func_id].bin_edges().at(last_idx);
395+
const double dist = runtime_i - last_bin_edge;
420396

421397
if (dist <= (bin_width * 0.05)) {
422-
if(param[func_id].counts().at(bin_ind) == 0) { //- 1) == 0) { // Ignore zero counts
398+
if(param[func_id].counts().at(bin_ind) == 0) { /**< Ignore zero counts */
423399

424400
ad_score = l_threshold - 1;
425401
std::cout << "corrected ad_score: " << ad_score << std::endl;
426402
}
427403
else {
428-
std::cout << runtime_i << " is farther but NOT outlier" << std::endl;
404+
std::cout << runtime_i << " is on right of histogram but NOT outlier" << std::endl;
429405
ad_score = out_scores_i.at(num_bins - 1);
430406
}
431407
}
432408
else{
433-
std::cout << runtime_i << " is farther and outlier" << std::endl;
409+
std::cout << runtime_i << " is on right of histogram and an outlier" << std::endl;
434410
ad_score = max_score;
435411
}
436412

437413
}
438414
else {
439415

440-
if(param[func_id].counts().at(bin_ind) == 0) { // Ignore zero counts
416+
if(param[func_id].counts().at(bin_ind) == 0) { /**< Ignore zero counts */
441417

442418
ad_score = l_threshold - 1;
443419
std::cout << "corrected ad_score: " << ad_score << std::endl;
444420
}
445421
else {
446-
std::cout << runtime_i << " can be an outlier" << std::endl;
447-
ad_score = out_scores_i.at( bin_ind - 1); //bin_ind - 1);
422+
std::cout << runtime_i << " maybe be an outlier" << std::endl;
423+
ad_score = out_scores_i.at( bin_ind - 1);
448424
}
449425

450426
}

src/param/hbos_param.cpp

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -200,19 +200,7 @@ using namespace chimbuko;
200200
combined.add2counts(id, inc);
201201
}
202202
}
203-
// for (int i = 0; i < l.bin_edges().size() - 1; i++) {
204-
//
205-
// auto index_it = std::lower_bound(combined.bin_edges().begin(), combined.bin_edges().end(), l.bin_edges().at(i));
206-
// if (index_it != combined.bin_edges().end()){
207-
// const int id = std::distance(combined.bin_edges().begin(), index_it); // - 1;
208-
// const int inc = l.counts().at(i);
209-
// std::cout << "In l " << "id: " << id << ", inc: " << inc << std::endl;
210-
// //if(id < 0) {id = 0;}
211-
// if (id >= 0 && id < combined.counts().size())
212-
// combined.add2counts(id, inc);
213-
// }
214-
//
215-
// }
203+
216204
return combined;
217205
}
218206
else{ // bin_width is > 0
@@ -353,7 +341,7 @@ using namespace chimbuko;
353341
std::cout << "Final Variance in _scott_binWidth: " << var << std::endl;
354342
std = sqrt(var);
355343
std::cout << "STD in _scott_binWidth: " << std << std::endl;
356-
if (std <= 10.0) {return 0;}
344+
if (std <= 100.0) {return 0;}
357345

358346
return ((3.5 * std ) / pow(size, 1/3));
359347

test/unit_tests/ad/HBOSOutlierTestBPFile.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,7 @@ TEST(HBOSADOutlierBPFileWithoutPServer, Works) {
183183
unsigned long n_outliers = 0, n_tot_events = 0; //n_executions = 0,
184184
std::set<unsigned long> n_functions;
185185

186-
ASSERT_EQ(step, -1);
187-
186+
188187
unsigned long first_event_ts, last_event_ts;
189188

190189
int io_steps = 0;

test/unit_tests/ad/SSTDOutlierTestBPFile.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ TEST(SSTDADOutlierBPFileWithoutPServer, Works) {
184184
unsigned long n_outliers = 0, n_tot_events = 0; //n_executions = 0,
185185
std::set<unsigned long> n_functions;
186186

187-
ASSERT_EQ(step, -1);
188187

189188
unsigned long first_event_ts, last_event_ts;
190189

0 commit comments

Comments
 (0)