@@ -247,6 +247,7 @@ Anomalies ADOutlierHBOS::run(int step) {
247247 HbosParam& g = *(HbosParam*)m_param;
248248 for (auto it : *m_execDataMap) { // loop over functions (key is function index)
249249 unsigned long func_id = it.first ;
250+ Histogram &hist = param[func_id];
250251 std::vector<double > runtimes;
251252 for (auto itt : it.second ) { // loop over events for that function
252253 if (itt->get_label () == 0 ) {
@@ -267,17 +268,17 @@ Anomalies ADOutlierHBOS::run(int step) {
267268 if (runtimes.size () > 0 ) {
268269 if (!g.find (func_id)) { // If func_id does not exist
269270
270- const int r = param[func_id] .create_histogram (runtimes);
271+ const int r = hist .create_histogram (runtimes);
271272 if (r < 0 ) {
272273 recoverable_error (std::string (" AD: Func_ID does not exist" ));
273274 continue ;
274275 }
275276 }
276277 else { // merge with exisiting func_id, not overwrite
277278
278- const int r = param[func_id] .merge_histograms (g[func_id], runtimes);
279+ const int r = hist .merge_histograms (g[func_id], runtimes);
279280 if (r < 0 ) {
280- recoverable_error ( std::string ( " AD: Merging error received " )) ;
281+ verboseStream << " AD: Merging reset " << std::endl ;
281282 continue ;
282283 }
283284 }
@@ -315,16 +316,16 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
315316 verboseStream << " Finding outliers in events for func " << func_id << std::endl;
316317
317318 HbosParam& param = *(HbosParam*)m_param;
318-
319+ Histogram &hist = param[func_id];
319320
320321 unsigned long n_outliers = 0 ;
321322
322323 // probability of runtime counts
323- std::vector<double > prob_counts = std::vector<double >(param[func_id] .counts ().size (), 0.0 );
324- double tot_runtimes = std::accumulate (param[func_id] .counts ().begin (), param[func_id] .counts ().end (), 0.0 );
324+ std::vector<double > prob_counts = std::vector<double >(hist .counts ().size (), 0.0 );
325+ double tot_runtimes = std::accumulate (hist .counts ().begin (), hist .counts ().end (), 0.0 );
325326
326- for (int i=0 ; i < param[func_id] .counts ().size (); i++){
327- int count = param[func_id] .counts ().at (i);
327+ for (int i=0 ; i < hist .counts ().size (); i++){
328+ int count = hist .counts ().at (i);
328329 double p = count / tot_runtimes;
329330 prob_counts.at (i) += p;
330331
@@ -338,7 +339,7 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
338339 for (int i=0 ; i < prob_counts.size (); i++){
339340 double l = -1 * log2 (prob_counts.at (i) + m_alpha);
340341 out_scores_i.push_back (l);
341- verboseStream << " Count: " << param[func_id] .counts ().at (i) << " , Probability: " << prob_counts.at (i) << " , score: " << l << std::endl;
342+ verboseStream << " Count: " << hist .counts ().at (i) << " , Probability: " << prob_counts.at (i) << " , score: " << l << std::endl;
342343 if (prob_counts.at (i) > 0 ) {
343344 if (l < min_score){
344345 min_score = l;
@@ -356,20 +357,19 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
356357 if (out_scores_i.size () <= 0 ) {return 0 ;}
357358
358359 // compute threshold
359- verboseStream << " Global threshold before comparison with local threshold = " << param[func_id] .get_threshold () << std::endl;
360+ verboseStream << " Global threshold before comparison with local threshold = " << hist .get_threshold () << std::endl;
360361 double l_threshold = min_score + (m_threshold * (max_score - min_score));
361362 if (m_use_global_threshold) {
362- if (l_threshold < param[func_id] .get_threshold ()) {
363- l_threshold = param[func_id] .get_threshold ();
363+ if (l_threshold < hist .get_threshold ()) {
364+ l_threshold = hist .get_threshold ();
364365 } else {
365- param[func_id].set_glob_threshold (l_threshold); // .get_histogram().glob_threshold = l_threshold;
366- // std::pair<size_t, size_t> msgsz_thres_update = sync_param(¶m);
366+ hist.set_glob_threshold (l_threshold);
367367 }
368368 }
369369
370370 // Compute HBOS based score for each datapoint
371- const double bin_width = param[func_id] .bin_edges ().at (1 ) - param[func_id] .bin_edges ().at (0 );
372- const int num_bins = param[func_id] .counts ().size ();
371+ const double bin_width = hist .bin_edges ().at (1 ) - hist .bin_edges ().at (0 );
372+ const int num_bins = hist .counts ().size ();
373373 verboseStream << " Bin width: " << bin_width << std::endl;
374374
375375 int top_out = 0 ;
@@ -379,20 +379,20 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
379379 const double runtime_i = this ->getStatisticValue (*itt); // runtimes.push_back(this->getStatisticValue(*itt));
380380 double ad_score;
381381
382- const int bin_ind = ADOutlierHBOS::np_digitize_get_bin_inds (runtime_i, param[func_id] .bin_edges ());
383- verboseStream << " bin_ind: " << bin_ind << " for runtime_i: " << runtime_i << " , where bin_edges Size:" << param[func_id] .bin_edges ().size () << " & num_bins: " << num_bins << std::endl;
382+ const int bin_ind = ADOutlierHBOS::np_digitize_get_bin_inds (runtime_i, hist .bin_edges ());
383+ verboseStream << " bin_ind: " << bin_ind << " for runtime_i: " << runtime_i << " , where bin_edges Size:" << hist .bin_edges ().size () << " & num_bins: " << num_bins << std::endl;
384384 /* *
385- * If the sample does not belong to any bins
385+ * Sample (datapoint) can be in either first bin or does not belong to any bins
386386 * bin_ind == 0 (fall outside since it is too small)
387387 */
388388 if ( bin_ind == 0 ){
389- const double first_bin_edge = param[func_id] .bin_edges ().at (0 );
389+ const double first_bin_edge = hist .bin_edges ().at (0 );
390390 const double dist = first_bin_edge - runtime_i;
391391 verboseStream << " First_bin_edge: " << first_bin_edge << std::endl;
392392 if ( dist <= (bin_width * 0.05 ) ){
393- verboseStream << runtime_i << " is on left of histogram but NOT outlier" << std::endl;
394- if (param[func_id] .counts ().size () < 1 ) {return 0 ;}
395- if (param[func_id] .counts ().at (0 ) == 0 ) { /* *< Ignore zero counts */
393+ verboseStream << runtime_i << " is in first bin of Histogram but NOT outlier" << std::endl;
394+ if (hist .counts ().size () < 1 ) {return 0 ;}
395+ if (hist .counts ().at (0 ) == 0 ) { /* *< Ignore zero counts */
396396
397397 ad_score = l_threshold - 1 ;
398398 verboseStream << " corrected ad_score: " << ad_score << std::endl;
@@ -403,7 +403,7 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
403403 }
404404 }
405405 else {
406- verboseStream << runtime_i << " is on left of histogram and an outlier" << std::endl;
406+ verboseStream << runtime_i << " is NOT in first bin of Histogram and it IS an outlier" << std::endl;
407407 ad_score = max_score;
408408 verboseStream << " ad_score(max_score): " << ad_score << std::endl;
409409 }
@@ -413,12 +413,12 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
413413 * If the sample does not belong to any bins
414414 */
415415 else if (bin_ind == num_bins + 1 ){
416- const int last_idx = param[func_id] .bin_edges ().size () - 1 ;
417- const double last_bin_edge = param[func_id] .bin_edges ().at (last_idx);
416+ const int last_idx = hist .bin_edges ().size () - 1 ;
417+ const double last_bin_edge = hist .bin_edges ().at (last_idx);
418418 const double dist = runtime_i - last_bin_edge;
419419 verboseStream << " last_indx: " << last_idx << " , last_bin_edge: " << last_bin_edge << std::endl;
420420 if (dist <= (bin_width * 0.05 )) {
421- if (param[func_id] .counts ().at (num_bins - 1 ) == 0 ) { // bin_ind) == 0) { /**< Ignore zero counts */
421+ if (hist .counts ().at (num_bins - 1 ) == 0 ) { /* *< Ignore zero counts */
422422
423423 ad_score = l_threshold - 1 ;
424424 verboseStream << " corrected ad_score: " << ad_score << std::endl;
@@ -438,7 +438,7 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
438438 }
439439 else {
440440
441- if (param[func_id] .counts ().at (bin_ind) == 0 ) { /* *< Ignore zero counts */
441+ if (hist .counts ().at (bin_ind) == 0 ) { /* *< Ignore zero counts */
442442
443443 ad_score = l_threshold - 1 ;
444444 verboseStream << " corrected ad_score: " << ad_score << std::endl;
@@ -533,6 +533,7 @@ Anomalies ADOutlierCOPOD::run(int step) {
533533 CopodParam& g = *(CopodParam*)m_param;
534534 for (auto it : *m_execDataMap) { // loop over functions (key is function index)
535535 unsigned long func_id = it.first ;
536+ Histogram &hist = param[func_id];
536537 std::vector<double > runtimes;
537538 for (auto itt : it.second ) { // loop over events for that function
538539 if (itt->get_label () == 0 ) {
@@ -552,17 +553,17 @@ Anomalies ADOutlierCOPOD::run(int step) {
552553 }
553554 if (runtimes.size () > 0 ) {
554555 if (!g.find (func_id)) { // If func_id does not exist
555- const int r = param[func_id] .create_histogram (runtimes);
556+ const int r = hist .create_histogram (runtimes);
556557 if (r < 0 ) {
557558 recoverable_error (std::string (" AD: Func_ID does not exist " ));
558559 continue ;
559560 }
560561 }
561562 else { // merge with exisiting func_id, not overwrite
562563
563- const int r = param[func_id] .merge_histograms (g[func_id], runtimes);
564+ const int r = hist .merge_histograms (g[func_id], runtimes);
564565 if (r < 0 ) {
565- recoverable_error ( std::string ( " AD: Merging error received " )) ;
566+ verboseStream << " AD: Merging reset " << std::endl ;
566567 continue ;
567568 }
568569 }
@@ -619,28 +620,28 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
619620 verboseStream << " data Size: " << data.size () << std::endl;
620621
621622 CopodParam& param = *(CopodParam*)m_param;
622-
623+ Histogram &hist = param[func_id];
623624
624625 unsigned long n_outliers = 0 ;
625626
626627 // probability of runtime counts
627628 // std::vector<double> prob_counts = std::vector<double>(param[func_id].counts().size(), 0.0);
628- double tot_runtimes = std::accumulate (param[func_id] .counts ().begin (), param[func_id] .counts ().end (), 0.0 );
629+ double tot_runtimes = std::accumulate (hist .counts ().begin (), hist .counts ().end (), 0.0 );
629630
630631 if (tot_runtimes <= 0 ) {
631632 return n_outliers;
632633 }
633634 std::vector<double > recon_p_runtimes = std::vector<double >(tot_runtimes, 0.0 );
634635 std::vector<double > recon_n_runtimes = std::vector<double >(tot_runtimes, 0.0 );
635636 int recon_idx = 0 ;
636- verboseStream << " Unwrapping Merged Histogram. Size: " << param[func_id] .counts ().size () << std::endl;
637- for (int i=0 ; i < param[func_id] .counts ().size (); i++){
638- int count = param[func_id] .counts ().at (i);
639- verboseStream << " Count: " << count << " , Value: " << param[func_id] .bin_edges ().at (i) << std::endl;
637+ verboseStream << " Unwrapping Merged Histogram. Size: " << hist .counts ().size () << std::endl;
638+ for (int i=0 ; i < hist .counts ().size (); i++){
639+ int count = hist .counts ().at (i);
640+ verboseStream << " Count: " << count << " , Value: " << hist .bin_edges ().at (i) << std::endl;
640641 for (int j=0 ; j<count; j++){
641642
642- recon_p_runtimes.at (recon_idx) = param[func_id] .bin_edges ().at (i);
643- recon_n_runtimes.at (recon_idx) = -1 * param[func_id] .bin_edges ().at (i);
643+ recon_p_runtimes.at (recon_idx) = hist .bin_edges ().at (i);
644+ recon_n_runtimes.at (recon_idx) = -1 * hist .bin_edges ().at (i);
644645 verboseStream << " recon_idx: " << recon_idx << std::endl;
645646 verboseStream << " recon_p_runtimes.at(recon_idx): " << recon_p_runtimes.at (recon_idx) << " , recon_n_runtimes.at(recon_idx): " << recon_n_runtimes.at (recon_idx) << std::endl;
646647 recon_idx++;
@@ -678,9 +679,6 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
678679 }
679680
680681
681- // for(int i=0; i<mean_pn_ecdf.size(); i++)
682- // verboseStream << "mean_pn_ecdf at " << i << ": " << mean_pn_ecdf.at(i) << std::endl;
683-
684682 // Create COPOD score vector
685683 std::vector<double > out_scores_i = std::vector<double >(final_comp.size (), 0.0 );
686684 verboseStream << " m_alpha: " << m_alpha << std::endl;
@@ -710,14 +708,14 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
710708 if (out_scores_i.size () <= 0 ) {return 0 ;}
711709
712710 // compute threshold
713- verboseStream << " Global threshold before comparison with local threshold = " << param[func_id] .get_threshold () << std::endl;
711+ verboseStream << " Global threshold before comparison with local threshold = " << hist .get_threshold () << std::endl;
714712 double l_threshold = (max_score < 0 ) ? (-1 * m_threshold * (max_score - min_score)) : min_score + (m_threshold * (max_score - min_score));
715713 verboseStream << " l_threshold computed: " << l_threshold << std::endl;
716714 if (m_use_global_threshold) {
717- if (l_threshold < param[func_id] .get_threshold () && param[func_id] .get_threshold () > (-1 * log2 (1.00001 ))) {
718- l_threshold = param[func_id] .get_threshold ();
715+ if (l_threshold < hist .get_threshold () && hist .get_threshold () > (-1 * log2 (1.00001 ))) {
716+ l_threshold = hist .get_threshold ();
719717 } else {
720- param[func_id] .set_glob_threshold (l_threshold); // .get_histogram().glob_threshold = l_threshold;
718+ hist .set_glob_threshold (l_threshold); // .get_histogram().glob_threshold = l_threshold;
721719 // std::pair<size_t, size_t> msgsz_thres_update = sync_param(¶m);
722720 }
723721 }
@@ -743,7 +741,7 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
743741 running_idx++;
744742 }
745743 else {
746- recoverable_error ( " AD: COPOD: runtime Index" ) ;
744+ verboseStream << " AD: COPOD: runtime Index" << std::endl ;
747745 continue ;
748746 }
749747
0 commit comments