@@ -297,28 +297,18 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
297297
298298 HbosParam& param = *(HbosParam*)m_param;
299299
300- // Display global bin_edges
301- // std::vector<double> tmp_b_edges = param[func_id].bin_edges();
302- // std::cout << "global bin_edges in compute_outliers: Size: " << tmp_b_edges.size() << std::endl;
303- // for(int i=0; i<tmp_b_edges.size(); i++){
304- // std::cout << tmp_b_edges.at(i) << std::endl;
305- // }
306-
307- // if (param[func_id].count() < 2){
308- // VERBOSE(std::cout << "Less than 2 events in stats associated with that func, stats not complete" << std::endl);
309- // return 0;
310- // }
300+
311301 unsigned long n_outliers = 0 ;
312302
313303 // probability of runtime counts
314304 std::vector<double > prob_counts = std::vector<double >(param[func_id].counts ().size (), 0.0 );
315305 double tot_runtimes = std::accumulate (param[func_id].counts ().begin (), param[func_id].counts ().end (), 0.0 );
316- // std::cout << "Count and its Probability for func_id: " << std::to_string(func_id) << std::endl;
306+
317307 for (int i=0 ; i < param[func_id].counts ().size (); i++){
318308 int count = param[func_id].counts ().at (i);
319309 double p = count / tot_runtimes;
320310 prob_counts.at (i) += p;
321- // std::cout << "Count: " << count << ", Probability: " << prob_counts.at(i) << std::endl;
311+
322312 }
323313
324314 // Create HBOS score vector
@@ -340,14 +330,15 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
340330 }
341331 }
342332 std::cout << std::endl;
343- // std::cout << "out_score_i size: " << out_scores_i.size() << std::endl;
344- // std::cout << "min_score = " << min_score << std::endl;
345- // std::cout << "max_score = " << max_score << std::endl;
333+ std::cout << " out_score_i size: " << out_scores_i.size () << std::endl;
334+ std::cout << " min_score = " << min_score << std::endl;
335+ std::cout << " max_score = " << max_score << std::endl;
336+
346337 if (out_scores_i.size () == 0 ) return 0 ;
347338
348339 // compute threshold
349- // std::cout << "Global threshold before comparison with local threshold = " << param[func_id].get_threshold() << std::endl;
350- double l_threshold = min_score + (m_threshold * (max_score - min_score));// m_threshold * max_score;
340+ std::cout << " Global threshold before comparison with local threshold = " << param[func_id].get_threshold () << std::endl;
341+ double l_threshold = min_score + (m_threshold * (max_score - min_score));
351342 if (m_use_global_threshold) {
352343 if (l_threshold < param[func_id].get_threshold ()) {
353344 l_threshold = param[func_id].get_threshold ();
@@ -357,47 +348,30 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
357348 }
358349 }
359350
360- // std::cout << "local threshold = " << l_threshold << " updated global_threshold = " << param[func_id].get_threshold() << std::endl;
361- // For each datapoint get its corresponding bin index
362- // std::vector<int> bin_inds = ADOutlierHBOS::np_digitize(param[func_id].runtimes, param[func_id].bin_edges);
363- // if (bin_inds.size() < param[func_id].runtimes.size()) {
364- // VERBOSE(std::cout << "INCORRECT bin_inds.size() < param[func_id].runtimes.size()\t: " << bin_inds.size() << " < " << param[func_id].runtimes.size() << std::endl);
365- // return 0;
366- // }
367-
368351 // Compute HBOS based score for each datapoint
369352 const double bin_width = param[func_id].bin_edges ().at (1 ) - param[func_id].bin_edges ().at (0 );
370353 const int num_bins = param[func_id].counts ().size ();
371- // std::cout << "Bin width: " << bin_width << std::endl;
372- // std::cout << "Bin edges: " << std::endl;
373- // for (int i=0; i< param[func_id].bin_edges().size(); i++){
374- // std::cout << param[func_id].bin_edges().at(i) << std::endl;
375- // }
354+ std::cout << " Bin width: " << bin_width << std::endl;
376355
377356 int top_out = 0 ;
378357 for (auto itt : data) {
379358 if (itt->get_label () == 0 ) {
380359
381360 const double runtime_i = this ->getStatisticValue (*itt); // runtimes.push_back(this->getStatisticValue(*itt));
382361 double ad_score;
383- // auto bin_it = std::upper_bound(param[func_id].bin_edges().begin(), param[func_id].bin_edges().end(), runtime_i);
384- // if(bin_it == param[func_id].bin_edges().end()) {// Not in histogram
385- // ad_score = max_score;
386- // }
387- // else{ //Found in Histogram
388- // const int index = std::distance(param[func_id].bin_edges().begin(), bin_it);
389- // ad_score = out_scores_i.at(index);
390- // }
362+
391363 const int bin_ind = ADOutlierHBOS::np_digitize_get_bin_inds (runtime_i, param[func_id].bin_edges ());
392364 std::cout << " bin_ind: " << bin_ind << " for runtime_i: " << runtime_i << std::endl;
393- // If the sample does not belong to any bins
394- // bin_ind == 0 (fall outside since it is too small)
365+ /* *
366+ * If the sample does not belong to any bins
367+ * bin_ind == 0 (fall outside since it is too small)
368+ */
395369 if ( bin_ind == 0 ){
396- double first_bin_edge = param[func_id].bin_edges ().at (0 );
397- double dist = first_bin_edge - runtime_i;
370+ const double first_bin_edge = param[func_id].bin_edges ().at (0 );
371+ const double dist = first_bin_edge - runtime_i;
398372 if ( dist <= (bin_width * 0.05 ) ){
399373 std::cout << runtime_i << " is on left of histogram but NOT outlier" << std::endl;
400- if (param[func_id].counts ().at (0 ) == 0 ) { // Ignore zero counts
374+ if (param[func_id].counts ().at (0 ) == 0 ) { /* *< Ignore zero counts */
401375
402376 ad_score = l_threshold - 1 ;
403377 std::cout << " corrected ad_score: " << ad_score << std::endl;
@@ -407,44 +381,46 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
407381 }
408382 }
409383 else {
410- std::cout << runtime_i << " is small and outlier" << std::endl;
384+ std::cout << runtime_i << " is on left of histogram and an outlier" << std::endl;
411385 ad_score = max_score;
412386 }
413- // std::cout << "bin_index=0: Anomaly score of " << runtime_i << " = " << ad_score <<std::endl;
387+
414388 }
415- // If the sample does not belong to any bins
389+ /* *
390+ * If the sample does not belong to any bins
391+ */
416392 else if (bin_ind == num_bins + 1 ){
417- int last_idx = param[func_id].bin_edges ().size () - 1 ;
418- double last_bin_edge = param[func_id].bin_edges ().at (last_idx);
419- double dist = runtime_i - last_bin_edge;
393+ const int last_idx = param[func_id].bin_edges ().size () - 1 ;
394+ const double last_bin_edge = param[func_id].bin_edges ().at (last_idx);
395+ const double dist = runtime_i - last_bin_edge;
420396
421397 if (dist <= (bin_width * 0.05 )) {
422- if (param[func_id].counts ().at (bin_ind) == 0 ) { // - 1) == 0) { // Ignore zero counts
398+ if (param[func_id].counts ().at (bin_ind) == 0 ) { /* *< Ignore zero counts */
423399
424400 ad_score = l_threshold - 1 ;
425401 std::cout << " corrected ad_score: " << ad_score << std::endl;
426402 }
427403 else {
428- std::cout << runtime_i << " is farther but NOT outlier" << std::endl;
404+ std::cout << runtime_i << " is on right of histogram but NOT outlier" << std::endl;
429405 ad_score = out_scores_i.at (num_bins - 1 );
430406 }
431407 }
432408 else {
433- std::cout << runtime_i << " is farther and outlier" << std::endl;
409+ std::cout << runtime_i << " is on right of histogram and an outlier" << std::endl;
434410 ad_score = max_score;
435411 }
436412
437413 }
438414 else {
439415
440- if (param[func_id].counts ().at (bin_ind) == 0 ) { // Ignore zero counts
416+ if (param[func_id].counts ().at (bin_ind) == 0 ) { /* *< Ignore zero counts */
441417
442418 ad_score = l_threshold - 1 ;
443419 std::cout << " corrected ad_score: " << ad_score << std::endl;
444420 }
445421 else {
446- std::cout << runtime_i << " can be an outlier" << std::endl;
447- ad_score = out_scores_i.at ( bin_ind - 1 ); // bin_ind - 1);
422+ std::cout << runtime_i << " maybe be an outlier" << std::endl;
423+ ad_score = out_scores_i.at ( bin_ind - 1 );
448424 }
449425
450426 }
0 commit comments