@@ -266,16 +266,26 @@ Anomalies ADOutlierHBOS::run(int step) {
266266 }
267267 if (runtimes.size () > 0 ) {
268268 if (!g.find (func_id)) { // If func_id does not exist
269- const int r = param[func_id].create_histogram (runtimes);
270- if (r < 0 ) {return outliers;}
269+
270+ const int r = param[func_id].create_histogram (runtimes);
271+ if (r < 0 ) {
272+ recoverable_error (std::string (" AD: Func_ID does not exist" ));
273+ continue ;
274+ }
271275 }
272276 else { // merge with exisiting func_id, not overwrite
273277
274278 const int r = param[func_id].merge_histograms (g[func_id], runtimes);
275- if (r < 0 ) {return outliers;}
279+ if (r < 0 ) {
280+ recoverable_error (std::string (" AD: Merging error received " ));
281+ continue ;
282+ }
276283 }
277284 }
278- else { return outliers;}
285+ else {
286+ // recoverable_error(std::string("AD: Zero function runtimes "));
287+ continue ;
288+ }
279289 }
280290
281291 // Update temp runstats to include information collected previously (synchronizes with the parameter server if connected)
@@ -543,15 +553,42 @@ Anomalies ADOutlierCOPOD::run(int step) {
543553 if (runtimes.size () > 0 ) {
544554 if (!g.find (func_id)) { // If func_id does not exist
545555 const int r = param[func_id].create_histogram (runtimes);
546- if (r < 0 ) {return outliers;}
556+ if (r < 0 ) {
557+ recoverable_error (std::string (" AD: Func_ID does not exist " ));
558+ continue ;
559+ }
547560 }
548561 else { // merge with exisiting func_id, not overwrite
549562
550563 const int r = param[func_id].merge_histograms (g[func_id], runtimes);
551- if (r < 0 ) {return outliers;}
564+ if (r < 0 ) {
565+ recoverable_error (std::string (" AD: Merging error received " ));
566+ continue ;
567+ }
552568 }
553569 }
554- else { return outliers;}
570+ else {
571+ // /recoverable_error(std::string("AD: Zero function runtimes "));
572+ continue ;
573+ }
574+ verboseStream << " Size of runtimes: " << runtimes.size () << " , func_id: " << func_id << std::endl;
575+
576+ // calculate skewness of runtimes for func_id
577+ const double mu = std::accumulate (runtimes.begin (), runtimes.end (), 0.0 ) / runtimes.size ();
578+
579+ std::vector<double > diff = std::vector<double >(runtimes.size ());
580+ std::transform (runtimes.begin (), runtimes.end (), diff.begin (), [mu](double x) {return x - mu;});
581+ const double sq_sum = std::inner_product (diff.begin (), diff.end (), diff.begin (), 0.0 );
582+ const double stdev = std::sqrt (sq_sum / runtimes.size ());
583+
584+ double summ = 0 ;
585+ for (int i=0 ; i<runtimes.size (); i++) {
586+ summ += std::pow ((runtimes.at (i) - mu), 3 );
587+ }
588+
589+ const double abs_skewness = summ / ((runtimes.size () - 1 ) * std::pow (stdev,3 ));
590+ m_skewness[func_id] = (abs_skewness < 0 ) ? -1 : (abs_skewness > 0 ) ? 1 : 0 ;
591+
555592 }
556593
557594 // Update temp runstats to include information collected previously (synchronizes with the parameter server if connected)
@@ -579,6 +616,7 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
579616 std::vector<CallListIterator_t>& data){
580617
581618 verboseStream << " Finding outliers in events for func " << func_id << std::endl;
619+ verboseStream << " data Size: " << data.size () << std::endl;
582620
583621 CopodParam& param = *(CopodParam*)m_param;
584622
@@ -589,47 +627,72 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
589627 // std::vector<double> prob_counts = std::vector<double>(param[func_id].counts().size(), 0.0);
590628 double tot_runtimes = std::accumulate (param[func_id].counts ().begin (), param[func_id].counts ().end (), 0.0 );
591629
630+ if (tot_runtimes <= 0 ) {
631+ return n_outliers;
632+ }
592633 std::vector<double > recon_p_runtimes = std::vector<double >(tot_runtimes, 0.0 );
593634 std::vector<double > recon_n_runtimes = std::vector<double >(tot_runtimes, 0.0 );
594635 int recon_idx = 0 ;
595- // verboseStream << "Unwrapping Merged Histogram. Size: " << param[func_id].counts().size() << std::endl;
636+ verboseStream << " Unwrapping Merged Histogram. Size: " << param[func_id].counts ().size () << std::endl;
596637 for (int i=0 ; i < param[func_id].counts ().size (); i++){
597638 int count = param[func_id].counts ().at (i);
598- // verboseStream << "Count: " << count << ", Value: " << param[func_id].bin_edges().at(i) << std::endl;
639+ verboseStream << " Count: " << count << " , Value: " << param[func_id].bin_edges ().at (i) << std::endl;
599640 for (int j=0 ; j<count; j++){
600641
601642 recon_p_runtimes.at (recon_idx) = param[func_id].bin_edges ().at (i);
602643 recon_n_runtimes.at (recon_idx) = -1 * param[func_id].bin_edges ().at (i);
603- // verboseStream << "recon_p_runtimes.at( recon_idx) : " << recon_p_runtimes.at( recon_idx) << ", recon_n_runtimes.at(recon_idx): " << recon_n_runtimes.at(recon_idx) << std::endl;
604- // verboseStream << "recon_idx: " << recon_idx << std::endl;
644+ verboseStream << " recon_idx: " << recon_idx << std::endl;
645+ verboseStream << " recon_p_runtimes.at( recon_idx) : " << recon_p_runtimes. at ( recon_idx) << " , recon_n_runtimes.at(recon_idx): " << recon_n_runtimes. at (recon_idx) << std::endl;
605646 recon_idx++;
606647 }
607648 }
608649
650+
609651 std::vector<double > func_p_ecdf = empiricalCDF (recon_p_runtimes, true );
610652 std::vector<double > func_n_ecdf = empiricalCDF (recon_n_runtimes, true );
653+
654+ verboseStream << " Size of empiricalCDF(recon_p_runtimes): " << func_p_ecdf.size () << std::endl;
655+ verboseStream << " Size of empiricalCDF(recon_n_runtimes): " << func_n_ecdf.size () << std::endl;
611656
612657 std::vector<double > mean_pn_ecdf = std::vector<double >(func_p_ecdf.size (), 0.0 );
658+ verboseStream << " Size of mean_pn_ecdf: " << mean_pn_ecdf.size () << " , func_id: " << func_id << std::endl;
659+
613660 for (int i=0 ; i < mean_pn_ecdf.size (); i++){
614- mean_pn_ecdf.at (i) = (func_p_ecdf.at (i) + func_n_ecdf.at (i)) / 2 ;
661+ mean_pn_ecdf.at (i) = (func_p_ecdf.at (i) + func_n_ecdf.at (i)) / 2.0 ;
662+ verboseStream << " mean_pn_ecdf.at(i): " << mean_pn_ecdf.at (i) << " , func_p_ecdf.at(i): " << func_p_ecdf.at (i) << " , func_n_ecdf.at(i): " << func_n_ecdf.at (i) << std::endl;
615663 }
616664
665+ // use skewness
666+ std::vector<double > skewness_arr = std::vector<double >(func_p_ecdf.size (), 0.0 );
667+ const int p_sign = (m_skewness[func_id] - 1 ) < 0 ? -1 : (m_skewness[func_id] - 1 ) > 0 ? 1 : 0 ;
668+ const int n_sign = (m_skewness[func_id] + 1 ) < 0 ? -1 : (m_skewness[func_id] + 1 ) > 0 ? 1 : 0 ;
669+
670+ for (int i = 0 ; i< func_p_ecdf.size (); i++) {
671+ skewness_arr.at (i) = (func_p_ecdf.at (i) * -1 * p_sign) + (func_n_ecdf.at (i) * n_sign);
672+ verboseStream << " skewness_arr.at(" << i << " ): " << skewness_arr.at (i) << std::endl;
673+ }
617674
618- // for(int i=0; i < param[func_id].counts().size(); i++){
619- // int count = param[func_id].counts().at(i);
620- // double p = count / tot_runtimes;
621- // prob_counts.at(i) += p;
622- // }
675+ std::vector<double > final_comp = std::vector<double >(skewness_arr.size (), 0.0 );
676+ for (int i = 0 ; i < skewness_arr.size (); i++) {
677+ final_comp.at (i) = std::max (skewness_arr.at (i), mean_pn_ecdf.at (i));
678+ }
679+
680+
681+ // for(int i=0; i<mean_pn_ecdf.size(); i++)
682+ // verboseStream << "mean_pn_ecdf at " << i << ": " << mean_pn_ecdf.at(i) << std::endl;
623683
624684 // Create COPOD score vector
625- std::vector<double > out_scores_i;
685+ std::vector<double > out_scores_i = std::vector<double >(final_comp.size (), 0.0 );
686+ verboseStream << " m_alpha: " << m_alpha << std::endl;
687+
626688 double min_score = -1 * log2 (0.0 + m_alpha);
627- double max_score = -1 * log2 (1.0 + m_alpha);
689+ double max_score = log2 (1.0 + m_alpha) - min_score;
690+ verboseStream << " Initializaing min_score: " << min_score << " , max_score: " << max_score <<std::endl;
628691 verboseStream << " out_scores_i: " << std::endl;
629- for (int i=0 ; i < mean_pn_ecdf .size (); i++){
630- double l = -1 * log2 (mean_pn_ecdf .at (i) + m_alpha);
631- out_scores_i.push_back (l) ;
632- // verboseStream << "Count: " << param[func_id].counts().at(i) << ", Probability : " << prob_counts .at(i) << ", score: "<< l << std::endl;
692+ for (int i=0 ; i < final_comp .size (); i++){
693+ double l = -1 * log2 (final_comp .at (i) + m_alpha);
694+ out_scores_i.at (i) = l ;
695+ verboseStream << " Final_comp at " << i << " : " << final_comp .at (i) << " , score: " << l << std::endl;
633696 // if(prob_counts.at(i) > 0) {
634697 if (l < min_score){
635698 min_score = l;
@@ -648,9 +711,10 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
648711
649712 // compute threshold
650713 verboseStream << " Global threshold before comparison with local threshold = " << param[func_id].get_threshold () << std::endl;
651- double l_threshold = min_score + (m_threshold * (max_score - min_score));
714+ double l_threshold = (max_score < 0 ) ? (-1 * m_threshold * (max_score - min_score)) : min_score + (m_threshold * (max_score - min_score));
715+ verboseStream << " l_threshold computed: " << l_threshold << std::endl;
652716 if (m_use_global_threshold) {
653- if (l_threshold < param[func_id].get_threshold ()) {
717+ if (l_threshold < param[func_id].get_threshold () && param[func_id]. get_threshold () > (- 1 * log2 ( 1.00001 )) ) {
654718 l_threshold = param[func_id].get_threshold ();
655719 } else {
656720 param[func_id].set_glob_threshold (l_threshold); // .get_histogram().glob_threshold = l_threshold;
@@ -659,9 +723,6 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
659723 }
660724
661725 // Compute COPOD based score for each datapoint
662- // const double bin_width = param[func_id].bin_edges().at(1) - param[func_id].bin_edges().at(0);
663- // const int num_bins = param[func_id].counts().size();
664- // verboseStream << "Bin width: " << bin_width << std::endl;
665726
666727 int top_out = 0 ;
667728 int running_idx = 0 ;
@@ -670,12 +731,21 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
670731
671732 const double runtime_i = this ->getStatisticValue (*itt); // runtimes.push_back(this->getStatisticValue(*itt));
672733 double ad_score;
673-
674- // verboseStream << "mean_pn_ecdf.at(running_idx++): " << mean_pn_ecdf.at(running_idx) << std::endl;
675- if (mean_pn_ecdf.at (running_idx++) < 0.99 )
676- ad_score = l_threshold + 1 ;
677- else
678- ad_score = l_threshold - 1 ;
734+
735+ if (running_idx < final_comp.size ()) {
736+ verboseStream << " final_comp.at(" << running_idx << " ): " << final_comp.at (running_idx) << " , func_id: " << func_id << " , runtime: " << runtime_i << std::endl;
737+
738+ if (out_scores_i.at (running_idx) > l_threshold) // (final_comp.at(running_idx) > 0) // < 0.9)
739+ ad_score = l_threshold + 1 ;
740+ else
741+ ad_score = l_threshold - 1 ;
742+
743+ running_idx++;
744+ }
745+ else {
746+ recoverable_error (" AD: COPOD: runtime Index" );
747+ continue ;
748+ }
679749
680750 itt->set_outlier_score (ad_score);
681751 verboseStream << " ad_score: " << ad_score << " , l_threshold: " << l_threshold << std::endl;
0 commit comments