@@ -571,6 +571,24 @@ Anomalies ADOutlierCOPOD::run(int step) {
571571 recoverable_error (std::string (" AD: Zero function runtimes " ));
572572 continue ;
573573 }
574+ verboseStream << " Size of runtimes: " << runtimes.size () << " , func_id: " << func_id << std::endl;
575+
576+ // calculate skewness of runtimes for func_id
577+ const double mu = std::accumulate (runtimes.begin (), runtimes.end (), 0.0 ) / runtimes.size ();
578+
579+ std::vector<double > diff = std::vector<double >(runtimes.size ());
580+ std::transform (runtimes.begin (), runtimes.end (), diff.begin (), [mu](double x) {return x - mu;});
581+ const double sq_sum = std::inner_product (diff.begin (), diff.end (), diff.begin (), 0.0 );
582+ const double stdev = std::sqrt (sq_sum / runtimes.size ());
583+
584+ double summ = 0 ;
585+ for (int i=0 ; i<runtimes.size (); i++) {
586+ summ += std::pow ((runtimes.at (i) - mu), 3 );
587+ }
588+
589+ const double abs_skewness = summ / ((runtimes.size () - 1 ) * std::pow (stdev,3 ));
590+ m_skewness[func_id] = (abs_skewness < 0 ) ? -1 : (abs_skewness > 0 ) ? 1 : 0 ;
591+
574592 }
575593
576594 // Update temp runstats to include information collected previously (synchronizes with the parameter server if connected)
@@ -637,34 +655,44 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
637655 verboseStream << " Size of empiricalCDF(recon_n_runtimes): " << func_n_ecdf.size () << std::endl;
638656
639657 std::vector<double > mean_pn_ecdf = std::vector<double >(func_p_ecdf.size (), 0.0 );
640- verboseStream << " Size of mean_pn_ecdf: " << mean_pn_ecdf.size () << std::endl;
658+ verboseStream << " Size of mean_pn_ecdf: " << mean_pn_ecdf.size () << " , func_id: " << func_id << std::endl;
641659
642660 for (int i=0 ; i < mean_pn_ecdf.size (); i++){
643661 mean_pn_ecdf.at (i) = (func_p_ecdf.at (i) + func_n_ecdf.at (i)) / 2.0 ;
644662 verboseStream << " mean_pn_ecdf.at(i): " << mean_pn_ecdf.at (i) << " , func_p_ecdf.at(i): " << func_p_ecdf.at (i) << " , func_n_ecdf.at(i): " << func_n_ecdf.at (i) << std::endl;
645663 }
646664
665+ // use skewness
666+ std::vector<double > skewness_arr = std::vector<double >(func_p_ecdf.size (), 0.0 );
667+ const int p_sign = (m_skewness[func_id] - 1 ) < 0 ? -1 : (m_skewness[func_id] - 1 ) > 0 ? 1 : 0 ;
668+ const int n_sign = (m_skewness[func_id] + 1 ) < 0 ? -1 : (m_skewness[func_id] + 1 ) > 0 ? 1 : 0 ;
669+
670+ for (int i = 0 ; i< func_p_ecdf.size (); i++) {
671+ skewness_arr.at (i) = (func_p_ecdf.at (i) * -1 * p_sign) + (func_n_ecdf.at (i) * n_sign);
672+ verboseStream << " skewness_arr.at(" << i << " ): " << skewness_arr.at (i) << std::endl;
673+ }
674+
675+ std::vector<double > final_comp = std::vector<double >(skewness_arr.size (), 0.0 );
676+ for (int i = 0 ; i < skewness_arr.size (); i++) {
677+ final_comp.at (i) = std::max (skewness_arr.at (i), mean_pn_ecdf.at (i));
678+ }
647679
648- // for(int i=0; i < param[func_id].counts().size(); i++){
649- // int count = param[func_id].counts().at(i);
650- // double p = count / tot_runtimes;
651- // prob_counts.at(i) += p;
652- // }
653680
654- for (int i=0 ; i<mean_pn_ecdf.size (); i++)
655- verboseStream << " mean_pn_ecdf at " << i << " : " << mean_pn_ecdf.at (i) << std::endl;
681+ // for(int i=0; i<mean_pn_ecdf.size(); i++)
682+ // verboseStream << "mean_pn_ecdf at " << i << ": " << mean_pn_ecdf.at(i) << std::endl;
656683
657684 // Create COPOD score vector
658- std::vector<double > out_scores_i = std::vector<double >(mean_pn_ecdf .size (), 0.0 );
685+ std::vector<double > out_scores_i = std::vector<double >(final_comp .size (), 0.0 );
659686 verboseStream << " m_alpha: " << m_alpha << std::endl;
660687
661688 double min_score = -1 * log2 (0.0 + m_alpha);
662- double max_score = -1 * log2 (1.0 + m_alpha);
689+ double max_score = log2 (1.0 + m_alpha) - min_score;
690+ verboseStream << " Initializaing min_score: " << min_score << " , max_score: " << max_score <<std::endl;
663691 verboseStream << " out_scores_i: " << std::endl;
664- for (int i=0 ; i < mean_pn_ecdf .size (); i++){
665- double l = -1 * log2 (mean_pn_ecdf .at (i) + m_alpha);
692+ for (int i=0 ; i < final_comp .size (); i++){
693+ double l = -1 * log2 (final_comp .at (i) + m_alpha);
666694 out_scores_i.at (i) = l;
667- verboseStream << " Mean_ecdf at " << i << " : " << mean_pn_ecdf .at (i) << " , score: " << l << std::endl;
695+ verboseStream << " Final_comp at " << i << " : " << final_comp .at (i) << " , score: " << l << std::endl;
668696 // if(prob_counts.at(i) > 0) {
669697 if (l < min_score){
670698 min_score = l;
@@ -683,9 +711,10 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
683711
684712 // compute threshold
685713 verboseStream << " Global threshold before comparison with local threshold = " << param[func_id].get_threshold () << std::endl;
686- double l_threshold = min_score + (m_threshold * (max_score - min_score));
714+ double l_threshold = (max_score < 0 ) ? (-1 * m_threshold * (max_score - min_score)) : min_score + (m_threshold * (max_score - min_score));
715+ verboseStream << " l_threshold computed: " << l_threshold << std::endl;
687716 if (m_use_global_threshold) {
688- if (l_threshold < param[func_id].get_threshold ()) {
717+ if (l_threshold < param[func_id].get_threshold () && param[func_id]. get_threshold () > (- 1 * log2 ( 1.00001 )) ) {
689718 l_threshold = param[func_id].get_threshold ();
690719 } else {
691720 param[func_id].set_glob_threshold (l_threshold); // .get_histogram().glob_threshold = l_threshold;
@@ -694,31 +723,29 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
694723 }
695724
696725 // Compute COPOD based score for each datapoint
697- // const double bin_width = param[func_id].bin_edges().at(1) - param[func_id].bin_edges().at(0);
698- // const int num_bins = param[func_id].counts().size();
699- // verboseStream << "Bin width: " << bin_width << std::endl;
700726
701727 int top_out = 0 ;
702- // int running_idx = 0;
728+ int running_idx = 0 ;
703729 for (auto itt : data) {
704730 if (itt->get_label () == 0 ) {
705731
706732 const double runtime_i = this ->getStatisticValue (*itt); // runtimes.push_back(this->getStatisticValue(*itt));
707733 double ad_score;
708- int running_idx = 0 ;
709-
710- // find bin index of data(runtime_i) in merged histogram
711- for (int i=1 ; i < param[func_id].bin_edges ().size (); i++) {
712- if (runtime_i < param[func_id].bin_edges ().at (i)) {
713- running_idx = i-1 ;
714- break ;
715- }
734+
735+ if (running_idx < final_comp.size ()) {
736+ verboseStream << " final_comp.at(" << running_idx << " ): " << final_comp.at (running_idx) << " , func_id: " << func_id << " , runtime: " << runtime_i << std::endl;
737+
738+ if (out_scores_i.at (running_idx) > l_threshold) // (final_comp.at(running_idx) > 0) // < 0.9)
739+ ad_score = l_threshold + 1 ;
740+ else
741+ ad_score = l_threshold - 1 ;
742+
743+ running_idx++;
744+ }
745+ else {
746+ recoverable_error (" AD: COPOD: runtime Index" );
747+ continue ;
716748 }
717- verboseStream << " mean_pn_ecdf.at(running_idx): " << mean_pn_ecdf.at (running_idx) << std::endl;
718- if (mean_pn_ecdf.at (running_idx) < 0.99 )
719- ad_score = l_threshold + 1 ;
720- else
721- ad_score = l_threshold - 1 ;
722749
723750 itt->set_outlier_score (ad_score);
724751 verboseStream << " ad_score: " << ad_score << " , l_threshold: " << l_threshold << std::endl;
0 commit comments