Skip to content

Commit 18312d6

Browse files
author
Sandeep Mittal
committed
Handling cases when function has zero events(runtimes) in HBOS and COPOD
1 parent afc55bd commit 18312d6

1 file changed

Lines changed: 62 additions & 19 deletions

File tree

src/ad/ADOutlier.cpp

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -266,16 +266,26 @@ Anomalies ADOutlierHBOS::run(int step) {
266266
}
267267
if (runtimes.size() > 0) {
268268
if (!g.find(func_id)) { // If func_id does not exist
269-
const int r = param[func_id].create_histogram(runtimes);
270-
if (r < 0) {continue;}
269+
270+
const int r = param[func_id].create_histogram(runtimes);
271+
if (r < 0) {
272+
recoverable_error(std::string("AD: Func_ID does not exist"));
273+
continue;
274+
}
271275
}
272276
else { //merge with exisiting func_id, not overwrite
273277

274278
const int r = param[func_id].merge_histograms(g[func_id], runtimes);
275-
if (r < 0) {continue;}
279+
if (r < 0) {
280+
recoverable_error(std::string("AD: Merging error received "));
281+
continue;
282+
}
276283
}
277284
}
278-
else { continue;}
285+
else {
286+
recoverable_error(std::string("AD: Zero function runtimes "));
287+
continue;
288+
}
279289
}
280290

281291
//Update temp runstats to include information collected previously (synchronizes with the parameter server if connected)
@@ -543,15 +553,24 @@ Anomalies ADOutlierCOPOD::run(int step) {
543553
if (runtimes.size() > 0) {
544554
if (!g.find(func_id)) { // If func_id does not exist
545555
const int r = param[func_id].create_histogram(runtimes);
546-
if (r < 0) {continue;}
556+
if (r < 0) {
557+
recoverable_error(std::string("AD: Func_ID does not exist "));
558+
continue;
559+
}
547560
}
548561
else { //merge with exisiting func_id, not overwrite
549562

550563
const int r = param[func_id].merge_histograms(g[func_id], runtimes);
551-
if (r < 0) {continue;}
564+
if (r < 0) {
565+
recoverable_error(std::string("AD: Merging error received "));
566+
continue;
567+
}
552568
}
553569
}
554-
else { continue;}
570+
else {
571+
recoverable_error(std::string("AD: Zero function runtimes "));
572+
continue;
573+
}
555574
}
556575

557576
//Update temp runstats to include information collected previously (synchronizes with the parameter server if connected)
@@ -579,6 +598,7 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
579598
std::vector<CallListIterator_t>& data){
580599

581600
verboseStream << "Finding outliers in events for func " << func_id << std::endl;
601+
verboseStream << "data Size: " << data.size() << std::endl;
582602

583603
CopodParam& param = *(CopodParam*)m_param;
584604

@@ -589,29 +609,39 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
589609
//std::vector<double> prob_counts = std::vector<double>(param[func_id].counts().size(), 0.0);
590610
double tot_runtimes = std::accumulate(param[func_id].counts().begin(), param[func_id].counts().end(), 0.0);
591611

612+
if (tot_runtimes <= 0 ) {
613+
return n_outliers;
614+
}
592615
std::vector<double> recon_p_runtimes = std::vector<double>(tot_runtimes, 0.0);
593616
std::vector<double> recon_n_runtimes = std::vector<double>(tot_runtimes, 0.0);
594617
int recon_idx = 0;
595-
//verboseStream << "Unwrapping Merged Histogram. Size: " << param[func_id].counts().size() << std::endl;
618+
verboseStream << "Unwrapping Merged Histogram. Size: " << param[func_id].counts().size() << std::endl;
596619
for(int i=0; i < param[func_id].counts().size(); i++){
597620
int count = param[func_id].counts().at(i);
598-
//verboseStream << "Count: " << count << ", Value: " << param[func_id].bin_edges().at(i) << std::endl;
621+
verboseStream << "Count: " << count << ", Value: " << param[func_id].bin_edges().at(i) << std::endl;
599622
for(int j=0; j<count; j++){
600623

601624
recon_p_runtimes.at(recon_idx) = param[func_id].bin_edges().at(i);
602625
recon_n_runtimes.at(recon_idx) = -1 * param[func_id].bin_edges().at(i);
603-
//verboseStream << "recon_p_runtimes.at(recon_idx): " << recon_p_runtimes.at(recon_idx) << ", recon_n_runtimes.at(recon_idx): " << recon_n_runtimes.at(recon_idx) << std::endl;
604-
//verboseStream << "recon_idx: " << recon_idx << std::endl;
626+
verboseStream << "recon_idx: " << recon_idx << std::endl;
627+
verboseStream << "recon_p_runtimes.at(recon_idx): " << recon_p_runtimes.at(recon_idx) << ", recon_n_runtimes.at(recon_idx): " << recon_n_runtimes.at(recon_idx) << std::endl;
605628
recon_idx++;
606629
}
607630
}
608631

632+
609633
std::vector<double> func_p_ecdf = empiricalCDF(recon_p_runtimes, true);
610634
std::vector<double> func_n_ecdf = empiricalCDF(recon_n_runtimes, true);
635+
636+
verboseStream << "Size of empiricalCDF(recon_p_runtimes): " << func_p_ecdf.size() << std::endl;
637+
verboseStream << "Size of empiricalCDF(recon_n_runtimes): " << func_n_ecdf.size() << std::endl;
611638

612639
std::vector<double> mean_pn_ecdf = std::vector<double>(func_p_ecdf.size(), 0.0);
640+
verboseStream << "Size of mean_pn_ecdf: " << mean_pn_ecdf.size() << std::endl;
641+
613642
for(int i=0; i < mean_pn_ecdf.size(); i++){
614-
mean_pn_ecdf.at(i) = (func_p_ecdf.at(i) + func_n_ecdf.at(i)) / 2;
643+
mean_pn_ecdf.at(i) = (func_p_ecdf.at(i) + func_n_ecdf.at(i)) / 2.0;
644+
verboseStream << "mean_pn_ecdf.at(i): " << mean_pn_ecdf.at(i) << ", func_p_ecdf.at(i): " << func_p_ecdf.at(i) << ", func_n_ecdf.at(i): " << func_n_ecdf.at(i) << std::endl;
615645
}
616646

617647

@@ -621,15 +651,20 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
621651
// prob_counts.at(i) += p;
622652
//}
623653

654+
for(int i=0; i<mean_pn_ecdf.size(); i++)
655+
verboseStream << "mean_pn_ecdf at " << i << ": " << mean_pn_ecdf.at(i) << std::endl;
656+
624657
//Create COPOD score vector
625-
std::vector<double> out_scores_i;
658+
std::vector<double> out_scores_i = std::vector<double>(mean_pn_ecdf.size(), 0.0);
659+
verboseStream << "m_alpha: " << m_alpha << std::endl;
660+
626661
double min_score = -1 * log2(0.0 + m_alpha);
627662
double max_score = -1 * log2(1.0 + m_alpha);
628663
verboseStream << "out_scores_i: " << std::endl;
629664
for(int i=0; i < mean_pn_ecdf.size(); i++){
630665
double l = -1 * log2(mean_pn_ecdf.at(i) + m_alpha);
631-
out_scores_i.push_back(l);
632-
//verboseStream << "Count: " << param[func_id].counts().at(i) << ", Probability: " << prob_counts.at(i) << ", score: "<< l << std::endl;
666+
out_scores_i.at(i) = l;
667+
verboseStream << "Mean_ecdf at " << i << ": " << mean_pn_ecdf.at(i) << ", score: "<< l << std::endl;
633668
//if(prob_counts.at(i) > 0) {
634669
if(l < min_score){
635670
min_score = l;
@@ -664,15 +699,23 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
664699
//verboseStream << "Bin width: " << bin_width << std::endl;
665700

666701
int top_out = 0;
667-
int running_idx = 0;
702+
//int running_idx = 0;
668703
for (auto itt : data) {
669704
if (itt->get_label() == 0) {
670705

671706
const double runtime_i = this->getStatisticValue(*itt); //runtimes.push_back(this->getStatisticValue(*itt));
672707
double ad_score;
673-
674-
//verboseStream << "mean_pn_ecdf.at(running_idx++): " << mean_pn_ecdf.at(running_idx) << std::endl;
675-
if (mean_pn_ecdf.at(running_idx++) < 0.99)
708+
int running_idx = 0;
709+
710+
//find bin index of data(runtime_i) in merged histogram
711+
for (int i=1; i < param[func_id].bin_edges().size(); i++) {
712+
if (runtime_i < param[func_id].bin_edges().at(i)) {
713+
running_idx = i-1;
714+
break;
715+
}
716+
}
717+
verboseStream << "mean_pn_ecdf.at(running_idx): " << mean_pn_ecdf.at(running_idx) << std::endl;
718+
if (mean_pn_ecdf.at(running_idx) < 0.99)
676719
ad_score = l_threshold + 1;
677720
else
678721
ad_score = l_threshold - 1;

0 commit comments

Comments
 (0)