Skip to content

Commit 9026128

Browse files
author
sandeepmittal
committed
ecdf update
1 parent 5117138 commit 9026128

2 files changed

Lines changed: 24 additions & 2 deletions

File tree

include/chimbuko/ad/ADOutlier.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,11 @@ namespace chimbuko {
301301
*/
302302
int np_digitize_get_bin_inds(const double& X, const std::vector<double>& bin_edges);
303303

304+
/**
305+
* @brief Computes Empirical CDF of input vector of function runtimes
306+
*/
307+
auto empiricalCDF(const std::vector<double>& runtimes, const bool sorted=true);
308+
304309
private:
305310
double m_alpha; /**< Used to prevent log2 overflow */
306311
double m_threshold; /**< Threshold used to filter anomalies in HBOS*/

src/ad/ADOutlier.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <mpi.h>
99
#include <nlohmann/json.hpp>
1010
#include <boost/math/distributions/normal.hpp>
11+
#include <boost/math/distributions/empirical_cumulative_distribution_function.hpp>
1112

1213
using namespace chimbuko;
1314

@@ -35,7 +36,7 @@ ADOutlier *ADOutlier::set_algorithm(OutlierStatistic stat, const std::string & a
3536
return new ADOutlierHBOS(stat, hbos_thres, glob_thres);
3637
}
3738
else if (algorithm == "copod" || algorithm == "COPOD") {
38-
return new ADOutlierCOPOD(stat, hbos_thres);
39+
return new ADOutlierCOPOD(stat, hbos_thres);
3940
}
4041
else {
4142
return nullptr;
@@ -589,11 +590,21 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
589590
std::vector<double> prob_counts = std::vector<double>(param[func_id].counts().size(), 0.0);
590591
double tot_runtimes = std::accumulate(param[func_id].counts().begin(), param[func_id].counts().end(), 0.0);
591592

593+
std::vector<double> recon_runtimes = std::vector<double>(tot_runtimes, 0.0);
594+
int recon_idx = 0;
595+
for(int i=0; i < param[func_id].counts().size(); i++){
596+
int count = param[func_id].counts().at(i);
597+
for(int j=0; j<count; j++){
598+
recon_runtimes.at(recon_idx++) = param[func_id].bin_edges.at(i);
599+
}
600+
}
601+
602+
auto func_ecdf = empiricalCDF(recon_runtimes, true);
603+
592604
for(int i=0; i < param[func_id].counts().size(); i++){
593605
int count = param[func_id].counts().at(i);
594606
double p = count / tot_runtimes;
595607
prob_counts.at(i) += p;
596-
597608
}
598609

599610
//Create COPOD score vector
@@ -767,3 +778,9 @@ int ADOutlierCOPOD::np_digitize_get_bin_inds(const double& X, const std::vector<
767778

768779
return ret_val;
769780
}
781+
782+
auto ADOutlierCOPOD::empiricalCDF(const std::vector<double>& runtimes, const bool sorted) {
783+
784+
return boost::math::empirical_cumulative_distribution_function(std::move(runtimes), sorted);
785+
786+
}

0 commit comments

Comments
 (0)