Skip to content

Commit 2cff21c

Browse files
author
Sandeep Mittal
committed
removed duplicates
1 parent 8553939 commit 2cff21c

3 files changed

Lines changed: 2 additions & 308 deletions

File tree

include/chimbuko/param/copod_param.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22
#include <chimbuko_config.h>
33
#include "chimbuko/param.hpp"
4+
#include "chimbuko/param/hbos_param.hpp"
45
#include "chimbuko/util/RunStats.hpp"
56
#include <unordered_map>
67
#include <nlohmann/json.hpp>

src/ad/ADOutlier.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,7 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
747747
}
748748

749749

750-
int ADOutlierHBOS::np_digitize_get_bin_inds(const double& X, const std::vector<double>& bin_edges) {
750+
int ADOutlierCOPOD::np_digitize_get_bin_inds(const double& X, const std::vector<double>& bin_edges) {
751751

752752

753753
if(bin_edges.size() < 2){ // If only one bin exists in the Histogram

src/param/copod_param.cpp

Lines changed: 0 additions & 307 deletions
Original file line numberDiff line numberDiff line change
@@ -137,310 +137,3 @@ using namespace chimbuko;
137137
}
138138

139139

140-
/**
141-
* @brief Histogram Class Implementation
142-
*/
143-
144-
145-
Histogram::Histogram(){clear();}
146-
Histogram::~Histogram(){}
147-
148-
/**
149-
* @brief Merge Histogram
150-
*/
151-
Histogram chimbuko::operator+(const Histogram& g, const Histogram& l) {
152-
Histogram combined;
153-
double min_runtime, max_runtime;
154-
//verboseStream << "Bin_Edges Size of Global Histogram: " << std::to_string(g.bin_edges().size()) << ", Bin_Edges Size of Local Histogram: " << std::to_string(l.bin_edges().size()) << std::endl;
155-
verboseStream << "Counts Size of Global Histogram: " << std::to_string(g.counts().size()) << ", Counts Size of Local Histogram: " << std::to_string(l.counts().size()) << std::endl;
156-
157-
if (g.counts().size() <= 0) {
158-
verboseStream << "Global Histogram is empty" << std::endl;
159-
combined = l;
160-
return combined;
161-
}
162-
else if (l.counts().size() <= 0) {
163-
verboseStream << "Local Histogram is empty" << std::endl;
164-
combined = g;
165-
return combined;
166-
}
167-
else {
168-
double bin_width;
169-
if(g.counts().size() > 0 && g.bin_edges().size() > 1 && l.counts().size() > 0 && l.bin_edges().size() > 1){ /**< If g and l are non-empty Histograms*/
170-
bin_width = Histogram::_scott_binWidth(g.counts(), g.bin_edges(), l.counts(), l.bin_edges()); /**< Compute bin width for merged histogram*/
171-
172-
verboseStream << "BIN WIDTH while merging: " << bin_width << std::endl;
173-
if (bin_width < 0){
174-
verboseStream << "Incorrect Bin Width Computed" << std::endl;
175-
exit(1);
176-
}
177-
}
178-
else{
179-
verboseStream << "INCORRECT histograms" << std::endl;
180-
exit(1);
181-
}
182-
183-
/**
184-
* Compute most minimum bin edges and most maximum bin edges from two histograms (g & l)
185-
*/
186-
min_runtime = MIN(l.bin_edges().at(0), g.bin_edges().at(0));
187-
max_runtime = MAX(l.bin_edges().at(l.bin_edges().size()-1), g.bin_edges().at(g.bin_edges().size()-1));
188-
189-
std::vector<double> comb_binedges;
190-
std::vector<int> comb_counts;
191-
192-
if (bin_width == 0){
193-
verboseStream << "BINWIDTH is Zero" << std::endl;
194-
combined = g;
195-
196-
for (int i = 0; i < l.bin_edges().size() -1; i++) {
197-
198-
auto index_it = std::lower_bound(combined.bin_edges().begin(), combined.bin_edges().end(), l.bin_edges().at(i));
199-
if (index_it != combined.bin_edges().end()){
200-
const int id = std::distance(combined.bin_edges().begin(), index_it) - 1;
201-
const int inc = l.counts().at(i);
202-
verboseStream << "In l " << "id: " << id << ", inc: " << inc << std::endl;
203-
if (id >= 0 && id < combined.counts().size())
204-
combined.add2counts(id, inc);
205-
}
206-
}
207-
208-
return combined;
209-
}
210-
else{ // bin_width is > 0
211-
verboseStream << "BindWidth is > 0 here: " << std::endl;
212-
213-
verboseStream << "min_runtime:" << min_runtime << std::endl;
214-
verboseStream << "max_runtime:" << max_runtime << std::endl;
215-
if (max_runtime < min_runtime){
216-
verboseStream << "Incorrect boundary for runtime" << std::endl;
217-
exit(1);
218-
}
219-
220-
double edge_val=min_runtime;
221-
222-
if (min_runtime == max_runtime) {
223-
comb_binedges.resize(2);
224-
225-
comb_binedges[0] = edge_val;
226-
comb_binedges[1] = edge_val + bin_width;
227-
}
228-
else{
229-
/*
230-
comb_binedges.resize(floor((max_runtime - min_runtime)/bin_width) + 2);
231-
for (int i = 0; i < comb_binedges.size(); i++) {
232-
comb_binedges[i] = edge_val;
233-
edge_val += bin_width;
234-
}
235-
*/
236-
for(edge_val = min_runtime; edge_val < max_runtime;) {
237-
comb_binedges.push_back(edge_val);
238-
edge_val += bin_width;
239-
}
240-
}
241-
}
242-
243-
comb_counts = std::vector<int>(comb_binedges.size() - 1, 0);
244-
245-
for (int i = 0; i < g.bin_edges().size() -1; i++) {
246-
247-
auto index_it = std::lower_bound(comb_binedges.begin(), comb_binedges.end(), g.bin_edges().at(i));
248-
if (index_it != comb_binedges.end()){
249-
const int id = std::distance(comb_binedges.begin(), index_it) - 1;
250-
const int inc = g.counts().at(i);
251-
verboseStream << "In g " << "id: " << id << ", inc: " << inc << std::endl;
252-
if (id >= 0 && id < comb_counts.size())
253-
comb_counts[id] += inc;
254-
}
255-
}
256-
257-
for (int i = 0; i < l.bin_edges().size() -1; i++) {
258-
259-
auto index_it = std::lower_bound(comb_binedges.begin(), comb_binedges.end(), l.bin_edges().at(i));
260-
if (index_it != comb_binedges.end()){
261-
const int id = std::distance(comb_binedges.begin(), index_it) - 1;
262-
const int inc = l.counts().at(i);
263-
verboseStream << "In l " << "id: " << id << ", inc: " << inc << std::endl;
264-
if (id >= 0 && id < comb_counts.size())
265-
comb_counts[id] += inc;
266-
}
267-
}
268-
269-
double new_threshold;
270-
if(l.get_threshold() > g.get_threshold())
271-
new_threshold = l.get_threshold();
272-
else
273-
new_threshold = g.get_threshold();
274-
275-
276-
combined = g;
277-
278-
combined.set_glob_threshold(new_threshold);
279-
combined.set_counts(comb_counts);
280-
combined.set_bin_edges(comb_binedges);
281-
return combined;
282-
}
283-
284-
285-
}
286-
287-
Histogram& Histogram::operator+=(const Histogram& h)
288-
{
289-
290-
Histogram combined = *this + h;
291-
292-
293-
*this = combined;
294-
//this->set_hist_data(Histogram::Data(this->get_threshold(), this->counts(), this->bin_edges()));
295-
return *this;
296-
}
297-
298-
double Histogram::_scott_binWidth(const std::vector<int> & global_counts, const std::vector<double> & global_edges, const std::vector<int> & local_counts, const std::vector<double> & local_edges){
299-
double sum = 0.0;
300-
verboseStream << "Size of Vector global_counts: " << global_counts.size() << std::endl;
301-
verboseStream << "Size of Vector local_counts: " << local_counts.size() << std::endl;
302-
303-
int size = 0;
304-
for(int i = 0; i < global_counts.size(); i++) {
305-
int count = global_counts[i];
306-
if (count < 0)
307-
count = -1 * count;
308-
if (count != 0){
309-
verboseStream << std::to_string(count) << ", ";
310-
}
311-
size += count;
312-
sum += (count * global_edges.at(i));
313-
}
314-
verboseStream << std::endl;
315-
verboseStream << "Size in _scott_binWidth: " << size << std::endl;
316-
verboseStream << "Global sum in _scott_binWidth: " << sum << std::endl;
317-
318-
for(int i = 0; i < local_counts.size(); i++) {
319-
int count = local_counts[i];
320-
if (count < 0)
321-
count = -1 * count;
322-
if (count != 0){
323-
verboseStream << std::to_string(count) << ", ";
324-
}
325-
size += count;
326-
sum += (count * local_edges.at(i));
327-
}
328-
verboseStream << std::endl;
329-
verboseStream << "total Size in _scott_binWidth: " << size << std::endl;
330-
verboseStream << "total sum in _scott_binWidth: " << sum << std::endl;
331-
332-
const double mean = sum / size;
333-
verboseStream << "mean in _xcott_binWidth: " << mean << std::endl;
334-
335-
double var = 0.0, std=0.0;
336-
for (int i=0;i<global_counts.size();i++){
337-
var += global_counts.at(i) * pow((global_edges.at(i) - mean), 2);
338-
}
339-
verboseStream << "Global var in _scott_binWidth: " << var << std::endl;
340-
for (int i=0;i<local_counts.size();i++){
341-
var += local_counts.at(i) * pow((local_edges.at(i) - mean), 2);
342-
}
343-
verboseStream << "total var in _scott_binWidth: " << var << std::endl;
344-
345-
var = var / size;
346-
verboseStream << "Final Variance in _scott_binWidth: " << var << std::endl;
347-
std = sqrt(var);
348-
verboseStream << "STD in merging _scott_binWidth: " << std << std::endl;
349-
if (std <= 100.0) {return 0;}
350-
351-
return ((3.5 * std ) / pow(size, 1/3));
352-
353-
}
354-
355-
double Histogram::_scott_binWidth(const std::vector<double> & vals){
356-
//Find bin width as per Scott's rule = 3.5*std*n^-1/3
357-
358-
double sum = std::accumulate(vals.begin(), vals.end(), 0.0);
359-
360-
double mean = sum / vals.size();
361-
double var = 0.0, std = 0.0;
362-
for(int i=0; i<vals.size(); i++){
363-
var += pow(vals.at(i) - mean, 2);
364-
}
365-
var = var / vals.size();
366-
std = sqrt(var);
367-
verboseStream << "STD in _scott_binWidth: " << std << std::endl;
368-
369-
return ((3.5 * std ) / pow(vals.size(), 1/3));
370-
}
371-
372-
void Histogram::set_hist_data(const Histogram::Data& d)
373-
{
374-
m_histogram.glob_threshold = d.glob_threshold;
375-
m_histogram.counts = d.counts;
376-
m_histogram.bin_edges = d.bin_edges;
377-
}
378-
379-
//void Histogram::push (double x)
380-
//{
381-
// m_histogram.runtimes.push_back(x);
382-
//}
383-
384-
int Histogram::create_histogram(const std::vector<double>& r_times)
385-
{
386-
std::vector<double> runtimes = r_times;
387-
const double bin_width = Histogram::_scott_binWidth(runtimes);
388-
if (bin_width <= 0) {return -1;}
389-
std::sort(runtimes.begin(), runtimes.end());
390-
const int h = runtimes.size() - 1;
391-
392-
if (m_histogram.bin_edges.size() > 0) m_histogram.bin_edges.clear();
393-
394-
m_histogram.bin_edges.push_back(runtimes.at(0));
395-
396-
double prev = m_histogram.bin_edges.at(0);
397-
while(prev < runtimes.at(h)){
398-
m_histogram.bin_edges.push_back(prev + bin_width);
399-
prev += bin_width;
400-
}
401-
//std::cout << "Number of bins: " << m_histogram.bin_edges.size()-1 << std::endl;
402-
403-
if (m_histogram.counts.size() > 0) m_histogram.counts.clear();
404-
m_histogram.counts = std::vector<int>(m_histogram.bin_edges.size()-1, 0);
405-
for ( int i=0; i < runtimes.size(); i++) {
406-
for ( int j=1; j < m_histogram.bin_edges.size(); j++) {
407-
if ( runtimes.at(i) < m_histogram.bin_edges.at(j) ) {
408-
m_histogram.counts[j-1] += 1;
409-
break;
410-
}
411-
}
412-
}
413-
//std::cout << "Size of counts: " << m_histogram.counts.size() << std::endl;
414-
415-
//m_histogram.runtimes.clear();
416-
const double min_threshold = -1 * log2(1.00001);
417-
if (!(m_histogram.glob_threshold > min_threshold)) {
418-
m_histogram.glob_threshold = min_threshold;
419-
}
420-
this->set_hist_data(Histogram::Data( m_histogram.glob_threshold, m_histogram.counts, m_histogram.bin_edges ));
421-
return 0;
422-
}
423-
424-
int Histogram::merge_histograms(const Histogram& g, const std::vector<double>& runtimes)
425-
{
426-
427-
std::vector<double> r_times = runtimes;
428-
429-
for (int i = 0; i < g.bin_edges().size() - 1; i++) {
430-
for(int j = 0; j < g.counts().at(i); j++){
431-
r_times.push_back(g.bin_edges().at(i));
432-
}
433-
}
434-
435-
m_histogram.glob_threshold = g.get_threshold();
436-
//verboseStream << "glob_threshold in merge_histograms = " << m_histogram.glob_threshold << std::endl;
437-
return this->create_histogram(r_times);
438-
//this->set_hist_data(Histogram::Data( m_histogram.glob_threshold, m_histogram.counts, m_histogram.bin_edges ));
439-
440-
}
441-
442-
nlohmann::json Histogram::get_json() const {
443-
return {
444-
{"Histogram Bin Counts", m_histogram.counts},
445-
{"Histogram Bin Edges", m_histogram.bin_edges}};
446-
}

0 commit comments

Comments
 (0)