|
9 | 9 |
|
10 | 10 | namespace chimbuko { |
11 | 11 |
|
| 12 | + |
12 | 13 | /** |
13 | | - * @brief Histogram Implementation |
14 | | - */ |
15 | | - class Histogram { |
16 | | - |
17 | | - public: |
18 | | - |
19 | | - /** |
20 | | - * @brief Construct a Histogram object |
21 | | - */ |
22 | | - Histogram(); |
23 | | - |
24 | | - /** |
25 | | - * @brief Destroy Histogram object |
26 | | - */ |
27 | | - ~Histogram(); |
28 | | - |
29 | | - /** |
30 | | - * @brief Data structure that stores Histogram data ( bin counts, bin edges) |
31 | | - */ |
32 | | - struct Data { |
33 | | - |
34 | | - double glob_threshold; /**< global threshold used to filter anomalies*/ |
35 | | - std::vector<int> counts; /**< Bin counts in Histogram*/ |
36 | | - std::vector<double> bin_edges; /**< Bin edges in Histogram*/ |
37 | | - |
38 | | - /** |
39 | | - * @brief Initialize histogram data |
40 | | - */ |
41 | | - Data(){ |
42 | | - |
43 | | - /** |
44 | | - * @brief Resets histogram data during initialization |
45 | | - */ |
46 | | - clear(); |
47 | | - } |
48 | | - |
49 | | - /** |
50 | | - * @brief Initialize histogram data with existing histogram data |
51 | | - * @param g_threshold: Global Threshold |
52 | | - * @param h_counts: a vector<int> of histogram bin counts |
53 | | - * @param h_bin_edges: a vector<double> of histogram bin edges |
54 | | - */ |
55 | | - Data(const double& g_threshold, const std::vector<int>& h_counts, const std::vector<double>& h_bin_edges ) { |
56 | | - glob_threshold = g_threshold; |
57 | | - counts = h_counts; |
58 | | - bin_edges = h_bin_edges; |
59 | | - } |
60 | | - |
61 | | - void clear() { |
62 | | - glob_threshold = -1 * log2(1.00001); |
63 | | - counts.clear(); |
64 | | - bin_edges.clear(); |
65 | | - } |
66 | | - |
67 | | - /** |
68 | | - * @brief Serialize using cereal |
69 | | - */ |
70 | | - template<class Archive> |
71 | | - void serialize(Archive & archive){ |
72 | | - archive(glob_threshold, counts, bin_edges); |
73 | | - } |
74 | | - |
75 | | - |
76 | | - |
77 | | - }; |
78 | | - |
79 | | - |
80 | | - void clear() {m_histogram.clear();} |
81 | | - |
82 | | - void push (double x); |
83 | | - |
84 | | - /** |
85 | | - * @brief returns reference to current histogram Data |
86 | | - * @return Data: Histogram data (bin counts, bin edges) |
87 | | - */ |
88 | | - const Data &get_histogram() const{ return m_histogram; } |
89 | | - |
90 | | - /** |
91 | | - * @brief Set the internal variables from an instance of Histogram Data |
92 | | - * @param d: Histogram Data (bin counts, bin edges) |
93 | | - */ |
94 | | - void set_hist_data(const Data& d); |
95 | | - |
96 | | - /** |
97 | | - * @brief Create an instance of this class from a Histogram Data instance |
98 | | - * @param d: Histogram Data (bin counts, bin edges) |
99 | | - * @return Instance of Histogram |
100 | | - */ |
101 | | - static Histogram from_hist_data(const Data& d) { |
102 | | - Histogram histdata; |
103 | | - histdata.set_hist_data(d); |
104 | | - return histdata; |
105 | | - } |
106 | | - |
107 | | - /** |
108 | | - * @brief Create new histogram locally for AD module's batch data instances |
109 | | - * @param r_times: a vector<double> of function run times |
110 | | - * @return returns 0 if success, else -1 |
111 | | - */ |
112 | | - int create_histogram(const std::vector<double>& r_times); |
113 | | - |
114 | | - /** |
115 | | - * @brief merges a Histogram with function runtimes |
116 | | - * @param g: Histogram to merge |
117 | | - * @param runtimes: Function runtimes |
118 | | - * @return 0 if successful, -1 if failed |
119 | | - */ |
120 | | - int merge_histograms(const Histogram& g, const std::vector<double>& runtimes); |
121 | | - |
122 | | - /** |
123 | | - * @brief Combine two Histogram instances such that the resulting statistics are the union of the two Histograms |
124 | | - * @param g: Histogram to merge into |
125 | | - * @param l: Histogram to merge |
126 | | - * @return result: Merged Histogram |
127 | | - */ |
128 | | - friend Histogram operator+(const Histogram& g, const Histogram& l); |
129 | | - |
130 | | - /** |
131 | | - * @brief Combine two Histogram instances such that the resulting statistics are the union of the two Histograms |
132 | | - * @param h: Histogram to merge |
133 | | - * @return result: Merged Histogram |
134 | | - */ |
135 | | - Histogram & operator+=(const Histogram& h); |
136 | | - |
137 | | - |
138 | | - /** |
139 | | - * @brief set global threshold for anomaly filtering |
140 | | - */ |
141 | | - void set_glob_threshold(const double& l) { m_histogram.glob_threshold = l;} |
142 | | - |
143 | | - /* |
144 | | - * @brief set bin counts in Histogram |
145 | | - * @param c: vector of bin counts |
146 | | - */ |
147 | | - void set_counts(const std::vector<int> & c) { m_histogram.counts = c; } |
148 | | - |
149 | | - /* |
150 | | - * @brief set bin edges in Histogram |
151 | | - * @param be: vector of bin edges |
152 | | - */ |
153 | | - void set_bin_edges(const std::vector<double>& be) {m_histogram.bin_edges = be;} |
154 | | - |
155 | | - /* |
156 | | - * @brief New bin counts in Histogram |
157 | | - * @param count: bin count value |
158 | | - */ |
159 | | - void add2counts(const int& count) {m_histogram.counts.push_back(count);} |
160 | | - |
161 | | - /* |
162 | | - * @brief Update counts for a given index of bin in histogram |
163 | | - * @param id: index of bin in Histogram |
164 | | - * @param count: bin count value to update |
165 | | - */ |
166 | | - void add2counts(const int& id, const int& count) {m_histogram.counts[id] += count;} |
167 | | - |
168 | | - /* |
169 | | - * @brief New bin edges in histogram |
170 | | - * @param bin_edge: vector of bin edges of histogram |
171 | | - */ |
172 | | - void add2binedges(const double& bin_edge) {m_histogram.bin_edges.push_back(bin_edge);} |
173 | | - |
174 | | - /* |
175 | | - * @brief get current value of global threshold from global histogram |
176 | | - * @return global threshold |
177 | | - */ |
178 | | - const double& get_threshold() const {return m_histogram.glob_threshold;} |
179 | | - |
180 | | - /* |
181 | | - * @brief Get current vector of bin counts of Histogram |
182 | | - * @return vector of bin counts |
183 | | - */ |
184 | | - const std::vector<int>& counts() const {return m_histogram.counts;} |
185 | | - |
186 | | - /* |
187 | | - * @brief Get current vector of bin edges of histogram |
188 | | - * @return vector of bin edges |
189 | | - */ |
190 | | - const std::vector<double>& bin_edges() const {return m_histogram.bin_edges;} |
191 | | - |
192 | | - /** |
193 | | - * @brief Get the current statistics as a JSON object |
194 | | - */ |
195 | | - nlohmann::json get_json() const; |
196 | | - |
197 | | - private: |
198 | | - Data m_histogram; /**< Histogram Data (bin counts, bin edges)*/ |
199 | | - |
200 | | - /* |
201 | | - * @brief Compute bin width based on Scott's rule |
202 | | - * @param vals: vector of runtimes |
203 | | - * @return computed bin width |
204 | | - */ |
205 | | - static double _scott_binWidth(const std::vector<double> & vals); |
206 | | - |
207 | | - /* |
208 | | - * @brief Compute bin width based on Scott's rule |
209 | | - * @param global_counts: bin counts in global histogram on pserver |
210 | | - * @param global_edges: bin edges in global histogram on pserver |
211 | | - * @param local_counts: bin counts in local histogram in AD module |
212 | | - * @param local_edges: bin edges in local histogram in AD module |
213 | | - * @return computed bin width |
214 | | - */ |
215 | | - static double _scott_binWidth(const std::vector<int> & global_counts, const std::vector<double> & global_edges, const std::vector<int> & local_counts, const std::vector<double> & local_edges); |
216 | | - |
217 | | - }; |
218 | | - |
219 | | - |
220 | | - Histogram operator+(const Histogram& g, const Histogram& l); |
221 | | - |
222 | | - /** |
223 | | - * @@brief Implementation of ParamInterface for HBOS based anomaly detection |
| 14 | + * @@brief Implementation of ParamInterface for COPOD based anomaly detection |
224 | 15 | */ |
225 | 16 | class CopodParam : public ParamInterface { |
226 | 17 | public: |
|
0 commit comments