Skip to content

Commit 3382904

Browse files
author
sandeepmittal
committed
Added function description in comments
1 parent 4631b7b commit 3382904

2 files changed

Lines changed: 79 additions & 15 deletions

File tree

include/chimbuko/ad/ADOutlier.hpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ namespace chimbuko {
114114
int m_rank; /**< this process rank */
115115
bool m_use_ps; /**< true if the parameter server is in use */
116116
ADThreadNetClient* m_net_client; /**< interface for communicating to parameter server */
117-
117+
118118
std::unordered_map< std::array<unsigned long, 4>, size_t, ArrayHasher<unsigned long,4> > m_local_func_exec_count; /**< Map(program id, rank id, thread id, func id) -> number of times encountered on this node*/
119119

120120
const ExecDataMap_t * m_execDataMap; /**< execution data map */
@@ -192,11 +192,13 @@ namespace chimbuko {
192192
*/
193193
class ADOutlierHBOS : public ADOutlier {
194194
public:
195+
195196
/**
196197
* @brief Construct a new ADOutlierHBOS object
197198
*
198199
*/
199200
ADOutlierHBOS(OutlierStatistic stat = ExclusiveRuntime, double threshold = 0.99, bool use_global_threshold = true);
201+
200202
/**
201203
* @brief Destroy the ADOutlierHBOS object
202204
*
@@ -238,16 +240,19 @@ namespace chimbuko {
238240
std::pair<size_t, size_t> sync_param(ParamInterface const* param) override;
239241

240242
/**
241-
* scott's rule for bin_width estimation
243+
* @brief Scott's rule for bin_width estimation during histogram formation
242244
*/
243245
double _scott_binWidth(std::vector<double>& vals);
244246

247+
/**
248+
* @brief Assigns samples to corresponding bins in Histogram. Similar to numpy digitize in Python
249+
*/
245250
int np_digitize_get_bin_inds(const double& X, const std::vector<double>& bin_edges);
246251

247252
private:
248-
double m_alpha; /**< alpha */
253+
double m_alpha; /**< Used to prevent log2 overflow */
249254
double m_threshold; /**< Threshold used to filter anomalies in HBOS*/
250-
bool m_use_global_threshold; /**< Flas to use global threshold*/
255+
bool m_use_global_threshold; /**< Flag to use global threshold*/
251256
//double m_threshold; /** sync with global threshold */
252257
OutlierStatistic m_statistic; /** Which statistic to use for outlier detection */
253258

include/chimbuko/param/hbos_param.hpp

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,43 @@ namespace chimbuko {
1515
class Histogram {
1616

1717
public:
18+
19+
/**
20+
* @brief Construct a Histogram object
21+
*/
1822
Histogram();
23+
24+
/**
25+
* @brief Destroy Histogram object
26+
*/
1927
~Histogram();
2028

29+
/**
30+
* @brief Data structure that stores Histogram data ( bin counts, bin edges)
31+
*/
2132
struct Data {
2233

2334
double glob_threshold; /**< global threshold used to filter anomalies*/
2435
std::vector<int> counts; /**< Bin counts in Histogram*/
2536
std::vector<double> bin_edges; /**< Bin edges in Histogram*/
2637

38+
/**
39+
* @brief Initialize histogram data
40+
*/
2741
Data(){
42+
43+
/**
44+
* @brief Resets histogram data during initialization
45+
*/
2846
clear();
2947
}
48+
49+
/**
50+
* @brief Initialize histogram data with existing histogram data
51+
* @param g_threshold: Global Threshold
52+
* @param h_counts: a vector<int> of histogram bin counts
53+
* @param h_bin_edges: a vector<double> of histogram bin edges
54+
*/
3055
Data(const double& g_threshold, const std::vector<int>& h_counts, const std::vector<double>& h_bin_edges ) {
3156
glob_threshold = g_threshold;
3257
counts = h_counts;
@@ -56,82 +81,111 @@ namespace chimbuko {
5681

5782
void push (double x);
5883

84+
/**
85+
* @brief returns reference to current histogram Data
86+
* @return Data: Histogram data (bin counts, bin edges)
87+
*/
5988
const Data &get_histogram() const{ return m_histogram; }
6089

6190
/**
6291
* @brief Set the internal variables from an instance of Histogram Data
92+
* @param d: Histogram Data (bin counts, bin edges)
6393
*/
6494
void set_hist_data(const Data& d);
6595

6696
/**
6797
* @brief Create an instance of this class from a Histogram Data instance
98+
* @param d: Histogram Data (bin counts, bin edges)
99+
* @return Instance of Histogram
68100
*/
69101
static Histogram from_hist_data(const Data& d) {
70102
Histogram histdata;
71103
histdata.set_hist_data(d);
72104
return histdata;
73105
}
106+
74107
/**
75108
* @brief Create new histogram locally for AD module's batch data instances
109+
* @param r_times: a vector<double> of function run times
110+
* @return returns 0 if success, else -1
76111
*/
77112
int create_histogram(const std::vector<double>& r_times);
78113

79-
114+
/**
115+
* @brief merges a Histogram with function runtimes
116+
* @param g: Histogram to merge
117+
* @param runtimes: Function runtimes
118+
* @return 0 if successful, -1 if failed
119+
*/
80120
int merge_histograms(const Histogram& g, const std::vector<double>& runtimes);
81121

82122
/**
83-
* @brief Combine two Histogram instances such that the resulting statistics are the union of the two
123+
* @brief Combine two Histogram instances such that the resulting statistics are the union of the two Histograms
124+
* @param g: Histogram to merge into
125+
* @param l: Histogram to merge
126+
* @return result: Merged Histogram
84127
*/
85128
friend Histogram operator+(const Histogram& g, const Histogram& l);
86129

87130
/**
88-
* @brief Combine two Histogram instances such that the resulting statistics are the union of the two
131+
* @brief Combine two Histogram instances such that the resulting statistics are the union of the two Histograms
132+
* @param h: Histogram to merge
133+
* @return result: Merged Histogram
89134
*/
90135
Histogram & operator+=(const Histogram& h);
91136

92137

93138
/**
94-
* @brief setd global threshold for anomaly filtering
139+
* @brief set global threshold for anomaly filtering
95140
*/
96141
void set_glob_threshold(const double& l) { m_histogram.glob_threshold = l;}
97142

98143
/*
99144
* @brief set bin counts in Histogram
145+
* @param c: vector of bin counts
100146
*/
101147
void set_counts(const std::vector<int> & c) { m_histogram.counts = c; }
102148

103149
/*
104150
* @brief set bin edges in Histogram
151+
* @param be: vector of bin edges
105152
*/
106153
void set_bin_edges(const std::vector<double>& be) {m_histogram.bin_edges = be;}
107154

108155
/*
109-
* @brief Update counts in Histogram
156+
* @brief New bin counts in Histogram
157+
* @param count: bin count value
110158
*/
111159
void add2counts(const int& count) {m_histogram.counts.push_back(count);}
112160

113161
/*
114162
* @brief Update counts for a given index of bin in histogram
163+
* @param id: index of bin in Histogram
164+
* @param count: bin count value to update
115165
*/
116166
void add2counts(const int& id, const int& count) {m_histogram.counts[id] += count;}
117167

118168
/*
119-
* @brief Update bin edges in histogram
169+
* @brief New bin edges in histogram
170+
* @param bin_edge: vector of bin edges of histogram
120171
*/
121172
void add2binedges(const double& bin_edge) {m_histogram.bin_edges.push_back(bin_edge);}
122173

123174
/*
124-
* @brief get global threshold from global histogram
175+
* @brief get current value of global threshold from global histogram
176+
* @return global threshold
125177
*/
126178
const double& get_threshold() const {return m_histogram.glob_threshold;}
127179

128180
/*
129-
* @brief Get bin counts of Histogram
181+
* @brief Get current vector of bin counts of Histogram
182+
* @return vector of bin counts
130183
*/
131184
const std::vector<int>& counts() const {return m_histogram.counts;}
132185

133186
/*
134-
* @brief Get bin edges of histogram
187+
* @brief Get current vector of bin edges of histogram
188+
* @return vector of bin edges
135189
*/
136190
const std::vector<double>& bin_edges() const {return m_histogram.bin_edges;}
137191

@@ -141,11 +195,12 @@ namespace chimbuko {
141195
nlohmann::json get_json() const;
142196

143197
private:
144-
Data m_histogram; /**< Histogram Data*/
198+
Data m_histogram; /**< Histogram Data (bin counts, bin edges)*/
145199

146200
/*
147201
* @brief Compute bin width based on Scott's rule
148202
* @param vals: vector of runtimes
203+
* @return computed bin width
149204
*/
150205
static double _scott_binWidth(const std::vector<double> & vals);
151206

@@ -155,10 +210,13 @@ namespace chimbuko {
155210
* @param global_edges: bin edges in global histogram on pserver
156211
* @param local_counts: bin counts in local histogram in AD module
157212
* @param local_edges: bin edges in local histogram in AD module
213+
* @return computed bin width
158214
*/
159215
static double _scott_binWidth(const std::vector<int> & global_counts, const std::vector<double> & global_edges, const std::vector<int> & local_counts, const std::vector<double> & local_edges);
160216

161217
};
218+
219+
162220
Histogram operator+(const Histogram& g, const Histogram& l);
163221

164222
/**
@@ -173,6 +231,7 @@ namespace chimbuko {
173231
*/
174232
void clear() override;
175233

234+
176235
const int find(const unsigned long& func_id);
177236

178237
/**
@@ -263,7 +322,7 @@ namespace chimbuko {
263322
nlohmann::json get_algorithm_params(const unsigned long func_id) const override;
264323

265324
private:
266-
std::unordered_map<unsigned long, Histogram> m_hbosstats;
325+
std::unordered_map<unsigned long, Histogram> m_hbosstats; /**< Map of func_id and corresponding Histogram*/
267326
};
268327

269328

0 commit comments

Comments
 (0)