@@ -137,310 +137,3 @@ using namespace chimbuko;
137137 }
138138
139139
140- /* *
141- * @brief Histogram Class Implementation
142- */
143-
144-
145- Histogram::Histogram (){clear ();}
146- Histogram::~Histogram (){}
147-
148- /* *
149- * @brief Merge Histogram
150- */
151- Histogram chimbuko::operator +(const Histogram& g, const Histogram& l) {
152- Histogram combined;
153- double min_runtime, max_runtime;
154- // verboseStream << "Bin_Edges Size of Global Histogram: " << std::to_string(g.bin_edges().size()) << ", Bin_Edges Size of Local Histogram: " << std::to_string(l.bin_edges().size()) << std::endl;
155- verboseStream << " Counts Size of Global Histogram: " << std::to_string (g.counts ().size ()) << " , Counts Size of Local Histogram: " << std::to_string (l.counts ().size ()) << std::endl;
156-
157- if (g.counts ().size () <= 0 ) {
158- verboseStream << " Global Histogram is empty" << std::endl;
159- combined = l;
160- return combined;
161- }
162- else if (l.counts ().size () <= 0 ) {
163- verboseStream << " Local Histogram is empty" << std::endl;
164- combined = g;
165- return combined;
166- }
167- else {
168- double bin_width;
169- if (g.counts ().size () > 0 && g.bin_edges ().size () > 1 && l.counts ().size () > 0 && l.bin_edges ().size () > 1 ){ /* *< If g and l are non-empty Histograms*/
170- bin_width = Histogram::_scott_binWidth (g.counts (), g.bin_edges (), l.counts (), l.bin_edges ()); /* *< Compute bin width for merged histogram*/
171-
172- verboseStream << " BIN WIDTH while merging: " << bin_width << std::endl;
173- if (bin_width < 0 ){
174- verboseStream << " Incorrect Bin Width Computed" << std::endl;
175- exit (1 );
176- }
177- }
178- else {
179- verboseStream << " INCORRECT histograms" << std::endl;
180- exit (1 );
181- }
182-
183- /* *
184- * Compute most minimum bin edges and most maximum bin edges from two histograms (g & l)
185- */
186- min_runtime = MIN (l.bin_edges ().at (0 ), g.bin_edges ().at (0 ));
187- max_runtime = MAX (l.bin_edges ().at (l.bin_edges ().size ()-1 ), g.bin_edges ().at (g.bin_edges ().size ()-1 ));
188-
189- std::vector<double > comb_binedges;
190- std::vector<int > comb_counts;
191-
192- if (bin_width == 0 ){
193- verboseStream << " BINWIDTH is Zero" << std::endl;
194- combined = g;
195-
196- for (int i = 0 ; i < l.bin_edges ().size () -1 ; i++) {
197-
198- auto index_it = std::lower_bound (combined.bin_edges ().begin (), combined.bin_edges ().end (), l.bin_edges ().at (i));
199- if (index_it != combined.bin_edges ().end ()){
200- const int id = std::distance (combined.bin_edges ().begin (), index_it) - 1 ;
201- const int inc = l.counts ().at (i);
202- verboseStream << " In l " << " id: " << id << " , inc: " << inc << std::endl;
203- if (id >= 0 && id < combined.counts ().size ())
204- combined.add2counts (id, inc);
205- }
206- }
207-
208- return combined;
209- }
210- else { // bin_width is > 0
211- verboseStream << " BindWidth is > 0 here: " << std::endl;
212-
213- verboseStream << " min_runtime:" << min_runtime << std::endl;
214- verboseStream << " max_runtime:" << max_runtime << std::endl;
215- if (max_runtime < min_runtime){
216- verboseStream << " Incorrect boundary for runtime" << std::endl;
217- exit (1 );
218- }
219-
220- double edge_val=min_runtime;
221-
222- if (min_runtime == max_runtime) {
223- comb_binedges.resize (2 );
224-
225- comb_binedges[0 ] = edge_val;
226- comb_binedges[1 ] = edge_val + bin_width;
227- }
228- else {
229- /*
230- comb_binedges.resize(floor((max_runtime - min_runtime)/bin_width) + 2);
231- for (int i = 0; i < comb_binedges.size(); i++) {
232- comb_binedges[i] = edge_val;
233- edge_val += bin_width;
234- }
235- */
236- for (edge_val = min_runtime; edge_val < max_runtime;) {
237- comb_binedges.push_back (edge_val);
238- edge_val += bin_width;
239- }
240- }
241- }
242-
243- comb_counts = std::vector<int >(comb_binedges.size () - 1 , 0 );
244-
245- for (int i = 0 ; i < g.bin_edges ().size () -1 ; i++) {
246-
247- auto index_it = std::lower_bound (comb_binedges.begin (), comb_binedges.end (), g.bin_edges ().at (i));
248- if (index_it != comb_binedges.end ()){
249- const int id = std::distance (comb_binedges.begin (), index_it) - 1 ;
250- const int inc = g.counts ().at (i);
251- verboseStream << " In g " << " id: " << id << " , inc: " << inc << std::endl;
252- if (id >= 0 && id < comb_counts.size ())
253- comb_counts[id] += inc;
254- }
255- }
256-
257- for (int i = 0 ; i < l.bin_edges ().size () -1 ; i++) {
258-
259- auto index_it = std::lower_bound (comb_binedges.begin (), comb_binedges.end (), l.bin_edges ().at (i));
260- if (index_it != comb_binedges.end ()){
261- const int id = std::distance (comb_binedges.begin (), index_it) - 1 ;
262- const int inc = l.counts ().at (i);
263- verboseStream << " In l " << " id: " << id << " , inc: " << inc << std::endl;
264- if (id >= 0 && id < comb_counts.size ())
265- comb_counts[id] += inc;
266- }
267- }
268-
269- double new_threshold;
270- if (l.get_threshold () > g.get_threshold ())
271- new_threshold = l.get_threshold ();
272- else
273- new_threshold = g.get_threshold ();
274-
275-
276- combined = g;
277-
278- combined.set_glob_threshold (new_threshold);
279- combined.set_counts (comb_counts);
280- combined.set_bin_edges (comb_binedges);
281- return combined;
282- }
283-
284-
285- }
286-
287- Histogram& Histogram::operator +=(const Histogram& h)
288- {
289-
290- Histogram combined = *this + h;
291-
292-
293- *this = combined;
294- // this->set_hist_data(Histogram::Data(this->get_threshold(), this->counts(), this->bin_edges()));
295- return *this ;
296- }
297-
298- double Histogram::_scott_binWidth (const std::vector<int > & global_counts, const std::vector<double > & global_edges, const std::vector<int > & local_counts, const std::vector<double > & local_edges){
299- double sum = 0.0 ;
300- verboseStream << " Size of Vector global_counts: " << global_counts.size () << std::endl;
301- verboseStream << " Size of Vector local_counts: " << local_counts.size () << std::endl;
302-
303- int size = 0 ;
304- for (int i = 0 ; i < global_counts.size (); i++) {
305- int count = global_counts[i];
306- if (count < 0 )
307- count = -1 * count;
308- if (count != 0 ){
309- verboseStream << std::to_string (count) << " , " ;
310- }
311- size += count;
312- sum += (count * global_edges.at (i));
313- }
314- verboseStream << std::endl;
315- verboseStream << " Size in _scott_binWidth: " << size << std::endl;
316- verboseStream << " Global sum in _scott_binWidth: " << sum << std::endl;
317-
318- for (int i = 0 ; i < local_counts.size (); i++) {
319- int count = local_counts[i];
320- if (count < 0 )
321- count = -1 * count;
322- if (count != 0 ){
323- verboseStream << std::to_string (count) << " , " ;
324- }
325- size += count;
326- sum += (count * local_edges.at (i));
327- }
328- verboseStream << std::endl;
329- verboseStream << " total Size in _scott_binWidth: " << size << std::endl;
330- verboseStream << " total sum in _scott_binWidth: " << sum << std::endl;
331-
332- const double mean = sum / size;
333- verboseStream << " mean in _xcott_binWidth: " << mean << std::endl;
334-
335- double var = 0.0 , std=0.0 ;
336- for (int i=0 ;i<global_counts.size ();i++){
337- var += global_counts.at (i) * pow ((global_edges.at (i) - mean), 2 );
338- }
339- verboseStream << " Global var in _scott_binWidth: " << var << std::endl;
340- for (int i=0 ;i<local_counts.size ();i++){
341- var += local_counts.at (i) * pow ((local_edges.at (i) - mean), 2 );
342- }
343- verboseStream << " total var in _scott_binWidth: " << var << std::endl;
344-
345- var = var / size;
346- verboseStream << " Final Variance in _scott_binWidth: " << var << std::endl;
347- std = sqrt (var);
348- verboseStream << " STD in merging _scott_binWidth: " << std << std::endl;
349- if (std <= 100.0 ) {return 0 ;}
350-
351- return ((3.5 * std ) / pow (size, 1 /3 ));
352-
353- }
354-
355- double Histogram::_scott_binWidth (const std::vector<double > & vals){
356- // Find bin width as per Scott's rule = 3.5*std*n^-1/3
357-
358- double sum = std::accumulate (vals.begin (), vals.end (), 0.0 );
359-
360- double mean = sum / vals.size ();
361- double var = 0.0 , std = 0.0 ;
362- for (int i=0 ; i<vals.size (); i++){
363- var += pow (vals.at (i) - mean, 2 );
364- }
365- var = var / vals.size ();
366- std = sqrt (var);
367- verboseStream << " STD in _scott_binWidth: " << std << std::endl;
368-
369- return ((3.5 * std ) / pow (vals.size (), 1 /3 ));
370- }
371-
372- void Histogram::set_hist_data (const Histogram::Data& d)
373- {
374- m_histogram.glob_threshold = d.glob_threshold ;
375- m_histogram.counts = d.counts ;
376- m_histogram.bin_edges = d.bin_edges ;
377- }
378-
379- // void Histogram::push (double x)
380- // {
381- // m_histogram.runtimes.push_back(x);
382- // }
383-
384- int Histogram::create_histogram (const std::vector<double >& r_times)
385- {
386- std::vector<double > runtimes = r_times;
387- const double bin_width = Histogram::_scott_binWidth (runtimes);
388- if (bin_width <= 0 ) {return -1 ;}
389- std::sort (runtimes.begin (), runtimes.end ());
390- const int h = runtimes.size () - 1 ;
391-
392- if (m_histogram.bin_edges .size () > 0 ) m_histogram.bin_edges .clear ();
393-
394- m_histogram.bin_edges .push_back (runtimes.at (0 ));
395-
396- double prev = m_histogram.bin_edges .at (0 );
397- while (prev < runtimes.at (h)){
398- m_histogram.bin_edges .push_back (prev + bin_width);
399- prev += bin_width;
400- }
401- // std::cout << "Number of bins: " << m_histogram.bin_edges.size()-1 << std::endl;
402-
403- if (m_histogram.counts .size () > 0 ) m_histogram.counts .clear ();
404- m_histogram.counts = std::vector<int >(m_histogram.bin_edges .size ()-1 , 0 );
405- for ( int i=0 ; i < runtimes.size (); i++) {
406- for ( int j=1 ; j < m_histogram.bin_edges .size (); j++) {
407- if ( runtimes.at (i) < m_histogram.bin_edges .at (j) ) {
408- m_histogram.counts [j-1 ] += 1 ;
409- break ;
410- }
411- }
412- }
413- // std::cout << "Size of counts: " << m_histogram.counts.size() << std::endl;
414-
415- // m_histogram.runtimes.clear();
416- const double min_threshold = -1 * log2 (1.00001 );
417- if (!(m_histogram.glob_threshold > min_threshold)) {
418- m_histogram.glob_threshold = min_threshold;
419- }
420- this ->set_hist_data (Histogram::Data ( m_histogram.glob_threshold , m_histogram.counts , m_histogram.bin_edges ));
421- return 0 ;
422- }
423-
424- int Histogram::merge_histograms (const Histogram& g, const std::vector<double >& runtimes)
425- {
426-
427- std::vector<double > r_times = runtimes;
428-
429- for (int i = 0 ; i < g.bin_edges ().size () - 1 ; i++) {
430- for (int j = 0 ; j < g.counts ().at (i); j++){
431- r_times.push_back (g.bin_edges ().at (i));
432- }
433- }
434-
435- m_histogram.glob_threshold = g.get_threshold ();
436- // verboseStream << "glob_threshold in merge_histograms = " << m_histogram.glob_threshold << std::endl;
437- return this ->create_histogram (r_times);
438- // this->set_hist_data(Histogram::Data( m_histogram.glob_threshold, m_histogram.counts, m_histogram.bin_edges ));
439-
440- }
441-
442- nlohmann::json Histogram::get_json () const {
443- return {
444- {" Histogram Bin Counts" , m_histogram.counts },
445- {" Histogram Bin Edges" , m_histogram.bin_edges }};
446- }
0 commit comments