Skip to content

Commit f08a6a7

Browse files
committed
Added a class that does a combined net send of data for both ADLocalFuncStatistics and ADLocalCounterStatistics + unit test
Used the above to combine the two send calls into a single call Fixed (recoverable) errors occuring when assigning comm and counter events whose timestamps are later than the function exit time Added comparison operators for several pserver data classes In Chimbuko main class, moved code that gathers and sends data to PS into a separate method
1 parent 0c4b7a2 commit f08a6a7

22 files changed

Lines changed: 431 additions & 42 deletions

app/pserver.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,7 @@ int main (int argc, char ** argv){
187187

188188
net.add_payload(new NetPayloadUpdateParams(param, args.freeze_params));
189189
net.add_payload(new NetPayloadGetParams(param));
190-
net.add_payload(new NetPayloadUpdateAnomalyStats(&global_func_stats));
191-
net.add_payload(new NetPayloadUpdateCounterStats(&global_counter_stats));
190+
net.add_payload(new NetPayloadRecvCombinedADdata(&global_func_stats, &global_counter_stats));
192191
net.add_payload(new NetPayloadGlobalFunctionIndexMapBatched(&global_func_index_map));
193192
net.init(nullptr, nullptr, args.nt);
194193

include/chimbuko/AD.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "chimbuko/ad/ADNetClient.hpp"
1111
#include "chimbuko/ad/ADLocalFuncStatistics.hpp"
1212
#include "chimbuko/ad/ADLocalCounterStatistics.hpp"
13+
#include "chimbuko/ad/ADcombinedPSdata.hpp"
1314
#include "chimbuko/ad/ADProvenanceDBclient.hpp"
1415
#include "chimbuko/ad/ADMetadataParser.hpp"
1516
#include "chimbuko/ad/ADglobalFunctionIndexMap.hpp"

include/chimbuko/ad/ADLocalCounterStatistics.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,16 @@ namespace chimbuko{
140140
*/
141141
int getIOstep() const{ return m_step; }
142142

143+
/**
144+
* @brief Comparison operator
145+
*/
146+
bool operator==(const ADLocalCounterStatistics &r) const{ return m_program_idx == r.m_program_idx && m_step == r.m_step && m_stats == r.m_stats; }
147+
148+
/**
149+
* @brief Inequality operator
150+
*/
151+
bool operator!=(const ADLocalCounterStatistics &r) const{ return !( *this == r ); }
152+
143153
protected:
144154
/**
145155
* @brief update (send) counter statistics gathered during this io step to the connected parameter server
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#pragma once
2+
#include <chimbuko_config.h>
3+
#include <chimbuko/ad/ADLocalFuncStatistics.hpp>
4+
#include <chimbuko/ad/ADLocalCounterStatistics.hpp>
5+
6+
namespace chimbuko{
7+
/**
8+
* @brief A class that combines all data that is sent to the PS into a single send transmission
9+
*/
10+
class ADcombinedPSdata{
11+
public:
12+
ADcombinedPSdata(ADLocalFuncStatistics &func_stats, ADLocalCounterStatistics &counter_stats, PerfStats* perf=nullptr):
13+
m_func_stats(func_stats), m_counter_stats(counter_stats), m_perf(perf){}
14+
15+
ADLocalCounterStatistics &get_counter_stats(){ return m_counter_stats; }
16+
const ADLocalCounterStatistics &get_counter_stats() const{ return m_counter_stats; }
17+
18+
ADLocalFuncStatistics &get_func_stats(){ return m_func_stats; }
19+
const ADLocalFuncStatistics &get_func_stats() const{ return m_func_stats; }
20+
21+
/**
22+
* @brief The State object is what is serialized for the communication
23+
*/
24+
struct State{
25+
ADLocalFuncStatistics::State func_stats_state;
26+
ADLocalCounterStatistics::State counter_stats_state;
27+
28+
/**
29+
* @brief Serialize using cereal
30+
*/
31+
template<class Archive>
32+
void serialize(Archive & archive){
33+
archive(func_stats_state , counter_stats_state);
34+
}
35+
36+
/**
37+
* Serialize into Cereal portable binary format
38+
*/
39+
std::string serialize_cerealpb() const;
40+
41+
/**
42+
* Serialize from Cereal portable binary format
43+
*/
44+
void deserialize_cerealpb(const std::string &strstate);
45+
};
46+
47+
/**
48+
* @brief Get the current state as a state object
49+
*
50+
* The string dump of this object is the serialized form sent to the parameter server
51+
*/
52+
State get_state() const;
53+
54+
55+
/**
56+
* @brief Set the internal variables to the given state object
57+
*/
58+
void set_state(const State &s);
59+
60+
61+
/**
62+
* @brief Serialize this class for communication over the network
63+
*/
64+
std::string net_serialize() const;
65+
66+
/**
67+
* @brief Unserialize this class after communication over the network
68+
*/
69+
void net_deserialize(const std::string &s);
70+
71+
72+
/**
73+
* @brief Send the data to the pserver
74+
* @param net_client The network client object
75+
* @return std::pair<size_t, size_t> [sent, recv] message size
76+
*/
77+
std::pair<size_t, size_t> send(ADNetClient &net_client) const;
78+
79+
80+
private:
81+
ADLocalFuncStatistics &m_func_stats;
82+
ADLocalCounterStatistics &m_counter_stats;
83+
PerfStats* m_perf;
84+
};
85+
86+
87+
}

include/chimbuko/chimbuko.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ namespace chimbuko {
194194
const unsigned long last_event_ts) const;
195195

196196

197+
/**
198+
* @brief Gather and send the required data to the pserver
199+
*/
200+
void gatherAndSendPSdata(const Anomalies &anomalies,
201+
const int step) const;
202+
197203
/**
198204
* @brief Send new metadata entries collected during current fram to provenance DB
199205
*/

include/chimbuko/message.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ enum MessageKind {
3131
PARAMETERS = 2,
3232
ANOMALY_STATS = 3,
3333
COUNTER_STATS = 4,
34-
FUNCTION_INDEX = 5
34+
FUNCTION_INDEX = 5,
35+
AD_PS_COMBINED_STATS = 6
3536
};
3637

3738
enum MessageCmd {
@@ -204,6 +205,7 @@ class Message {
204205
case 3: return "ANOMALY_STATS";
205206
case 4: return "COUNTER_STATS";
206207
case 5: return "FUNCTION_INDEX";
208+
case 6: return "AD_PS_COMBINED_STATS";
207209
default: return "UNKNOWN";
208210
}
209211
}

include/chimbuko/pserver.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
#include "pserver/GlobalAnomalyStats.hpp"
44
#include "pserver/GlobalCounterStats.hpp"
55
#include "pserver/PSglobalFunctionIndexMap.hpp"
6+
#include "pserver/NetPayloadRecvCombinedADdata.hpp"

include/chimbuko/pserver/AggregateAnomalyData.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,16 @@ namespace chimbuko {
6969
*/
7070
size_t get_n_data() const;
7171

72+
/**
73+
* @brief Comparison operator
74+
*/
75+
bool operator==(const AggregateAnomalyData &r) const;
76+
/**
77+
* @brief Inequality operator
78+
*/
79+
bool operator!=(const AggregateAnomalyData &r) const{ return !( *this == r ); }
80+
81+
7282
private:
7383
mutable std::mutex m_mutex;
7484
RunStats m_stats; /** Statistics on the number of anomalies collected per io step since start of run as well as count of total anomalies*/

include/chimbuko/pserver/AggregateFuncStats.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,19 @@ namespace chimbuko {
5353
*/
5454
const RunStats & get_exclusive() const{ return m_exclusive; }
5555

56+
/**
57+
* @brief Comparison operator
58+
*/
59+
bool operator==(const AggregateFuncStats &r) const{ return m_pid == r.m_pid && m_fid == r.m_fid && m_func == r.m_func &&
60+
m_func_anomaly == r.m_func_anomaly && m_inclusive == r.m_inclusive &&
61+
m_exclusive == r.m_exclusive;
62+
}
63+
/**
64+
* @brief Inequality operator
65+
*/
66+
bool operator!=(const AggregateFuncStats &r) const{ return !( *this == r ); }
67+
68+
5669
private:
5770
int m_pid; /**< Program idx */
5871
int m_fid; /**< Function idx */

include/chimbuko/pserver/GlobalAnomalyStats.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,16 @@ namespace chimbuko{
100100
* @return JSON object containing anomaly and function data
101101
*/
102102
nlohmann::json collect();
103+
104+
/**
105+
* @brief Comparison operator
106+
*/
107+
bool operator==(const GlobalAnomalyStats &r) const{ return m_anomaly_stats == r.m_anomaly_stats && m_funcstats == r.m_funcstats; }
108+
109+
/**
110+
* @brief Inequality operator
111+
*/
112+
bool operator!=(const GlobalAnomalyStats &r) const{ return !( *this == r ); }
103113

104114
protected:
105115
std::unordered_map<int, std::unordered_map<unsigned long, AggregateAnomalyData> > m_anomaly_stats; /**< Map of program index and rank to the statistics of the number of anomalies per step and the AnomalyData objects that have been added by that AD instance since the last flush */

0 commit comments

Comments
 (0)