Skip to content

Commit 0c4b7a2

Browse files
committed
Improved organization and naming of pserver classes that aggregate data from the AD
Fixed ADOutlier anomaly score unit test not reflecting new score definition
1 parent a6ae0ac commit 0c4b7a2

28 files changed

Lines changed: 365 additions & 209 deletions

include/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
nobase_include_HEADERS = chimbuko/chimbuko.hpp chimbuko/ad/ADOutlier.hpp chimbuko/ad/ADNetClient.hpp chimbuko/ad/ADAnomalyProvenance.hpp chimbuko/ad/ADParser.hpp chimbuko/ad/ADProvenanceDBengine.hpp chimbuko/ad/ADNormalEventProvenance.hpp chimbuko/ad/ADLocalFuncStatistics.hpp chimbuko/ad/ADMetadataParser.hpp chimbuko/ad/AnomalyData.hpp chimbuko/ad/ADDefine.hpp chimbuko/ad/ADglobalFunctionIndexMap.hpp chimbuko/ad/ADProvenanceDBclient.hpp chimbuko/ad/ADEvent.hpp chimbuko/ad/ADCounter.hpp chimbuko/ad/ADLocalCounterStatistics.hpp chimbuko/ad/ExecData.hpp chimbuko/ad/utils.hpp chimbuko/ad/ADio.hpp chimbuko/verbose.hpp chimbuko/pserver/PSProvenanceDBclient.hpp chimbuko/pserver/global_anomaly_stats.hpp chimbuko/pserver/PSglobalFunctionIndexMap.hpp chimbuko/pserver/PSstatSender.hpp chimbuko/pserver/AnomalyStat.hpp chimbuko/pserver/global_counter_stats.hpp chimbuko/param/sstd_param.hpp chimbuko/param/hbos_param.hpp chimbuko/AD.hpp chimbuko/message.hpp chimbuko/net.hpp chimbuko/pserver.hpp chimbuko/param.hpp chimbuko/net/zmqme_net.hpp chimbuko/net/mpi_net.hpp chimbuko/net/zmq_net.hpp chimbuko/net/local_net.hpp chimbuko/util/RunStats.hpp chimbuko/util/ADIOS2parseUtils.hpp chimbuko/util/time.hpp chimbuko/util/map.hpp chimbuko/util/error.hpp chimbuko/util/string.hpp chimbuko/util/DispatchQueue.hpp chimbuko/util/PerfStats.hpp chimbuko/util/RunMetric.hpp chimbuko/util/memutils.hpp chimbuko/util/Anomalies.hpp chimbuko/util/barrier.hpp chimbuko/util/threadPool.hpp chimbuko/util/hash.hpp chimbuko/util/mtQueue.hpp chimbuko/util/serialize.hpp chimbuko/util/commandLineParser.hpp chimbuko/util/environment.hpp
1+
nobase_include_HEADERS = chimbuko/chimbuko.hpp chimbuko/ad/ADOutlier.hpp chimbuko/ad/ADNetClient.hpp chimbuko/ad/FuncStats.hpp chimbuko/ad/ADAnomalyProvenance.hpp chimbuko/ad/ADParser.hpp chimbuko/ad/ADProvenanceDBengine.hpp chimbuko/ad/ADNormalEventProvenance.hpp chimbuko/ad/ADLocalFuncStatistics.hpp chimbuko/ad/ADMetadataParser.hpp chimbuko/ad/AnomalyData.hpp chimbuko/ad/ADDefine.hpp chimbuko/ad/ADglobalFunctionIndexMap.hpp chimbuko/ad/ADProvenanceDBclient.hpp chimbuko/ad/ADEvent.hpp chimbuko/ad/ADCounter.hpp chimbuko/ad/ADLocalCounterStatistics.hpp chimbuko/ad/ExecData.hpp chimbuko/ad/utils.hpp chimbuko/ad/ADio.hpp chimbuko/verbose.hpp chimbuko/pserver/PSProvenanceDBclient.hpp chimbuko/pserver/GlobalAnomalyStats.hpp chimbuko/pserver/PSglobalFunctionIndexMap.hpp chimbuko/pserver/AggregateFuncStats.hpp chimbuko/pserver/AggregateAnomalyData.hpp chimbuko/pserver/PSstatSender.hpp chimbuko/pserver/GlobalCounterStats.hpp chimbuko/param/sstd_param.hpp chimbuko/param/hbos_param.hpp chimbuko/AD.hpp chimbuko/message.hpp chimbuko/net.hpp chimbuko/pserver.hpp chimbuko/param.hpp chimbuko/net/zmqme_net.hpp chimbuko/net/mpi_net.hpp chimbuko/net/zmq_net.hpp chimbuko/net/local_net.hpp chimbuko/util/RunStats.hpp chimbuko/util/ADIOS2parseUtils.hpp chimbuko/util/environment.hpp chimbuko/util/time.hpp chimbuko/util/map.hpp chimbuko/util/error.hpp chimbuko/util/string.hpp chimbuko/util/DispatchQueue.hpp chimbuko/util/PerfStats.hpp chimbuko/util/RunMetric.hpp chimbuko/util/memutils.hpp chimbuko/util/Anomalies.hpp chimbuko/util/barrier.hpp chimbuko/util/threadPool.hpp chimbuko/util/hash.hpp chimbuko/util/mtQueue.hpp chimbuko/util/serialize.hpp chimbuko/util/commandLineParser.hpp

include/chimbuko/ad/ADLocalFuncStatistics.hpp

Lines changed: 3 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -2,86 +2,19 @@
22
#include <chimbuko_config.h>
33
#include <chimbuko/ad/ADNetClient.hpp>
44
#include <chimbuko/ad/ADEvent.hpp>
5-
#include <chimbuko/ad/AnomalyData.hpp>
65
#include <chimbuko/util/Anomalies.hpp>
76
#include "chimbuko/util/PerfStats.hpp"
87

8+
#include <chimbuko/ad/AnomalyData.hpp>
9+
#include <chimbuko/ad/FuncStats.hpp>
10+
911
namespace chimbuko{
1012

1113
/**
1214
* @brief A class that gathers local function statistics and communicates them to the parameter server
1315
*/
1416
class ADLocalFuncStatistics{
1517
public:
16-
/**
17-
* @brief Structure to store the profile statistics associated with a specific function
18-
*/
19-
struct FuncStats{
20-
unsigned long pid; /**< Program index*/
21-
unsigned long id; /**< Function index*/
22-
std::string name; /**< Function name*/
23-
unsigned long n_anomaly; /**< Number of anomalies*/
24-
RunStats inclusive; /**< Inclusive runtime stats*/
25-
RunStats exclusive; /**< Exclusive runtime stats*/
26-
27-
FuncStats(): n_anomaly(0){}
28-
29-
/**
30-
* @brief Create a FuncStats instance of a particular pid, id, name
31-
*/
32-
FuncStats(const unsigned long pid, const unsigned long id, const std::string &name): pid(pid), id(id), name(name), n_anomaly(0){}
33-
34-
struct State{
35-
unsigned long pid; /**< Program index*/
36-
unsigned long id; /**< Function index*/
37-
std::string name; /**< Function name*/
38-
unsigned long n_anomaly; /**< Number of anomalies*/
39-
RunStats::State inclusive; /**< Inclusive runtime stats*/
40-
RunStats::State exclusive; /**< Exclusive runtime stats*/
41-
42-
State(){}
43-
/**
44-
* @brief Create the State from the FuncStats instance
45-
*/
46-
State(const FuncStats &p);
47-
48-
/**
49-
* @brief Serialize using cereal
50-
*/
51-
template<class Archive>
52-
void serialize(Archive & archive){
53-
archive(pid,id,name,n_anomaly,inclusive,exclusive);
54-
}
55-
56-
/**
57-
* @brief Create a JSON object from this instance
58-
*/
59-
nlohmann::json get_json() const;
60-
};
61-
62-
/**
63-
*@brief Get the State object corresponding to this object
64-
*/
65-
inline State get_state() const{ return State(*this); }
66-
67-
/**
68-
* @brief Set the object state
69-
*/
70-
void set_state(const State &to);
71-
72-
/**
73-
* @brief Equivalence operator
74-
*/
75-
bool operator==(const FuncStats &r) const{
76-
return pid==r.pid && id==r.id && name==r.name && n_anomaly==r.n_anomaly && inclusive==r.inclusive && exclusive==r.exclusive;
77-
}
78-
79-
/**
80-
* @brief Inequalityoperator
81-
*/
82-
inline bool operator!=(const FuncStats &r) const{ return !(*this == r); }
83-
};
84-
8518
/**
8619
* @brief Data structure containing the data that is sent (in serialized form) to the parameter server
8720
*/

include/chimbuko/ad/AnomalyData.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ namespace chimbuko {
1212
/**
1313
* @brief A class that contains data on the number of anomalies collected during the present timestep.
1414
* It contains the number of anomalies and the timestamp window in which the anomalies occurred
15+
*
16+
* These data are aggregated over rank to form the anomaly_stats.anomaly field of the pserver streaming output
1517
*
1618
*/
1719
class AnomalyData {

include/chimbuko/ad/FuncStats.hpp

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#pragma once
2+
#include <chimbuko_config.h>
3+
#include <chimbuko/util/RunStats.hpp>
4+
5+
namespace chimbuko{
6+
/**
7+
* @brief Structure to store the profile statistics associated with a specific function
8+
*/
9+
struct FuncStats{
10+
unsigned long pid; /**< Program index*/
11+
unsigned long id; /**< Function index*/
12+
std::string name; /**< Function name*/
13+
unsigned long n_anomaly; /**< Number of anomalies*/
14+
RunStats inclusive; /**< Inclusive runtime stats*/
15+
RunStats exclusive; /**< Exclusive runtime stats*/
16+
17+
FuncStats(): n_anomaly(0){}
18+
19+
/**
20+
* @brief Create a FuncStats instance of a particular pid, id, name
21+
*/
22+
FuncStats(const unsigned long pid, const unsigned long id, const std::string &name): pid(pid), id(id), name(name), n_anomaly(0){}
23+
24+
struct State{
25+
unsigned long pid; /**< Program index*/
26+
unsigned long id; /**< Function index*/
27+
std::string name; /**< Function name*/
28+
unsigned long n_anomaly; /**< Number of anomalies*/
29+
RunStats::State inclusive; /**< Inclusive runtime stats*/
30+
RunStats::State exclusive; /**< Exclusive runtime stats*/
31+
32+
State(){}
33+
/**
34+
* @brief Create the State from the FuncStats instance
35+
*/
36+
State(const FuncStats &p);
37+
38+
/**
39+
* @brief Serialize using cereal
40+
*/
41+
template<class Archive>
42+
void serialize(Archive & archive){
43+
archive(pid,id,name,n_anomaly,inclusive,exclusive);
44+
}
45+
46+
/**
47+
* @brief Create a JSON object from this instance
48+
*/
49+
nlohmann::json get_json() const;
50+
};
51+
52+
/**
53+
*@brief Get the State object corresponding to this object
54+
*/
55+
inline State get_state() const{ return State(*this); }
56+
57+
/**
58+
* @brief Set the object state
59+
*/
60+
void set_state(const State &to);
61+
62+
/**
63+
* @brief Equivalence operator
64+
*/
65+
bool operator==(const FuncStats &r) const{
66+
return pid==r.pid && id==r.id && name==r.name && n_anomaly==r.n_anomaly && inclusive==r.inclusive && exclusive==r.exclusive;
67+
}
68+
69+
/**
70+
* @brief Inequalityoperator
71+
*/
72+
inline bool operator!=(const FuncStats &r) const{ return !(*this == r); }
73+
};
74+
75+
}

include/chimbuko/pserver.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <chimbuko_config.h>
22
#include "pserver/PSstatSender.hpp"
3-
#include "pserver/global_anomaly_stats.hpp"
4-
#include "pserver/global_counter_stats.hpp"
3+
#include "pserver/GlobalAnomalyStats.hpp"
4+
#include "pserver/GlobalCounterStats.hpp"
55
#include "pserver/PSglobalFunctionIndexMap.hpp"

include/chimbuko/pserver/AnomalyStat.hpp renamed to include/chimbuko/pserver/AggregateAnomalyData.hpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,19 @@ namespace chimbuko {
66

77
/**
88
* @brief A class that contains statistics on the number of anomalies detected
9+
*
10+
* It contains the AnomalyData collected over IO steps for a given app/rank
911
*/
10-
class AnomalyStat {
12+
class AggregateAnomalyData {
1113
public:
12-
AnomalyStat(bool do_accumulate=false);
13-
~AnomalyStat();
14+
AggregateAnomalyData(bool do_accumulate=false);
15+
~AggregateAnomalyData();
1416

15-
AnomalyStat(const AnomalyStat &r);
16-
AnomalyStat(AnomalyStat &&r);
17+
AggregateAnomalyData(const AggregateAnomalyData &r);
18+
AggregateAnomalyData(AggregateAnomalyData &&r);
1719

18-
AnomalyStat & operator=(const AnomalyStat &r);
19-
AnomalyStat & operator=(AnomalyStat &&r);
20+
AggregateAnomalyData & operator=(const AggregateAnomalyData &r);
21+
AggregateAnomalyData & operator=(AggregateAnomalyData &&r);
2022

2123
/**
2224
* @brief Set the stats object to accumulate the sum total
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#pragma once
2+
#include <chimbuko_config.h>
3+
#include <chimbuko/ad/FuncStats.hpp>
4+
5+
namespace chimbuko {
6+
7+
/**< An object holding statistics information on a function aggregated over the entire job*/
8+
struct AggregateFuncStats{
9+
public:
10+
/**
11+
* @brief Constructor
12+
* @param pid Program idx
13+
* @param fid Function idx
14+
* @param func Function name
15+
*/
16+
AggregateFuncStats(int pid, int fid, const std::string &func);
17+
18+
/**
19+
* @brief Add more data into the accumulated totals
20+
* @param n_anomaly Number of anomalies
21+
* @param inclusive Inclusive runtime
22+
* @param exclusive Exclusive runtime
23+
*/
24+
void add(unsigned long n_anomaly, const RunStats& inclusive, const RunStats& exclusive);
25+
26+
/**
27+
* @brief Create a JSON object from this instance
28+
*/
29+
nlohmann::json get_json() const;
30+
31+
/**
32+
* @brief Get the program idx
33+
*/
34+
const int get_pid() const{ return m_pid; }
35+
/**
36+
* @brief Get the function idx
37+
*/
38+
const int get_fid() const{ return m_fid; }
39+
/**
40+
* @brief Get the function name
41+
*/
42+
const std::string &get_func() const{ return m_func; }
43+
/**
44+
* @brief Get the statistics on the number of anomalies
45+
*/
46+
const RunStats & get_func_anomaly() const{ return m_func_anomaly; }
47+
/**
48+
* @brief Get the inclusive runtime
49+
*/
50+
const RunStats & get_inclusive() const{ return m_inclusive; }
51+
/**
52+
* @brief Get the exclusive runtime
53+
*/
54+
const RunStats & get_exclusive() const{ return m_exclusive; }
55+
56+
private:
57+
int m_pid; /**< Program idx */
58+
int m_fid; /**< Function idx */
59+
std::string m_func; /**< Func name */
60+
RunStats m_func_anomaly; /**< Statistics on number of anomalies*/
61+
RunStats m_inclusive; /**< Statistics on number of function timings inclusive of children*/
62+
RunStats m_exclusive; /**< Statistics on number of function timings exclusive of children*/
63+
};
64+
65+
66+
67+
}

include/chimbuko/pserver/global_anomaly_stats.hpp renamed to include/chimbuko/pserver/GlobalAnomalyStats.hpp

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
#include <vector>
77
#include <unordered_map>
88
#include <nlohmann/json.hpp>
9-
#include "chimbuko/pserver/AnomalyStat.hpp"
9+
#include "chimbuko/pserver/AggregateAnomalyData.hpp"
10+
#include "chimbuko/pserver/AggregateFuncStats.hpp"
1011
#include <chimbuko/net.hpp>
1112
#include <chimbuko/pserver/PSstatSender.hpp>
1213
#include <chimbuko/ad/ADLocalFuncStatistics.hpp>
@@ -18,14 +19,6 @@ namespace chimbuko{
1819
*/
1920
class GlobalAnomalyStats{
2021
public:
21-
/**< A struct holding statistics information on a function*/
22-
struct FuncStats{
23-
std::string func; /**< Func name */
24-
RunStats func_anomaly; /**< Statistics on number of anomalies*/
25-
RunStats inclusive; /**< Statistics on number of function timings inclusive of children*/
26-
RunStats exclusive; /**< Statistics on number of function timings exclusive of children*/
27-
};
28-
2922
GlobalAnomalyStats(){}
3023

3124
/**
@@ -50,11 +43,11 @@ namespace chimbuko{
5043

5144

5245
/**
53-
* @brief Const accessor to the AnomalyStat instance corresponding to a particular stat_id (throw error if not present)
46+
* @brief Const accessor to the AggregateAnomalyData instance corresponding to a particular stat_id (throw error if not present)
5447
* @param pid program index
5548
* @param rid rank
5649
*/
57-
const AnomalyStat & get_anomaly_stat_container(const int pid, const unsigned long rid) const;
50+
const AggregateAnomalyData & get_anomaly_stat_container(const int pid, const unsigned long rid) const;
5851

5952
/**
6053
* @brief Get the number of anomaly data objects collected since the last flush for a given program/rank
@@ -90,7 +83,7 @@ namespace chimbuko{
9083
* @param pid Program index
9184
* @param fid Function index
9285
*/
93-
const FuncStats & get_func_stats(int pid, unsigned long fid) const;
86+
const AggregateFuncStats & get_func_stats(int pid, unsigned long fid) const;
9487

9588
/**
9689
* @brief Collect anomaly statistics into JSON object and flush the m_anomaly_stats statistics
@@ -109,8 +102,8 @@ namespace chimbuko{
109102
nlohmann::json collect();
110103

111104
protected:
112-
std::unordered_map<int, std::unordered_map<unsigned long, AnomalyStat> > m_anomaly_stats; /**< Map of program index and rank to the statistics of the number of anomalies per step and the AnomalyData objects that have been added by that AD instance since the last flush */
113-
std::unordered_map<unsigned long, std::unordered_map<unsigned long, FuncStats> > m_funcstats; /**< Map of program index and function index to aggregated profile statistics on the function*/
105+
std::unordered_map<int, std::unordered_map<unsigned long, AggregateAnomalyData> > m_anomaly_stats; /**< Map of program index and rank to the statistics of the number of anomalies per step and the AnomalyData objects that have been added by that AD instance since the last flush */
106+
std::unordered_map<unsigned long, std::unordered_map<unsigned long, AggregateFuncStats> > m_funcstats; /**< Map of program index and function index to aggregated profile statistics on the function*/
114107
mutable std::mutex m_mutex_anom; /**< Mutex for global anomaly statistics */
115108
mutable std::mutex m_mutex_func; /**< Mutex for global function statistics */
116109
};
@@ -124,16 +117,7 @@ namespace chimbuko{
124117
NetPayloadUpdateAnomalyStats(GlobalAnomalyStats * global_anom_stats): m_global_anom_stats(global_anom_stats){}
125118
MessageKind kind() const override{ return MessageKind::ANOMALY_STATS; }
126119
MessageType type() const override{ return MessageType::REQ_ADD; }
127-
void action(Message &response, const Message &message) override{
128-
check(message);
129-
if(m_global_anom_stats == nullptr) throw std::runtime_error("Cannot update global anomaly statistics as stats object has not been linked");
130-
131-
ADLocalFuncStatistics loc;
132-
loc.net_deserialize(message.buf());
133-
134-
m_global_anom_stats->add_anomaly_data(loc);
135-
response.set_msg("", false);
136-
}
120+
void action(Message &response, const Message &message) override;
137121
};
138122

139123

@@ -145,11 +129,7 @@ namespace chimbuko{
145129
GlobalAnomalyStats *m_stats;
146130
public:
147131
PSstatSenderGlobalAnomalyStatsPayload(GlobalAnomalyStats *stats): m_stats(stats){}
148-
void add_json(nlohmann::json &into) const override{
149-
nlohmann::json stats = m_stats->collect();
150-
if(stats.size() > 0)
151-
into["anomaly_stats"] = std::move(stats);
152-
}
132+
void add_json(nlohmann::json &into) const override;
153133
};
154134

155135

File renamed without changes.

sim/include/sim/pserver.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#pragma once
22
#include <chimbuko_config.h>
33
#include<chimbuko/param.hpp>
4-
#include<chimbuko/pserver/global_anomaly_stats.hpp>
5-
#include<chimbuko/pserver/global_counter_stats.hpp>
4+
#include<chimbuko/pserver/GlobalAnomalyStats.hpp>
5+
#include<chimbuko/pserver/GlobalCounterStats.hpp>
66

77
namespace chimbuko_sim{
88
using namespace chimbuko;

0 commit comments

Comments
 (0)