|
| 1 | +#pragma once |
| 2 | +#include <chimbuko_config.h> |
| 3 | +#include <chimbuko/ad/ADNetClient.hpp> |
| 4 | +#include <chimbuko/util/Anomalies.hpp> |
| 5 | +#include <chimbuko/ad/FuncAnomalyMetrics.hpp> |
| 6 | + |
| 7 | +namespace chimbuko{ |
| 8 | + |
| 9 | + /** |
| 10 | + * @brief A class that collects anomaly metrics broken down by function for sending to the pserver |
| 11 | + */ |
| 12 | + class ADLocalAnomalyMetrics{ |
| 13 | + public: |
| 14 | + /** |
| 15 | + * @brief Class state that is serialized for pserver comms |
| 16 | + */ |
| 17 | + struct State{ |
| 18 | + int app; /**< Program idx*/ |
| 19 | + int rank; /**< rank */ |
| 20 | + int step; /**< io step */ |
| 21 | + unsigned long first_event_ts; /**< Timestamp of first event on step */ |
| 22 | + unsigned long last_event_ts; /**< Timestamp of last event on step */ |
| 23 | + |
| 24 | + std::unordered_map<int, FuncAnomalyMetrics::State> func_anom_metrics; |
| 25 | + |
| 26 | + /* |
| 27 | + * @brief Serialize this instance in Cereal |
| 28 | + */ |
| 29 | + template<class Archive> |
| 30 | + void serialize(Archive & archive){ |
| 31 | + archive(app, rank, step, first_event_ts, last_event_ts, func_anom_metrics); |
| 32 | + } |
| 33 | + |
| 34 | + State(const ADLocalAnomalyMetrics &parent); |
| 35 | + State(){} |
| 36 | + }; |
| 37 | + |
| 38 | + /** |
| 39 | + * @brief Constructor |
| 40 | + * @param app Application index |
| 41 | + * @param rank AD rank |
| 42 | + * @param step IO step |
| 43 | + * @param first_event_ts Timestamp of the first event on this step |
| 44 | + * @param last_event_ts Timestamp of the last event on this step |
| 45 | + * @param anom Anomalies instance |
| 46 | + */ |
| 47 | + ADLocalAnomalyMetrics(int app, int rank, int step, unsigned long first_event_ts, unsigned long last_event_ts, const Anomalies &anom); |
| 48 | + ADLocalAnomalyMetrics(){} |
| 49 | + |
| 50 | + /** |
| 51 | + * @brief Get the current state as a state object |
| 52 | + * |
| 53 | + * The string dump of this object is the serialized form sent to the parameter server |
| 54 | + */ |
| 55 | + State get_state() const; |
| 56 | + |
| 57 | + |
| 58 | + /** |
| 59 | + * @brief Set the internal variables to the given state object |
| 60 | + */ |
| 61 | + void set_state(const State &s); |
| 62 | + |
| 63 | + |
| 64 | + /** |
| 65 | + * @brief Serialize this class for communication over the network |
| 66 | + */ |
| 67 | + std::string net_serialize() const; |
| 68 | + |
| 69 | + /** |
| 70 | + * @brief Unserialize this class after communication over the network |
| 71 | + */ |
| 72 | + void net_deserialize(const std::string &s); |
| 73 | + |
| 74 | + |
| 75 | + /** |
| 76 | + * @brief Send the data to the pserver |
| 77 | + * @param net_client The network client object |
| 78 | + * @return std::pair<size_t, size_t> [sent, recv] message size |
| 79 | + */ |
| 80 | + std::pair<size_t, size_t> send(ADNetClient &client) const; |
| 81 | + |
| 82 | + |
| 83 | + /** |
| 84 | + * @brief Get the data |
| 85 | + */ |
| 86 | + const std::unordered_map<int, FuncAnomalyMetrics> & get_metrics() const{ return m_func_anom_metrics; } |
| 87 | + |
| 88 | + /** |
| 89 | + * @brief Get the program idx |
| 90 | + */ |
| 91 | + int get_pid() const{ return m_app; } |
| 92 | + |
| 93 | + /** |
| 94 | + * @brief Get the rank |
| 95 | + */ |
| 96 | + int get_rid() const{ return m_rank; } |
| 97 | + |
| 98 | + /** |
| 99 | + * @brief Get the IO step |
| 100 | + */ |
| 101 | + int get_step() const{ return m_step; } |
| 102 | + |
| 103 | + /** |
| 104 | + * @brief Get the timestamp of the first event on this IO step |
| 105 | + */ |
| 106 | + unsigned long get_first_event_ts() const{ return m_first_event_ts; } |
| 107 | + |
| 108 | + /** |
| 109 | + * @brief Get the timestamp of the last event on this IO step |
| 110 | + */ |
| 111 | + unsigned long get_last_event_ts() const{ return m_last_event_ts; } |
| 112 | + |
| 113 | + |
| 114 | + /** |
| 115 | + * @brief Equivalence operator |
| 116 | + */ |
| 117 | + bool operator==(const ADLocalAnomalyMetrics &r) const{ |
| 118 | + return m_app == r.m_app && m_rank == r.m_rank && m_step == r.m_step && m_func_anom_metrics == r.m_func_anom_metrics && |
| 119 | + m_first_event_ts == r.m_first_event_ts && m_last_event_ts == r.m_last_event_ts; |
| 120 | + } |
| 121 | + |
| 122 | + /** |
| 123 | + * @brief Inequality operator |
| 124 | + */ |
| 125 | + inline bool operator!=(const ADLocalAnomalyMetrics &r) const{ return !(*this == r); } |
| 126 | + |
| 127 | + /** |
| 128 | + * @brief Attach a RunMetric object into which performance metrics are accumulated |
| 129 | + */ |
| 130 | + void linkPerf(PerfStats* perf){ m_perf = perf; } |
| 131 | + |
| 132 | + private: |
| 133 | + int m_app; /**< Program idx*/ |
| 134 | + int m_rank; /**< rank */ |
| 135 | + int m_step; /**< io step */ |
| 136 | + unsigned long m_first_event_ts; /**< Timestamp of first event on step */ |
| 137 | + unsigned long m_last_event_ts; /**< Timestamp of last event on step */ |
| 138 | + |
| 139 | + std::unordered_map<int, FuncAnomalyMetrics> m_func_anom_metrics; /**< Map of function idx to the metrics for that function*/ |
| 140 | + |
| 141 | + PerfStats *m_perf; /**< Store performance data */ |
| 142 | + }; |
| 143 | + |
| 144 | + |
| 145 | + |
| 146 | + |
| 147 | +} |
0 commit comments