Skip to content

Commit a0bc1ff

Browse files
committed
First stage of implementing provenance post-pruning. A new ADDataInterface derived class can be used to pass the anomalies back into the AD algorithm on the provDB, eliminating entries that are no longer considered anomalous. A new unit test is included to aid development and checking.
To support the above: ADDataInterface can now be default constructed with the number of datasets set after construction Added a function to ADOutlier to pass in the initial global model Setting ADOutlier::setGlobalModelSyncFrequency to <= 0 will now completely disable updates to the global model Added a ProvDBtester to unit_tests/provdb which acts as a standalone, in-memory implementation of the provDB
1 parent 99d92b5 commit a0bc1ff

15 files changed

Lines changed: 323 additions & 14 deletions

File tree

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ AC_CONFIG_HEADERS([chimbuko_config.h])
217217
AC_SUBST([PS_FLAGS])
218218

219219
AC_CONFIG_SRCDIR([src/dummy])
220-
AC_CONFIG_FILES([Makefile src/Makefile app/Makefile sphinx/Makefile include/Makefile 3rdparty/Makefile test/Makefile test/unit_tests/Makefile test/unit_tests/ad/Makefile test/unit_tests/util/Makefile test/unit_tests/pserver/Makefile test/unit_tests/net/Makefile test/unit_tests/param/Makefile scripts/Makefile scripts/launch/Makefile sim/Makefile sim/src/Makefile sim/main/Makefile sim/include/Makefile])
220+
AC_CONFIG_FILES([Makefile src/Makefile app/Makefile sphinx/Makefile include/Makefile 3rdparty/Makefile test/Makefile test/unit_tests/Makefile test/unit_tests/ad/Makefile test/unit_tests/util/Makefile test/unit_tests/pserver/Makefile test/unit_tests/net/Makefile test/unit_tests/param/Makefile test/unit_tests/provdb/Makefile scripts/Makefile scripts/launch/Makefile sim/Makefile sim/src/Makefile sim/main/Makefile sim/include/Makefile])
221221

222222
AC_CONFIG_FILES([app/ws_flask_stat.py:app/ws_flask_stat.py app/sst_view_parse.pl:app/sst_view_parse.pl])
223223
AC_CONFIG_FILES([run_test.sh:run_test.sh test/run_all.sh:test/run_all.sh test/run_ad.sh:test/run_ad.sh test/run_net.sh:test/run_net.sh test/run_provdb_client_test.sh:test/run_provdb_client_test.sh test/run_provdb_autoshutdown_test.sh:test/run_provdb_autoshutdown_test.sh test/unit_tests/run_all.sh:test/unit_tests/run_all.sh test/run_ad_with_provdb.sh:test/run_ad_with_provdb.sh test/run_stat_sender.sh:test/run_stat_sender.sh scripts/launch/run_services.sh:scripts/launch/run_services.sh], [chmod u+x $(echo $ac_tag | sed s/.*\://)])

include/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
nobase_include_HEADERS = chimbuko/modules/performance_analysis/chimbuko.hpp chimbuko/modules/performance_analysis/ad/FuncAnomalyMetrics.hpp chimbuko/modules/performance_analysis/ad/ADLocalAnomalyMetrics.hpp chimbuko/modules/performance_analysis/ad/FuncStats.hpp chimbuko/modules/performance_analysis/ad/ADAnomalyProvenance.hpp chimbuko/modules/performance_analysis/ad/ADParser.hpp chimbuko/modules/performance_analysis/ad/ADMonitoring.hpp chimbuko/modules/performance_analysis/ad/ADNormalEventProvenance.hpp chimbuko/modules/performance_analysis/ad/ADLocalFuncStatistics.hpp chimbuko/modules/performance_analysis/ad/ADMetadataParser.hpp chimbuko/modules/performance_analysis/ad/AnomalyData.hpp chimbuko/modules/performance_analysis/ad/ADDefine.hpp chimbuko/modules/performance_analysis/ad/ADglobalFunctionIndexMap.hpp chimbuko/modules/performance_analysis/ad/ADExecDataInterface.hpp chimbuko/modules/performance_analysis/ad/ADcombinedPSdata.hpp chimbuko/modules/performance_analysis/ad/ADEvent.hpp chimbuko/modules/performance_analysis/ad/ADCounter.hpp chimbuko/modules/performance_analysis/ad/ADLocalCounterStatistics.hpp chimbuko/modules/performance_analysis/ad/ExecData.hpp chimbuko/modules/performance_analysis/provdb/ProvDBmoduleSetup.hpp chimbuko/modules/performance_analysis/module.hpp chimbuko/modules/performance_analysis/pserver/PSmoduleDataManager.hpp chimbuko/modules/performance_analysis/pserver/GlobalAnomalyStats.hpp chimbuko/modules/performance_analysis/pserver/AggregateFuncAnomalyMetrics.hpp chimbuko/modules/performance_analysis/pserver/PSglobalFunctionIndexMap.hpp chimbuko/modules/performance_analysis/pserver/AggregateFuncStats.hpp chimbuko/modules/performance_analysis/pserver/AggregateAnomalyData.hpp chimbuko/modules/performance_analysis/pserver/NetPayloadRecvCombinedADdata.hpp chimbuko/modules/performance_analysis/pserver/PScommon.hpp chimbuko/modules/performance_analysis/pserver/FunctionProfile.hpp chimbuko/modules/performance_analysis/pserver/GlobalAnomalyMetrics.hpp chimbuko/modules/performance_analysis/pserver/AggregateFuncAnomalyMetricsAllRanks.hpp chimbuko/modules/performance_analysis/pserver/GlobalCounterStats.hpp chimbuko/modules/performance_analysis/AD.hpp chimbuko/modules/performance_analysis/pserver.hpp chimbuko/modules/factory.hpp chimbuko/core/chimbuko.hpp chimbuko/core/ad/ADOutlier.hpp chimbuko/core/ad/ADNetClient.hpp chimbuko/core/ad/ADDataInterface.hpp chimbuko/core/ad/ADProvenanceDBclient.hpp chimbuko/core/ad/ADcmdLineArgs.hpp chimbuko/core/ad/utils.hpp chimbuko/core/ad/ADio.hpp chimbuko/core/provdb/ProvDBclient.hpp chimbuko/core/provdb/ProvDBmoduleSetupCore.hpp chimbuko/core/provdb/ProvDBengine.hpp chimbuko/core/provdb/setup.hpp chimbuko/core/verbose.hpp chimbuko/core/pserver/PSProvenanceDBclient.hpp chimbuko/core/pserver/PSmoduleDataManagerCore.hpp chimbuko/core/pserver/PSparamManager.hpp chimbuko/core/pserver/PSstatSender.hpp chimbuko/core/param/sstd_param.hpp chimbuko/core/param/copod_param.hpp chimbuko/core/param/hbos_param.hpp chimbuko/core/message.hpp chimbuko/core/net.hpp chimbuko/core/pserver.hpp chimbuko/core/provdb.hpp chimbuko/core/param.hpp chimbuko/core/net/zmqme_net.hpp chimbuko/core/net/mpi_net.hpp chimbuko/core/net/zmq_net.hpp chimbuko/core/net/local_net.hpp chimbuko/core/util/chunkAllocator.hpp chimbuko/core/util/RunStats.hpp chimbuko/core/util/ADIOS2parseUtils.hpp chimbuko/core/util/json.hpp chimbuko/core/util/environment.hpp chimbuko/core/util/pointerRegistry.hpp chimbuko/core/util/time.hpp chimbuko/core/util/map.hpp chimbuko/core/util/error.hpp chimbuko/core/util/string.hpp chimbuko/core/util/DispatchQueue.hpp chimbuko/core/util/PerfStats.hpp chimbuko/core/util/curlJsonSender.hpp chimbuko/core/util/RunMetric.hpp chimbuko/core/util/memutils.hpp chimbuko/core/util/Histogram.hpp chimbuko/core/util/barrier.hpp chimbuko/core/util/threadPool.hpp chimbuko/core/util/hash.hpp chimbuko/core/util/mtQueue.hpp chimbuko/core/util/serialize.hpp chimbuko/core/util/commandLineParser.hpp
1+
nobase_include_HEADERS = chimbuko/modules/performance_analysis/chimbuko.hpp chimbuko/modules/performance_analysis/ad/FuncAnomalyMetrics.hpp chimbuko/modules/performance_analysis/ad/ADLocalAnomalyMetrics.hpp chimbuko/modules/performance_analysis/ad/FuncStats.hpp chimbuko/modules/performance_analysis/ad/ADAnomalyProvenance.hpp chimbuko/modules/performance_analysis/ad/ADParser.hpp chimbuko/modules/performance_analysis/ad/ADMonitoring.hpp chimbuko/modules/performance_analysis/ad/ADNormalEventProvenance.hpp chimbuko/modules/performance_analysis/ad/ADLocalFuncStatistics.hpp chimbuko/modules/performance_analysis/ad/ADMetadataParser.hpp chimbuko/modules/performance_analysis/ad/AnomalyData.hpp chimbuko/modules/performance_analysis/ad/ADDefine.hpp chimbuko/modules/performance_analysis/ad/ADglobalFunctionIndexMap.hpp chimbuko/modules/performance_analysis/ad/ADExecDataInterface.hpp chimbuko/modules/performance_analysis/ad/ADcombinedPSdata.hpp chimbuko/modules/performance_analysis/ad/ADEvent.hpp chimbuko/modules/performance_analysis/ad/ADCounter.hpp chimbuko/modules/performance_analysis/ad/ADLocalCounterStatistics.hpp chimbuko/modules/performance_analysis/ad/ExecData.hpp chimbuko/modules/performance_analysis/provdb/ProvDBpruneOutlierInterface.hpp chimbuko/modules/performance_analysis/provdb/ProvDBmoduleSetup.hpp chimbuko/modules/performance_analysis/module.hpp chimbuko/modules/performance_analysis/pserver/PSmoduleDataManager.hpp chimbuko/modules/performance_analysis/pserver/GlobalAnomalyStats.hpp chimbuko/modules/performance_analysis/pserver/AggregateFuncAnomalyMetrics.hpp chimbuko/modules/performance_analysis/pserver/PSglobalFunctionIndexMap.hpp chimbuko/modules/performance_analysis/pserver/AggregateFuncStats.hpp chimbuko/modules/performance_analysis/pserver/AggregateAnomalyData.hpp chimbuko/modules/performance_analysis/pserver/NetPayloadRecvCombinedADdata.hpp chimbuko/modules/performance_analysis/pserver/PScommon.hpp chimbuko/modules/performance_analysis/pserver/FunctionProfile.hpp chimbuko/modules/performance_analysis/pserver/GlobalAnomalyMetrics.hpp chimbuko/modules/performance_analysis/pserver/AggregateFuncAnomalyMetricsAllRanks.hpp chimbuko/modules/performance_analysis/pserver/GlobalCounterStats.hpp chimbuko/modules/performance_analysis/AD.hpp chimbuko/modules/performance_analysis/pserver.hpp chimbuko/modules/factory.hpp chimbuko/core/chimbuko.hpp chimbuko/core/ad/ADOutlier.hpp chimbuko/core/ad/ADNetClient.hpp chimbuko/core/ad/ADDataInterface.hpp chimbuko/core/ad/ADProvenanceDBclient.hpp chimbuko/core/ad/ADcmdLineArgs.hpp chimbuko/core/ad/utils.hpp chimbuko/core/ad/ADio.hpp chimbuko/core/provdb/ProvDBclient.hpp chimbuko/core/provdb/ProvDBmoduleSetupCore.hpp chimbuko/core/provdb/ProvDBengine.hpp chimbuko/core/provdb/setup.hpp chimbuko/core/verbose.hpp chimbuko/core/pserver/PSProvenanceDBclient.hpp chimbuko/core/pserver/PSmoduleDataManagerCore.hpp chimbuko/core/pserver/PSparamManager.hpp chimbuko/core/pserver/PSstatSender.hpp chimbuko/core/param/sstd_param.hpp chimbuko/core/param/copod_param.hpp chimbuko/core/param/hbos_param.hpp chimbuko/core/message.hpp chimbuko/core/net.hpp chimbuko/core/pserver.hpp chimbuko/core/provdb.hpp chimbuko/core/param.hpp chimbuko/core/net/zmqme_net.hpp chimbuko/core/net/mpi_net.hpp chimbuko/core/net/zmq_net.hpp chimbuko/core/net/local_net.hpp chimbuko/core/util/chunkAllocator.hpp chimbuko/core/util/RunStats.hpp chimbuko/core/util/ADIOS2parseUtils.hpp chimbuko/core/util/json.hpp chimbuko/core/util/environment.hpp chimbuko/core/util/pointerRegistry.hpp chimbuko/core/util/time.hpp chimbuko/core/util/map.hpp chimbuko/core/util/error.hpp chimbuko/core/util/string.hpp chimbuko/core/util/DispatchQueue.hpp chimbuko/core/util/PerfStats.hpp chimbuko/core/util/curlJsonSender.hpp chimbuko/core/util/RunMetric.hpp chimbuko/core/util/memutils.hpp chimbuko/core/util/Histogram.hpp chimbuko/core/util/barrier.hpp chimbuko/core/util/threadPool.hpp chimbuko/core/util/hash.hpp chimbuko/core/util/mtQueue.hpp chimbuko/core/util/serialize.hpp chimbuko/core/util/commandLineParser.hpp

include/chimbuko/core/ad/ADDataInterface.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ namespace chimbuko {
6262
};
6363

6464
ADDataInterface(size_t ndataset): m_dset_anom(ndataset){}
65+
ADDataInterface(){}
66+
67+
/**
68+
* @brief Set the number of datasets, designed for use if instance was default-constructed
69+
*/
70+
inline void setNdataSets(size_t ndataset){ m_dset_anom.resize(ndataset); }
6571

6672
/**
6773
* @brief Return the number of data sets

include/chimbuko/core/ad/ADOutlier.hpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ namespace chimbuko {
8585
ParamInterface const* get_global_parameters() const{ return m_param; }
8686

8787
/**
88-
* @brief Set how often (in steps, or equivalently calls to "run") the global model is updated
88+
* @brief Set the global parameters, overwriting the existing global model. Here the input is in serialized form
89+
*
90+
* Use in conjunction with setGlobalModelSyncFrequency(0) to set and freeze the model, not allowing it to be modified by the data
91+
*/
92+
void setGlobalParameters(const std::string &to);
93+
94+
/**
95+
* @brief Set how often (in steps, or equivalently calls to "run") the global model is updated. If to <= 0, the global model will never be updated
8996
*/
9097
void setGlobalModelSyncFrequency(int to){ m_global_model_sync_freq = to; }
9198

@@ -114,7 +121,7 @@ namespace chimbuko {
114121
int m_sync_call_count; /**< count of calls to sync_param */
115122
int m_global_model_sync_freq; /**< how often the local model is pushed and synchronized with the globel model (default 1)*/
116123

117-
int m_rank; /**< rank index*/
124+
int m_rank; /**< rank index, used only for staggering pserver sync events*/
118125
PerfStats *m_perf;
119126
};
120127

include/chimbuko/core/param.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,18 @@ namespace chimbuko {
3131
virtual void clear() = 0;
3232

3333
/**
34-
* @brief Get the number of functions for which statistics are being collected
34+
* @brief Get the number of models for which statistics are being collected
3535
*/
3636
virtual size_t size() const = 0;
3737

3838
/**
39-
* @brief Convert internal run statistics to string format for IO
39+
* @brief Convert internal models to string format for IO
4040
* @return Run statistics in string format
4141
*/
4242
virtual std::string serialize() const = 0;
4343

4444
/**
45-
* @brief Update the internal run statistics with those included in the serialized input map
45+
* @brief Update the internal models with those included in the serialized input map
4646
* @param parameters The parameters in serialized format
4747
* @param return_update Indicates that the function should return a serialized copy of the updated parameters
4848
* @return An empty string or a serialized copy of the updated parameters depending on return_update
@@ -69,8 +69,8 @@ namespace chimbuko {
6969
virtual void update(const std::vector<ParamInterface*> &other);
7070

7171
/**
72-
* @brief Set the internal run statistics to match those included in the serialized input map. Overwrite performed only for those keys in input.
73-
* @param runstats The serialized input map
72+
* @brief Set the internal run statistics to match those included in the serialized input model. Overwrite performed only for those model indices in te input.
73+
* @param parameters The serialized input model
7474
*/
7575
virtual void assign(const std::string& parameters) = 0;
7676

include/chimbuko/core/util/RunStats.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,11 @@ namespace chimbuko {
169169
bool equiv(const RunStats &b) const;
170170

171171
/**
172-
* @brief Set the eta parameter
172+
* @brief Set the eta (mean) parameter
173173
*/
174174
void set_eta(double to){ m_eta = to; }
175175
/**
176-
* @brief Set the rho parameter
176+
* @brief Set the rho parameter (variance * [count-1])
177177
*/
178178
void set_rho(double to){ m_rho = to; }
179179
/**
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#pragma once
2+
#include <chimbuko_config.h>
3+
#include <chimbuko/core/ad/ADDataInterface.hpp>
4+
#include <chimbuko/core/ad/ADOutlier.hpp>
5+
6+
#include<string>
7+
#include <sonata/Database.hpp>
8+
9+
namespace chimbuko {
10+
namespace modules{
11+
namespace performance_analysis{
12+
13+
/**
14+
* @brief The interface class between the provDB data and the AD algorithm
15+
*/
16+
class ProvDBpruneOutlierInterface: public ADDataInterface{
17+
public:
18+
ProvDBpruneOutlierInterface(sonata::Database &db);
19+
20+
/**
21+
* @brief Get the values associated with each recorded anomaly
22+
*/
23+
std::vector<Elem> getDataSet(size_t dset_index) const override;
24+
25+
/**
26+
* @brief Check the newly assigned label is still anomaly, otherwise erase the element from the database
27+
*/
28+
void recordDataSetLabelsInternal(const std::vector<Elem> &data, size_t dset_index) override;
29+
30+
31+
/**
32+
* @brief Return the function index (a unique index associated with a program index/function name combination) associated with a given dataset index
33+
*/
34+
size_t getDataSetModelIndex(size_t dset_index) const;
35+
36+
37+
private:
38+
sonata::Database &m_database;
39+
std::unique_ptr<sonata::Collection> m_collection;
40+
std::unordered_map<unsigned long, std::vector<std::pair<uint64_t, double> > > m_data; //[fid] -> [ (record_id, value), ... ]
41+
};
42+
43+
44+
/**
45+
* @brief Instantiate the AD algorithm with the provided parameters and use it to prune entries from the database that are no longer outliers
46+
*/
47+
void ProvDBpruneOutliers(const std::string &algorithm, const ADOutlier::AlgoParams &algo_params, const std::string &params_ser, sonata::Database &db);
48+
49+
50+
}
51+
}
52+
}

0 commit comments

Comments
 (0)