Skip to content

Commit 35deacc

Browse files
author
sandeepmittal
committed
merge ckelly_develop
2 parents 7c52aa0 + b24028d commit 35deacc

8 files changed

Lines changed: 58 additions & 22 deletions

File tree

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Performance Data Analysis
22
| Branch | Status |
33
| :--- | :--- |
4-
| master | [![Build Status](https://travis-ci.org/CODARcode/PerformanceAnalysis.svg?branch=master)](https://travis-ci.org/CODARcode/PerformanceAnalysis) [![codecov](https://codecov.io/gh/CODARcode/PerformanceAnalysis/branch/master/graph/badge.svg?token=B5VPVSZII4)](https://codecov.io/gh/CODARcode/PerformanceAnalysis) |
5-
| develop | [![Build Status](https://travis-ci.org/CODARcode/PerformanceAnalysis.svg?branch=release)](https://travis-ci.org/CODARcode/PerformanceAnalysis) [![codecov](https://codecov.io/gh/CODARcode/PerformanceAnalysis/branch/develop/graph/badge.svg?token=B5VPVSZII4)](https://codecov.io/gh/CODARcode/PerformanceAnalysis) |
4+
| master | [![Build Status](https://travis-ci.com/CODARcode/PerformanceAnalysis.svg?branch=master)](https://travis-ci.com/CODARcode/PerformanceAnalysis) [![codecov](https://codecov.io/gh/CODARcode/PerformanceAnalysis/branch/master/graph/badge.svg?token=B5VPVSZII4)](https://codecov.io/gh/CODARcode/PerformanceAnalysis) |
5+
| develop | [![Build Status](https://travis-ci.com/CODARcode/PerformanceAnalysis.svg?branch=release)](https://travis-ci.com/CODARcode/PerformanceAnalysis) [![codecov](https://codecov.io/gh/CODARcode/PerformanceAnalysis/branch/develop/graph/badge.svg?token=B5VPVSZII4)](https://codecov.io/gh/CODARcode/PerformanceAnalysis) |
66

77
This library is part of the [CHIMBUKO](https://github.com/CODARcode/Chimbuko) software framework and provides the C/C++ API to process [TAU](http://tau.uoregon.edu) performance traces which can be produced by multiple workflow components, processes, and threads. Its purpose is to detect events in the trace data that reveal useful information to developers of High Performance Computing applications.
88

configure.ac

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,19 @@ AC_COMPILE_IFELSE( [AC_LANG_PROGRAM([[]],
4040
AC_ARG_ENABLE([mpi],
4141
AS_HELP_STRING([--disable-mpi], [Disable MPI. User will need to manually assign the rank index to the AD instances.]), [], [enable_mpi=yes])
4242

43-
AS_IF([test "x$enable_mpi" != "xno"], [
43+
AS_IF(
44+
[test "x$enable_mpi" != "xno"], [
4445
#Check we can compile with MPI
4546
AC_MSG_NOTICE([MPI use is enabled, checking compilation with MPI is possible])
4647
AC_CHECK_HEADER(mpi.h, [], [AC_MSG_FAILURE([["Cannot find MPI header"]])] )
4748
AC_DEFINE([USE_MPI],[1],[Use MPI])
48-
])
49+
LIBS+="-lpthread"
50+
],
51+
[test "x$enable_mpi" == "xno"], [
52+
AC_MSG_NOTICE([MPI use is disabled])
53+
LIBS+="-lpthread"
54+
]
55+
)
4956

5057

5158
#Check for curl config and library

spack/repo/chimbuko/packages/chimbuko-performance-analysis/package.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ class ChimbukoPerformanceAnalysis(AutotoolsPackage):
1717
version('master', branch='master')
1818

1919
variant('perf-metric', default=True, description='Build with performance monitoring')
20-
21-
depends_on('mpi')
20+
variant('mpi', default=True, description='Enable building Chimbuko with MPI. If disabled the user must manually provide the rank index to the OAD.')
21+
22+
depends_on('mpi', when="+mpi")
2223
depends_on('cereal')
2324
depends_on('adios2')
2425
depends_on('googletest')
@@ -34,12 +35,15 @@ class ChimbukoPerformanceAnalysis(AutotoolsPackage):
3435

3536

3637
def setup_environment(self, spack_env, run_env):
37-
spack_env.set('CXX', self.spec['mpi'].mpicxx)
38+
if '+mpi' in self.spec:
39+
spack_env.set('CXX', self.spec['mpi'].mpicxx)
3840

3941
def configure_args(self):
4042
args = ["--with-network=ZMQ", "--with-adios2=%s" % self.spec['adios2'].prefix ]
4143

4244
if '+perf-metric' in self.spec:
4345
args.append('--with-perf-metric')
44-
46+
if '+mpi' not in self.spec:
47+
args.append('--disable-mpi')
48+
4549
return args

src/ad/ADNetClient.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,9 @@ void ADThreadNetClient::run(bool local){
428428
size_t nwork = getNwork();
429429
while(nwork > 0){
430430
ClientAction* work_item = getWorkItem();
431+
//Ask if shutdown is to be done *before* calling perform as blocking actions unlock the parent thread in perform which destroys the ClientAction object making the pointer invalid!
432+
shutdown = shutdown || work_item->shutdown_worker();
431433
work_item->perform(*client);
432-
shutdown = shutdown || work_item->shutdown_worker();
433434

434435
if(work_item->do_delete()) delete work_item;
435436
nwork = getNwork();

src/ad/ADOutlier.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,12 +170,12 @@ double ADOutlierSSTD::computeScore(CallListIterator_t ev, const SstdParam &stats
170170
unsigned long ADOutlierSSTD::compute_outliers(Anomalies &outliers,
171171
const unsigned long func_id,
172172
std::vector<CallListIterator_t>& data){
173-
std::cout << "Finding outliers in events for func " << func_id << std::endl;
173+
verboseStream << "Finding outliers in events for func " << func_id << std::endl;
174174

175175

176176
SstdParam& param = *(SstdParam*)m_param;
177177
if (param[func_id].count() < 2){
178-
std::cout << "Less than 2 events in stats associated with that func, stats not complete" << std::endl;
178+
verboseStream << "Less than 2 events in stats associated with that func, stats not complete" << std::endl;
179179
return 0;
180180
}
181181
unsigned long n_outliers = 0;
@@ -194,15 +194,15 @@ unsigned long ADOutlierSSTD::compute_outliers(Anomalies &outliers,
194194
itt->set_outlier_score(computeScore(itt, param));
195195

196196
if (label == -1) {
197-
std::cout << "!!!!!!!Detected outlier on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid()
197+
verboseStream << "!!!!!!!Detected outlier on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid()
198198
<< " runtime " << runtime << " mean " << mean << " std " << std << std::endl;
199199
n_outliers += 1;
200200
std::vector<double> sstd_stats{thr_hi, thr_lo, mean, std};
201201
outliers.insert(itt, Anomalies::EventType::Outlier, sstd_stats); //insert into data structure containing captured anomalies
202202
}else{
203203
//Capture maximum of one normal execution per io step
204204
if(outliers.nFuncEvents(func_id, Anomalies::EventType::Normal) == 0){
205-
std::cout << "Detected normal event on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid()
205+
verboseStream << "Detected normal event on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid()
206206
<< " runtime " << runtime << " mean " << mean << " std " << std << std::endl;
207207

208208
outliers.insert(itt, Anomalies::EventType::Normal);
@@ -302,7 +302,7 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
302302
const unsigned long func_id,
303303
std::vector<CallListIterator_t>& data){
304304

305-
std::cout << "Finding outliers in events for func " << func_id << std::endl;
305+
verboseStream << "Finding outliers in events for func " << func_id << std::endl;
306306

307307
HbosParam& param = *(HbosParam*)m_param;
308308

@@ -448,7 +448,7 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
448448
if (ad_score >= l_threshold) {
449449

450450
itt->set_label(-1);
451-
std::cout << "!!!!!!!Detected outlier on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid() << " runtime " << runtime_i << std::endl;
451+
verboseStream << "!!!!!!!Detected outlier on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid() << " runtime " << runtime_i << std::endl;
452452
outliers.insert(itt, Anomalies::EventType::Outlier, runtime_i, ad_score, l_threshold); //insert into data structure containing captured anomalies
453453
n_outliers += 1;
454454

@@ -458,9 +458,8 @@ unsigned long ADOutlierHBOS::compute_outliers(Anomalies &outliers,
458458
//Capture maximum of one normal execution per io step
459459
itt->set_label(1);
460460
if(outliers.nFuncEvents(func_id, Anomalies::EventType::Normal) == 0) {
461-
std::cout << "Detected normal event on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid() << " runtime " << runtime_i << std::endl;
462-
outliers.insert(itt, Anomalies::EventType::Normal);
463-
461+
verboseStream << "Detected normal event on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid() << " runtime " << runtime_i << std::endl;
462+
outliers.insert(itt, Anomalies::EventType::Normal);
464463
}
465464

466465
}
@@ -579,7 +578,7 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
579578
const unsigned long func_id,
580579
std::vector<CallListIterator_t>& data){
581580

582-
std::cout << "Finding outliers in events for func " << func_id << std::endl;
581+
verboseStream << "Finding outliers in events for func " << func_id << std::endl;
583582

584583
CopodParam& param = *(CopodParam*)m_param;
585584

@@ -694,9 +693,14 @@ unsigned long ADOutlierCOPOD::compute_outliers(Anomalies &outliers,
694693
//Capture maximum of one normal execution per io step
695694
itt->set_label(1);
696695
if(outliers.nFuncEvents(func_id, Anomalies::EventType::Normal) == 0) {
696+
<<<<<<< HEAD
697697
verboseStream << "Detected normal event on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid() << " runtime " << runtime_i << std::endl;
698698
outliers.insert(itt, Anomalies::EventType::Normal);
699699

700+
=======
701+
verboseStream << "Detected normal event on func id " << func_id << " (" << itt->get_funcname() << ") on thread " << itt->get_tid() << " runtime " << runtime_i << std::endl;
702+
outliers.insert(itt, Anomalies::EventType::Normal);
703+
>>>>>>> ckelly_develop
700704
}
701705

702706
}

test/unit_tests/ad/HBOSOutlierTestBPFile.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <nlohmann/json.hpp>
1010
#include "gtest/gtest.h"
1111
#include "../unit_test_common.hpp"
12+
#include "../unit_test_cmdline.hpp"
1213

1314
#include<thread>
1415
#include<chrono>
@@ -92,7 +93,12 @@ void create_save_json(const std::unordered_map<unsigned long, std::vector<std::v
9293
}
9394

9495
TEST(HBOSADOutlierBPFileWithoutPServer, Works) {
95-
96+
//Get trace data dir from command line
97+
if(_argc < 2){
98+
throw std::runtime_error("Path to trace data directory must be provided as an argument!");
99+
}
100+
std::string trace_data_dir = _argv[1];
101+
96102
int ranks = 4;
97103
std::vector<int> v_io_steps(ranks);
98104
std::vector<int> v_functions(ranks);
@@ -108,7 +114,7 @@ TEST(HBOSADOutlierBPFileWithoutPServer, Works) {
108114
ChimbukoParams params;
109115
//Parameters for the connection to the instrumented binary trace output
110116
params.trace_engineType = "BPFile"; // BPFile or SST
111-
params.trace_data_dir = "<PATH>/test/data"; //ad/test/data"; // *.bp location
117+
params.trace_data_dir = trace_data_dir; // *.bp location
112118
std::string bp_prefix = "tau-metrics"; // bp file prefix (e.g. tau-metrics-[nwchem])
113119

114120
//The remainder are optional arguments. Enable using the appropriate command line switch

test/unit_tests/ad/SSTDOutlierTestBPFile.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include<chimbuko/message.hpp>
88
#include "gtest/gtest.h"
99
#include "../unit_test_common.hpp"
10+
#include "../unit_test_cmdline.hpp"
1011

1112
#include<thread>
1213
#include<chrono>
@@ -92,6 +93,12 @@ void create_save_json(const std::unordered_map<unsigned long, std::vector<double
9293
}
9394

9495
TEST(SSTDADOutlierBPFileWithoutPServer, Works) {
96+
//Get trace data dir from command line
97+
if(_argc < 2){
98+
throw std::runtime_error("Path to trace data directory must be provided as an argument!");
99+
}
100+
std::string trace_data_dir = _argv[1];
101+
95102
//int file_suffix = 1;
96103
int ranks = 4;
97104
std::vector<int> v_io_steps(ranks);
@@ -108,7 +115,7 @@ TEST(SSTDADOutlierBPFileWithoutPServer, Works) {
108115
ChimbukoParams params;
109116
//Parameters for the connection to the instrumented binary trace output
110117
params.trace_engineType = "BPFile"; // argv[1]; // BPFile or SST
111-
params.trace_data_dir = "<PATH>/test/data"; // argv[2]; // *.bp location
118+
params.trace_data_dir = trace_data_dir; // *.bp location
112119
std::string bp_prefix = "tau-metrics"; //argv[3]; // bp file prefix (e.g. tau-metrics-[nwchem])
113120

114121
//The remainder are optional arguments. Enable using the appropriate command line switch

test/unit_tests/run_all.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ set -o pipefail
2121
./ad/ADcombinedPSdata
2222
./ad/FuncAnomalyMetrics
2323
./ad/ADLocalAnomalyMetrics
24+
./ad/HBOSOutlierDistributions
25+
./ad/HBOSOutlier
26+
./ad/HBOSOutlierADs
27+
./ad/HBOSOutlierTestBPFile ../data
28+
./ad/SSTDOutlierTestBPFile ../data
29+
./ad/COPODOutlier
30+
./ad/COPODOutlierADs
2431
./util/DispatchQueue
2532
./util/commandLineParser
2633
./util/RunStats

0 commit comments

Comments
 (0)