Skip to content

Commit dc51efa

Browse files
committed
sstSinker can now be passed the SST timeout on the command line
1 parent e7212d2 commit dc51efa

1 file changed

Lines changed: 141 additions & 125 deletions

File tree

app/sstSinker.cpp

Lines changed: 141 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -1,146 +1,162 @@
1-
//A program for connecting to tau2 adios2 output and running the parser routines for timing purposes
1+
//A program for connecting to tau2 adios2 output and running the parser routines for timing purposes and/or debugging
22
#include "chimbuko/AD.hpp"
33
#include <chrono>
4+
#include "chimbuko/util/commandLineParser.hpp"
5+
#include "chimbuko/util/string.hpp"
46

57
using namespace chimbuko;
68
using namespace std::chrono;
79

8-
// input argument
9-
// - engineType (for BP, + data_dir)
10-
// - output_dir (for now) to dump
10+
struct SinkerArgs{
11+
int timeout;
1112

12-
// soon later
13-
// - inputFile prefix (i.e. tau-metrics)
14-
// - sigma
15-
// - ps server name
13+
SinkerArgs(): timeout(60){}
14+
};
1615

17-
int main(int argc, char ** argv)
18-
{
19-
MPI_Init(&argc, &argv);
2016

21-
int world_rank, world_size;
22-
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
23-
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
17+
int main(int argc, char ** argv){
18+
MPI_Init(&argc, &argv);
2419

25-
try
20+
int world_rank, world_size;
21+
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
22+
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
23+
24+
bool error = false;
25+
try
2626
{
27-
// -----------------------------------------------------------------------
28-
// parser command line arguments
29-
// -----------------------------------------------------------------------
30-
std::string engineType = argv[1]; // BPFile or SST
31-
std::string data_dir = argv[2]; // *.bp location
32-
std::string prefix = argv[3]; // "tau-metrics-nwchem"
33-
std::string inputFile = prefix + "-" + std::to_string(world_rank) + ".bp";
34-
int fetch_data = atoi(argv[4]);
35-
36-
if (world_rank == 0) {
37-
std::cout << "\n"
38-
<< "rank : " << world_rank << "\n"
39-
<< "Engine : " << engineType << "\n"
40-
<< "BP in dir : " << data_dir << "\n"
41-
<< "BP file : " << inputFile << "\n"
42-
<< "Fetch : " << fetch_data
43-
<< std::endl;
44-
}
45-
46-
// -----------------------------------------------------------------------
47-
// AD module variables
48-
// -----------------------------------------------------------------------
49-
ADParser * parser;
50-
51-
// int step = 0;
52-
53-
// -----------------------------------------------------------------------
54-
// Measurement variables
55-
// -----------------------------------------------------------------------
56-
unsigned long total_frames = 0, frames = 0;
57-
unsigned long total_processing_time = 0, processing_time = 0;
58-
high_resolution_clock::time_point t1, t2;
59-
60-
// -----------------------------------------------------------------------
61-
// Init. AD module
62-
// First, init io to make sure file (or connection) handler
63-
// -----------------------------------------------------------------------
64-
parser = new ADParser(data_dir + "/" + inputFile, 0, world_rank, engineType);
65-
66-
// -----------------------------------------------------------------------
67-
// Start analysis
68-
// -----------------------------------------------------------------------
69-
if (world_rank == 0) {
70-
std::cout << "rank: " << world_rank << std::endl;
71-
}
72-
t1 = high_resolution_clock::now();
73-
while ( parser->getStatus() )
74-
{
75-
parser->beginStep();
76-
if (!parser->getStatus())
77-
{
78-
// No more steps available.
79-
break;
80-
}
81-
82-
// step = parser->getCurrentStep();
83-
84-
if (fetch_data) {
85-
parser->update_attributes();
86-
parser->fetchFuncData();
87-
parser->fetchCommData();
88-
parser->fetchCounterData();
89-
}
90-
91-
frames++;
92-
parser->endStep();
93-
}
94-
t2 = high_resolution_clock::now();
95-
if (world_rank == 0) {
96-
std::cout << "rank: " << world_rank << " analysis done!\n";
97-
}
98-
99-
// -----------------------------------------------------------------------
100-
// Average analysis time and total number of outliers
101-
// -----------------------------------------------------------------------
102-
//MPI_Barrier(MPI_COMM_WORLD);
103-
processing_time = duration_cast<milliseconds>(t2 - t1).count();
104-
105-
if (false) {
106-
const unsigned long local_measures[] = {processing_time, frames};
107-
unsigned long global_measures[] = {0, 0};
108-
MPI_Reduce(
109-
local_measures, global_measures, 2, MPI_UNSIGNED_LONG,
110-
MPI_SUM, 0, MPI_COMM_WORLD
111-
);
112-
total_processing_time = global_measures[0];
113-
total_frames = global_measures[1];
114-
}
27+
commandLineParser<SinkerArgs> cmdline;
28+
addOptionalCommandLineArg(cmdline, timeout, "Specify the SST connect timeout in seconds (Default 60s)");
29+
30+
if(argc < 5 || (argc == 2 && std::string(argv[1]) == "-help")){
31+
std::cout << "Usage: <exe> <engine type (BPFile, SST)> <bp directory> <bpfile prefix (eg tau-metrics-nwchem)> <fetch>\n"
32+
<< "Where \"fetch\" indicates whether the data is actually transferred or we just iterate over the IO steps\n"
33+
<< "Options:" << std::endl;
34+
35+
cmdline.help(std::cout);
36+
return 0;
37+
}
38+
39+
std::string engineType = argv[1]; // BPFile or SST
40+
std::string data_dir = argv[2]; // *.bp location
41+
std::string prefix = argv[3]; // "tau-metrics-nwchem"
42+
std::string inputFile = prefix + "-" + std::to_string(world_rank) + ".bp";
43+
int fetch_data = atoi(argv[4]);
44+
45+
SinkerArgs args;
46+
cmdline.parse(args, argc-5, (const char**)(argv+5) );
47+
48+
if (world_rank == 0) {
49+
std::cout << "\n"
50+
<< "rank : " << world_rank << "\n"
51+
<< "Engine : " << engineType << "\n"
52+
<< "BP in dir : " << data_dir << "\n"
53+
<< "BP file : " << inputFile << "\n"
54+
<< "Fetch : " << fetch_data << "\n"
55+
<< "Timeout (s): " << args.timeout
56+
<< std::endl;
57+
}
58+
59+
// -----------------------------------------------------------------------
60+
// AD module variables
61+
// -----------------------------------------------------------------------
62+
ADParser * parser;
63+
64+
// int step = 0;
65+
66+
// -----------------------------------------------------------------------
67+
// Measurement variables
68+
// -----------------------------------------------------------------------
69+
unsigned long total_frames = 0, frames = 0;
70+
unsigned long total_processing_time = 0, processing_time = 0;
71+
high_resolution_clock::time_point t1, t2;
72+
73+
// -----------------------------------------------------------------------
74+
// Init. AD module
75+
// First, init io to make sure file (or connection) handler
76+
// -----------------------------------------------------------------------
77+
parser = new ADParser(data_dir + "/" + inputFile, 0, world_rank, engineType, args.timeout);
78+
79+
// -----------------------------------------------------------------------
80+
// Start analysis
81+
// -----------------------------------------------------------------------
82+
if (world_rank == 0) {
83+
std::cout << "rank: " << world_rank << std::endl;
84+
}
85+
t1 = high_resolution_clock::now();
86+
while ( parser->getStatus() )
87+
{
88+
parser->beginStep();
89+
if (!parser->getStatus())
90+
{
91+
// No more steps available.
92+
break;
93+
}
94+
95+
// step = parser->getCurrentStep();
96+
97+
if (fetch_data) {
98+
parser->update_attributes();
99+
parser->fetchFuncData();
100+
parser->fetchCommData();
101+
parser->fetchCounterData();
102+
}
103+
104+
frames++;
105+
parser->endStep();
106+
}
107+
t2 = high_resolution_clock::now();
108+
if (world_rank == 0) {
109+
std::cout << "rank: " << world_rank << " analysis done!\n";
110+
}
111+
112+
// -----------------------------------------------------------------------
113+
// Average analysis time and total number of outliers
114+
// -----------------------------------------------------------------------
115+
//MPI_Barrier(MPI_COMM_WORLD);
116+
processing_time = duration_cast<milliseconds>(t2 - t1).count();
117+
118+
if (false) {
119+
const unsigned long local_measures[] = {processing_time, frames};
120+
unsigned long global_measures[] = {0, 0};
121+
MPI_Reduce(
122+
local_measures, global_measures, 2, MPI_UNSIGNED_LONG,
123+
MPI_SUM, 0, MPI_COMM_WORLD
124+
);
125+
total_processing_time = global_measures[0];
126+
total_frames = global_measures[1];
127+
}
115128

116-
if (false && world_rank == 0) {
117-
std::cout << "\n"
118-
<< "Avg. num. frames : " << (double)total_frames/(double)world_size << "\n"
119-
<< "Avg. processing time : " << (double)total_processing_time/(double)world_size << " msec\n"
120-
<< std::endl;
121-
}
122-
123-
// -----------------------------------------------------------------------
124-
// Finalize
125-
// -----------------------------------------------------------------------
126-
delete parser;
129+
if (false && world_rank == 0) {
130+
std::cout << "\n"
131+
<< "Avg. num. frames : " << (double)total_frames/(double)world_size << "\n"
132+
<< "Avg. processing time : " << (double)total_processing_time/(double)world_size << " msec\n"
133+
<< std::endl;
134+
}
135+
136+
// -----------------------------------------------------------------------
137+
// Finalize
138+
// -----------------------------------------------------------------------
139+
delete parser;
127140
}
128-
catch (std::invalid_argument &e)
141+
catch (std::invalid_argument &e)
129142
{
130-
std::cout << e.what() << std::endl;
131-
//todo: usages()
143+
std::cout << e.what() << std::endl;
144+
error = true;
145+
//todo: usages()
132146
}
133-
catch (std::ios_base::failure &e)
147+
catch (std::ios_base::failure &e)
134148
{
135-
std::cout << "I/O base exception caught\n";
136-
std::cout << e.what() << std::endl;
149+
std::cout << "I/O base exception caught\n";
150+
std::cout << e.what() << std::endl;
151+
error = true;
137152
}
138-
catch (std::exception &e)
153+
catch (std::exception &e)
139154
{
140-
std::cout << "Exception caught\n";
141-
std::cout << e.what() << std::endl;
155+
std::cout << "Exception caught\n";
156+
std::cout << e.what() << std::endl;
157+
error = true;
142158
}
143159

144-
MPI_Finalize();
145-
return 0;
160+
MPI_Finalize();
161+
return error ? 1 : 0;
146162
}

0 commit comments

Comments
 (0)