Skip to content

Commit eee5657

Browse files
committed
Added a script to provdb-python to validate the saved viz output json data against the provDB
1 parent 1157050 commit eee5657

3 files changed

Lines changed: 105 additions & 1 deletion

File tree

scripts/provdb_python/src/provdb_python/cli.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import provdb_python.provdb_analyze as pa
33
import provdb_python.provdb_counter_analyze as pca
44
import provdb_python.provdb_between_run_analyze as pbr
5+
import provdb_python.provdb_viz_validate as pvv
56

67
import sys
78

@@ -14,6 +15,7 @@ def cli():
1415
print("basic-analysis")
1516
print("counter-analysis")
1617
print("between-run-analysis")
18+
print("viz-output-validate")
1719
sys.exit(0)
1820
tool = args[1]
1921
tool_args = args[2:]
@@ -25,6 +27,8 @@ def cli():
2527
pca.provdb_counter_analysis(tool_args)
2628
elif tool == 'between-run-analysis':
2729
pbr.provdb_between_run_analysis(tool_args)
30+
elif tool == 'viz-output-validate':
31+
pvv.provdb_viz_validate(tool_args)
2832
else:
2933
print("Invalid tool")
3034

scripts/provdb_python/src/provdb_python/provdb_analyze.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def summarizeEvent(event):
139139
thr_str = "{}".format(event['tid'])
140140
if event['is_gpu_event']:
141141
thr_str = "GPU{}/{}/{}".format(event['gpu_location']['device'],event['gpu_location']['context'],event['gpu_location']['stream'])
142-
return "pid={} rid={} tid={} func=\"{}\" step={} excl={}s tot={}s score={} severity={}".format(event['pid'],event['rid'], thr_str, event['func'], event['io_step'], float(event['runtime_exclusive'])/1e6, float(event['runtime_total'])/1e6, event['outlier_score'], event['outlier_severity'])
142+
return "pid={} rid={} tid={} fid={} func=\"{}\" step={} excl={}s tot={}s score={} severity={}".format(event['pid'],event['rid'], thr_str, event['fid'], event['func'], event['io_step'], float(event['runtime_exclusive'])/1e6, float(event['runtime_total'])/1e6, event['outlier_score'], event['outlier_severity'])
143143

144144

145145
#Get the function profile information for application index 'app'
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#A Python module for offline analysis of the provenance database
2+
#Executed as a script it performs some rudimentary analysis
3+
import provdb_python.provdb_interact as pdb
4+
import provdb_python.provdb_analyze as pa
5+
import pymargo
6+
from pymargo.core import Engine
7+
import json
8+
import sys
9+
import copy
10+
from cmd import Cmd
11+
import glob
12+
13+
14+
15+
def provdb_viz_validate(args):
16+
if(len(args) != 2):
17+
print("Arguments: <nshards> <viz output dir>")
18+
sys.exit(0)
19+
nshards = int(args[0])
20+
viz_dir=args[1]
21+
22+
with Engine('na+sm', pymargo.server) as engine:
23+
db = pdb.provDBinterface(engine, r'provdb.%d.unqlite', nshards)
24+
25+
dkeys = ['rid','pid','fid','io_step']
26+
index=pa.generateIndex(db, dkeys, 'anomalies')
27+
print(index)
28+
index_sets = {}
29+
for k in dkeys:
30+
index_sets[k] = {}
31+
for v in index[k].keys():
32+
index_sets[k][v] = set(index[k][v])
33+
34+
print(index_sets)
35+
36+
files=glob.glob("%s/pserver_output_stats_*.json" % viz_dir)
37+
38+
fail=False
39+
for f in files:
40+
print(f)
41+
fp = open(f)
42+
v = json.load(fp)
43+
fp.close()
44+
45+
if 'anomaly_metrics' in v:
46+
for anom_group in v['anomaly_metrics']:
47+
print(anom_group)
48+
fid=str(anom_group['fid'])
49+
pid=str(anom_group['app'])
50+
rid=str(anom_group['rank'])
51+
new_data = anom_group['new_data']
52+
nanom=int(new_data['count']['accumulate'])
53+
iostep_start=int(new_data['first_io_step'])
54+
iostep_end=int(new_data['last_io_step'])
55+
print("%d anomalies in [%s,%s] on (%s,%s,%s)" % (nanom,iostep_start,iostep_end,pid,rid,fid))
56+
57+
if pid not in index_sets['pid'].keys():
58+
print("Could not find any anomalies for this pid!")
59+
continue
60+
if rid not in index_sets['rid'].keys():
61+
print("Could not find any anomalies for this rid!")
62+
continue
63+
if fid not in index_sets['fid'].keys():
64+
print("Could not find any anomalies for this fid!")
65+
continue
66+
67+
iosets = []
68+
for i in range(iostep_start,iostep_end+1):
69+
if(str(i) in index_sets['io_step'].keys()):
70+
iosets.append(index_sets['io_step'][str(i)])
71+
if(len(iosets)==0):
72+
print("Could not find any anomalies in this time window!")
73+
continue
74+
75+
76+
aset = index_sets['rid'][rid] & index_sets['fid'][fid] & index_sets['pid'][pid]
77+
acount=0
78+
for i in iosets:
79+
bset = aset & i
80+
acount += len(bset)
81+
print("Found %d anomalies matching these keys" % acount)
82+
if acount != nanom:
83+
print("!!INVALID: Mismatch in number of anomalies: found %d, expected %d" % (acount,nanom) )
84+
print("All anomalies in this time window:")
85+
for i in range(iostep_start,iostep_end+1):
86+
if(str(i) in index_sets['io_step'].keys()):
87+
print("IO step %d" % i)
88+
for anom in index_sets['io_step'][str(i)]:
89+
print(anom, pa.summarizeEvent(pa.getEventByID(db,index,anom)))
90+
91+
92+
93+
fail=True
94+
if(fail):
95+
print("Validation FAILED")
96+
else:
97+
print("Validation passed")
98+
99+
del db
100+
engine.finalize()

0 commit comments

Comments
 (0)