-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_project_strict.sh
More file actions
executable file
·136 lines (122 loc) · 3.52 KB
/
run_project_strict.sh
File metadata and controls
executable file
·136 lines (122 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env bash
set -euo pipefail
# Always execute from repository root.
cd "$(dirname "$0")"
PYTHON_BIN="${PYTHON_BIN:-python3}"
MODE="${1:-all}"
STRICT_SOURCE="training_data/real_labeled_runs_strict_curated.csv"
STRICT_TRAIN="training_data/fixed_train_base_strict.csv"
STRICT_EVAL="training_data/fixed_eval_set_strict.csv"
STRICT_EVAL_GRAPH="training_data/fixed_eval_graph_only_strict.csv"
run_quality_gate() {
echo "[1/6] Running strict dataset quality gate"
"$PYTHON_BIN" training_data/dataset_quality_gate.py \
--source "$STRICT_SOURCE" \
--train "$STRICT_TRAIN" \
--eval "$STRICT_EVAL" \
--out_json training_data/dataset_quality_report_strict_runtime.json
}
run_train() {
echo "[2/6] Training model on strict train split"
"$PYTHON_BIN" model/trainer.py --data "$STRICT_TRAIN"
}
run_relevance() {
echo "[3/6] Running strict relevance evaluation"
"$PYTHON_BIN" experiments/relevance_evaluation.py \
--train "$STRICT_TRAIN" \
--eval "$STRICT_EVAL" \
--out_json experiments/results/relevance_eval_strict_runtime.json \
--out_md experiments/results/relevance_eval_strict_runtime.md
}
run_ablation() {
echo "[4/6] Running strict ablation study"
"$PYTHON_BIN" experiments/ablation_study.py \
--data "$STRICT_TRAIN" \
--output experiments/results/ablation_strict_runtime.csv
}
run_shift() {
echo "[5/6] Running strict dataset-shift evaluation"
"$PYTHON_BIN" experiments/dataset_shift_evaluation.py \
--source "$STRICT_SOURCE" \
--out_json experiments/results/dataset_shift_eval_strict_runtime.json \
--out_md experiments/results/dataset_shift_eval_strict_runtime.md
}
run_robustness() {
echo "[6/6] Running strict robustness evaluation"
"$PYTHON_BIN" experiments/strict_robustness_evaluation.py \
--train "$STRICT_TRAIN" \
--eval "$STRICT_EVAL" \
--transfer_source "$STRICT_SOURCE" \
--out_json experiments/results/strict_robustness_eval_runtime.json \
--out_md experiments/results/strict_robustness_eval_runtime.md
}
run_correctness() {
echo "[7/8] Running strict correctness report"
"$PYTHON_BIN" experiments/correctness_report.py \
--queries dsl/sample_queries \
--output experiments/results/correctness_report_runtime.csv
}
run_publish_gate() {
echo "[8/8] Validating strict publication gate"
"$PYTHON_BIN" experiments/publish_gate.py \
--min_max_feature_drop 0.005 \
--min_max_group_drop 0.005 \
--min_max_permutation_drop 0.05
}
run_publish_gate_native() {
echo "[8/8] Validating strict publication gate (native TPCH required)"
"$PYTHON_BIN" experiments/publish_gate.py \
--require_native_tpch \
--min_max_feature_drop 0.005 \
--min_max_group_drop 0.005 \
--min_max_permutation_drop 0.05
}
case "$MODE" in
all)
run_quality_gate
run_train
run_relevance
run_ablation
run_shift
run_robustness
run_correctness
run_publish_gate
;;
quality)
run_quality_gate
;;
train)
run_train
;;
relevance)
run_relevance
;;
ablation)
run_ablation
;;
shift)
run_shift
;;
robustness)
run_robustness
;;
smoke)
# Fast sanity path for CI/local quick checks.
run_quality_gate
run_relevance
run_robustness
run_correctness
run_publish_gate
;;
gate)
run_publish_gate
;;
gate-native)
run_publish_gate_native
;;
*)
echo "Usage: ./run_project_strict.sh [all|quality|train|relevance|ablation|shift|robustness|smoke|gate|gate-native]"
exit 1
;;
esac
echo "Done: strict pipeline mode '$MODE' completed successfully."