Skip to content

Commit bc5f2fd

Browse files
committed
Add 'mtproxymax metrics [live]' command — engine metrics dashboard
Single awk pass over Prometheus output extracts connections, upstream success rate (OK/WARN/CRIT), duration buckets, per-user RX/TX/IPs, ME health, and security events. Live mode auto-refreshes in a subshell.
1 parent 1120608 commit bc5f2fd

1 file changed

Lines changed: 152 additions & 0 deletions

File tree

mtproxymax.sh

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4937,6 +4937,8 @@ show_cli_help() {
49374937
echo ""
49384938
echo -e " ${BOLD}Monitoring:${NC}"
49394939
echo -e " ${GREEN}traffic${NC} Show traffic stats"
4940+
echo -e " ${GREEN}metrics${NC} Show live engine metrics (connections, upstream, users, ME)"
4941+
echo -e " ${GREEN}metrics live${NC} [seconds] Auto-refresh metrics dashboard (default: 5s)"
49404942
echo -e " ${GREEN}logs${NC} Stream container logs"
49414943
echo -e " ${GREEN}health${NC} Run health diagnostics"
49424944
echo ""
@@ -5024,6 +5026,136 @@ show_status_json() {
50245026
printf '}\n'
50255027
}
50265028

5029+
show_metrics() {
5030+
local m
5031+
if ! m=$(_fetch_metrics 2>/dev/null); then
5032+
log_error "Metrics endpoint unavailable — is the proxy running?"
5033+
return 1
5034+
fi
5035+
5036+
# Single awk pass: S| = scalars, D| = duration buckets, U| = per-user
5037+
local parsed
5038+
parsed=$(echo "$m" | awk '
5039+
function lbl(s, k, p, q) {
5040+
p = index(s, k "=\""); if (!p) return ""
5041+
s = substr(s, p + length(k) + 2)
5042+
q = index(s, "\""); return q ? substr(s, 1, q-1) : ""
5043+
}
5044+
/^telemt_uptime_seconds / { uptime = $NF }
5045+
/^telemt_connections_total / { c_tot = $NF }
5046+
/^telemt_connections_bad_total / { c_bad = $NF }
5047+
/^telemt_connections_current / { c_cur = $NF }
5048+
/^telemt_connections_me_current / { c_me = $NF }
5049+
/^telemt_connections_direct_current / { c_dir = $NF }
5050+
/^telemt_upstream_connect_attempt_total / { up_att = $NF }
5051+
/^telemt_upstream_connect_success_total / { up_ok = $NF }
5052+
/^telemt_upstream_connect_fail_total / { up_fail= $NF }
5053+
/^telemt_me_reconnect_attempts_total / { me_att = $NF }
5054+
/^telemt_me_reconnect_success_total / { me_ok = $NF }
5055+
/^telemt_me_writers_active_current / { me_wa = $NF }
5056+
/^telemt_me_writers_warm_current / { me_ww = $NF }
5057+
/^telemt_me_endpoint_quarantine_total / { me_quar= $NF }
5058+
/^telemt_me_crc_mismatch_total / { me_crc = $NF }
5059+
/^telemt_pool_drain_active / { pool = $NF }
5060+
/^telemt_desync_total / { desync = $NF }
5061+
/^telemt_secure_padding_invalid_total / { padinv = $NF }
5062+
/^telemt_upstream_connect_duration_success_total\{/ { b=lbl($0,"bucket"); if(b) ds[b]+=$NF }
5063+
/^telemt_upstream_connect_duration_fail_total\{/ { b=lbl($0,"bucket"); if(b) df[b]+=$NF }
5064+
/^telemt_user_connections_current\{/ { u=lbl($0,"user"); if(u) uc[u]+=$NF }
5065+
/^telemt_user_connections_total\{/ { u=lbl($0,"user"); if(u) ut[u]+=$NF }
5066+
/^telemt_user_octets_from_client\{/ { u=lbl($0,"user"); if(u) rx[u]+=$NF }
5067+
/^telemt_user_octets_to_client\{/ { u=lbl($0,"user"); if(u) tx[u]+=$NF }
5068+
/^telemt_user_unique_ips_current\{/ { u=lbl($0,"user"); if(u) ui[u]+=$NF }
5069+
END {
5070+
printf "S|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f\n",
5071+
uptime+0,c_tot+0,c_bad+0,c_cur+0,c_me+0,c_dir+0,
5072+
up_att+0,up_ok+0,up_fail+0,me_att+0,me_ok+0,
5073+
me_wa+0,me_ww+0,me_quar+0,me_crc+0,pool+0,desync+0,padinv+0
5074+
bkeys[1]="le_100ms"; bnames[1]="<=100ms"
5075+
bkeys[2]="101_500ms"; bnames[2]="101-500ms"
5076+
bkeys[3]="501_1000ms"; bnames[3]="501ms-1s"
5077+
bkeys[4]="gt_1000ms"; bnames[4]=">1s"
5078+
for (i=1;i<=4;i++) {
5079+
b=bkeys[i]; ok=ds[b]+0; fail=df[b]+0; tot=ok+fail
5080+
printf "D|%s|%s|%.0f|%.0f|%.1f\n", b, bnames[i], ok, fail, (tot>0 ? ok/tot*100 : -1)
5081+
}
5082+
for (u in uc) users[u]=1
5083+
for (u in rx) users[u]=1
5084+
for (u in tx) users[u]=1
5085+
for (u in ui) users[u]=1
5086+
for (u in users)
5087+
printf "U|%s|%.0f|%.0f|%.0f|%.0f|%.0f\n", u, uc[u]+0, ut[u]+0, rx[u]+0, tx[u]+0, ui[u]+0
5088+
}
5089+
')
5090+
5091+
# Parse scalar line
5092+
local uptime c_tot c_bad c_cur c_me c_dir up_att up_ok up_fail me_att me_ok me_wa me_ww me_quar me_crc pool desync padinv
5093+
IFS='|' read -r _ uptime c_tot c_bad c_cur c_me c_dir up_att up_ok up_fail \
5094+
me_att me_ok me_wa me_ww me_quar me_crc pool desync padinv \
5095+
<<< "$(echo "$parsed" | grep '^S|')"
5096+
5097+
local c_good=$(( ${c_tot:-0} - ${c_bad:-0} ))
5098+
local up_rate=0 me_rate=0
5099+
[ "${up_att:-0}" -gt 0 ] && up_rate=$(awk -v a="$up_att" -v b="$up_ok" 'BEGIN{printf "%.1f", b/a*100}')
5100+
[ "${me_att:-0}" -gt 0 ] && me_rate=$(awk -v a="$me_att" -v b="$me_ok" 'BEGIN{printf "%.1f", b/a*100}')
5101+
5102+
local up_status
5103+
if [ "${up_att:-0}" -eq 0 ]; then
5104+
up_status="${DIM}${NC}"
5105+
elif awk -v r="$up_rate" 'BEGIN{exit !(r+0 >= 95)}'; then
5106+
up_status="${BRIGHT_GREEN}OK${NC} ${up_rate}%"
5107+
elif awk -v r="$up_rate" 'BEGIN{exit !(r+0 >= 80)}'; then
5108+
up_status="${YELLOW}WARN${NC} ${up_rate}%"
5109+
else
5110+
up_status="${BRIGHT_RED}CRIT${NC} ${up_rate}%"
5111+
fi
5112+
5113+
local me_rate_disp
5114+
[ "${me_att:-0}" -gt 0 ] && me_rate_disp="${me_rate}%" || me_rate_disp=""
5115+
5116+
draw_header "METRICS"
5117+
echo -e " ${DIM}uptime:${NC} $(format_duration "${uptime:-0}") ${DIM}upstream:${NC} ${up_status} ${DIM}active:${NC} ${c_cur:-0} ${DIM}writers:${NC} ${me_wa:-0}/${me_ww:-0}"
5118+
echo ""
5119+
5120+
echo -e " ${BOLD}Connections${NC}"
5121+
echo -e " ${DIM}total:${NC} ${c_tot:-0} ${DIM}authorized:${NC} ${BRIGHT_GREEN}${c_good}${NC} ${DIM}rejected:${NC} ${BRIGHT_RED}${c_bad:-0}${NC}"
5122+
echo -e " ${DIM}active:${NC} ${c_cur:-0} (ME: ${c_me:-0} direct: ${c_dir:-0})"
5123+
echo ""
5124+
5125+
echo -e " ${BOLD}Upstream${NC}"
5126+
echo -e " ${DIM}attempts:${NC} ${up_att:-0} ${DIM}success:${NC} ${BRIGHT_GREEN}${up_ok:-0}${NC} ${DIM}failed:${NC} ${BRIGHT_RED}${up_fail:-0}${NC} ${DIM}rate:${NC} ${up_status}"
5127+
while IFS='|' read -r _ bk bn ok fail pct; do
5128+
local ppct
5129+
ppct=$(awk -v p="$pct" 'BEGIN{if(p+0<0) print "—"; else printf "%.0f%%", p}')
5130+
printf " %-12s %6s ok %6s fail (%s)\n" "$bn" "$ok" "$fail" "$ppct"
5131+
done < <(echo "$parsed" | grep '^D|')
5132+
echo ""
5133+
5134+
local user_lines
5135+
user_lines=$(echo "$parsed" | grep '^U|' | sort -t'|' -k3 -rn)
5136+
if [ -n "$user_lines" ]; then
5137+
echo -e " ${BOLD}Users${NC}"
5138+
while IFS='|' read -r _ uname ucur utot urx utx uips; do
5139+
echo -e " ${GREEN}${SYM_OK}${NC} ${BOLD}${uname}${NC} active: ${ucur} total: ${utot} ${SYM_DOWN} $(format_bytes "$urx") ${SYM_UP} $(format_bytes "$utx") IPs: ${uips}"
5140+
done <<< "$user_lines"
5141+
echo ""
5142+
fi
5143+
5144+
echo -e " ${BOLD}ME Health${NC}"
5145+
echo -e " ${DIM}reconnects:${NC} ${me_ok:-0}/${me_att:-0} (${me_rate_disp}) ${DIM}writers:${NC} ${me_wa:-0} active / ${me_ww:-0} warm"
5146+
[ "${me_quar:-0}" -gt 0 ] && echo -e " ${DIM}quarantined endpoints:${NC} ${YELLOW}${me_quar}${NC}"
5147+
[ "${me_crc:-0}" -gt 0 ] && echo -e " ${DIM}CRC mismatches:${NC} ${YELLOW}${me_crc}${NC}"
5148+
[ "${pool:-0}" -gt 0 ] && echo -e " ${DIM}writers draining:${NC} ${pool}"
5149+
echo ""
5150+
5151+
if [ "${desync:-0}" -gt 0 ] || [ "${padinv:-0}" -gt 0 ]; then
5152+
echo -e " ${BOLD}Security${NC}"
5153+
[ "${desync:-0}" -gt 0 ] && echo -e " ${DIM}desync events:${NC} ${YELLOW}${desync}${NC}"
5154+
[ "${padinv:-0}" -gt 0 ] && echo -e " ${DIM}invalid padding:${NC} ${YELLOW}${padinv}${NC}"
5155+
echo ""
5156+
fi
5157+
}
5158+
50275159
show_status() {
50285160
echo ""
50295161
local w=$TERM_WIDTH
@@ -5444,6 +5576,26 @@ cli_main() {
54445576
echo ""
54455577
;;
54465578

5579+
metrics)
5580+
load_settings
5581+
local subcmd="${1:-}"
5582+
if [ "$subcmd" = "live" ]; then
5583+
local interval="${2:-5}"
5584+
[[ "$interval" =~ ^[0-9]+$ ]] && [ "$interval" -ge 1 ] || interval=5
5585+
(
5586+
while true; do
5587+
tput clear 2>/dev/null || printf '\033[2J\033[H'
5588+
show_metrics
5589+
echo -e " ${DIM}[live — refreshing every ${interval}s, Ctrl+C to stop]${NC}"
5590+
sleep "$interval"
5591+
done
5592+
)
5593+
echo ""
5594+
else
5595+
show_metrics
5596+
fi
5597+
;;
5598+
54475599
logs)
54485600
load_settings
54495601
echo -e " ${DIM}Streaming logs (Ctrl+C to stop)...${NC}"

0 commit comments

Comments
 (0)