@@ -4937,6 +4937,8 @@ show_cli_help() {
49374937 echo " "
49384938 echo -e " ${BOLD} Monitoring:${NC} "
49394939 echo -e " ${GREEN} traffic${NC} Show traffic stats"
4940+ echo -e " ${GREEN} metrics${NC} Show live engine metrics (connections, upstream, users, ME)"
4941+ echo -e " ${GREEN} metrics live${NC} [seconds] Auto-refresh metrics dashboard (default: 5s)"
49404942 echo -e " ${GREEN} logs${NC} Stream container logs"
49414943 echo -e " ${GREEN} health${NC} Run health diagnostics"
49424944 echo " "
@@ -5024,6 +5026,136 @@ show_status_json() {
50245026 printf ' }\n'
50255027}
50265028
5029+ show_metrics () {
5030+ local m
5031+ if ! m=$( _fetch_metrics 2> /dev/null) ; then
5032+ log_error " Metrics endpoint unavailable — is the proxy running?"
5033+ return 1
5034+ fi
5035+
5036+ # Single awk pass: S| = scalars, D| = duration buckets, U| = per-user
5037+ local parsed
5038+ parsed=$( echo " $m " | awk '
5039+ function lbl(s, k, p, q) {
5040+ p = index(s, k "=\""); if (!p) return ""
5041+ s = substr(s, p + length(k) + 2)
5042+ q = index(s, "\""); return q ? substr(s, 1, q-1) : ""
5043+ }
5044+ /^telemt_uptime_seconds / { uptime = $NF }
5045+ /^telemt_connections_total / { c_tot = $NF }
5046+ /^telemt_connections_bad_total / { c_bad = $NF }
5047+ /^telemt_connections_current / { c_cur = $NF }
5048+ /^telemt_connections_me_current / { c_me = $NF }
5049+ /^telemt_connections_direct_current / { c_dir = $NF }
5050+ /^telemt_upstream_connect_attempt_total / { up_att = $NF }
5051+ /^telemt_upstream_connect_success_total / { up_ok = $NF }
5052+ /^telemt_upstream_connect_fail_total / { up_fail= $NF }
5053+ /^telemt_me_reconnect_attempts_total / { me_att = $NF }
5054+ /^telemt_me_reconnect_success_total / { me_ok = $NF }
5055+ /^telemt_me_writers_active_current / { me_wa = $NF }
5056+ /^telemt_me_writers_warm_current / { me_ww = $NF }
5057+ /^telemt_me_endpoint_quarantine_total / { me_quar= $NF }
5058+ /^telemt_me_crc_mismatch_total / { me_crc = $NF }
5059+ /^telemt_pool_drain_active / { pool = $NF }
5060+ /^telemt_desync_total / { desync = $NF }
5061+ /^telemt_secure_padding_invalid_total / { padinv = $NF }
5062+ /^telemt_upstream_connect_duration_success_total\{/ { b=lbl($0,"bucket"); if(b) ds[b]+=$NF }
5063+ /^telemt_upstream_connect_duration_fail_total\{/ { b=lbl($0,"bucket"); if(b) df[b]+=$NF }
5064+ /^telemt_user_connections_current\{/ { u=lbl($0,"user"); if(u) uc[u]+=$NF }
5065+ /^telemt_user_connections_total\{/ { u=lbl($0,"user"); if(u) ut[u]+=$NF }
5066+ /^telemt_user_octets_from_client\{/ { u=lbl($0,"user"); if(u) rx[u]+=$NF }
5067+ /^telemt_user_octets_to_client\{/ { u=lbl($0,"user"); if(u) tx[u]+=$NF }
5068+ /^telemt_user_unique_ips_current\{/ { u=lbl($0,"user"); if(u) ui[u]+=$NF }
5069+ END {
5070+ printf "S|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f|%.0f\n",
5071+ uptime+0,c_tot+0,c_bad+0,c_cur+0,c_me+0,c_dir+0,
5072+ up_att+0,up_ok+0,up_fail+0,me_att+0,me_ok+0,
5073+ me_wa+0,me_ww+0,me_quar+0,me_crc+0,pool+0,desync+0,padinv+0
5074+ bkeys[1]="le_100ms"; bnames[1]="<=100ms"
5075+ bkeys[2]="101_500ms"; bnames[2]="101-500ms"
5076+ bkeys[3]="501_1000ms"; bnames[3]="501ms-1s"
5077+ bkeys[4]="gt_1000ms"; bnames[4]=">1s"
5078+ for (i=1;i<=4;i++) {
5079+ b=bkeys[i]; ok=ds[b]+0; fail=df[b]+0; tot=ok+fail
5080+ printf "D|%s|%s|%.0f|%.0f|%.1f\n", b, bnames[i], ok, fail, (tot>0 ? ok/tot*100 : -1)
5081+ }
5082+ for (u in uc) users[u]=1
5083+ for (u in rx) users[u]=1
5084+ for (u in tx) users[u]=1
5085+ for (u in ui) users[u]=1
5086+ for (u in users)
5087+ printf "U|%s|%.0f|%.0f|%.0f|%.0f|%.0f\n", u, uc[u]+0, ut[u]+0, rx[u]+0, tx[u]+0, ui[u]+0
5088+ }
5089+ ' )
5090+
5091+ # Parse scalar line
5092+ local uptime c_tot c_bad c_cur c_me c_dir up_att up_ok up_fail me_att me_ok me_wa me_ww me_quar me_crc pool desync padinv
5093+ IFS=' |' read -r _ uptime c_tot c_bad c_cur c_me c_dir up_att up_ok up_fail \
5094+ me_att me_ok me_wa me_ww me_quar me_crc pool desync padinv \
5095+ <<< " $(echo " $parsed " | grep '^S|')"
5096+
5097+ local c_good=$(( ${c_tot:- 0} - ${c_bad:- 0} ))
5098+ local up_rate=0 me_rate=0
5099+ [ " ${up_att:- 0} " -gt 0 ] && up_rate=$( awk -v a=" $up_att " -v b=" $up_ok " ' BEGIN{printf "%.1f", b/a*100}' )
5100+ [ " ${me_att:- 0} " -gt 0 ] && me_rate=$( awk -v a=" $me_att " -v b=" $me_ok " ' BEGIN{printf "%.1f", b/a*100}' )
5101+
5102+ local up_status
5103+ if [ " ${up_att:- 0} " -eq 0 ]; then
5104+ up_status=" ${DIM} —${NC} "
5105+ elif awk -v r=" $up_rate " ' BEGIN{exit !(r+0 >= 95)}' ; then
5106+ up_status=" ${BRIGHT_GREEN} OK${NC} ${up_rate} %"
5107+ elif awk -v r=" $up_rate " ' BEGIN{exit !(r+0 >= 80)}' ; then
5108+ up_status=" ${YELLOW} WARN${NC} ${up_rate} %"
5109+ else
5110+ up_status=" ${BRIGHT_RED} CRIT${NC} ${up_rate} %"
5111+ fi
5112+
5113+ local me_rate_disp
5114+ [ " ${me_att:- 0} " -gt 0 ] && me_rate_disp=" ${me_rate} %" || me_rate_disp=" —"
5115+
5116+ draw_header " METRICS"
5117+ echo -e " ${DIM} uptime:${NC} $( format_duration " ${uptime:- 0} " ) ${DIM} upstream:${NC} ${up_status} ${DIM} active:${NC} ${c_cur:- 0} ${DIM} writers:${NC} ${me_wa:- 0} /${me_ww:- 0} "
5118+ echo " "
5119+
5120+ echo -e " ${BOLD} Connections${NC} "
5121+ echo -e " ${DIM} total:${NC} ${c_tot:- 0} ${DIM} authorized:${NC} ${BRIGHT_GREEN}${c_good}${NC} ${DIM} rejected:${NC} ${BRIGHT_RED}${c_bad:- 0}${NC} "
5122+ echo -e " ${DIM} active:${NC} ${c_cur:- 0} (ME: ${c_me:- 0} direct: ${c_dir:- 0} )"
5123+ echo " "
5124+
5125+ echo -e " ${BOLD} Upstream${NC} "
5126+ echo -e " ${DIM} attempts:${NC} ${up_att:- 0} ${DIM} success:${NC} ${BRIGHT_GREEN}${up_ok:- 0}${NC} ${DIM} failed:${NC} ${BRIGHT_RED}${up_fail:- 0}${NC} ${DIM} rate:${NC} ${up_status} "
5127+ while IFS=' |' read -r _ bk bn ok fail pct; do
5128+ local ppct
5129+ ppct=$( awk -v p=" $pct " ' BEGIN{if(p+0<0) print "—"; else printf "%.0f%%", p}' )
5130+ printf " %-12s %6s ok %6s fail (%s)\n" " $bn " " $ok " " $fail " " $ppct "
5131+ done < <( echo " $parsed " | grep ' ^D|' )
5132+ echo " "
5133+
5134+ local user_lines
5135+ user_lines=$( echo " $parsed " | grep ' ^U|' | sort -t' |' -k3 -rn)
5136+ if [ -n " $user_lines " ]; then
5137+ echo -e " ${BOLD} Users${NC} "
5138+ while IFS=' |' read -r _ uname ucur utot urx utx uips; do
5139+ echo -e " ${GREEN}${SYM_OK}${NC} ${BOLD}${uname}${NC} active: ${ucur} total: ${utot} ${SYM_DOWN} $( format_bytes " $urx " ) ${SYM_UP} $( format_bytes " $utx " ) IPs: ${uips} "
5140+ done <<< " $user_lines"
5141+ echo " "
5142+ fi
5143+
5144+ echo -e " ${BOLD} ME Health${NC} "
5145+ echo -e " ${DIM} reconnects:${NC} ${me_ok:- 0} /${me_att:- 0} (${me_rate_disp} ) ${DIM} writers:${NC} ${me_wa:- 0} active / ${me_ww:- 0} warm"
5146+ [ " ${me_quar:- 0} " -gt 0 ] && echo -e " ${DIM} quarantined endpoints:${NC} ${YELLOW}${me_quar}${NC} "
5147+ [ " ${me_crc:- 0} " -gt 0 ] && echo -e " ${DIM} CRC mismatches:${NC} ${YELLOW}${me_crc}${NC} "
5148+ [ " ${pool:- 0} " -gt 0 ] && echo -e " ${DIM} writers draining:${NC} ${pool} "
5149+ echo " "
5150+
5151+ if [ " ${desync:- 0} " -gt 0 ] || [ " ${padinv:- 0} " -gt 0 ]; then
5152+ echo -e " ${BOLD} Security${NC} "
5153+ [ " ${desync:- 0} " -gt 0 ] && echo -e " ${DIM} desync events:${NC} ${YELLOW}${desync}${NC} "
5154+ [ " ${padinv:- 0} " -gt 0 ] && echo -e " ${DIM} invalid padding:${NC} ${YELLOW}${padinv}${NC} "
5155+ echo " "
5156+ fi
5157+ }
5158+
50275159show_status () {
50285160 echo " "
50295161 local w=$TERM_WIDTH
@@ -5444,6 +5576,26 @@ cli_main() {
54445576 echo " "
54455577 ;;
54465578
5579+ metrics)
5580+ load_settings
5581+ local subcmd=" ${1:- } "
5582+ if [ " $subcmd " = " live" ]; then
5583+ local interval=" ${2:- 5} "
5584+ [[ " $interval " =~ ^[0-9]+$ ]] && [ " $interval " -ge 1 ] || interval=5
5585+ (
5586+ while true ; do
5587+ tput clear 2> /dev/null || printf ' \033[2J\033[H'
5588+ show_metrics
5589+ echo -e " ${DIM} [live — refreshing every ${interval} s, Ctrl+C to stop]${NC} "
5590+ sleep " $interval "
5591+ done
5592+ )
5593+ echo " "
5594+ else
5595+ show_metrics
5596+ fi
5597+ ;;
5598+
54475599 logs)
54485600 load_settings
54495601 echo -e " ${DIM} Streaming logs (Ctrl+C to stop)...${NC} "
0 commit comments