Skip to content

Commit 82ee4d0

Browse files
committed
Fix traffic stats lost on restart when Prometheus is unreachable (#44)
flush_traffic_to_disk previously aborted entirely if curl to Prometheus failed (timeout, engine shutting down). This meant no data was saved and all cumulative stats were lost after restart. Now: if metrics are unavailable, the function still saves existing cumulative totals from disk and resets snapshots to 0. The last delta may be lost, but the lifetime accumulation is preserved.
1 parent d4b2ea9 commit 82ee4d0

1 file changed

Lines changed: 52 additions & 38 deletions

File tree

mtproxymax.sh

Lines changed: 52 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,38 +1523,42 @@ flush_traffic_to_disk() {
15231523
[[ "${snap_gout:-0}" =~ ^[0-9]+$ ]] || snap_gout=0
15241524

15251525
# Fetch current live metrics
1526-
local _metrics
1527-
_metrics=$(curl -s --max-time 2 "http://127.0.0.1:${PROXY_METRICS_PORT:-9090}/metrics" 2>/dev/null) || { exec 9>&-; return; }
1528-
[ -z "$_metrics" ] && { exec 9>&-; return; }
1529-
1530-
# Global traffic delta
1531-
local cur_gin cur_gout
1532-
cur_gin=$(echo "$_metrics" | awk '/^telemt_user_octets_from_client\{/{s+=$NF}END{printf "%.0f",s}')
1533-
cur_gout=$(echo "$_metrics" | awk '/^telemt_user_octets_to_client\{/{s+=$NF}END{printf "%.0f",s}')
1534-
cur_gin=${cur_gin:-0}; cur_gout=${cur_gout:-0}
1535-
local gd_in=$((cur_gin - snap_gin)) gd_out=$((cur_gout - snap_gout))
1536-
[ "$gd_in" -lt 0 ] 2>/dev/null && gd_in=$cur_gin
1537-
[ "$gd_out" -lt 0 ] 2>/dev/null && gd_out=$cur_gout
1538-
cum_in=$((cum_in + gd_in))
1539-
cum_out=$((cum_out + gd_out))
1540-
1541-
# Per-user traffic delta
1542-
[ -f "$SECRETS_FILE" ] && while IFS='|' read -r label secret created enabled _mc _mi _q _ex _notes; do
1543-
[[ "$label" =~ ^# ]] && continue; [ -z "$secret" ] && continue
1544-
[ "$enabled" != "true" ] && continue
1545-
local ui uo
1546-
ui=$(echo "$_metrics" | awk -v u="$label" '$0 ~ "^telemt_user_octets_from_client\\{.*user=\"" u "\"" {print $NF}')
1547-
uo=$(echo "$_metrics" | awk -v u="$label" '$0 ~ "^telemt_user_octets_to_client\\{.*user=\"" u "\"" {print $NF}')
1548-
ui=${ui:-0}; uo=${uo:-0}
1549-
local si=${_fu_snap_in["$label"]:-0} so=${_fu_snap_out["$label"]:-0}
1550-
local di=$((ui - si)) doo=$((uo - so))
1551-
[ "$di" -lt 0 ] 2>/dev/null && di=$ui
1552-
[ "$doo" -lt 0 ] 2>/dev/null && doo=$uo
1553-
_fu_cum_in["$label"]=$(( ${_fu_cum_in["$label"]:-0} + di ))
1554-
_fu_cum_out["$label"]=$(( ${_fu_cum_out["$label"]:-0} + doo ))
1555-
_fu_snap_in["$label"]=$ui
1556-
_fu_snap_out["$label"]=$uo
1557-
done < "$SECRETS_FILE"
1526+
local _metrics _have_metrics=false
1527+
_metrics=$(curl -s --max-time 2 "http://127.0.0.1:${PROXY_METRICS_PORT:-9090}/metrics" 2>/dev/null) || true
1528+
[ -n "$_metrics" ] && _have_metrics=true
1529+
1530+
if $_have_metrics; then
1531+
# Global traffic delta
1532+
local cur_gin cur_gout
1533+
cur_gin=$(echo "$_metrics" | awk '/^telemt_user_octets_from_client\{/{s+=$NF}END{printf "%.0f",s}')
1534+
cur_gout=$(echo "$_metrics" | awk '/^telemt_user_octets_to_client\{/{s+=$NF}END{printf "%.0f",s}')
1535+
cur_gin=${cur_gin:-0}; cur_gout=${cur_gout:-0}
1536+
local gd_in=$((cur_gin - snap_gin)) gd_out=$((cur_gout - snap_gout))
1537+
[ "$gd_in" -lt 0 ] 2>/dev/null && gd_in=$cur_gin
1538+
[ "$gd_out" -lt 0 ] 2>/dev/null && gd_out=$cur_gout
1539+
cum_in=$((cum_in + gd_in))
1540+
cum_out=$((cum_out + gd_out))
1541+
1542+
# Per-user traffic delta
1543+
[ -f "$SECRETS_FILE" ] && while IFS='|' read -r label secret created enabled _mc _mi _q _ex _notes; do
1544+
[[ "$label" =~ ^# ]] && continue; [ -z "$secret" ] && continue
1545+
[ "$enabled" != "true" ] && continue
1546+
local ui uo
1547+
ui=$(echo "$_metrics" | awk -v u="$label" '$0 ~ "^telemt_user_octets_from_client\\{.*user=\"" u "\"" {print $NF}')
1548+
uo=$(echo "$_metrics" | awk -v u="$label" '$0 ~ "^telemt_user_octets_to_client\\{.*user=\"" u "\"" {print $NF}')
1549+
ui=${ui:-0}; uo=${uo:-0}
1550+
local si=${_fu_snap_in["$label"]:-0} so=${_fu_snap_out["$label"]:-0}
1551+
local di=$((ui - si)) doo=$((uo - so))
1552+
[ "$di" -lt 0 ] 2>/dev/null && di=$ui
1553+
[ "$doo" -lt 0 ] 2>/dev/null && doo=$uo
1554+
_fu_cum_in["$label"]=$(( ${_fu_cum_in["$label"]:-0} + di ))
1555+
_fu_cum_out["$label"]=$(( ${_fu_cum_out["$label"]:-0} + doo ))
1556+
_fu_snap_in["$label"]=$ui
1557+
_fu_snap_out["$label"]=$uo
1558+
done < "$SECRETS_FILE"
1559+
fi
1560+
# If metrics unavailable, still save existing cumulative (don't lose what we have)
1561+
# Snapshot resets to 0 so next session starts fresh
15581562

15591563
# Write cumulative traffic
15601564
local _tmp
@@ -1571,18 +1575,28 @@ flush_traffic_to_disk() {
15711575
done
15721576
mv "$_tmp" "$_utf" 2>/dev/null || rm -f "$_tmp"
15731577

1574-
# Write per-user snapshot
1578+
# Write per-user snapshot (reset to 0 if metrics were unavailable)
15751579
_tmp=$(mktemp "${_stats_dir}/.traffic.XXXXXX" 2>/dev/null) || { exec 9>&-; return; }
15761580
chmod 600 "$_tmp"
1577-
for _l in "${!_fu_snap_in[@]}"; do
1578-
echo "${_l}|${_fu_snap_in[$_l]}|${_fu_snap_out[$_l]}" >> "$_tmp"
1579-
done
1581+
if $_have_metrics; then
1582+
for _l in "${!_fu_snap_in[@]}"; do
1583+
echo "${_l}|${_fu_snap_in[$_l]}|${_fu_snap_out[$_l]}" >> "$_tmp"
1584+
done
1585+
else
1586+
for _l in "${!_fu_cum_in[@]}"; do
1587+
echo "${_l}|0|0" >> "$_tmp"
1588+
done
1589+
fi
15801590
mv "$_tmp" "$_snap" 2>/dev/null || rm -f "$_tmp"
15811591

1582-
# Write global snapshot
1592+
# Write global snapshot (reset to 0 if metrics were unavailable)
15831593
_tmp=$(mktemp "${_stats_dir}/.traffic.XXXXXX" 2>/dev/null) || { exec 9>&-; return; }
15841594
chmod 600 "$_tmp"
1585-
echo "${cur_gin}|${cur_gout}" > "$_tmp"
1595+
if $_have_metrics; then
1596+
echo "${cur_gin}|${cur_gout}" > "$_tmp"
1597+
else
1598+
echo "0|0" > "$_tmp"
1599+
fi
15861600
mv "$_tmp" "${_stats_dir}/global_traffic_snapshot" 2>/dev/null || rm -f "$_tmp"
15871601
exec 9>&- # Release lock
15881602
}

0 commit comments

Comments
 (0)