@@ -98,17 +98,20 @@ resource "aws_cloudwatch_metric_alarm" "waf_bad_inputs_blocks" {
9898}
9999
100100# Alarm for rate limit violations (overall)
101+ # Rate limit is set to 300,000 req/5min (1000 TPS headroom over 500 TPS peak).
102+ # Any block at this threshold is a serious incident - a single IP would need to exceed
103+ # 300k requests in 5 minutes, which indicates a runaway or compromised proxy.
101104resource "aws_cloudwatch_metric_alarm" "waf_rate_limit_blocks" {
102105 count = local. waf_enabled ? 1 : 0
103106 alarm_name = " WAF-RateLimit-Blocks-${ local . workspace } "
104- alarm_description = " Alerts when requests are rate-limited (potential DDoS) "
107+ alarm_description = " Alerts when requests are rate-limited - at 300k/5min limit this indicates a runaway or compromised proxy "
105108 comparison_operator = " GreaterThanThreshold"
106- evaluation_periods = 2
109+ evaluation_periods = 1
107110 metric_name = " BlockedRequests"
108111 namespace = " AWS/WAFV2"
109112 period = 300
110113 statistic = " Sum"
111- threshold = 50 # Alert after 50 rate-limited requests
114+ threshold = 1 # Any block at this limit is a serious incident
112115 treat_missing_data = " notBreaching"
113116
114117 dimensions = {
@@ -129,14 +132,16 @@ resource "aws_cloudwatch_metric_alarm" "waf_rate_limit_blocks" {
129132 )
130133}
131134
132- # Alarm for non-UK rate limit violations
133- resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
135+ # Alarm for blocked non-UK requests
136+ # In preprod US is also allowed (for GitHub Actions), so this alarm fires on traffic
137+ # from countries outside GB+US. In prod it fires on anything outside GB.
138+ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_blocked" {
134139 count = local. waf_enabled ? 1 : 0
135- alarm_name = " WAF-NonUK-CountedRequests -${ local . workspace } "
136- alarm_description = " Alerts when non-UK requests are observed (COUNT mode) by geo rule"
140+ alarm_name = " WAF-NonUK-BlockedRequests -${ local . workspace } "
141+ alarm_description = " Alerts when non-UK requests are blocked by geo rule - may indicate stolen mTLS cert use from outside UK "
137142 comparison_operator = " GreaterThanThreshold"
138143 evaluation_periods = 2
139- metric_name = " CountedRequests "
144+ metric_name = " BlockedRequests "
140145 namespace = " AWS/WAFV2"
141146 period = 300
142147 statistic = " Sum"
@@ -145,7 +150,7 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
145150
146151 dimensions = {
147152 Region = var.default_aws_region
148- Rule = " MonitorNonUK "
153+ Rule = " BlockNonUK "
149154 WebACL = aws_wafv2_web_acl.api_gateway[0 ].name
150155 }
151156
@@ -154,8 +159,8 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
154159 tags = merge (
155160 local. tags ,
156161 {
157- Name = " WAF-NonUK-CountedRequests "
158- Severity = " medium "
162+ Name = " WAF-NonUK-BlockedRequests "
163+ Severity = " high "
159164 Environment = var.environment
160165 }
161166 )
@@ -165,14 +170,14 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
165170resource "aws_cloudwatch_metric_alarm" "waf_all_requests_high" {
166171 count = local. waf_enabled ? 1 : 0
167172 alarm_name = " WAF-AllRequests-High-${ local . workspace } "
168- alarm_description = " Monitors total request volume through WAF"
173+ alarm_description = " Monitors total allowed request volume through WAF"
169174 comparison_operator = " GreaterThanThreshold"
170175 evaluation_periods = 2
171176 metric_name = " AllowedRequests"
172177 namespace = " AWS/WAFV2"
173178 period = 300
174179 statistic = " Sum"
175- threshold = 10000 # Adjust based on expected traffic
180+ threshold = 300000 # 2x peak (500 TPS = 150k/5min); alert above 300k/5min
176181 treat_missing_data = " notBreaching"
177182
178183 dimensions = {
@@ -192,19 +197,21 @@ resource "aws_cloudwatch_metric_alarm" "waf_all_requests_high" {
192197 )
193198}
194199
195- # Alarm for monitoring counted requests (during initial count mode)
196- # This helps identify if rules would block legitimate traffic
200+ # Alarm for counted requests (NoUserAgent_Header override)
201+ # The CRS NoUserAgent_Header sub-rule is kept in COUNT to allow the API proxy healthcheck.
202+ # This alarm alerts if count spikes unexpectedly, which could indicate rule misconfiguration
203+ # or unexpected traffic patterns hitting that override.
197204resource "aws_cloudwatch_metric_alarm" "waf_counted_requests_monitoring" {
198205 count = local. waf_enabled ? 1 : 0
199206 alarm_name = " WAF-CountedRequests-Monitoring-${ local . workspace } "
200- alarm_description = " Monitors requests that would be blocked if rules were active (COUNT mode )"
207+ alarm_description = " Monitors counted requests - expected to be low volume (healthcheck NoUserAgent_Header override only )"
201208 comparison_operator = " GreaterThanThreshold"
202209 evaluation_periods = 1
203210 metric_name = " CountedRequests"
204211 namespace = " AWS/WAFV2"
205212 period = 300
206213 statistic = " Sum"
207- threshold = 100 # Alert if many requests would be blocked
214+ threshold = 100 # Alert if count spikes beyond normal healthcheck frequency
208215 treat_missing_data = " notBreaching"
209216
210217 dimensions = {
@@ -220,7 +227,7 @@ resource "aws_cloudwatch_metric_alarm" "waf_counted_requests_monitoring" {
220227 Name = " WAF-CountedRequests-Monitoring"
221228 Severity = " low"
222229 Environment = var.environment
223- Purpose = " Initial monitoring during COUNT mode phase "
230+ Purpose = " Monitor NoUserAgent_Header count override for healthcheck proxy "
224231 }
225232 )
226233}
0 commit comments