Skip to content

Commit 0c369bc

Browse files
authored
Merge branch 'main' into feature/eja-add-custom-review-instructions
2 parents 184c661 + a002164 commit 0c369bc

4 files changed

Lines changed: 51 additions & 38 deletions

File tree

infrastructure/stacks/api-layer/variables.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ variable "SPLUNK_HEC_ENDPOINT" {
1212
# WAF deployment environments (list of environment names where WAF should be deployed)
1313
variable "waf_enabled_environments" {
1414
type = list(string)
15-
description = "Environments in which WAF resources are deployed. Adjust to disable in test after evaluation."
16-
default = ["dev", "preprod", "prod"]
15+
description = "Environments in which WAF resources are deployed"
16+
default = ["preprod", "prod"]
1717
}
1818

1919
variable "OPERATOR_EMAILS" {

infrastructure/stacks/api-layer/waf.tf

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
# WAF Web ACL for API Gateway
2-
# Only deployed in production environment for cost optimization
3-
# Initially all rules are in COUNT mode to monitor traffic patterns
4-
52
resource "aws_wafv2_web_acl" "api_gateway" {
63
count = local.waf_enabled ? 1 : 0
74
name = "${local.workspace}-eligibility-signposting-api-waf"
8-
description = "WAF Web ACL for Eligibility Signposting API Gateway - Production"
5+
description = "WAF Web ACL for Eligibility Signposting API Gateway"
96
scope = "REGIONAL"
107

118
default_action {
@@ -19,7 +16,7 @@ resource "aws_wafv2_web_acl" "api_gateway" {
1916
priority = 10
2017

2118
override_action {
22-
count {} # Start in count mode - change to none {} when ready to block
19+
none {}
2320
}
2421

2522
statement {
@@ -43,13 +40,21 @@ resource "aws_wafv2_web_acl" "api_gateway" {
4340
priority = 20
4441

4542
override_action {
46-
count {} # Start in count mode - change to none {} when ready to block
43+
none {}
4744
}
4845

4946
statement {
5047
managed_rule_group_statement {
5148
vendor_name = "AWS"
5249
name = "AWSManagedRulesCommonRuleSet"
50+
51+
# Override NoUserAgent_Header to count only - APIM health checks send no User-Agent
52+
rule_action_override {
53+
name = "NoUserAgent_Header"
54+
action_to_use {
55+
count {}
56+
}
57+
}
5358
}
5459
}
5560

@@ -93,12 +98,12 @@ resource "aws_wafv2_web_acl" "api_gateway" {
9398
priority = 40
9499

95100
action {
96-
count {} # Start in count mode - change to block {} when ready
101+
block {}
97102
}
98103

99104
statement {
100105
rate_based_statement {
101-
limit = 2000 # Requests per 5-minute period per IP
106+
limit = 300000 # 1000 TPS - we should tie this to other rate limits
102107
aggregate_key_type = "IP"
103108
}
104109
}
@@ -110,30 +115,31 @@ resource "aws_wafv2_web_acl" "api_gateway" {
110115
}
111116
}
112117

113-
# Rule 5: Geographic Monitoring Rule - Monitor non-UK traffic (COUNT only)
114-
# NHS-specific requirement: initially monitor requests originating from outside GB
115-
# This rule COUNTS any request whose geo country code is not GB (does not block)
118+
# Rule 5: Geographic Block Rule - Block non-UK traffic
119+
# Blocks requests from outside the allowed country list.
120+
# In prod: GB only - all legitimate traffic must originate from within the UK
121+
# In preprod: GB + US - GitHub Actions integration tests run from US-based servers
116122
rule {
117-
name = "MonitorNonUK"
123+
name = "BlockNonUK"
118124
priority = 50
119125

120126
action {
121-
count {}
127+
block {}
122128
}
123129

124130
statement {
125131
not_statement {
126132
statement {
127133
geo_match_statement {
128-
country_codes = ["GB"] # United Kingdom only (does NOT include Crown Dependencies)
134+
country_codes = var.environment == "preprod" ? ["GB", "US"] : ["GB"]
129135
}
130136
}
131137
}
132138
}
133139

134140
visibility_config {
135141
cloudwatch_metrics_enabled = true
136-
metric_name = "MonitorNonUK"
142+
metric_name = "BlockNonUK"
137143
sampled_requests_enabled = true
138144
}
139145
}

infrastructure/stacks/api-layer/waf_alarms.tf

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -98,17 +98,20 @@ resource "aws_cloudwatch_metric_alarm" "waf_bad_inputs_blocks" {
9898
}
9999

100100
# Alarm for rate limit violations (overall)
101+
# Rate limit is set to 300,000 req/5min (1000 TPS headroom over 500 TPS peak).
102+
# Any block at this threshold is a serious incident - a single IP would need to exceed
103+
# 300k requests in 5 minutes, which indicates a runaway or compromised proxy.
101104
resource "aws_cloudwatch_metric_alarm" "waf_rate_limit_blocks" {
102105
count = local.waf_enabled ? 1 : 0
103106
alarm_name = "WAF-RateLimit-Blocks-${local.workspace}"
104-
alarm_description = "Alerts when requests are rate-limited (potential DDoS)"
107+
alarm_description = "Alerts when requests are rate-limited - at 300k/5min limit this indicates a runaway or compromised proxy"
105108
comparison_operator = "GreaterThanThreshold"
106-
evaluation_periods = 2
109+
evaluation_periods = 1
107110
metric_name = "BlockedRequests"
108111
namespace = "AWS/WAFV2"
109112
period = 300
110113
statistic = "Sum"
111-
threshold = 50 # Alert after 50 rate-limited requests
114+
threshold = 1 # Any block at this limit is a serious incident
112115
treat_missing_data = "notBreaching"
113116

114117
dimensions = {
@@ -129,14 +132,16 @@ resource "aws_cloudwatch_metric_alarm" "waf_rate_limit_blocks" {
129132
)
130133
}
131134

132-
# Alarm for non-UK rate limit violations
133-
resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
135+
# Alarm for blocked non-UK requests
136+
# In preprod US is also allowed (for GitHub Actions), so this alarm fires on traffic
137+
# from countries outside GB+US. In prod it fires on anything outside GB.
138+
resource "aws_cloudwatch_metric_alarm" "waf_non_uk_blocked" {
134139
count = local.waf_enabled ? 1 : 0
135-
alarm_name = "WAF-NonUK-CountedRequests-${local.workspace}"
136-
alarm_description = "Alerts when non-UK requests are observed (COUNT mode) by geo rule"
140+
alarm_name = "WAF-NonUK-BlockedRequests-${local.workspace}"
141+
alarm_description = "Alerts when non-UK requests are blocked by geo rule - may indicate stolen mTLS cert use from outside UK"
137142
comparison_operator = "GreaterThanThreshold"
138143
evaluation_periods = 2
139-
metric_name = "CountedRequests"
144+
metric_name = "BlockedRequests"
140145
namespace = "AWS/WAFV2"
141146
period = 300
142147
statistic = "Sum"
@@ -145,7 +150,7 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
145150

146151
dimensions = {
147152
Region = var.default_aws_region
148-
Rule = "MonitorNonUK"
153+
Rule = "BlockNonUK"
149154
WebACL = aws_wafv2_web_acl.api_gateway[0].name
150155
}
151156

@@ -154,8 +159,8 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
154159
tags = merge(
155160
local.tags,
156161
{
157-
Name = "WAF-NonUK-CountedRequests"
158-
Severity = "medium"
162+
Name = "WAF-NonUK-BlockedRequests"
163+
Severity = "high"
159164
Environment = var.environment
160165
}
161166
)
@@ -165,14 +170,14 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
165170
resource "aws_cloudwatch_metric_alarm" "waf_all_requests_high" {
166171
count = local.waf_enabled ? 1 : 0
167172
alarm_name = "WAF-AllRequests-High-${local.workspace}"
168-
alarm_description = "Monitors total request volume through WAF"
173+
alarm_description = "Monitors total allowed request volume through WAF"
169174
comparison_operator = "GreaterThanThreshold"
170175
evaluation_periods = 2
171176
metric_name = "AllowedRequests"
172177
namespace = "AWS/WAFV2"
173178
period = 300
174179
statistic = "Sum"
175-
threshold = 10000 # Adjust based on expected traffic
180+
threshold = 300000 # 2x peak (500 TPS = 150k/5min); alert above 300k/5min
176181
treat_missing_data = "notBreaching"
177182

178183
dimensions = {
@@ -192,19 +197,21 @@ resource "aws_cloudwatch_metric_alarm" "waf_all_requests_high" {
192197
)
193198
}
194199

195-
# Alarm for monitoring counted requests (during initial count mode)
196-
# This helps identify if rules would block legitimate traffic
200+
# Alarm for counted requests (NoUserAgent_Header override)
201+
# The CRS NoUserAgent_Header sub-rule is kept in COUNT to allow the API proxy healthcheck.
202+
# This alarm alerts if count spikes unexpectedly, which could indicate rule misconfiguration
203+
# or unexpected traffic patterns hitting that override.
197204
resource "aws_cloudwatch_metric_alarm" "waf_counted_requests_monitoring" {
198205
count = local.waf_enabled ? 1 : 0
199206
alarm_name = "WAF-CountedRequests-Monitoring-${local.workspace}"
200-
alarm_description = "Monitors requests that would be blocked if rules were active (COUNT mode)"
207+
alarm_description = "Monitors counted requests - expected to be low volume (healthcheck NoUserAgent_Header override only)"
201208
comparison_operator = "GreaterThanThreshold"
202209
evaluation_periods = 1
203210
metric_name = "CountedRequests"
204211
namespace = "AWS/WAFV2"
205212
period = 300
206213
statistic = "Sum"
207-
threshold = 100 # Alert if many requests would be blocked
214+
threshold = 100 # Alert if count spikes beyond normal healthcheck frequency
208215
treat_missing_data = "notBreaching"
209216

210217
dimensions = {
@@ -220,7 +227,7 @@ resource "aws_cloudwatch_metric_alarm" "waf_counted_requests_monitoring" {
220227
Name = "WAF-CountedRequests-Monitoring"
221228
Severity = "low"
222229
Environment = var.environment
223-
Purpose = "Initial monitoring during COUNT mode phase"
230+
Purpose = "Monitor NoUserAgent_Header count override for healthcheck proxy"
224231
}
225232
)
226233
}

poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)