Skip to content

Commit 2e8a997

Browse files
lexfreiclaude
andcommitted
feat(tunnel): add configuration hot reload support
Add hot reload capability for locally managed tunnels. Configuration changes are detected via fsnotify with periodic polling fallback (for Kubernetes ConfigMap and symlink rotation scenarios). Features: - Automatic reload on config file changes (500ms debounce) - Manual reload via SIGHUP signal - Fallback polling every 30s for environments where fsnotify fails - Catch-all rule validation for ingress configuration Implementation: - LocalConfigWatcher monitors config file and triggers Orchestrator updates - Uses separate version namespace (starting at 1M) to avoid conflicts with remote configuration - SIGHUP handler integrated with existing signal handling Addresses: #240, #301, #512, #965 Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Aleksei Sviridkin <f@lex.la>
1 parent 0d2a7a0 commit 2e8a997

7 files changed

Lines changed: 957 additions & 10 deletions

File tree

cmd/cloudflared/tunnel/cmd.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,15 @@ func StartServer(
391391
ctx, cancel := context.WithCancel(c.Context)
392392
defer cancel()
393393

394-
go waitForSignal(graceShutdownC, log)
394+
// reloadC is used to trigger configuration reloads via SIGHUP.
395+
// Channel is created here but waitForSignal is started later, after localWatcher
396+
// is ready to consume from reloadC (to avoid race condition).
397+
var reloadC chan struct{}
398+
configPath := c.String("config")
399+
if configPath != "" && c.String(TunnelTokenFlag) == "" {
400+
// Only enable hot reload for locally configured tunnels (not token-based)
401+
reloadC = make(chan struct{}, 1)
402+
}
395403

396404
if c.IsSet(cfdflags.ProxyDns) {
397405
dnsReadySignal := make(chan struct{})
@@ -489,6 +497,26 @@ func StartServer(
489497
return err
490498
}
491499

500+
// Start local config watcher for hot reload if enabled
501+
if reloadC != nil && configPath != "" {
502+
localWatcher := orchestration.NewLocalConfigWatcher(orchestrator, configPath, log)
503+
wg.Add(1)
504+
go func() {
505+
defer wg.Done()
506+
if err := localWatcher.Run(ctx, reloadC); err != nil {
507+
log.Debug().Err(err).Msg("Local config watcher stopped")
508+
}
509+
}()
510+
log.Info().Str("config", configPath).Msg("Configuration hot reload enabled (use SIGHUP to reload)")
511+
} else if configPath == "" {
512+
log.Debug().Msg("Configuration hot reload disabled: no config file specified")
513+
} else {
514+
log.Debug().Msg("Configuration hot reload disabled: token-based tunnel")
515+
}
516+
517+
// Start signal handler after localWatcher is ready to avoid race condition
518+
go waitForSignal(graceShutdownC, reloadC, log)
519+
492520
metricsListener, err := metrics.CreateMetricsListener(&listeners, c.String("metrics"))
493521
if err != nil {
494522
log.Err(err).Msg("Error opening metrics server listener")

cmd/cloudflared/tunnel/signal.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,36 @@ import (
88
"github.com/rs/zerolog"
99
)
1010

11-
// waitForSignal closes graceShutdownC to indicate that we should start graceful shutdown sequence
12-
func waitForSignal(graceShutdownC chan struct{}, logger *zerolog.Logger) {
11+
// waitForSignal handles OS signals for graceful shutdown and configuration reload.
12+
// It closes graceShutdownC on SIGTERM/SIGINT to trigger graceful shutdown.
13+
// If reloadC is provided, SIGHUP will send a reload signal instead of being ignored.
14+
func waitForSignal(graceShutdownC chan struct{}, reloadC chan<- struct{}, logger *zerolog.Logger) {
1315
signals := make(chan os.Signal, 10)
14-
signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT)
16+
signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP)
1517
defer signal.Stop(signals)
1618

17-
select {
18-
case s := <-signals:
19-
logger.Info().Msgf("Initiating graceful shutdown due to signal %s ...", s)
20-
close(graceShutdownC)
21-
case <-graceShutdownC:
19+
for {
20+
select {
21+
case s := <-signals:
22+
switch s {
23+
case syscall.SIGHUP:
24+
if reloadC != nil {
25+
logger.Info().Msg("Received SIGHUP, triggering configuration reload...")
26+
select {
27+
case reloadC <- struct{}{}:
28+
default:
29+
logger.Warn().Msg("Configuration reload already in progress, skipping")
30+
}
31+
} else {
32+
logger.Info().Msg("Received SIGHUP but hot reload is not enabled for this tunnel")
33+
}
34+
case syscall.SIGTERM, syscall.SIGINT:
35+
logger.Info().Msgf("Initiating graceful shutdown due to signal %s ...", s)
36+
close(graceShutdownC)
37+
return
38+
}
39+
case <-graceShutdownC:
40+
return
41+
}
2242
}
2343
}

cmd/cloudflared/tunnel/signal_test.go

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,123 @@ func TestSignalShutdown(t *testing.T) {
5252
}
5353
})
5454

55-
waitForSignal(graceShutdownC, &log)
55+
waitForSignal(graceShutdownC, nil, &log)
5656
assert.True(t, channelClosed(graceShutdownC))
5757
}
5858
}
5959

60+
func TestSignalSIGHUP_WithReloadChannel(t *testing.T) {
61+
log := zerolog.Nop()
62+
63+
graceShutdownC := make(chan struct{})
64+
reloadC := make(chan struct{}, 1)
65+
66+
go func() {
67+
// sleep for a tick to prevent sending signal before calling waitForSignal
68+
time.Sleep(tick)
69+
_ = syscall.Kill(syscall.Getpid(), syscall.SIGHUP)
70+
// Give time for signal to be processed
71+
time.Sleep(tick)
72+
// Send SIGTERM to exit waitForSignal
73+
_ = syscall.Kill(syscall.Getpid(), syscall.SIGTERM)
74+
}()
75+
76+
time.AfterFunc(time.Second, func() {
77+
select {
78+
case <-graceShutdownC:
79+
default:
80+
close(graceShutdownC)
81+
t.Fatal("waitForSignal timed out")
82+
}
83+
})
84+
85+
waitForSignal(graceShutdownC, reloadC, &log)
86+
87+
// Check that reload signal was received
88+
select {
89+
case <-reloadC:
90+
// Expected - SIGHUP should trigger reload
91+
default:
92+
t.Fatal("Expected reload channel to receive signal from SIGHUP")
93+
}
94+
}
95+
96+
func TestSignalSIGHUP_WithoutReloadChannel(t *testing.T) {
97+
log := zerolog.Nop()
98+
99+
graceShutdownC := make(chan struct{})
100+
101+
go func() {
102+
// sleep for a tick to prevent sending signal before calling waitForSignal
103+
time.Sleep(tick)
104+
// Send SIGHUP without reload channel - should be ignored
105+
_ = syscall.Kill(syscall.Getpid(), syscall.SIGHUP)
106+
time.Sleep(tick)
107+
// Send SIGTERM to exit waitForSignal
108+
_ = syscall.Kill(syscall.Getpid(), syscall.SIGTERM)
109+
}()
110+
111+
time.AfterFunc(time.Second, func() {
112+
select {
113+
case <-graceShutdownC:
114+
default:
115+
close(graceShutdownC)
116+
t.Fatal("waitForSignal timed out")
117+
}
118+
})
119+
120+
// Should complete without panic or deadlock
121+
waitForSignal(graceShutdownC, nil, &log)
122+
assert.True(t, channelClosed(graceShutdownC))
123+
}
124+
125+
func TestSignalSIGHUP_ReloadInProgress(t *testing.T) {
126+
log := zerolog.Nop()
127+
128+
graceShutdownC := make(chan struct{})
129+
// Create buffered channel and fill it
130+
reloadC := make(chan struct{}, 1)
131+
reloadC <- struct{}{} // Pre-fill to simulate reload in progress
132+
133+
go func() {
134+
// sleep for a tick to prevent sending signal before calling waitForSignal
135+
time.Sleep(tick)
136+
// Send SIGHUP while reload is "in progress"
137+
_ = syscall.Kill(syscall.Getpid(), syscall.SIGHUP)
138+
time.Sleep(tick)
139+
// Send SIGTERM to exit waitForSignal
140+
_ = syscall.Kill(syscall.Getpid(), syscall.SIGTERM)
141+
}()
142+
143+
time.AfterFunc(time.Second, func() {
144+
select {
145+
case <-graceShutdownC:
146+
default:
147+
close(graceShutdownC)
148+
t.Fatal("waitForSignal timed out")
149+
}
150+
})
151+
152+
// Should complete without blocking (non-blocking send)
153+
waitForSignal(graceShutdownC, reloadC, &log)
154+
155+
// Channel should still have exactly one signal (the pre-filled one)
156+
select {
157+
case <-reloadC:
158+
// Expected - drain the one signal
159+
default:
160+
t.Fatal("Expected reload channel to have signal")
161+
}
162+
163+
// Should be empty now
164+
select {
165+
case <-reloadC:
166+
t.Fatal("Expected reload channel to be empty after draining")
167+
default:
168+
// Expected - channel is empty
169+
}
170+
}
171+
60172
func TestWaitForShutdown(t *testing.T) {
61173
log := zerolog.Nop()
62174

orchestration/local_config.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package orchestration
2+
3+
import (
4+
"encoding/json"
5+
"os"
6+
7+
"github.com/pkg/errors"
8+
"gopkg.in/yaml.v3"
9+
10+
"github.com/cloudflare/cloudflared/config"
11+
"github.com/cloudflare/cloudflared/ingress"
12+
)
13+
14+
// LocalConfigJSON represents the JSON format expected by Orchestrator.UpdateConfig.
15+
// It mirrors ingress.RemoteConfigJSON structure.
16+
type LocalConfigJSON struct {
17+
GlobalOriginRequest *config.OriginRequestConfig `json:"originRequest,omitempty"`
18+
IngressRules []config.UnvalidatedIngressRule `json:"ingress"`
19+
WarpRouting config.WarpRoutingConfig `json:"warp-routing"`
20+
}
21+
22+
// ReadLocalConfig reads and parses the local YAML configuration file.
23+
func ReadLocalConfig(configPath string) (*config.Configuration, error) {
24+
file, err := os.Open(configPath)
25+
if err != nil {
26+
return nil, errors.Wrapf(err, "failed to open config file %s", configPath)
27+
}
28+
defer file.Close()
29+
30+
var cfg config.Configuration
31+
if err := yaml.NewDecoder(file).Decode(&cfg); err != nil {
32+
return nil, errors.Wrapf(err, "failed to parse YAML config file %s", configPath)
33+
}
34+
35+
return &cfg, nil
36+
}
37+
38+
// ConvertLocalConfigToJSON converts local YAML configuration to JSON format
39+
// expected by Orchestrator.UpdateConfig.
40+
func ConvertLocalConfigToJSON(cfg *config.Configuration) ([]byte, error) {
41+
if cfg == nil {
42+
return nil, errors.New("config cannot be nil")
43+
}
44+
45+
localJSON := LocalConfigJSON{
46+
GlobalOriginRequest: &cfg.OriginRequest,
47+
IngressRules: cfg.Ingress,
48+
WarpRouting: cfg.WarpRouting,
49+
}
50+
51+
data, err := json.Marshal(localJSON)
52+
if err != nil {
53+
return nil, errors.Wrap(err, "failed to marshal config to JSON")
54+
}
55+
56+
return data, nil
57+
}
58+
59+
// ValidateLocalConfig validates the local configuration by attempting to parse
60+
// ingress rules. Returns nil if valid.
61+
func ValidateLocalConfig(cfg *config.Configuration) error {
62+
_, err := ConvertAndValidateLocalConfig(cfg)
63+
return err
64+
}
65+
66+
// ConvertAndValidateLocalConfig converts local config to JSON and validates it
67+
// in a single pass. Returns JSON bytes if valid, error otherwise.
68+
func ConvertAndValidateLocalConfig(cfg *config.Configuration) ([]byte, error) {
69+
data, err := ConvertLocalConfigToJSON(cfg)
70+
if err != nil {
71+
return nil, err
72+
}
73+
74+
// Skip validation if no ingress rules
75+
if len(cfg.Ingress) == 0 {
76+
return data, nil
77+
}
78+
79+
// Validate catch-all rule exists (last rule must have empty hostname)
80+
lastRule := cfg.Ingress[len(cfg.Ingress)-1]
81+
if lastRule.Hostname != "" {
82+
return nil, errors.New("ingress rules must end with a catch-all rule (empty hostname)")
83+
}
84+
85+
// Validate by attempting to parse as RemoteConfig
86+
var remoteConfig ingress.RemoteConfig
87+
if err := json.Unmarshal(data, &remoteConfig); err != nil {
88+
return nil, errors.Wrap(err, "invalid ingress configuration")
89+
}
90+
91+
return data, nil
92+
}

0 commit comments

Comments
 (0)