Skip to content

Commit 93c3472

Browse files
committed
Add additional flavor in leadership when transaction fails to start
Related to #1077, a common problem that users run into is that there database pool is configured to be too small, and a common place they run into this is as a River client is trying to elect itself. Here, add a little bit of custom flavor to a deadline exceeded error that occurs during leadership election. This is certainly not an exhaustive way to reveal these errors (it only goes in one spot), but the idea is that we put it in a common error spot, and it should improve things incrementally.
1 parent 51f8346 commit 93c3472

2 files changed

Lines changed: 33 additions & 20 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111

1212
- Basic stuck detection after a job's exceeded its timeout and still not returned after the executor's initiated context cancellation and waited a short margin for the cancellation to take effect. [PR #1097](https://github.com/riverqueue/river/pull/1097).
13+
- Add a little more error flavor for when encountering a deadline exceeded error on leadership election suggesting that the user may want to try increasing their database pool size. [PR #1101](https://github.com/riverqueue/river/pull/1101).
1314

1415
## [0.29.0-rc.1] - 2025-12-04
1516

internal/leadership/elector.go

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"encoding/json"
77
"errors"
8+
"fmt"
89
"log/slog"
910
"strings"
1011
"sync"
@@ -567,27 +568,38 @@ func attemptElectOrReelect(ctx context.Context, exec riverdriver.Executor, alrea
567568
ctx, cancel := context.WithTimeout(ctx, deadlineTimeout)
568569
defer cancel()
569570

570-
return dbutil.WithTxV(ctx, exec, func(ctx context.Context, exec riverdriver.ExecutorTx) (bool, error) {
571-
if _, err := exec.LeaderDeleteExpired(ctx, &riverdriver.LeaderDeleteExpiredParams{
572-
Now: params.Now,
573-
Schema: params.Schema,
574-
}); err != nil {
575-
return false, err
571+
execTx, err := exec.Begin(ctx)
572+
if err != nil {
573+
var additionalDetail string
574+
if errors.Is(err, context.DeadlineExceeded) {
575+
additionalDetail = " (a common cause of this is a database pool that's at its connection limit; you may need to increase maximum connections)"
576576
}
577577

578-
var (
579-
elected bool
580-
err error
581-
)
582-
if alreadyElected {
583-
elected, err = exec.LeaderAttemptReelect(ctx, params)
584-
} else {
585-
elected, err = exec.LeaderAttemptElect(ctx, params)
586-
}
587-
if err != nil {
588-
return false, err
589-
}
578+
return false, fmt.Errorf("error beginning transaction: %w%s", err, additionalDetail)
579+
}
580+
defer dbutil.RollbackWithoutCancel(ctx, execTx)
590581

591-
return elected, nil
592-
})
582+
if _, err := execTx.LeaderDeleteExpired(ctx, &riverdriver.LeaderDeleteExpiredParams{
583+
Now: params.Now,
584+
Schema: params.Schema,
585+
}); err != nil {
586+
return false, err
587+
}
588+
589+
var elected bool
590+
if alreadyElected {
591+
elected, err = execTx.LeaderAttemptReelect(ctx, params)
592+
593+
} else {
594+
elected, err = execTx.LeaderAttemptElect(ctx, params)
595+
}
596+
if err != nil {
597+
return false, err
598+
}
599+
600+
if err := execTx.Commit(ctx); err != nil {
601+
return false, fmt.Errorf("error committing transaction: %w", err)
602+
}
603+
604+
return elected, nil
593605
}

0 commit comments

Comments
 (0)