Skip to content

Commit 565dc2c

Browse files
Simplified error message
Simplified error message to use either X-Databricks-Reason-Phrase or X-Thriftserver-Error-Message header. Always honour Retry-After header when retrying. Not just on 429 or 503. Signed-off-by: Raymond Cypher <raymond.cypher@databricks.com>
1 parent 3453ebb commit 565dc2c

2 files changed

Lines changed: 33 additions & 34 deletions

File tree

internal/client/client.go

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,16 +56,7 @@ const (
5656
cancelOperation
5757
)
5858

59-
var idempotentClientMethods map[clientMethod]any = map[clientMethod]any{
60-
closeSession: struct{}{},
61-
getResultSetMetadata: struct{}{},
62-
getOperationStatus: struct{}{},
63-
closeOperation: struct{}{},
64-
cancelOperation: struct{}{},
65-
// fetchResults is treated as idempotent because the drivers fetching logic is robust enoug
66-
// to fetch results in correct order
67-
fetchResults: struct{}{},
68-
}
59+
var nonRetryableClientMethods map[clientMethod]any = map[clientMethod]any{executeStatement: struct{}{}}
6960

7061
// OpenSession is a wrapper around the thrift operation OpenSession
7162
// If RecordResults is true, the results will be marshalled to JSON format and written to OpenSession<index>.json
@@ -449,27 +440,24 @@ func errorHandler(resp *http.Response, err error, numTries int) (*http.Response,
449440
var werr error
450441
msg := fmt.Sprintf("request error after %d attempt(s)", numTries)
451442
if err == nil {
452-
err = errors.New(msg)
443+
werr = errors.New(msg)
453444
} else {
454-
err = errors.Wrap(err, msg)
445+
werr = errors.Wrap(err, msg)
455446
}
456447

457448
if resp != nil {
458-
var orgid, reason, terrmsg, errmsg string
459-
// TODO @mattdeekay: convert these to specific error types
460449
if resp.Header != nil {
461-
orgid = resp.Header.Get("X-Databricks-Org-Id")
462-
reason = resp.Header.Get("X-Databricks-Reason-Phrase") // TODO note: shown on notebook
463-
terrmsg = resp.Header.Get("X-Thriftserver-Error-Message")
464-
errmsg = resp.Header.Get("x-databricks-error-or-redirect-message")
465-
// TODO note: need to see if there's other headers
450+
reason := resp.Header.Get("X-Databricks-Reason-Phrase")
451+
terrmsg := resp.Header.Get("X-Thriftserver-Error-Message")
452+
453+
if reason != "" {
454+
werr = dbsqlerrint.WrapErr(werr, reason)
455+
} else if terrmsg != "" {
456+
werr = dbsqlerrint.WrapErr(werr, terrmsg)
457+
}
466458
}
467-
msg := fmt.Sprintf("orgId: %s, reason: %s, thriftErr: %s, err: %s", orgid, reason, terrmsg, errmsg)
468459

469-
werr = dbsqlerrint.WrapErr(err, msg)
470460
logger.Err(werr).Msg(resp.Status)
471-
} else {
472-
werr = err
473461
}
474462

475463
return resp, werr
@@ -526,7 +514,7 @@ func RetryPolicy(ctx context.Context, resp *http.Response, err error) (bool, err
526514
// 429 Too Many Requests or 503 service unavailable is recoverable. Sometimes the server puts
527515
// a Retry-After response header to indicate when the server is
528516
// available to start processing request from client.
529-
if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable {
517+
if isRetryableServerResponse(resp) {
530518
var retryAfter string
531519
if resp.Header != nil {
532520
retryAfter = resp.Header.Get("Retry-After")
@@ -538,7 +526,7 @@ func RetryPolicy(ctx context.Context, resp *http.Response, err error) (bool, err
538526
if resp.StatusCode == 0 || (resp.StatusCode >= 500 && resp.StatusCode != http.StatusNotImplemented) {
539527
callerAny := ctx.Value(ClientMethod)
540528
if caller, ok := callerAny.(clientMethod); ok {
541-
if _, ok := idempotentClientMethods[caller]; ok {
529+
if _, noRetry := nonRetryableClientMethods[caller]; !noRetry {
542530
return true, checkErr
543531
}
544532
}
@@ -550,14 +538,16 @@ func RetryPolicy(ctx context.Context, resp *http.Response, err error) (bool, err
550538
}
551539

552540
func backoff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration {
553-
if resp != nil && isRetryableServerResponse(resp) {
541+
// honour the Retry-After header
542+
if resp != nil && resp.Header != nil {
554543
if s, ok := resp.Header["Retry-After"]; ok {
555544
if sleep, err := strconv.ParseInt(s[0], 10, 64); err == nil {
556545
return time.Second * time.Duration(sleep)
557546
}
558547
}
559548
}
560549

550+
// exponential backoff
561551
mult := math.Pow(2, float64(attemptNum)) * float64(min)
562552
sleep := time.Duration(mult)
563553
if float64(sleep) != mult || sleep > max {

internal/client/client_test.go

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,23 @@ func TestRetryPolicy(t *testing.T) {
8484

8585
nonRetryableCodes := []int{200, 300, 400, 501}
8686

87-
idempotentOps := []clientMethod{closeSession, getResultSetMetadata, getOperationStatus, closeOperation, cancelOperation, fetchResults}
88-
nonIdempotentOps := []clientMethod{openSession, executeStatement}
87+
retryableOps := []clientMethod{
88+
closeSession,
89+
getResultSetMetadata,
90+
getOperationStatus,
91+
closeOperation,
92+
cancelOperation,
93+
fetchResults,
94+
openSession,
95+
}
96+
97+
nonRetryableOps := []clientMethod{executeStatement}
8998

9099
cancelled, cancel := context.WithCancel(context.Background())
91100
cancel()
92101

93102
for _, code := range retryableCodes {
94-
for _, op := range idempotentOps {
103+
for _, op := range retryableOps {
95104
resp.StatusCode = code
96105
ctx := context.WithValue(context.Background(), ClientMethod, op)
97106
retry, _ := RetryPolicy(ctx, resp, nil)
@@ -102,7 +111,7 @@ func TestRetryPolicy(t *testing.T) {
102111
require.False(t, retry)
103112
}
104113

105-
for _, op := range nonIdempotentOps {
114+
for _, op := range nonRetryableOps {
106115
resp.StatusCode = code
107116
ctx := context.WithValue(context.Background(), ClientMethod, op)
108117
retry, _ := RetryPolicy(ctx, resp, nil)
@@ -115,7 +124,7 @@ func TestRetryPolicy(t *testing.T) {
115124
}
116125

117126
for _, code := range maybeRetryableCodes {
118-
for _, op := range idempotentOps {
127+
for _, op := range retryableOps {
119128
resp.StatusCode = code
120129
ctx := context.WithValue(context.Background(), ClientMethod, op)
121130
retry, _ := RetryPolicy(ctx, resp, nil)
@@ -126,7 +135,7 @@ func TestRetryPolicy(t *testing.T) {
126135
require.False(t, retry)
127136
}
128137

129-
for _, op := range nonIdempotentOps {
138+
for _, op := range nonRetryableOps {
130139
resp.StatusCode = code
131140
ctx := context.WithValue(context.Background(), ClientMethod, op)
132141
retry, _ := RetryPolicy(ctx, resp, nil)
@@ -139,7 +148,7 @@ func TestRetryPolicy(t *testing.T) {
139148
}
140149

141150
for _, code := range nonRetryableCodes {
142-
for _, op := range idempotentOps {
151+
for _, op := range retryableOps {
143152
resp.StatusCode = code
144153
ctx := context.WithValue(context.Background(), ClientMethod, op)
145154
retry, _ := RetryPolicy(ctx, resp, nil)
@@ -150,7 +159,7 @@ func TestRetryPolicy(t *testing.T) {
150159
require.False(t, retry)
151160
}
152161

153-
for _, op := range nonIdempotentOps {
162+
for _, op := range nonRetryableOps {
154163
resp.StatusCode = code
155164
ctx := context.WithValue(context.Background(), ClientMethod, op)
156165
retry, _ := RetryPolicy(ctx, resp, nil)

0 commit comments

Comments
 (0)