-
Notifications
You must be signed in to change notification settings - Fork 576
[apiserver] Add retry and timeout to apiserver V2 #3869
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
f76b6b9
430f429
d0cab52
043d99b
4807020
5949d16
501975d
440730e
a10c8f5
08da214
37e885b
73d7592
b7d776f
5cffede
5c1b1dd
3c60df2
31129fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package apiserversdk | ||
|
||
import "time" | ||
|
||
// TODO: Make apiserver configs compatible with V1 | ||
const ( | ||
// Max retry times for HTTP Client | ||
HTTPClientDefaultMaxRetry = 3 | ||
|
||
// Retry backoff settings | ||
HTTPClientDefaultBackoffBase = float64(2) | ||
HTTPClientDefaultInitBackoff = 500 * time.Millisecond | ||
HTTPClientDefaultMaxBackoff = 10 * time.Second | ||
|
||
// Overall timeout for retries | ||
HTTPClientDefaultOverallTimeout = 30 * time.Second | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,15 @@ | ||
package apiserversdk | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io" | ||
"math" | ||
"net/http" | ||
"net/http/httputil" | ||
"net/url" | ||
"strings" | ||
"time" | ||
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/util/net" | ||
|
@@ -22,12 +27,14 @@ type MuxConfig struct { | |
func NewMux(config MuxConfig) (*http.ServeMux, error) { | ||
u, err := url.Parse(config.KubernetesConfig.Host) // parse the K8s API server URL from the KubernetesConfig. | ||
if err != nil { | ||
return nil, err | ||
return nil, fmt.Errorf("failed to parse url %s from config: %w", config.KubernetesConfig.Host, err) | ||
} | ||
proxy := httputil.NewSingleHostReverseProxy(u) | ||
if proxy.Transport, err = rest.TransportFor(config.KubernetesConfig); err != nil { // rest.TransportFor provides the auth to the K8s API server. | ||
return nil, err | ||
baseTransport, err := rest.TransportFor(config.KubernetesConfig) // rest.TransportFor provides the auth to the K8s API server. | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to get transport for config: %w", err) | ||
} | ||
proxy.Transport = newRetryRoundTripper(baseTransport) | ||
var handler http.Handler = proxy | ||
if config.Middleware != nil { | ||
handler = config.Middleware(proxy) | ||
|
@@ -84,3 +91,114 @@ func requireKubeRayService(handler http.Handler, k8sClient *kubernetes.Clientset | |
handler.ServeHTTP(w, r) | ||
}) | ||
} | ||
|
||
// retryRoundTripper is a custom implementation of http.RoundTripper that retries HTTP requests. | ||
// It verifies retryable HTTP status codes and retries using exponential backoff. | ||
type retryRoundTripper struct { | ||
base http.RoundTripper | ||
|
||
// Num of retries after the initial attempt | ||
maxRetries int | ||
} | ||
|
||
func newRetryRoundTripper(base http.RoundTripper) http.RoundTripper { | ||
return &retryRoundTripper{base: base, maxRetries: HTTPClientDefaultMaxRetry} | ||
} | ||
|
||
func (rrt *retryRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { | ||
ctx := req.Context() | ||
|
||
var resp *http.Response | ||
var err error | ||
for attempt := 0; attempt <= rrt.maxRetries; attempt++ { | ||
/* Try up to (rrt.maxRetries + 1) times: initial attempt + retries */ | ||
|
||
if attempt == 0 && req.Body != nil && req.GetBody == nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you help me understand what these two if blocks are doing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh I see we are reusing the body. Should we add comments explaining a bit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comments added. PTAL |
||
/* Reuse request body in each attempt */ | ||
bodyBytes, err := io.ReadAll(req.Body) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to read request body for retry support: %w", err) | ||
} | ||
err = req.Body.Close() | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to close request body: %w", err) | ||
} | ||
req.Body = io.NopCloser(bytes.NewReader(bodyBytes)) | ||
req.GetBody = func() (io.ReadCloser, error) { | ||
return io.NopCloser(bytes.NewReader(bodyBytes)), nil | ||
} | ||
} | ||
|
||
if attempt > 0 && req.GetBody != nil { | ||
var bodyCopy io.ReadCloser | ||
bodyCopy, err = req.GetBody() | ||
kenchung285 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return nil, fmt.Errorf("failed to read request body: %w", err) | ||
} | ||
req.Body = bodyCopy | ||
} | ||
|
||
resp, err = rrt.base.RoundTrip(req) | ||
kenchung285 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return resp, fmt.Errorf("request to %s %s failed with error: %w", req.Method, req.URL.String(), err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be better to return There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO, if any infomation is return in |
||
} | ||
|
||
if isSuccessfulStatusCode(resp.StatusCode) { | ||
return resp, nil | ||
} | ||
|
||
if !isRetryableHTTPStatusCodes(resp.StatusCode) { | ||
return resp, nil | ||
} | ||
|
||
if attempt < rrt.maxRetries && resp.Body != nil { | ||
/* If not last attempt, drain response body */ | ||
if _, err = io.Copy(io.Discard, resp.Body); err != nil { | ||
return nil, fmt.Errorf("retryRoundTripper internal failure to drain response body: %w", err) | ||
} | ||
if err = resp.Body.Close(); err != nil { | ||
return nil, fmt.Errorf("retryRoundTripper internal failure to close response body: %w", err) | ||
} | ||
} | ||
|
||
// TODO: move to HTTP util function in independent util file | ||
sleepDuration := HTTPClientDefaultInitBackoff * time.Duration(math.Pow(HTTPClientDefaultBackoffBase, float64(attempt))) | ||
kenchung285 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if sleepDuration > HTTPClientDefaultMaxBackoff { | ||
sleepDuration = HTTPClientDefaultMaxBackoff | ||
} | ||
|
||
// TODO: merge common utils for apiserver v1 and v2 | ||
if deadline, ok := ctx.Deadline(); ok { | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
remaining := time.Until(deadline) | ||
if remaining <= 0 { | ||
return resp, fmt.Errorf("retry timeout exceeded context deadline") | ||
} | ||
if sleepDuration > remaining { | ||
sleepDuration = remaining | ||
} | ||
Comment on lines
+176
to
+178
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we'd better not cap |
||
} | ||
|
||
time.Sleep(sleepDuration) | ||
} | ||
return resp, err | ||
} | ||
|
||
// TODO: move HTTP util function into independent util file / folder | ||
func isSuccessfulStatusCode(statusCode int) bool { | ||
kenchung285 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return 200 <= statusCode && statusCode < 300 | ||
} | ||
|
||
// TODO: merge common utils for apiserver v1 and v2 | ||
func isRetryableHTTPStatusCodes(statusCode int) bool { | ||
switch statusCode { | ||
case http.StatusRequestTimeout, // 408 | ||
http.StatusTooManyRequests, // 429 | ||
http.StatusInternalServerError, // 500 | ||
http.StatusBadGateway, // 502 | ||
http.StatusServiceUnavailable, // 503 | ||
http.StatusGatewayTimeout: // 504 | ||
return true | ||
default: | ||
return false | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.