Skip to content

Commit 9524c1f

Browse files
committed
feat: add automatic S3 request retry with exponential backoff
Implements SDK-level automatic retry matching the minio-go retry-mechanism spec. All requests through BaseS3Client.executeAsync() now retry on retryable S3 error codes (InternalError, SlowDown, RequestTimeout, etc.), retryable HTTP status codes (408, 429, 499, 500, 502, 503, 504, 520), and transient IO errors (connection reset, EOF, server closed idle connection). Non-seekable request bodies (raw okhttp3.RequestBody) get a single attempt; seekable bodies (byte[], ByteBuffer, RandomAccessFile) retry up to maxRetries (default 10). Backoff is full-jitter exponential: rand(0, min(1s, 200ms*2^n)) before the nth retry, matching minio-go's DefaultRetryUnit/DefaultRetryCap. The "RetryHead" S3 code is explicitly excluded so executeHeadAsync region redirect logic is unaffected. maxRetries is configurable via builder and setMaxRetries() post-construction. Closes #1700.
1 parent 03b4eda commit 9524c1f

7 files changed

Lines changed: 909 additions & 7 deletions

File tree

api/src/main/java/io/minio/BaseS3Client.java

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@
6262
import java.util.concurrent.CompletableFuture;
6363
import java.util.concurrent.CompletionException;
6464
import java.util.concurrent.ConcurrentHashMap;
65+
import java.util.concurrent.Executors;
66+
import java.util.concurrent.ScheduledExecutorService;
67+
import java.util.concurrent.TimeUnit;
6568
import java.util.function.Supplier;
6669
import java.util.logging.Logger;
6770
import javax.annotation.Nonnull;
@@ -103,6 +106,15 @@ public abstract class BaseS3Client implements AutoCloseable {
103106
private static final String UPLOAD_ID = "uploadId";
104107
private static final Set<String> TRACE_QUERY_PARAMS =
105108
ImmutableSet.of("retention", "legal-hold", "tagging", UPLOAD_ID, "acl", "attributes");
109+
110+
private static final ScheduledExecutorService RETRY_SCHEDULER =
111+
Executors.newSingleThreadScheduledExecutor(
112+
r -> {
113+
Thread t = new Thread(r, "minio-retry-scheduler");
114+
t.setDaemon(true);
115+
return t;
116+
});
117+
106118
private PrintWriter traceStream;
107119
protected final Map<String, String> regionCache = new ConcurrentHashMap<>();
108120
protected String userAgent = Utils.getDefaultUserAgent();
@@ -111,6 +123,7 @@ public abstract class BaseS3Client implements AutoCloseable {
111123
protected Provider provider;
112124
protected OkHttpClient httpClient;
113125
protected boolean closeHttpClient;
126+
protected volatile int maxRetries = Retry.MAX_RETRY;
114127

115128
protected BaseS3Client(
116129
Http.BaseUrl baseUrl, Provider provider, OkHttpClient httpClient, boolean closeHttpClient) {
@@ -125,6 +138,7 @@ protected BaseS3Client(BaseS3Client client) {
125138
this.provider = client.provider;
126139
this.httpClient = client.httpClient;
127140
this.closeHttpClient = client.closeHttpClient;
141+
this.maxRetries = client.maxRetries;
128142
}
129143

130144
/** Closes underneath HTTP client. */
@@ -136,6 +150,18 @@ public void close() {
136150
}
137151
}
138152

153+
/**
154+
* Sets the maximum number of retry attempts for failed S3 requests. Requests with non-seekable
155+
* bodies are never retried regardless of this value. The default is {@code Retry.MAX_RETRY} (10).
156+
* Pass 1 to disable automatic retries.
157+
*
158+
* @param maxRetries maximum attempts (must be >= 1).
159+
*/
160+
public void setMaxRetries(int maxRetries) {
161+
if (maxRetries < 1) throw new IllegalArgumentException("maxRetries must be >= 1");
162+
this.maxRetries = maxRetries;
163+
}
164+
139165
/**
140166
* Sets HTTP connect, write and read timeouts. A value of 0 means no timeout, otherwise values
141167
* must be between 1 and Integer.MAX_VALUE when converted to milliseconds.
@@ -270,8 +296,47 @@ private String[] handleRedirectResponse(
270296
return new String[] {code, message};
271297
}
272298

273-
/** Execute HTTP request asynchronously for given parameters. */
299+
/** Execute HTTP request asynchronously for given parameters, with automatic retry. */
274300
protected CompletableFuture<Response> executeAsync(Http.S3Request s3request, String region) {
301+
// Non-seekable bodies (raw okhttp3 RequestBody) cannot be replayed — single attempt only.
302+
Http.Body body = s3request.body();
303+
int maxAttempts = (body != null && body.isHttpRequestBody()) ? 1 : this.maxRetries;
304+
return executeWithRetry(s3request, region, maxAttempts, 0);
305+
}
306+
307+
private CompletableFuture<Response> executeWithRetry(
308+
Http.S3Request s3request, String region, int maxAttempts, int attempt) {
309+
return doExecuteAsync(s3request, region)
310+
.handle(
311+
(response, throwable) -> {
312+
if (throwable == null) {
313+
return CompletableFuture.completedFuture(response);
314+
}
315+
Throwable cause =
316+
(throwable instanceof CompletionException) ? throwable.getCause() : throwable;
317+
if (cause == null) cause = throwable;
318+
if (attempt + 1 >= maxAttempts || !Retry.isRetryable(cause)) {
319+
return Utils.<Response>failedFuture(cause);
320+
}
321+
long delayMs = Retry.computeBackoffMs(attempt + 1, RANDOM);
322+
CompletableFuture<Response> retryFuture = new CompletableFuture<>();
323+
RETRY_SCHEDULER.schedule(
324+
() ->
325+
executeWithRetry(s3request, region, maxAttempts, attempt + 1)
326+
.whenComplete(
327+
(r, t) -> {
328+
if (t != null) retryFuture.completeExceptionally(t);
329+
else retryFuture.complete(r);
330+
}),
331+
delayMs,
332+
TimeUnit.MILLISECONDS);
333+
return retryFuture;
334+
})
335+
.thenCompose(cf -> cf);
336+
}
337+
338+
/** Execute single HTTP request attempt asynchronously for given parameters. */
339+
private CompletableFuture<Response> doExecuteAsync(Http.S3Request s3request, String region) {
275340
Credentials credentials = (provider == null) ? null : provider.fetch();
276341
Http.Request request = null;
277342
try {
@@ -285,11 +350,6 @@ protected CompletableFuture<Response> executeAsync(Http.S3Request s3request, Str
285350
if (traceStream != null) traceStream.print(request.httpTraces());
286351

287352
OkHttpClient httpClient = this.httpClient;
288-
// FIXME: enable retry for all request.
289-
// if (!s3request.retryFailure()) {
290-
// httpClient = httpClient.newBuilder().retryOnConnectionFailure(false).build();
291-
// }
292-
293353
okhttp3.Request httpRequest = request.httpRequest();
294354
CompletableFuture<Response> completableFuture = newCompleteableFuture();
295355
httpClient

api/src/main/java/io/minio/Http.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1503,6 +1503,11 @@ public String object() {
15031503
return object;
15041504
}
15051505

1506+
/** Returns the request body, or {@code null} if none was set. */
1507+
public Body body() {
1508+
return body;
1509+
}
1510+
15061511
private Request toRequest(
15071512
BaseUrl baseUrl, String region, Credentials credentials, Integer expiry)
15081513
throws MinioException {

api/src/main/java/io/minio/MinioAsyncClient.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ public static final class Builder {
156156
private Provider provider;
157157
private OkHttpClient httpClient;
158158
private boolean closeHttpClient;
159+
private int maxRetries = Retry.MAX_RETRY;
159160

160161
public Builder baseUrl(Http.BaseUrl baseUrl) {
161162
if (baseUrl.region() == null) {
@@ -217,6 +218,16 @@ public Builder httpClient(OkHttpClient httpClient, boolean close) {
217218
return this;
218219
}
219220

221+
/**
222+
* Sets the maximum number of retry attempts per request. Pass 1 to disable automatic retries.
223+
* Defaults to {@link Retry#MAX_RETRY} (10).
224+
*/
225+
public Builder maxRetries(int maxRetries) {
226+
if (maxRetries < 1) throw new IllegalArgumentException("maxRetries must be >= 1");
227+
this.maxRetries = maxRetries;
228+
return this;
229+
}
230+
220231
public MinioAsyncClient build() {
221232
Utils.validateNotNull(baseUrl, "endpoint");
222233

@@ -232,7 +243,10 @@ public MinioAsyncClient build() {
232243
httpClient = Http.newDefaultClient();
233244
}
234245

235-
return new MinioAsyncClient(baseUrl, provider, httpClient, closeHttpClient);
246+
MinioAsyncClient client =
247+
new MinioAsyncClient(baseUrl, provider, httpClient, closeHttpClient);
248+
client.maxRetries = maxRetries;
249+
return client;
236250
}
237251
}
238252

api/src/main/java/io/minio/MinioClient.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,6 +1979,15 @@ public void setAwsS3Prefix(String awsS3Prefix) {
19791979
asyncClient.setAwsS3Prefix(awsS3Prefix);
19801980
}
19811981

1982+
/**
1983+
* Sets the maximum number of retry attempts. Pass 1 to disable automatic retries.
1984+
*
1985+
* @param maxRetries maximum attempts (must be >= 1).
1986+
*/
1987+
public void setMaxRetries(int maxRetries) {
1988+
asyncClient.setMaxRetries(maxRetries);
1989+
}
1990+
19821991
/** Closes underneath async client. */
19831992
@Override
19841993
public void close() throws Exception {
@@ -2043,6 +2052,15 @@ public Builder httpClient(OkHttpClient httpClient, boolean close) {
20432052
return this;
20442053
}
20452054

2055+
/**
2056+
* Sets the maximum number of retry attempts per request. Pass 1 to disable automatic retries.
2057+
* Defaults to {@link Retry#MAX_RETRY} (10).
2058+
*/
2059+
public Builder maxRetries(int maxRetries) {
2060+
asyncClientBuilder.maxRetries(maxRetries);
2061+
return this;
2062+
}
2063+
20462064
public MinioClient build() {
20472065
MinioAsyncClient asyncClient = asyncClientBuilder.build();
20482066
return new MinioClient(asyncClient);
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
* MinIO Java SDK for Amazon S3 Compatible Cloud Storage, (C) 2026 MinIO, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.minio;
18+
19+
import com.google.common.collect.ImmutableSet;
20+
import io.minio.errors.ErrorResponseException;
21+
import io.minio.errors.InvalidResponseException;
22+
import io.minio.errors.ServerException;
23+
import java.io.IOException;
24+
import java.util.Random;
25+
import java.util.Set;
26+
import java.util.concurrent.CompletionException;
27+
import javax.net.ssl.SSLHandshakeException;
28+
29+
/** Retry configuration and helpers for S3 request execution. */
30+
class Retry {
31+
/** Default maximum number of retry attempts per request. */
32+
static final int MAX_RETRY = 10;
33+
34+
/** Base sleep unit for exponential backoff (milliseconds). */
35+
static final long RETRY_BASE_MS = 200L;
36+
37+
/** Maximum sleep cap for exponential backoff (milliseconds). */
38+
static final long RETRY_CAP_MS = 1_000L;
39+
40+
/**
41+
* S3 error codes that should trigger a retry. Matches the retryableS3Codes set from minio-go
42+
* retry.go.
43+
*/
44+
private static final Set<String> RETRYABLE_S3_CODES =
45+
ImmutableSet.of(
46+
"RequestError",
47+
"RequestTimeout",
48+
"Throttling",
49+
"ThrottlingException",
50+
"RequestLimitExceeded",
51+
"RequestThrottled",
52+
"InternalError",
53+
"ExpiredToken",
54+
"ExpiredTokenException",
55+
"SlowDown",
56+
"SlowDownWrite",
57+
"SlowDownRead");
58+
59+
/**
60+
* HTTP status codes that should trigger a retry. Matches retryableHTTPStatusCodes from minio-go
61+
* retry.go.
62+
*/
63+
private static final Set<Integer> RETRYABLE_HTTP_CODES =
64+
ImmutableSet.of(
65+
408, // Request Timeout
66+
429, // Too Many Requests
67+
499, // Client Closed Request (nginx)
68+
500, // Internal Server Error
69+
502, // Bad Gateway
70+
503, // Service Unavailable
71+
504, // Gateway Timeout
72+
520 // Cloudflare unknown error
73+
);
74+
75+
static boolean isRetryableS3Code(String code) {
76+
return code != null && RETRYABLE_S3_CODES.contains(code);
77+
}
78+
79+
static boolean isRetryableHttpCode(int code) {
80+
return RETRYABLE_HTTP_CODES.contains(code);
81+
}
82+
83+
/**
84+
* Returns true if the IOException is retryable. Non-retryable: TLS handshake failures, HTTP/HTTPS
85+
* protocol mismatch. Everything else (connection reset, EOF, server closed idle connection) is
86+
* retried.
87+
*/
88+
static boolean isRetryableIOException(IOException e) {
89+
// TLS certificate / handshake failures are not retryable.
90+
if (e instanceof SSLHandshakeException) return false;
91+
String msg = e.getMessage();
92+
// Protocol mismatch is not retryable.
93+
if (msg != null && msg.contains("server gave HTTP response to HTTPS client")) return false;
94+
return true;
95+
}
96+
97+
/**
98+
* Returns true if the throwable represents a retryable failure. Handles IOException,
99+
* ErrorResponseException, ServerException, and InvalidResponseException.
100+
*/
101+
static boolean isRetryable(Throwable t) {
102+
if (t instanceof CompletionException) t = t.getCause();
103+
if (t == null) return false;
104+
105+
if (t instanceof IOException) {
106+
return isRetryableIOException((IOException) t);
107+
}
108+
109+
if (t instanceof ErrorResponseException) {
110+
ErrorResponseException e = (ErrorResponseException) t;
111+
String code = e.errorResponse().code();
112+
// "RetryHead" is handled separately by executeHeadAsync — must not be swallowed here.
113+
if ("RetryHead".equals(code)) return false;
114+
if (isRetryableS3Code(code)) return true;
115+
if (e.response() != null && isRetryableHttpCode(e.response().code())) return true;
116+
return false;
117+
}
118+
119+
if (t instanceof ServerException) {
120+
return isRetryableHttpCode(((ServerException) t).statusCode());
121+
}
122+
123+
if (t instanceof InvalidResponseException) {
124+
return isRetryableHttpCode(((InvalidResponseException) t).responseCode());
125+
}
126+
127+
return false;
128+
}
129+
130+
/**
131+
* Computes the full-jitter exponential backoff delay for retry {@code attempt} (1-indexed: 1 =
132+
* first retry). Matches minio-go's {@code exponentialBackoffWait(i)}:
133+
*
134+
* <pre>
135+
* attempt=1 → [0, 200 ms]
136+
* attempt=2 → [0, 400 ms]
137+
* attempt=3 → [0, 800 ms]
138+
* attempt=4+→ [0, 1000 ms] (capped)
139+
* </pre>
140+
*
141+
* Pass {@code attempt <= 0} to get 0 (no delay).
142+
*/
143+
static long computeBackoffMs(int attempt, Random random) {
144+
if (attempt <= 0) return 0L;
145+
// exp = attempt-1 so that attempt=1 maps to base*2^0=200ms cap
146+
int exp = Math.min(attempt - 1, 30);
147+
long cap = Math.min(RETRY_CAP_MS, RETRY_BASE_MS * (1L << exp));
148+
return (long) (random.nextDouble() * cap);
149+
}
150+
151+
private Retry() {}
152+
}

api/src/main/java/io/minio/errors/InvalidResponseException.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
public class InvalidResponseException extends MinioException {
2121
private static final long serialVersionUID = -4793742105569629274L;
2222

23+
private final int responseCode;
24+
2325
public InvalidResponseException(
2426
int responseCode, String contentType, String body, String httpTrace) {
2527
super(
@@ -30,5 +32,11 @@ public InvalidResponseException(
3032
+ ", body: "
3133
+ body,
3234
httpTrace);
35+
this.responseCode = responseCode;
36+
}
37+
38+
/** Returns the HTTP response code that triggered this exception. */
39+
public int responseCode() {
40+
return responseCode;
3341
}
3442
}

0 commit comments

Comments
 (0)