diff --git a/cmd/client-s3.go b/cmd/client-s3.go index ff95a79300..660f2775b2 100644 --- a/cmd/client-s3.go +++ b/cmd/client-s3.go @@ -985,7 +985,11 @@ func (c *S3Client) Copy(ctx context.Context, source string, opts CopyOptions, pr destOpts.ReplaceMetadata = len(metadata) > 0 var e error - if opts.disableMultipart || opts.size < 64*1024*1024 { + // Use CopyObject for files < 5GiB (its maximum size limit) + // Use ComposeObject for files >= 5GiB (supports multipart copy up to 5TiB) + const maxCopyObjectSize = 5 * 1024 * 1024 * 1024 // 5GiB + + if opts.disableMultipart || opts.size < maxCopyObjectSize { _, e = c.api.CopyObject(ctx, destOpts, srcOpts) } else { _, e = c.api.ComposeObject(ctx, destOpts, srcOpts) diff --git a/cmd/common-methods.go b/cmd/common-methods.go index 5472b4b46c..f5dd5cdd03 100644 --- a/cmd/common-methods.go +++ b/cmd/common-methods.go @@ -21,6 +21,7 @@ import ( "context" "errors" "io" + "maps" "net/http" "os" "path/filepath" @@ -36,6 +37,7 @@ import ( "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/encrypt" "github.com/minio/minio-go/v7/pkg/tags" + "github.com/minio/pkg/v3/console" "github.com/minio/pkg/v3/env" ) @@ -290,7 +292,7 @@ func getAllMetadata(ctx context.Context, sourceAlias, sourceURLStr string, srcSS } // uploadSourceToTargetURL - uploads to targetURL from source. -// optionally optimizes copy for object sizes <= 5GiB by using +// optionally optimizes copy for object sizes <= 5TiB by using // server side copy operation. func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTargetURLOpts) URLs { sourceAlias := uploadOpts.urls.SourceAlias @@ -353,8 +355,16 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge metadata[http.CanonicalHeaderKey(k)] = v } - // Optimize for server side copy if the host is same. - if sourceAlias == targetAlias && !uploadOpts.isZip && !uploadOpts.urls.checksum.IsSet() { + // Server-side copy using ComposeObject API has a 5TiB limit + // For files >= 5TiB, we must use stream copy (download + upload) even for same-alias + const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB + canUseServerSideCopy := sourceAlias == targetAlias && + !uploadOpts.isZip && + !uploadOpts.urls.checksum.IsSet() && + length < maxServerSideCopySize + + // Optimize for server side copy if the host is same and file size allows it + if canUseServerSideCopy { // preserve new metadata and save existing ones. if uploadOpts.preserve { currentMetadata, err := getAllMetadata(ctx, sourceAlias, sourceURL.String(), srcSSE, uploadOpts.urls) @@ -377,10 +387,6 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge } sourcePath := filepath.ToSlash(sourceURL.Path) - if uploadOpts.urls.SourceContent.RetentionEnabled { - err = putTargetRetention(ctx, targetAlias, targetURL.String(), metadata) - return uploadOpts.urls.WithError(err.Trace(sourceURL.String())) - } opts := CopyOptions{ srcSSE: srcSSE, @@ -393,6 +399,11 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge err = copySourceToTargetURL(ctx, targetAlias, targetURL.String(), sourcePath, sourceVersion, mode, until, legalHold, length, uploadOpts.progress, opts) + + // Can apply retention after copy if enabled + if err == nil && uploadOpts.urls.SourceContent.RetentionEnabled { + err = putTargetRetention(ctx, targetAlias, targetURL.String(), metadata) + } } else { if uploadOpts.urls.SourceContent.RetentionEnabled { // preserve new metadata and save existing ones. @@ -447,9 +458,7 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge } metadata := make(map[string]string, len(content.Metadata)) - for k, v := range content.Metadata { - metadata[k] = v - } + maps.Copy(metadata, content.Metadata) // Get metadata from target content as well for k, v := range uploadOpts.urls.TargetContent.Metadata { @@ -486,13 +495,17 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge } } - if uploadOpts.multipartThreads == "" { + if uploadOpts.multipartThreads == 0 { multipartThreads, e = strconv.Atoi(env.Get("MC_UPLOAD_MULTIPART_THREADS", "4")) + if e != nil { + return uploadOpts.urls.WithError(probe.NewError(e)) + } } else { - multipartThreads, e = strconv.Atoi(uploadOpts.multipartThreads) + multipartThreads = uploadOpts.multipartThreads } - if e != nil { - return uploadOpts.urls.WithError(probe.NewError(e)) + + if globalDebug { + console.Debugln("DEBUG: multipart configuration - part-size:", humanize.IBytes(multipartSize), "parallel:", multipartThreads, "file size:", humanize.IBytes(uint64(length))) } putOpts := PutOptions{ @@ -586,7 +599,7 @@ type uploadSourceToTargetURLOpts struct { encKeyDB map[string][]prefixSSEPair preserve, isZip bool multipartSize string - multipartThreads string + multipartThreads int updateProgressTotal bool ifNotExists bool } diff --git a/cmd/cp-main.go b/cmd/cp-main.go index 9ec393ef6d..fd3393e276 100644 --- a/cmd/cp-main.go +++ b/cmd/cp-main.go @@ -25,6 +25,7 @@ import ( "path/filepath" "strings" + "github.com/dustin/go-humanize" "github.com/fatih/color" "github.com/minio/cli" json "github.com/minio/colorjson" @@ -72,6 +73,14 @@ var ( Name: "disable-multipart", Usage: "disable multipart upload feature", }, + cli.StringFlag{ + Name: "part-size", + Usage: "part size for multipart uploads (e.g. 16MiB, 64MiB, 128MiB). Max 5GiB. Max file size = part-size × 10000", + }, + cli.IntFlag{ + Name: "parallel", + Usage: "number of parts to upload in parallel for multipart uploads", + }, cli.BoolFlag{ Name: "md5", Usage: "force all upload(s) to calculate md5sum checksum", @@ -194,6 +203,12 @@ EXAMPLES: 19. Set tags to the uploaded objects {{.Prompt}} {{.HelpName}} -r --tags "category=prod&type=backup" ./data/ play/another-bucket/ + 20. Copy a large file with custom part size and parallel uploads + {{.Prompt}} {{.HelpName}} --part-size 128MiB --parallel 8 largefile.bin play/mybucket/ + + 21. Copy a very large file (12TB+) with CRC32C checksum and optimized multipart settings + {{.Prompt}} {{.HelpName}} --checksum CRC32C --part-size 5GiB --parallel 8 verylargefile.bin play/mybucket/ + `, } @@ -306,6 +321,37 @@ func printCopyURLsError(cpURLs *URLs) { } } +// doCopySession manages the copy session and determines copy strategy. +// +// 1. SERVER-SIDE COPY - no data transfer through client and is preferred +// Used when all below conditions are met: +// - Source and target are on the same alias (same MinIO/S3 server) +// - File size is < 5 TiB (ComposeObject API limit) +// - Not extracting from zip (--zip not used) +// - No checksum verification requested (--checksum not used) +// +// Multipart behavior: Uses ComposeObject API with X-Amz-Copy-Source-Range headers. +// Part size and parallel settings ARE applied via --part-size and --parallel flags. +// +// 2. STREAM COPY (download + upload through client) +// Used when ANY of these conditions are met: +// - Source and target are on different aliases (cross-server copy) +// - File size is >= 5 TiB +// - Extracting from zip (--zip flag used) +// - Checksum verification requested (--checksum flag used) +// +// Multipart behavior: Uses standard multipart upload API. +// Part size and parallel settings ARE applied via --part-size and --parallel flags. +// +// 3. PUT without multipart +// Used when: +// - File size is < 64 MiB (default threshold) +// - OR --disable-multipart flag is used +// +// Notes: +// - The 5 TiB limit is a limitation of the S3 ComposeObject API +// - Default part size: based on Minio SDK of 128 MiB for multipart uploads +// - Default parallel: 4 threads if parallel is not set and no env var MC_UPLOAD_MULTIPART_THREADS func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli.Context, encryptionKeys map[string][]prefixSSEPair, isMvCmd bool) error { var isCopied func(string) bool var totalObjects, totalBytes int64 @@ -441,17 +487,54 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli. return doCopyFake(cpURLs, pg) }, 0) } else { + const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB + isServerSideCopy := cpURLs.SourceAlias == cpURLs.TargetAlias && + !isZip && + !checksum.IsSet() && + cpURLs.SourceContent.Size < maxServerSideCopySize + + // For server-side copy (< 5TiB), pass size=0 to parallel manager + // For stream copy, pass actual size for progress tracking + queueSize := cpURLs.SourceContent.Size + if isServerSideCopy { + queueSize = 0 // No client bandwidth used for server-side copy + } + + if globalDebug { + copyType := "server-side copy" + if !isServerSideCopy { + copyType = "stream copy" + if checksum.IsSet() { + console.Debugln(fmt.Sprintf("DEBUG: Checksum %v requested - forcing stream copy for verification", checksum)) + } + } + + partSizeStr := cli.String("part-size") + if partSizeStr == "" { + partSizeStr = "default" + } + + console.Debugln(fmt.Sprintf("DEBUG: Starting %s - file: %s, size: %s, part-size: %s, parallel: %d", + copyType, + cpURLs.SourceContent.URL.Path, + humanize.IBytes(uint64(cpURLs.SourceContent.Size)), + partSizeStr, + cli.Int("parallel"))) + } + // Print the copy resume summary once in start parallel.queueTask(func() URLs { return doCopy(ctx, doCopyOpts{ - cpURLs: cpURLs, - pg: pg, - encryptionKeys: encryptionKeys, - isMvCmd: isMvCmd, - preserve: preserve, - isZip: isZip, + cpURLs: cpURLs, + pg: pg, + encryptionKeys: encryptionKeys, + isMvCmd: isMvCmd, + preserve: preserve, + isZip: isZip, + multipartSize: cli.String("part-size"), + multipartThreads: cli.Int("parallel"), }) - }, cpURLs.SourceContent.Size) + }, queueSize) } } } @@ -569,6 +652,6 @@ type doCopyOpts struct { isMvCmd, preserve, isZip bool updateProgressTotal bool multipartSize string - multipartThreads string + multipartThreads int ifNotExists bool } diff --git a/cmd/cp-main_test.go b/cmd/cp-main_test.go index ade868685f..88ae55d9c1 100644 --- a/cmd/cp-main_test.go +++ b/cmd/cp-main_test.go @@ -74,3 +74,337 @@ func TestParseMetaData(t *testing.T) { } } } + +// TestCopyStrategyDecision tests the decision logic for determining which copy strategy to use +// based on various conditions like alias, file size, zip flag, and checksum flag. +func TestCopyStrategyDecision(t *testing.T) { + const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB + + testCases := []struct { + name string + sourceAlias string + targetAlias string + fileSize int64 + isZip bool + checksumSet bool + expectedStrategy string // "server-side" or "stream" + description string + }{ + // Server-side copy cases + { + name: "Same alias, small file, no flags", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 1024 * 1024 * 1024, // 1 GiB + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for same alias and small file", + }, + { + name: "Same alias, 4.9 TiB file, no flags", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 5393554080768, // ~4.9 TiB (< 5 TiB limit) + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for files just under 5 TiB limit", + }, + { + name: "Same alias, exactly 5 TiB minus 1 byte", + sourceAlias: "minio1", + targetAlias: "minio1", + fileSize: maxServerSideCopySize - 1, + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for files exactly at limit boundary", + }, + + // Stream copy cases - different alias + { + name: "Different alias, small file", + sourceAlias: "s3", + targetAlias: "minio", + fileSize: 1024 * 1024 * 1024, // 1 GiB + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for cross-alias transfers", + }, + { + name: "Different alias, large file", + sourceAlias: "minio1", + targetAlias: "minio2", + fileSize: 10 * 1024 * 1024 * 1024 * 1024, // 10 TiB + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for cross-alias even with large files", + }, + + // Stream copy cases - file size >= 5 TiB + { + name: "Same alias, exactly 5 TiB", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: maxServerSideCopySize, + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for files >= 5 TiB (ComposeObject limit)", + }, + { + name: "Same alias, 6 TiB file", + sourceAlias: "minio", + targetAlias: "minio", + fileSize: 6 * 1024 * 1024 * 1024 * 1024, // 6 TiB + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for files over 5 TiB limit", + }, + { + name: "Same alias, 100 TiB file", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 100 * 1024 * 1024 * 1024 * 1024, // 100 TiB + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for very large files", + }, + + // Stream copy cases - zip flag + { + name: "Same alias, small file, zip enabled", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 100 * 1024 * 1024, // 100 MiB + isZip: true, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy when extracting from zip", + }, + { + name: "Same alias, 1 TiB file, zip enabled", + sourceAlias: "minio", + targetAlias: "minio", + fileSize: 1024 * 1024 * 1024 * 1024, // 1 TiB + isZip: true, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for zip extraction regardless of size", + }, + + // Stream copy cases - checksum flag + { + name: "Same alias, small file, checksum enabled", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 500 * 1024 * 1024, // 500 MiB + isZip: false, + checksumSet: true, + expectedStrategy: "stream", + description: "Should use stream copy when checksum verification is requested", + }, + { + name: "Same alias, 2 TiB file, checksum enabled", + sourceAlias: "minio", + targetAlias: "minio", + fileSize: 2 * 1024 * 1024 * 1024 * 1024, // 2 TiB + isZip: false, + checksumSet: true, + expectedStrategy: "stream", + description: "Should use stream copy for checksum verification on large files", + }, + + // Edge cases - multiple conditions forcing stream copy + { + name: "Different alias, large file, zip enabled", + sourceAlias: "s3", + targetAlias: "minio", + fileSize: 10 * 1024 * 1024 * 1024 * 1024, // 10 TiB + isZip: true, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy when multiple conditions require it", + }, + { + name: "Same alias, over 5 TiB, checksum enabled", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 7 * 1024 * 1024 * 1024 * 1024, // 7 TiB + isZip: false, + checksumSet: true, + expectedStrategy: "stream", + description: "Should use stream copy when both size and checksum require it", + }, + { + name: "Different alias, zip, checksum", + sourceAlias: "minio1", + targetAlias: "minio2", + fileSize: 100 * 1024 * 1024, // 100 MiB + isZip: true, + checksumSet: true, + expectedStrategy: "stream", + description: "Should use stream copy when all stream conditions are met", + }, + + // Additional edge cases + { + name: "Same alias, zero byte file", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 0, + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for zero byte files", + }, + { + name: "Same alias, 64 MiB file (multipart threshold)", + sourceAlias: "minio", + targetAlias: "minio", + fileSize: 64 * 1024 * 1024, // 64 MiB + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for files at multipart threshold", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Simulate the decision logic from doCopySession + isServerSideCopy := tc.sourceAlias == tc.targetAlias && + !tc.isZip && + !tc.checksumSet && + tc.fileSize < maxServerSideCopySize + + var actualStrategy string + if isServerSideCopy { + actualStrategy = "server-side" + } else { + actualStrategy = "stream" + } + + if actualStrategy != tc.expectedStrategy { + t.Errorf("Test '%s' failed:\n"+ + " Description: %s\n"+ + " Source Alias: %s\n"+ + " Target Alias: %s\n"+ + " File Size: %d bytes (%.2f TiB)\n"+ + " Zip Flag: %v\n"+ + " Checksum Set: %v\n"+ + " Expected Strategy: %s\n"+ + " Actual Strategy: %s", + tc.name, + tc.description, + tc.sourceAlias, + tc.targetAlias, + tc.fileSize, + float64(tc.fileSize)/(1024*1024*1024*1024), + tc.isZip, + tc.checksumSet, + tc.expectedStrategy, + actualStrategy, + ) + } + }) + } +} + +// copyStrategyConditions represents the conditions that determine copy strategy +type copyStrategyConditions struct { + sameAlias bool + underLimit bool + isZip bool + checksumSet bool +} + +// TestCopyStrategyMatrix tests all combinations of copy strategy decision factors +func TestCopyStrategyMatrix(t *testing.T) { + const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB + + // Test matrix: all combinations + testCases := []copyStrategyConditions{ + // All conditions met for server-side copy + {sameAlias: true, underLimit: true, isZip: false, checksumSet: false}, + + // One condition fails resulting in stream copy + {sameAlias: false, underLimit: true, isZip: false, checksumSet: false}, + {sameAlias: true, underLimit: false, isZip: false, checksumSet: false}, + {sameAlias: true, underLimit: true, isZip: true, checksumSet: false}, + {sameAlias: true, underLimit: true, isZip: false, checksumSet: true}, + + // Multiple conditions fail + {sameAlias: false, underLimit: false, isZip: false, checksumSet: false}, + {sameAlias: false, underLimit: true, isZip: true, checksumSet: false}, + {sameAlias: false, underLimit: true, isZip: false, checksumSet: true}, + {sameAlias: true, underLimit: false, isZip: true, checksumSet: false}, + {sameAlias: true, underLimit: false, isZip: false, checksumSet: true}, + {sameAlias: true, underLimit: true, isZip: true, checksumSet: true}, + + // All conditions unfavorable + {sameAlias: false, underLimit: false, isZip: true, checksumSet: true}, + } + + for _, cond := range testCases { + t.Run(formatConditionsName(cond), func(t *testing.T) { + // Set up test parameters based on conditions + sourceAlias := "source" + targetAlias := "target" + if cond.sameAlias { + targetAlias = "source" + } + + fileSize := int64(1024 * 1024 * 1024) // 1 GiB + if !cond.underLimit { + fileSize = maxServerSideCopySize + 1 + } + + // Determine expected strategy + // Server-side copy only if ALL conditions are true + expectedServerSide := cond.sameAlias && cond.underLimit && !cond.isZip && !cond.checksumSet + + // Simulate the decision logic + isServerSideCopy := sourceAlias == targetAlias && + !cond.isZip && + !cond.checksumSet && + fileSize < maxServerSideCopySize + + if isServerSideCopy != expectedServerSide { + t.Errorf("Strategy mismatch for conditions %+v: expected server-side=%v, got=%v", + cond, expectedServerSide, isServerSideCopy) + } + }) + } +} + +// Helper function to format test names for matrix tests +func formatConditionsName(cond copyStrategyConditions) string { + name := "" + if cond.sameAlias { + name += "SameAlias_" + } else { + name += "DiffAlias_" + } + if cond.underLimit { + name += "Under5TiB_" + } else { + name += "Over5TiB_" + } + if cond.isZip { + name += "Zip_" + } else { + name += "NoZip_" + } + if cond.checksumSet { + name += "Checksum" + } else { + name += "NoChecksum" + } + return name +} diff --git a/cmd/put-main.go b/cmd/put-main.go index 14c4c3e94e..ba519c889b 100644 --- a/cmd/put-main.go +++ b/cmd/put-main.go @@ -206,7 +206,7 @@ func mainPut(cliCtx *cli.Context) (e error) { pg: pg, encryptionKeys: encryptionKeys, multipartSize: size, - multipartThreads: strconv.Itoa(threads), + multipartThreads: threads, ifNotExists: cliCtx.Bool("if-not-exists"), }) if urls.Error != nil {