Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tree/ntuple/inc/ROOT/RNTuple.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ public:
static constexpr std::uint16_t kVersionMinor = 1;
static constexpr std::uint16_t kVersionPatch = 2;

/// Returns the RNTuple version in the following form:
/// Epoch: 2 most significant bytes
/// Major: next 2 bytes
/// Minor: next 2 bytes
/// Patch: 2 least significant bytes
/// This integer can be compared with that of another RNTuple to determine which one has the highest overall version.
static constexpr std::uint64_t GetCurrentVersion()
{
return (static_cast<std::uint64_t>(kVersionEpoch) << 48) | (static_cast<std::uint64_t>(kVersionMajor) << 32) |
(static_cast<std::uint64_t>(kVersionMinor) << 16) | (static_cast<std::uint64_t>(kVersionPatch));
}

private:
/// Version of the RNTuple binary format that the writer supports (see specification).
/// Changing the epoch indicates backward-incompatible changes
Expand Down
6 changes: 6 additions & 0 deletions tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,12 @@ public:
std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
/// \see ROOT::RNTuple::GetCurrentVersion()
std::uint64_t GetVersion() const
{
return (static_cast<std::uint64_t>(fVersionEpoch) << 48) | (static_cast<std::uint64_t>(fVersionMajor) << 32) |
(static_cast<std::uint64_t>(fVersionMinor) << 16) | (static_cast<std::uint64_t>(fVersionPatch));
}

const RFieldDescriptor &GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
{
Expand Down
13 changes: 13 additions & 0 deletions tree/ntuple/inc/ROOT/RNTupleMerger.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ enum class ENTupleMergeErrBehavior {
kSkip
};

enum class ENTupleMergeVersionBehavior {
/// The merger will emit a warning when merging RNTuples with higher version than the latest supported by this
/// ROOT version, but merging will work. Some optional features present in the source(s) may be missing from the
/// merged RNTuple.
kWarnOnHigherVersion,
/// The merger will refuse to merge RNTuples with higher versions than the latest supported by this ROOT version.
/// The merging process will abort as soon as one such source is encountered.
kAbortOnHigherVersion
};

struct RColumnMergeInfo;
struct RNTupleMergeData;
struct RSealedPageMergeData;
Expand All @@ -71,6 +81,7 @@ struct RSealedPageMergeData;
/// Here is the mapping for the TFileMerger options:
/// - "rntuple.MergingMode=(Filter|Union|...)" -> sets fMergingMode
/// - "rntuple.ErrBehavior=(Abort|Skip|...)" -> sets fErrBehavior
/// - "rntuple.VersionBehavior=(WarnOnHigherVersion|AbortOnHigherVersion|...)" -> sets fVersionBehavior
/// - "rntuple.ExtraVerbose" -> sets fExtraVerbose to true
/// Rules about the string-based options:
/// 1. there must be no space between the separators (i.e. `.` and `=`)
Expand All @@ -84,6 +95,8 @@ struct RNTupleMergeOptions {
ENTupleMergingMode fMergingMode = ENTupleMergingMode::kFilter;
/// Determines how the Merge function behaves upon merging errors
ENTupleMergeErrBehavior fErrBehavior = ENTupleMergeErrBehavior::kAbort;
/// Determines how the Merge function behaves depending on the RNTuple sources' version.
ENTupleMergeVersionBehavior fVersionBehavior = ENTupleMergeVersionBehavior::kWarnOnHigherVersion;
/// If true, the merger will emit further diagnostics and information.
bool fExtraVerbose = false;
};
Expand Down
29 changes: 28 additions & 1 deletion tree/ntuple/src/RNTupleMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

#include <algorithm>
#include <deque>
#include <cinttypes> // for PRIu64
#include <initializer_list>
#include <unordered_map>
#include <vector>
Expand Down Expand Up @@ -107,6 +106,16 @@ static std::optional<ENTupleMergeErrBehavior> ParseOptionErrBehavior(const TStri
{"Skip", ENTupleMergeErrBehavior::kSkip},
});
}

static std::optional<ENTupleMergeVersionBehavior> ParseOptionVersionBehavior(const TString &opts)
{
return ParseStringOption<ENTupleMergeVersionBehavior>(
opts, "rntuple.VersionBehavior=",
{
{"WarnOnHigherVersion", ENTupleMergeVersionBehavior::kWarnOnHigherVersion},
{"AbortOnHigherVersion", ENTupleMergeVersionBehavior::kAbortOnHigherVersion},
});
}
// -------------------------------------------------------------------------------------

// Entry point for TFileMerger. Internally calls RNTupleMerger::Merge().
Expand Down Expand Up @@ -245,6 +254,9 @@ try {
if (auto errBehavior = ParseOptionErrBehavior(mergeInfo->fOptions)) {
mergerOpts.fErrBehavior = *errBehavior;
}
if (auto versionBehavior = ParseOptionVersionBehavior(mergeInfo->fOptions)) {
mergerOpts.fVersionBehavior = *versionBehavior;
}
merger.Merge(sourcePtrs, mergerOpts).ThrowOnError();

// Provide the caller with a merged anchor object (even though we've already
Expand Down Expand Up @@ -908,6 +920,7 @@ RNTupleMerger::MergeCommonColumns(ROOT::Internal::RClusterPool &clusterPool,
// the actual data size after recompressing.
buffer = MakeUninitArray<std::uint8_t>(bufSize);

// clang-format off
if (needsResealing) {
RTaskVisitor{fTaskGroup}(RResealFunc{
*srcColElement,
Expand All @@ -931,6 +944,7 @@ RNTupleMerger::MergeCommonColumns(ROOT::Internal::RClusterPool &clusterPool,
mergeData.fDestination.GetWriteOptions()
});
}
// clang-format on
}

++pageIdx;
Expand Down Expand Up @@ -1255,6 +1269,19 @@ ROOT::RResult<void> RNTupleMerger::Merge(std::span<RPageSource *> sources, const
auto srcDescriptor = source->GetSharedDescriptorGuard();
mergeData.fSrcDescriptor = &srcDescriptor.GetRef();

if (mergeData.fSrcDescriptor->GetVersion() > ROOT::RNTuple::GetCurrentVersion()) {
if (mergeOpts.fVersionBehavior == ENTupleMergeVersionBehavior::kWarnOnHigherVersion) {
R__LOG_WARNING(NTupleMergeLog())
<< "RNTuple '" << mergeData.fSrcDescriptor->GetName()
<< "' has a higher format version than the latest supported by this version "
"of ROOT. Merging will work but some features may be dropped.";
} else {
return R__FAIL("RNTuple '" + mergeData.fSrcDescriptor->GetName() +
"' has a higher format version than the latest supported by this version. Refusing to "
"merge, since RNTupleMergeOptions::fVersionBehavior is set to AbortOnHigherVersion.");
}
}

// Create sink from the input model if not initialized
if (!fModel) {
fModel = fDestination->InitFromDescriptor(srcDescriptor.GetRef(), false /* copyClusters */);
Expand Down
4 changes: 3 additions & 1 deletion tree/ntuple/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ if(NOT MSVC)
endif()
ROOT_ADD_GTEST(ntuple_field_name ntuple_field_name.cxx LIBRARIES ROOTNTuple)
ROOT_ADD_GTEST(ntuple_join_table ntuple_join_table.cxx LIBRARIES ROOTNTuple)
ROOT_ADD_GTEST(ntuple_merger ntuple_merger.cxx LIBRARIES ROOTNTuple CustomStruct ZLIB::ZLIB Tree INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/tree/tree/inc)
ROOT_ADD_GTEST(ntuple_merger ntuple_merger.cxx
LIBRARIES ROOTNTuple CustomStruct ZLIB::ZLIB Tree xxHash::xxHash
INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/tree/tree/inc)
ROOT_ADD_GTEST(ntuple_metrics ntuple_metrics.cxx LIBRARIES ROOTNTuple)
ROOT_ADD_GTEST(ntuple_model ntuple_model.cxx LIBRARIES ROOTNTuple CustomStruct)
ROOT_ADD_GTEST(ntuple_multi_column ntuple_multi_column.cxx LIBRARIES ROOTNTuple)
Expand Down
8 changes: 8 additions & 0 deletions tree/ntuple/test/ntuple_descriptor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,14 @@ TEST(RNTupleDescriptorBuilder, CatchInvalidDescriptors)
descBuilder.EnsureValidDescriptor();
}

TEST(RNTupleDescriptorBuilder, VersionMatches)
{
RNTupleDescriptorBuilder descBuilder;
descBuilder.SetVersionForWriting();
auto desc = descBuilder.MoveDescriptor();
EXPECT_EQ(desc.GetVersion(), ROOT::RNTuple::GetCurrentVersion());
}

TEST(RFieldDescriptorBuilder, HeaderExtension)
{
RNTupleDescriptorBuilder descBuilder;
Expand Down
104 changes: 99 additions & 5 deletions tree/ntuple/test/ntuple_merger.cxx
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
#include "ntuple_test.hxx"

#include <TFileMerger.h>
#ifdef R__BYTESWAP
#include <Byteswap.h>
#endif
#include <ROOT/TBufferMerger.hxx>

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <string_view>
#include <unordered_map>
#include <xxhash.h>
#include <zlib.h>
#include "gmock/gmock.h"
#include <TTree.h>

#include <TFileMerger.h>
#include <TKey.h>
#include <TRandom3.h>
#include <TTree.h>

#include <string_view>
#include <unordered_map>

using ROOT::TestSupport::CheckDiagsRAII;

Expand Down Expand Up @@ -3809,3 +3817,89 @@ TEST(RNTupleMerger, MergeStreamerFieldsSecondMissing)
}
}
}

TEST(RNTupleMerger, MergeNewerVersion)
{
// Verify that merging RNTuples with future versions works as expected (warn user but successfully merge)
FileRaii fileGuard("test_ntuple_merge_newer_version.root");

// Write a regular RNTuple
{
auto model = RNTupleModel::Create();
auto pInt = model->MakeField<int>("int");
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath());
for (int i = 0; i < 10; ++i) {
*pInt = i;
ntuple->Fill();
}
ntuple->CommitDataset();
}

// Get metadata about its anchor
std::uint64_t anchorSeek = 0, anchorNbytes = 0;
{
auto file = std::unique_ptr<TFile>(TFile::Open(fileGuard.GetPath().c_str()));
auto anchorKey = file->GetKey("ntuple");
ASSERT_NE(anchorKey, nullptr);
anchorSeek = anchorKey->GetSeekKey() + anchorKey->GetKeylen();
anchorNbytes = anchorKey->GetNbytes() - anchorKey->GetKeylen() - 8; // 8 for the checksum
}

// Patch the anchor version (and update its checksum)
{
auto file = fopen(fileGuard.GetPath().c_str(), "r+b");
// change the major version to 0x99
fseek(file, anchorSeek + 9, SEEK_SET);
fputc(0x99, file);

// NOTE: skipping the first 6 bytes (nbytes and class version) for checksum calculation
fseek(file, anchorSeek + 6, SEEK_SET);
anchorNbytes -= 6;

// recompute checksum
auto buf = MakeUninitArray<std::byte>(anchorNbytes);
auto read = fread(buf.get(), 1, anchorNbytes, file);
ASSERT_EQ(read, anchorNbytes);
std::uint64_t checksum = XXH3_64bits(buf.get(), anchorNbytes);
#ifdef R__BYTESWAP
checksum = RByteSwap<8>::bswap(checksum);
#endif
// no need to seek: we are already at the end of the anchor object (i.e. at the start of the checksum)
fwrite(&checksum, 1, sizeof(checksum), file);
fclose(file);
}

// Merge
FileRaii fileGuardOut("test_ntuple_merge_newer_version_out.root");
{
// Gather the input sources
std::vector<std::unique_ptr<RPageSource>> sources;
sources.push_back(RPageSource::Create("ntuple", fileGuard.GetPath(), RNTupleReadOptions()));
std::vector<RPageSource *> sourcePtrs;
for (const auto &s : sources) {
sourcePtrs.push_back(s.get());
}

// Create the output
auto destination = std::make_unique<RPageSinkFile>("ntuple", fileGuardOut.GetPath(), RNTupleWriteOptions());
RNTupleMerger merger{std::move(destination)};

for (const auto mmode : {ENTupleMergingMode::kFilter, ENTupleMergingMode::kStrict, ENTupleMergingMode::kUnion}) {
CheckDiagsRAII diagsRaii;
diagsRaii.requiredDiag(kWarning, "ROOT.NTuple.Merge", "has a higher format version", false);
RNTupleMergeOptions opts;
opts.fMergingMode = mmode;
auto res = merger.Merge(sourcePtrs, opts);
EXPECT_TRUE(bool(res));
}

// Now merge again but with VersionBehavior set to Abort.
{
RNTupleMergeOptions opts;
opts.fVersionBehavior = ROOT::Experimental::Internal::ENTupleMergeVersionBehavior::kAbortOnHigherVersion;
auto res = merger.Merge(sourcePtrs, opts);
EXPECT_FALSE(bool(res));
EXPECT_THAT(res.GetError()->GetReport(), testing::HasSubstr("has a higher format version"));
}
}
}
Loading