From 10c10682f31a532d462802e2646611938cbd8ede Mon Sep 17 00:00:00 2001 From: Aditya-138-12 Date: Fri, 31 Jan 2025 12:25:33 +0530 Subject: [PATCH 1/4] Added fBasketSize option in RSnapshotOptions.hxx and honored it in the ActionHelpers.hxx file for custom basket size knob --- tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx | 17 +++++++++++++---- tree/dataframe/inc/ROOT/RSnapshotOptions.hxx | 6 ++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx index deb3507d8491e..96be70c93d9e0 100644 --- a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx +++ b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx @@ -1397,7 +1397,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i /// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch. template void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName, - TBranch *&branch, void *&branchAddress, RVec *ab, RBranchSet &outputBranches, bool isDefine) + TBranch *&branch, void *&branchAddress, RVec *ab, RBranchSet &outputBranches, bool isDefine, const RSnapshotOptions &options) { TBranch *inputBranch = nullptr; if (inputTree) { @@ -1407,6 +1407,8 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i } auto *outputBranch = outputBranches.Get(outName); + bool isNewBranch = isDefine || !inputBranch; // Determine if this is a new branch or not (Created via Define). + // if no backing input branch, we must write out an RVec bool mustWriteRVec = (inputBranch == nullptr || isDefine); // otherwise, if input branch is TClonesArray, must write out an RVec @@ -1435,6 +1437,10 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i outputBranch->SetObject(ab); } else { auto *b = outputTree.Branch(outName.c_str(), ab); + // Set Custom basket size for new branches. + if(isNewBranch && options.fBasketSize > 0){ + b->SetBasketSize(options.fBasketSize); + } outputBranches.Insert(outName, b); } return; @@ -1462,6 +1468,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i // added to the output tree yet. However, the size leaf has to be available for the creation of the array // branch to be successful. So we create the size leaf here. const auto sizeTypeStr = TypeName2ROOTTypeName(sizeLeaf->GetTypeName()); + // Use Original basket size for Existing Branches. const auto sizeBufSize = sizeLeaf->GetBranch()->GetBasketSize(); // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName` auto *sizeBranch = outputTree.Branch(sizeLeafName.c_str(), (void *)nullptr, @@ -1478,7 +1485,9 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i bname); } else { const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype; - outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str()); + //Use original basket size for existing branches and new basket size for new branches + const auto branchBufSize = isNewBranch && options.fBasketSize > 0 ? options.fBasketSize : inputBranch->GetBasket(); + outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str(), branchBufSize); outputBranch->SetTitle(inputBranch->GetTitle()); outputBranches.Insert(outName, outputBranch); branch = outputBranch; @@ -1578,7 +1587,7 @@ public: { // create branches in output tree int expander[] = {(SetBranchesHelper(fInputTree, *fOutputTree, fInputBranchNames[S], fOutputBranchNames[S], - fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S]), + fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S], fOptions), 0)..., 0}; fOutputBranches.AssertNoNullBranchAddresses(); @@ -1779,7 +1788,7 @@ public: // hack to call TTree::Branch on all variadic template arguments int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S], fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S], - &values, fOutputBranches[slot], fIsDefine[S]), + &values, fOutputBranches[slot], fIsDefine[S], fOptions), 0)..., 0}; fOutputBranches[slot].AssertNoNullBranchAddresses(); diff --git a/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx b/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx index 97318ef8a6da1..1ae596247ecac 100644 --- a/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx +++ b/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx @@ -25,7 +25,7 @@ struct RSnapshotOptions { RSnapshotOptions(const RSnapshotOptions &) = default; RSnapshotOptions(RSnapshotOptions &&) = default; RSnapshotOptions(std::string_view mode, ECAlgo comprAlgo, int comprLevel, int autoFlush, int splitLevel, bool lazy, - bool overwriteIfExists = false, bool vector2RVec = true) + bool overwriteIfExists = false, bool vector2RVec = true, int basketSize = 32000) : fMode(mode), fCompressionAlgorithm(comprAlgo), fCompressionLevel{comprLevel}, @@ -33,7 +33,8 @@ struct RSnapshotOptions { fSplitLevel(splitLevel), fLazy(lazy), fOverwriteIfExists(overwriteIfExists), - fVector2RVec(vector2RVec) + fVector2RVec(vector2RVec), + fBasketSize(basketSize) { } std::string fMode = "RECREATE"; ///< Mode of creation of output file @@ -45,6 +46,7 @@ struct RSnapshotOptions { bool fLazy = false; ///< Do not start the event loop when Snapshot is called bool fOverwriteIfExists = false; ///< If fMode is "UPDATE", overwrite object in output file if it already exists bool fVector2RVec = true; ///< If set to true will convert std::vector columns to RVec when saving to disk + int fBasketSize = 32000; /// Custom Basket Size option }; } // namespace RDF } // namespace ROOT From aa4e73c68d74938f2751da715340650ea0846adf Mon Sep 17 00:00:00 2001 From: Aditya-138-12 Date: Fri, 31 Jan 2025 13:01:19 +0530 Subject: [PATCH 2/4] Added the options and corrected one typo --- tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx index 96be70c93d9e0..d08dd13286cf3 100644 --- a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx +++ b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx @@ -1486,7 +1486,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i } else { const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype; //Use original basket size for existing branches and new basket size for new branches - const auto branchBufSize = isNewBranch && options.fBasketSize > 0 ? options.fBasketSize : inputBranch->GetBasket(); + const auto branchBufSize = isNewBranch && options.fBasketSize > 0 ? options.fBasketSize : inputBranch->GetBasketSize(); outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str(), branchBufSize); outputBranch->SetTitle(inputBranch->GetTitle()); outputBranches.Insert(outName, outputBranch); From 96a93b36bf32107fcd5805786ec334e3aac72747 Mon Sep 17 00:00:00 2001 From: Aditya-138-12 Date: Fri, 31 Jan 2025 15:45:31 +0530 Subject: [PATCH 3/4] Made the Following changes, 1. Now using std::optional instead of int, 2. Passing only basketSize in the SetBranchesHelper function instead of Complete options object, 3. Added some more inline comments. --- tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx | 13 +++++++------ tree/dataframe/inc/ROOT/RSnapshotOptions.hxx | 5 +++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx index d08dd13286cf3..076e2b01ce189 100644 --- a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx +++ b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx @@ -58,6 +58,7 @@ #include #include #include // std::accumulate in MeanHelper +#include // std::optional in SetBranchesHelper /// \cond HIDDEN_SYMBOLS @@ -1397,7 +1398,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i /// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch. template void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName, - TBranch *&branch, void *&branchAddress, RVec *ab, RBranchSet &outputBranches, bool isDefine, const RSnapshotOptions &options) + TBranch *&branch, void *&branchAddress, RVec *ab, RBranchSet &outputBranches, bool isDefine, const std::optional basketSize) { TBranch *inputBranch = nullptr; if (inputTree) { @@ -1438,8 +1439,8 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i } else { auto *b = outputTree.Branch(outName.c_str(), ab); // Set Custom basket size for new branches. - if(isNewBranch && options.fBasketSize > 0){ - b->SetBasketSize(options.fBasketSize); + if(isNewBranch && basketSize.value() > 0){ + b->SetBasketSize(basketSize.value()); } outputBranches.Insert(outName, b); } @@ -1486,7 +1487,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i } else { const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype; //Use original basket size for existing branches and new basket size for new branches - const auto branchBufSize = isNewBranch && options.fBasketSize > 0 ? options.fBasketSize : inputBranch->GetBasketSize(); + const auto branchBufSize = isNewBranch && basketSize.value() > 0 ? basketSize.value() : inputBranch->GetBasketSize(); outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str(), branchBufSize); outputBranch->SetTitle(inputBranch->GetTitle()); outputBranches.Insert(outName, outputBranch); @@ -1587,7 +1588,7 @@ public: { // create branches in output tree int expander[] = {(SetBranchesHelper(fInputTree, *fOutputTree, fInputBranchNames[S], fOutputBranchNames[S], - fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S], fOptions), + fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S], fOptions.fBasketSize), 0)..., 0}; fOutputBranches.AssertNoNullBranchAddresses(); @@ -1788,7 +1789,7 @@ public: // hack to call TTree::Branch on all variadic template arguments int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S], fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S], - &values, fOutputBranches[slot], fIsDefine[S], fOptions), + &values, fOutputBranches[slot], fIsDefine[S], fOptions.fBasketSize), 0)..., 0}; fOutputBranches[slot].AssertNoNullBranchAddresses(); diff --git a/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx b/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx index 1ae596247ecac..bb529bd2ffe43 100644 --- a/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx +++ b/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx @@ -14,6 +14,7 @@ #include #include #include +#include namespace ROOT { @@ -25,7 +26,7 @@ struct RSnapshotOptions { RSnapshotOptions(const RSnapshotOptions &) = default; RSnapshotOptions(RSnapshotOptions &&) = default; RSnapshotOptions(std::string_view mode, ECAlgo comprAlgo, int comprLevel, int autoFlush, int splitLevel, bool lazy, - bool overwriteIfExists = false, bool vector2RVec = true, int basketSize = 32000) + bool overwriteIfExists = false, bool vector2RVec = true, std::optional basketSize = 32000) : fMode(mode), fCompressionAlgorithm(comprAlgo), fCompressionLevel{comprLevel}, @@ -46,7 +47,7 @@ struct RSnapshotOptions { bool fLazy = false; ///< Do not start the event loop when Snapshot is called bool fOverwriteIfExists = false; ///< If fMode is "UPDATE", overwrite object in output file if it already exists bool fVector2RVec = true; ///< If set to true will convert std::vector columns to RVec when saving to disk - int fBasketSize = 32000; /// Custom Basket Size option + std::optional fBasketSize = 32000; /// Custom Basket Size option, for more details, see https://root.cern/manual/trees/#baskets-clusters-and-the-tree-header }; } // namespace RDF } // namespace ROOT From 6d875d332f996243fc520018ad460816b154534e Mon Sep 17 00:00:00 2001 From: Aditya-138-12 Date: Fri, 31 Jan 2025 16:00:51 +0530 Subject: [PATCH 4/4] Added more changes, 1. basketSize.has_value(), 2. std::nullopt, 3. std::optional fBasketSize{} --- tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx | 6 +++--- tree/dataframe/inc/ROOT/RSnapshotOptions.hxx | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx index 076e2b01ce189..e385160f75e86 100644 --- a/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx +++ b/tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx @@ -1398,7 +1398,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i /// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch. template void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName, - TBranch *&branch, void *&branchAddress, RVec *ab, RBranchSet &outputBranches, bool isDefine, const std::optional basketSize) + TBranch *&branch, void *&branchAddress, RVec *ab, RBranchSet &outputBranches, bool isDefine, const std::optional &basketSize) { TBranch *inputBranch = nullptr; if (inputTree) { @@ -1439,7 +1439,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i } else { auto *b = outputTree.Branch(outName.c_str(), ab); // Set Custom basket size for new branches. - if(isNewBranch && basketSize.value() > 0){ + if(isNewBranch && basketSize.has_value()){ b->SetBasketSize(basketSize.value()); } outputBranches.Insert(outName, b); @@ -1487,7 +1487,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i } else { const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype; //Use original basket size for existing branches and new basket size for new branches - const auto branchBufSize = isNewBranch && basketSize.value() > 0 ? basketSize.value() : inputBranch->GetBasketSize(); + const auto branchBufSize = isNewBranch && basketSize.has_value() ? basketSize.value() : inputBranch->GetBasketSize(); outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str(), branchBufSize); outputBranch->SetTitle(inputBranch->GetTitle()); outputBranches.Insert(outName, outputBranch); diff --git a/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx b/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx index bb529bd2ffe43..c1d21d9389d4a 100644 --- a/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx +++ b/tree/dataframe/inc/ROOT/RSnapshotOptions.hxx @@ -26,7 +26,7 @@ struct RSnapshotOptions { RSnapshotOptions(const RSnapshotOptions &) = default; RSnapshotOptions(RSnapshotOptions &&) = default; RSnapshotOptions(std::string_view mode, ECAlgo comprAlgo, int comprLevel, int autoFlush, int splitLevel, bool lazy, - bool overwriteIfExists = false, bool vector2RVec = true, std::optional basketSize = 32000) + bool overwriteIfExists = false, bool vector2RVec = true, const std::optional &basketSize = std::nullopt) : fMode(mode), fCompressionAlgorithm(comprAlgo), fCompressionLevel{comprLevel}, @@ -47,7 +47,7 @@ struct RSnapshotOptions { bool fLazy = false; ///< Do not start the event loop when Snapshot is called bool fOverwriteIfExists = false; ///< If fMode is "UPDATE", overwrite object in output file if it already exists bool fVector2RVec = true; ///< If set to true will convert std::vector columns to RVec when saving to disk - std::optional fBasketSize = 32000; /// Custom Basket Size option, for more details, see https://root.cern/manual/trees/#baskets-clusters-and-the-tree-header + std::optional fBasketSize {}; /// Set a custom basket size option. For more details, see https://root.cern/manual/trees/#baskets-clusters-and-the-tree-header }; } // namespace RDF } // namespace ROOT