Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RDF] Add option to change default basket size in RDataFrame Snapshot #17579

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions tree/dataframe/inc/ROOT/RDF/ActionHelpers.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include <vector>
#include <iomanip>
#include <numeric> // std::accumulate in MeanHelper
#include <optional> // std::optional<int> in SetBranchesHelper

/// \cond HIDDEN_SYMBOLS

Expand Down Expand Up @@ -1397,7 +1398,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i
/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
template <typename T>
void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName,
TBranch *&branch, void *&branchAddress, RVec<T> *ab, RBranchSet &outputBranches, bool isDefine)
TBranch *&branch, void *&branchAddress, RVec<T> *ab, RBranchSet &outputBranches, bool isDefine, const std::optional<int> &basketSize)
{
TBranch *inputBranch = nullptr;
if (inputTree) {
Expand All @@ -1407,6 +1408,8 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i
}
auto *outputBranch = outputBranches.Get(outName);

bool isNewBranch = isDefine || !inputBranch; // Determine if this is a new branch or not (Created via Define).

// if no backing input branch, we must write out an RVec
bool mustWriteRVec = (inputBranch == nullptr || isDefine);
// otherwise, if input branch is TClonesArray, must write out an RVec
Expand Down Expand Up @@ -1435,6 +1438,10 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i
outputBranch->SetObject(ab);
} else {
auto *b = outputTree.Branch(outName.c_str(), ab);
// Set Custom basket size for new branches.
if(isNewBranch && basketSize.has_value()){
b->SetBasketSize(basketSize.value());
}
outputBranches.Insert(outName, b);
}
return;
Expand Down Expand Up @@ -1462,6 +1469,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i
// added to the output tree yet. However, the size leaf has to be available for the creation of the array
// branch to be successful. So we create the size leaf here.
const auto sizeTypeStr = TypeName2ROOTTypeName(sizeLeaf->GetTypeName());
// Use Original basket size for Existing Branches.
const auto sizeBufSize = sizeLeaf->GetBranch()->GetBasketSize();
// The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName`
auto *sizeBranch = outputTree.Branch(sizeLeafName.c_str(), (void *)nullptr,
Expand All @@ -1478,7 +1486,9 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &i
bname);
} else {
const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
//Use original basket size for existing branches and new basket size for new branches
const auto branchBufSize = isNewBranch && basketSize.has_value() ? basketSize.value() : inputBranch->GetBasketSize();
outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str(), branchBufSize);
outputBranch->SetTitle(inputBranch->GetTitle());
outputBranches.Insert(outName, outputBranch);
branch = outputBranch;
Expand Down Expand Up @@ -1578,7 +1588,7 @@ public:
{
// create branches in output tree
int expander[] = {(SetBranchesHelper(fInputTree, *fOutputTree, fInputBranchNames[S], fOutputBranchNames[S],
fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S]),
fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S], fOptions.fBasketSize),
0)...,
0};
fOutputBranches.AssertNoNullBranchAddresses();
Expand Down Expand Up @@ -1779,7 +1789,7 @@ public:
// hack to call TTree::Branch on all variadic template arguments
int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S],
&values, fOutputBranches[slot], fIsDefine[S]),
&values, fOutputBranches[slot], fIsDefine[S], fOptions.fBasketSize),
0)...,
0};
fOutputBranches[slot].AssertNoNullBranchAddresses();
Expand Down
7 changes: 5 additions & 2 deletions tree/dataframe/inc/ROOT/RSnapshotOptions.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <Compression.h>
#include <string_view>
#include <string>
#include <optional>

namespace ROOT {

Expand All @@ -25,15 +26,16 @@ struct RSnapshotOptions {
RSnapshotOptions(const RSnapshotOptions &) = default;
RSnapshotOptions(RSnapshotOptions &&) = default;
RSnapshotOptions(std::string_view mode, ECAlgo comprAlgo, int comprLevel, int autoFlush, int splitLevel, bool lazy,
bool overwriteIfExists = false, bool vector2RVec = true)
bool overwriteIfExists = false, bool vector2RVec = true, const std::optional<int> &basketSize = std::nullopt)
: fMode(mode),
fCompressionAlgorithm(comprAlgo),
fCompressionLevel{comprLevel},
fAutoFlush(autoFlush),
fSplitLevel(splitLevel),
fLazy(lazy),
fOverwriteIfExists(overwriteIfExists),
fVector2RVec(vector2RVec)
fVector2RVec(vector2RVec),
fBasketSize(basketSize)
{
}
std::string fMode = "RECREATE"; ///< Mode of creation of output file
Expand All @@ -45,6 +47,7 @@ struct RSnapshotOptions {
bool fLazy = false; ///< Do not start the event loop when Snapshot is called
bool fOverwriteIfExists = false; ///< If fMode is "UPDATE", overwrite object in output file if it already exists
bool fVector2RVec = true; ///< If set to true will convert std::vector columns to RVec when saving to disk
std::optional<int> fBasketSize {}; /// Set a custom basket size option. For more details, see https://root.cern/manual/trees/#baskets-clusters-and-the-tree-header
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::optional<int> fBasketSize {}; /// Set a custom basket size option. For more details, see https://root.cern/manual/trees/#baskets-clusters-and-the-tree-header
std::optional<int> fBasketSize {}; ///< Set a custom basket size option. For more details, see https://root.cern/manual/trees/#baskets-clusters-and-the-tree-header

};
} // namespace RDF
} // namespace ROOT
Expand Down