Skip to content

Commit

Permalink
[ntuple] move lower layer in Internal namespace
Browse files Browse the repository at this point in the history
Including classes RPageSource/Sink, RCluster(Pool), RPage, RColumn and
supporting classes
  • Loading branch information
jblomer committed Feb 13, 2024
1 parent 0b39fda commit aa9b84e
Show file tree
Hide file tree
Showing 54 changed files with 770 additions and 841 deletions.
12 changes: 4 additions & 8 deletions tree/dataframe/inc/ROOT/RNTupleDS.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,9 @@ class RFieldBase;
class RNTuple;
class RNTupleDescriptor;

namespace Detail {
class RFieldValue;
class RPageSource;
} // namespace Detail

namespace Internal {
class RNTupleColumnReader;
class RPageSource;
}

class RNTupleDS final : public ROOT::RDF::RDataSource {
Expand All @@ -51,7 +47,7 @@ class RNTupleDS final : public ROOT::RDF::RDataSource {
/// The GetEntryRanges() swaps fNextRanges and fCurrentRanges and uses the list of
/// REntryRangeDS records to return the list of ranges ready to use by the RDF loop manager.
struct REntryRangeDS {
std::unique_ptr<ROOT::Experimental::Detail::RPageSource> fSource;
std::unique_ptr<ROOT::Experimental::Internal::RPageSource> fSource;
ULong64_t fFirstEntry = 0; ///< First entry index in fSource
/// End entry index in fSource, e.g. the number of entries in the range is fLastEntry - fFirstEntry
ULong64_t fLastEntry = 0;
Expand All @@ -60,7 +56,7 @@ class RNTupleDS final : public ROOT::RDF::RDataSource {
/// The first source is used to extract the schema and build the prototype fields. The page source
/// is used to extract a clone of the descriptor to fPrincipalDescriptor. Afterwards it is moved
/// into the first REntryRangeDS.
std::unique_ptr<Detail::RPageSource> fPrincipalSource;
std::unique_ptr<Internal::RPageSource> fPrincipalSource;
/// A clone of the first pages source's descriptor.
std::unique_ptr<RNTupleDescriptor> fPrincipalDescriptor;

Expand Down Expand Up @@ -119,7 +115,7 @@ class RNTupleDS final : public ROOT::RDF::RDataSource {
/// is not enough work to give at least one cluster to every slot.
void PrepareNextRanges();

explicit RNTupleDS(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> pageSource);
explicit RNTupleDS(std::unique_ptr<ROOT::Experimental::Internal::RPageSource> pageSource);

public:
RNTupleDS(std::string_view ntupleName, std::string_view fileName);
Expand Down
18 changes: 9 additions & 9 deletions tree/dataframe/src/RNTupleDS.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class RRDFCardinalityField : public ROOT::Experimental::RFieldBase {
{
auto onDiskTypes = EnsureCompatibleColumnTypes(desc);
fColumns.emplace_back(
ROOT::Experimental::Detail::RColumn::Create<ClusterSize_t>(RColumnModel(onDiskTypes[0]), 0));
ROOT::Experimental::Internal::RColumn::Create<ClusterSize_t>(RColumnModel(onDiskTypes[0]), 0));
}

size_t GetValueSize() const final { return sizeof(std::size_t); }
Expand Down Expand Up @@ -156,7 +156,7 @@ class RArraySizeField final : public ROOT::Experimental::RFieldBase {
/// Every RDF column is represented by exactly one RNTuple field
class RNTupleColumnReader : public ROOT::Detail::RDF::RColumnReaderBase {
using RFieldBase = ROOT::Experimental::RFieldBase;
using RPageSource = ROOT::Experimental::Detail::RPageSource;
using RPageSource = ROOT::Experimental::Internal::RPageSource;

RNTupleDS *fDataSource; ///< The data source that owns this column reader
RFieldBase *fProtoField; ///< The prototype field from which fField is cloned
Expand Down Expand Up @@ -373,7 +373,7 @@ void RNTupleDS::AddField(const RNTupleDescriptor &desc, std::string_view colName
fProtoFields.emplace_back(std::move(valueField));
}

RNTupleDS::RNTupleDS(std::unique_ptr<Detail::RPageSource> pageSource) : fPrincipalSource(std::move(pageSource))
RNTupleDS::RNTupleDS(std::unique_ptr<Internal::RPageSource> pageSource) : fPrincipalSource(std::move(pageSource))
{
fPrincipalSource->Attach();
fPrincipalDescriptor = fPrincipalSource->GetSharedDescriptorGuard()->Clone();
Expand All @@ -383,17 +383,17 @@ RNTupleDS::RNTupleDS(std::unique_ptr<Detail::RPageSource> pageSource) : fPrincip
}

RNTupleDS::RNTupleDS(std::string_view ntupleName, std::string_view fileName)
: RNTupleDS(ROOT::Experimental::Detail::RPageSource::Create(ntupleName, fileName))
: RNTupleDS(ROOT::Experimental::Internal::RPageSource::Create(ntupleName, fileName))
{
}

RNTupleDS::RNTupleDS(RNTuple *ntuple)
: RNTupleDS(ROOT::Experimental::Detail::RPageSourceFile::CreateFromAnchor(*ntuple))
: RNTupleDS(ROOT::Experimental::Internal::RPageSourceFile::CreateFromAnchor(*ntuple))
{
}

RNTupleDS::RNTupleDS(std::string_view ntupleName, const std::vector<std::string> &fileNames)
: RNTupleDS(Detail::RPageSource::Create(ntupleName, fileNames[0]))
: RNTupleDS(Internal::RPageSource::Create(ntupleName, fileNames[0]))
{
fNTupleName = ntupleName;
fFileNames = fileNames;
Expand Down Expand Up @@ -450,7 +450,7 @@ void RNTupleDS::PrepareNextRanges()
assert(fNextFileIndex == 0);
std::swap(fPrincipalSource, range.fSource);
} else {
range.fSource = Detail::RPageSource::Create(fNTupleName, fFileNames[fNextFileIndex]);
range.fSource = Internal::RPageSource::Create(fNTupleName, fFileNames[fNextFileIndex]);
range.fSource->Attach();
}
fNextFileIndex++;
Expand All @@ -470,13 +470,13 @@ void RNTupleDS::PrepareNextRanges()
// Again, we need to skip empty files.
unsigned int nSlotsPerFile = fNSlots / nRemainingFiles;
for (std::size_t i = 0; (fNextRanges.size() < fNSlots) && (fNextFileIndex < nFiles); ++i) {
std::unique_ptr<Detail::RPageSource> source;
std::unique_ptr<Internal::RPageSource> source;
if (fPrincipalSource) {
// Avoid reopening the first file, which has been opened already to read the schema
assert(fNextFileIndex == 0);
std::swap(source, fPrincipalSource);
} else {
source = Detail::RPageSource::Create(fNTupleName, fFileNames[fNextFileIndex]);
source = Internal::RPageSource::Create(fNTupleName, fFileNames[fNextFileIndex]);
source->Attach();
}
fNextFileIndex++;
Expand Down
4 changes: 2 additions & 2 deletions tree/dataframe/test/datasource_ntuple.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
#include <limits>

using ROOT::Experimental::RNTupleDS;
using ROOT::Experimental::RNTupleWriter;
using ROOT::Experimental::RNTupleModel;
using ROOT::Experimental::Detail::RPageSource;
using ROOT::Experimental::RNTupleWriter;
using ROOT::Experimental::Internal::RPageSource;

template <typename V1, typename V2>
void EXPECT_VEC_EQ(const V1 &v1, const V2 &v2)
Expand Down
43 changes: 20 additions & 23 deletions tree/ntuple/v7/inc/ROOT/RCluster.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@

namespace ROOT {
namespace Experimental {
namespace Detail {

namespace Internal {

// clang-format off
/**
Expand Down Expand Up @@ -66,36 +65,35 @@ public:
std::uint32_t GetSize() const { return fSize; }

bool IsNull() const { return fAddress == nullptr; }
};
}; // class ROnDiskPage

} // namespace Detail
} // namespace Internal
} // namespace Experimental
} // namespace ROOT

// For hash maps ROnDiskPage::Key --> ROnDiskPage
namespace std
{
template <>
struct hash<ROOT::Experimental::Detail::ROnDiskPage::Key>
template <>
struct hash<ROOT::Experimental::Internal::ROnDiskPage::Key> {
// TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
size_t operator()(const ROOT::Experimental::Internal::ROnDiskPage::Key &key) const
{
// TODO(jblomer): quick and dirty hash, likely very sub-optimal, to be revised later.
size_t operator()(const ROOT::Experimental::Detail::ROnDiskPage::Key &key) const
{
return ((std::hash<ROOT::Experimental::DescriptorId_t>()(key.fPhysicalColumnId) ^
(hash<ROOT::Experimental::NTupleSize_t>()(key.fPageNo) << 1)) >>
1);
}
};
return ((std::hash<ROOT::Experimental::DescriptorId_t>()(key.fPhysicalColumnId) ^
(hash<ROOT::Experimental::NTupleSize_t>()(key.fPageNo) << 1)) >>
1);
}
};
}


namespace ROOT {
namespace Experimental {
namespace Detail {
namespace Internal {

// clang-format off
/**
\class ROOT::Experimental::Detail::ROnDiskPageMap
\class ROOT::Experimental::Internal::ROnDiskPageMap
\ingroup NTuple
\brief A memory region that contains packed and compressed pages
Expand All @@ -120,12 +118,11 @@ public:
/// needs to be owned by the page map (see derived classes). If a page map contains a page of a given column,
/// it is expected that _all_ the pages of that column in that cluster are part of the page map.
void Register(const ROnDiskPage::Key &key, const ROnDiskPage &onDiskPage) { fOnDiskPages.emplace(key, onDiskPage); }
};

}; // class ROnDiskPageMap

// clang-format off
/**
\class ROOT::Experimental::Detail::ROnDiskPageMapHeap
\class ROOT::Experimental::Internal::ROnDiskPageMapHeap
\ingroup NTuple
\brief An ROnDiskPageMap that is used for an fMemory allocated as an array of unsigned char.
*/
Expand All @@ -141,11 +138,11 @@ public:
ROnDiskPageMapHeap &operator =(const ROnDiskPageMapHeap &other) = delete;
ROnDiskPageMapHeap &operator =(ROnDiskPageMapHeap &&other) = default;
~ROnDiskPageMapHeap() override;
};
}; // class ROnDiskPageMapHeap

// clang-format off
/**
\class ROOT::Experimental::Detail::RCluster
\class ROOT::Experimental::Internal::RCluster
\ingroup NTuple
\brief An in-memory subset of the packed and compressed pages of a cluster
Expand Down Expand Up @@ -199,9 +196,9 @@ public:
const ColumnSet_t &GetAvailPhysicalColumns() const { return fAvailPhysicalColumns; }
bool ContainsColumn(DescriptorId_t colId) const { return fAvailPhysicalColumns.count(colId) > 0; }
size_t GetNOnDiskPages() const { return fOnDiskPages.size(); }
};
}; // class RCluster

} // namespace Detail
} // namespace Internal
} // namespace Experimental
} // namespace ROOT

Expand Down
6 changes: 3 additions & 3 deletions tree/ntuple/v7/inc/ROOT/RClusterPool.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@

namespace ROOT {
namespace Experimental {
namespace Detail {

namespace Internal {
class RPageSource;

// clang-format off
/**
\class ROOT::Experimental::Detail::RClusterPool
\class ROOT::Experimental::Internal::RClusterPool
\ingroup NTuple
\brief Managed a set of clusters containing compressed and packed pages
Expand Down Expand Up @@ -161,7 +161,7 @@ public:
void WaitForInFlightClusters();
}; // class RClusterPool

} // namespace Detail
} // namespace Internal
} // namespace Experimental
} // namespace ROOT

Expand Down
9 changes: 4 additions & 5 deletions tree/ntuple/v7/inc/ROOT/RColumn.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@

namespace ROOT {
namespace Experimental {
namespace Detail {
namespace Internal {

// clang-format off
/**
\class ROOT::Experimental::RColumn
\class ROOT::Internal::RColumn
\ingroup NTuple
\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
*/
Expand Down Expand Up @@ -330,10 +330,9 @@ public:
RPageSink *GetPageSink() const { return fPageSink; }
RPageStorage::ColumnHandle_t GetHandleSource() const { return fHandleSource; }
RPageStorage::ColumnHandle_t GetHandleSink() const { return fHandleSink; }
};

} // namespace Detail
}; // class RColumn

} // namespace Internal
} // namespace Experimental
} // namespace ROOT

Expand Down
13 changes: 6 additions & 7 deletions tree/ntuple/v7/inc/ROOT/RColumnElement.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,11 @@ static void CastZigzagSplitUnpack(void *destination, const void *source, std::si

namespace ROOT {
namespace Experimental {

namespace Detail {
namespace Internal {

// clang-format off
/**
\class ROOT::Experimental::Detail::RColumnElementBase
\class ROOT::Experimental::Internal::RColumnElementBase
\ingroup NTuple
\brief A column element encapsulates the translation between basic C++ types and their column representation.
Expand Down Expand Up @@ -309,7 +308,7 @@ public:
std::size_t GetSize() const { return fSize; }
std::size_t GetBitsOnStorage() const { return fBitsOnStorage; }
std::size_t GetPackedSize(std::size_t nElements = 1U) const { return (nElements * fBitsOnStorage + 7) / 8; }
};
}; // class RColumnElementBase

/**
* Base class for columns whose on-storage representation is little-endian.
Expand Down Expand Up @@ -631,7 +630,7 @@ public:
std::uint16_t *uint16Array = reinterpret_cast<std::uint16_t *>(dst);

for (std::size_t i = 0; i < count; ++i) {
uint16Array[i] = Internal::FloatToHalf(floatArray[i]);
uint16Array[i] = FloatToHalf(floatArray[i]);
ByteSwapIfNecessary(uint16Array[i]);
}
}
Expand All @@ -643,7 +642,7 @@ public:

for (std::size_t i = 0; i < count; ++i) {
ByteSwapIfNecessary(floatArray[i]);
floatArray[i] = Internal::HalfToFloat(uint16Array[i]);
floatArray[i] = HalfToFloat(uint16Array[i]);
}
}
};
Expand Down Expand Up @@ -794,7 +793,7 @@ std::unique_ptr<RColumnElementBase> RColumnElementBase::Generate(EColumnType typ
template <>
std::unique_ptr<RColumnElementBase> RColumnElementBase::Generate<void>(EColumnType type);

} // namespace Detail
} // namespace Internal
} // namespace Experimental
} // namespace ROOT

Expand Down
20 changes: 10 additions & 10 deletions tree/ntuple/v7/inc/ROOT/RField.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ class REntry;

namespace Internal {
struct RFieldCallbackInjector;
class RPageStorage;
// TODO(jblomer): find a better way to not have these three methods in the RFieldBase public API
void CallCommitClusterOnField(RFieldBase &);
void CallConnectPageSinkOnField(RFieldBase &, Detail::RPageSink &, NTupleSize_t firstEntry = 0);
void CallConnectPageSourceOnField(RFieldBase &, Detail::RPageSource &);
void CallConnectPageSinkOnField(RFieldBase &, Internal::RPageSink &, NTupleSize_t firstEntry = 0);
void CallConnectPageSourceOnField(RFieldBase &, Internal::RPageSource &);
} // namespace Internal

namespace Detail {
class RFieldVisitor;
class RPageStorage;
} // namespace Detail

// clang-format off
Expand All @@ -90,8 +90,8 @@ class RFieldBase {
friend class ROOT::Experimental::RCollectionField; // to move the fields from the collection model
friend struct ROOT::Experimental::Internal::RFieldCallbackInjector; // used for unit tests
friend void Internal::CallCommitClusterOnField(RFieldBase &);
friend void Internal::CallConnectPageSinkOnField(RFieldBase &, Detail::RPageSink &, NTupleSize_t);
friend void Internal::CallConnectPageSourceOnField(RFieldBase &, Detail::RPageSource &);
friend void Internal::CallConnectPageSinkOnField(RFieldBase &, Internal::RPageSink &, NTupleSize_t);
friend void Internal::CallConnectPageSourceOnField(RFieldBase &, Internal::RPageSource &);
using ReadCallback_t = std::function<void(void *)>;

protected:
Expand Down Expand Up @@ -347,12 +347,12 @@ private:
/// Fields and their columns live in the void until connected to a physical page storage. Only once connected, data
/// can be read or written. In order to find the field in the page storage, the field's on-disk ID has to be set.
/// \param firstEntry The global index of the first entry with on-disk data for the connected field
void ConnectPageSink(Detail::RPageSink &pageSink, NTupleSize_t firstEntry = 0);
void ConnectPageSink(Internal::RPageSink &pageSink, NTupleSize_t firstEntry = 0);
/// Connects the field and its sub field tree to the given page source. Once connected, data can be read.
/// Only unconnected fields may be connected, i.e. the method is not idempotent. The field ID has to be set prior to
/// calling this function. For sub fields, a field ID may or may not be set. If the field ID is unset, it will be
/// determined using the page source descriptor, based on the parent field ID and the sub field name.
void ConnectPageSource(Detail::RPageSource &pageSource);
void ConnectPageSource(Internal::RPageSource &pageSource);

protected:
/// Input parameter to ReadBulk() and ReadBulkImpl(). See RBulk class for more information
Expand Down Expand Up @@ -380,9 +380,9 @@ protected:
/// Points into fColumns. All fields that have columns have a distinct main column. For simple fields
/// (float, int, ...), the principal column corresponds to the field type. For collection fields expect std::array,
/// the main column is the offset field. Class fields have no column of their own.
Detail::RColumn *fPrincipalColumn;
Internal::RColumn *fPrincipalColumn;
/// The columns are connected either to a sink or to a source (not to both); they are owned by the field.
std::vector<std::unique_ptr<Detail::RColumn>> fColumns;
std::vector<std::unique_ptr<Internal::RColumn>> fColumns;
/// Properties of the type that allow for optimizations of collections of that type
int fTraits = 0;
/// A typedef or using name that was used when creating the field
Expand Down Expand Up @@ -497,7 +497,7 @@ protected:
static void CallReadOn(RFieldBase &other, NTupleSize_t globalIndex, void *to) { other.Read(globalIndex, to); }

/// Fields may need direct access to the principal column of their sub fields, e.g. in RRVecField::ReadBulk
static Detail::RColumn *GetPrincipalColumnOf(const RFieldBase &other) { return other.fPrincipalColumn; }
static Internal::RColumn *GetPrincipalColumnOf(const RFieldBase &other) { return other.fPrincipalColumn; }

/// Set a user-defined function to be called after reading a value, giving a chance to inspect and/or modify the
/// value object.
Expand Down
Loading

0 comments on commit aa9b84e

Please sign in to comment.