Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Default compression via CS config stable-24-3-14-cs #13803

Open
wants to merge 2 commits into
base: stable-24-3-14-cs
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ydb/core/base/appdata_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,15 +283,16 @@ inline TAppData* AppData(NActors::TActorSystem* actorSystem) {
}

inline bool HasAppData() {
return !!NActors::TlsActivationContext;
return !!NActors::TlsActivationContext
&& NActors::TlsActivationContext->ExecutorThread.ActorSystem
&& NActors::TlsActivationContext->ExecutorThread.ActorSystem->AppData<TAppData>();
}

inline TAppData& AppDataVerified() {
Y_ABORT_UNLESS(HasAppData());
auto& actorSystem = NActors::TlsActivationContext->ExecutorThread.ActorSystem;
Y_ABORT_UNLESS(actorSystem);
TAppData* const x = actorSystem->AppData<TAppData>();
Y_ABORT_UNLESS(x && x->Magic == TAppData::MagicTag);
Y_ABORT_UNLESS(x->Magic == TAppData::MagicTag);
return *x;
}

Expand Down
55 changes: 55 additions & 0 deletions ydb/core/config/validation/column_shard_config_validator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "validators.h"

#include <ydb/core/formats/arrow/serializer/parsing.h>
#include <ydb/core/formats/arrow/serializer/utils.h>
#include <ydb/core/protos/config.pb.h>

#include <util/generic/string.h>
#include <util/string/builder.h>

#include <vector>

namespace NKikimr::NConfig {
namespace {

EValidationResult ValidateDefaultCompression(const NKikimrConfig::TColumnShardConfig& columnShardConfig, std::vector<TString>& msg) {
if (!columnShardConfig.HasDefaultCompression() && !columnShardConfig.HasDefaultCompressionLevel()) {
return EValidationResult::Ok;
}
if (!columnShardConfig.HasDefaultCompression() && columnShardConfig.HasDefaultCompressionLevel()) {
msg.push_back("ColumnShardConfig: compression level is set without compression type");
return EValidationResult::Error;
}
std::optional<arrow::Compression::type> codec = NArrow::CompressionFromProto(columnShardConfig.GetDefaultCompression());
if (!codec.has_value()) {
msg.push_back("ColumnShardConfig: Unknown compression");
return EValidationResult::Error;
}
if (columnShardConfig.HasDefaultCompressionLevel()) {
if (!NArrow::SupportsCompressionLevel(codec.value())) {
TString messageErr = TStringBuilder() << "ColumnShardConfig: compression `" << NArrow::CompressionToString(codec.value())
<< "` does not support compression level";
msg.push_back(messageErr);
return EValidationResult::Error;
} else if (!NArrow::SupportsCompressionLevel(codec.value(), columnShardConfig.GetDefaultCompressionLevel())) {
TString messageErr = TStringBuilder()
<< "ColumnShardConfig: compression `" << NArrow::CompressionToString(codec.value())
<< "` does not support compression level = " << std::to_string(columnShardConfig.GetDefaultCompressionLevel());
msg.push_back(messageErr);
return EValidationResult::Error;
}
}
return EValidationResult::Ok;
}

} // namespace

EValidationResult ValidateColumnShardConfig(const NKikimrConfig::TColumnShardConfig& columnShardConfig, std::vector<TString>& msg) {
EValidationResult validateDefaultCompressionResult = ValidateDefaultCompression(columnShardConfig, msg);
if (validateDefaultCompressionResult == EValidationResult::Error) {
return EValidationResult::Error;
}
return EValidationResult::Ok;
}

} // namespace NKikimr::NConfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include <ydb/core/config/validation/validators.h>
#include <ydb/core/protos/config.pb.h>
#include <ydb/core/protos/flat_scheme_op.pb.h>

#include <library/cpp/testing/unittest/registar.h>

#include <vector>

using namespace NKikimr::NConfig;

Y_UNIT_TEST_SUITE(ColumnShardConfigValidation) {
Y_UNIT_TEST(AcceptDefaultCompression) {
NKikimrConfig::TColumnShardConfig CSConfig;
std::vector<TString> error;
EValidationResult result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Ok);
UNIT_ASSERT_C(error.empty(), "Should not be errors");
}

Y_UNIT_TEST(NotAcceptDefaultCompression) {
NKikimrConfig::TColumnShardConfig CSConfig;
std::vector<TString> error;
CSConfig.SetDefaultCompressionLevel(2);
EValidationResult result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Error);
UNIT_ASSERT_VALUES_EQUAL(error.size(), 1);
UNIT_ASSERT_STRINGS_EQUAL(error.front(), "ColumnShardConfig: compression level is set without compression type");
}

Y_UNIT_TEST(CorrectPlainCompression) {
NKikimrConfig::TColumnShardConfig CSConfig;
std::vector<TString> error;
CSConfig.SetDefaultCompression(NKikimrSchemeOp::EColumnCodec::ColumnCodecPlain);
EValidationResult result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Ok);
UNIT_ASSERT_C(error.empty(), "Should not be errors");
}

Y_UNIT_TEST(NotCorrectPlainCompression) {
NKikimrConfig::TColumnShardConfig CSConfig;
std::vector<TString> error;
CSConfig.SetDefaultCompression(NKikimrSchemeOp::EColumnCodec::ColumnCodecPlain);
CSConfig.SetDefaultCompressionLevel(1);
EValidationResult result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Error);
UNIT_ASSERT_VALUES_EQUAL(error.size(), 1);
UNIT_ASSERT_STRINGS_EQUAL(error.front(), "ColumnShardConfig: compression `uncompressed` does not support compression level");
}

Y_UNIT_TEST(CorrectLZ4Compression) {
NKikimrConfig::TColumnShardConfig CSConfig;
std::vector<TString> error;
CSConfig.SetDefaultCompression(NKikimrSchemeOp::EColumnCodec::ColumnCodecLZ4);
EValidationResult result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Ok);
UNIT_ASSERT_C(error.empty(), "Should not be errors");
}

Y_UNIT_TEST(NotCorrectLZ4Compression) {
NKikimrConfig::TColumnShardConfig CSConfig;
std::vector<TString> error;
CSConfig.SetDefaultCompression(NKikimrSchemeOp::EColumnCodec::ColumnCodecLZ4);
CSConfig.SetDefaultCompressionLevel(1);
EValidationResult result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Error);
UNIT_ASSERT_VALUES_EQUAL(error.size(), 1);
UNIT_ASSERT_STRINGS_EQUAL(error.front(), "ColumnShardConfig: compression `lz4` does not support compression level");
}

Y_UNIT_TEST(CorrectZSTDCompression) {
NKikimrConfig::TColumnShardConfig CSConfig;
std::vector<TString> error;
CSConfig.SetDefaultCompression(NKikimrSchemeOp::EColumnCodec::ColumnCodecZSTD);
EValidationResult result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Ok);
UNIT_ASSERT_C(error.empty(), "Should not be errors");
CSConfig.SetDefaultCompressionLevel(0);
result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Ok);
UNIT_ASSERT_C(error.empty(), "Should not be errors");
CSConfig.SetDefaultCompressionLevel(-100);
result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Ok);
UNIT_ASSERT_C(error.empty(), "Should not be errors");
}

Y_UNIT_TEST(NotCorrectZSTDCompression) {
NKikimrConfig::TColumnShardConfig CSConfig;
std::vector<TString> error;
CSConfig.SetDefaultCompression(NKikimrSchemeOp::EColumnCodec::ColumnCodecZSTD);
CSConfig.SetDefaultCompressionLevel(100);
EValidationResult result = ValidateColumnShardConfig(CSConfig, error);
UNIT_ASSERT_EQUAL(result, EValidationResult::Error);
UNIT_ASSERT_VALUES_EQUAL(error.size(), 1);
UNIT_ASSERT_STRINGS_EQUAL(error.front(), "ColumnShardConfig: compression `zstd` does not support compression level = 100");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
UNITTEST_FOR(ydb/core/config/validation)

SRC(
column_shard_config_validator_ut.cpp
)

END()
14 changes: 14 additions & 0 deletions ydb/core/config/validation/validators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,18 @@ EValidationResult ValidateStaticGroup(const NKikimrConfig::TAppConfig& current,
return EValidationResult::Ok;
}

EValidationResult ValidateConfig(const NKikimrConfig::TAppConfig& config, std::vector<TString>& msg) {
if (config.HasColumnShardConfig()) {
NKikimr::NConfig::EValidationResult result = NKikimr::NConfig::ValidateColumnShardConfig(config.GetColumnShardConfig(), msg);
if (result == NKikimr::NConfig::EValidationResult::Error) {
return EValidationResult::Error;
}
}
if (msg.size() > 0) {
return EValidationResult::Warn;
}

return EValidationResult::Ok;
}

} // namespace NKikimr::NConfig
8 changes: 8 additions & 0 deletions ydb/core/config/validation/validators.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,12 @@ EValidationResult ValidateStaticGroup(
const NKikimrConfig::TAppConfig& proposed,
std::vector<TString>& msg);

EValidationResult ValidateColumnShardConfig(
const NKikimrConfig::TColumnShardConfig& columnShardConfig,
std::vector<TString>& msg);

EValidationResult ValidateConfig(
const NKikimrConfig::TAppConfig& config,
std::vector<TString>& msg);

} // namespace NKikimr::NConfig
3 changes: 3 additions & 0 deletions ydb/core/config/validation/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@ LIBRARY()
SRCS(
validators.h
validators.cpp
column_shard_config_validator.cpp
)

PEERDIR(
ydb/core/protos
ydb/core/formats/arrow/serializer
)

END()

RECURSE_FOR_TESTS(
ut
column_shard_config_validator_ut
)

26 changes: 24 additions & 2 deletions ydb/core/formats/arrow/serializer/native.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#pragma once

#include "abstract.h"
#include "parsing.h"

#include <ydb/core/base/appdata_fwd.h>
#include <ydb/core/protos/config.pb.h>
#include <ydb/core/protos/flat_scheme_op.pb.h>

#include <ydb/library/accessor/accessor.h>
Expand All @@ -22,6 +25,11 @@ class TNativeSerializer: public ISerializer {

TConclusion<std::shared_ptr<arrow::util::Codec>> BuildCodec(const arrow::Compression::type& cType, const std::optional<ui32> level) const;
static const inline TFactory::TRegistrator<TNativeSerializer> Registrator = TFactory::TRegistrator<TNativeSerializer>(GetClassNameStatic());

static std::shared_ptr<arrow::util::Codec> GetDefaultCodec() {
return *arrow::util::Codec::Create(arrow::Compression::LZ4_FRAME);
}

protected:
virtual bool IsCompatibleForExchangeWithSameClass(const ISerializer& /*item*/) const override {
return true;
Expand Down Expand Up @@ -53,7 +61,21 @@ class TNativeSerializer: public ISerializer {
static arrow::ipc::IpcOptions BuildDefaultOptions() {
arrow::ipc::IpcWriteOptions options;
options.use_threads = false;
options.codec = *arrow::util::Codec::Create(arrow::Compression::LZ4_FRAME);
if (HasAppData()) {
if (AppData()->ColumnShardConfig.HasDefaultCompression()) {
arrow::Compression::type codec = CompressionFromProto(AppData()->ColumnShardConfig.GetDefaultCompression()).value();
if (AppData()->ColumnShardConfig.HasDefaultCompressionLevel()) {
options.codec = NArrow::TStatusValidator::GetValid(
arrow::util::Codec::Create(codec, AppData()->ColumnShardConfig.GetDefaultCompressionLevel()));
} else {
options.codec = NArrow::TStatusValidator::GetValid(arrow::util::Codec::Create(codec));
}
} else {
options.codec = GetDefaultCodec();
}
} else {
options.codec = GetDefaultCodec();
}
return options;
}

Expand Down Expand Up @@ -83,7 +105,7 @@ class TNativeSerializer: public ISerializer {
}

static arrow::ipc::IpcOptions GetDefaultOptions() {
static arrow::ipc::IpcWriteOptions options = BuildDefaultOptions();
arrow::ipc::IpcWriteOptions options = BuildDefaultOptions();
return options;
}

Expand Down
20 changes: 13 additions & 7 deletions ydb/core/formats/arrow/serializer/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@
#include <contrib/libs/apache/arrow/cpp/src/arrow/util/compression.h>

namespace NKikimr::NArrow {
bool SupportsCompressionLevel(const arrow::Compression::type compression) {
return arrow::util::Codec::SupportsCompressionLevel(compression);
}

bool SupportsCompressionLevel(const NKikimrSchemeOp::EColumnCodec compression) {
return SupportsCompressionLevel(CompressionFromProto(compression).value());
bool SupportsCompressionLevel(const arrow::Compression::type compression, const std::optional<i32>& compressionLevel) {
if (!arrow::util::Codec::SupportsCompressionLevel(compression)) {
return false;
}
if (compressionLevel.has_value()) {
int minLevel = MinimumCompressionLevel(compression).value();
int maxLevel = MaximumCompressionLevel(compression).value();
if (compressionLevel < minLevel || compressionLevel > maxLevel) {
return false;
}
}
return true;
}

std::optional<int> MinimumCompressionLevel(const arrow::Compression::type compression) {
Expand All @@ -26,4 +32,4 @@ std::optional<int> MaximumCompressionLevel(const arrow::Compression::type compre
}
return NArrow::TStatusValidator::GetValid(arrow::util::Codec::MaximumCompressionLevel(compression));
}
}
} // namespace NKikimr::NArrow
7 changes: 2 additions & 5 deletions ydb/core/formats/arrow/serializer/utils.h
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
#pragma once

#include <ydb/core/protos/flat_scheme_op.pb.h>

#include <contrib/libs/apache/arrow/cpp/src/arrow/util/type_fwd.h>
#include <util/system/yassert.h>
#include <util/system/types.h>

#include <optional>

namespace NKikimr::NArrow {
bool SupportsCompressionLevel(const arrow::Compression::type compression);
bool SupportsCompressionLevel(const NKikimrSchemeOp::EColumnCodec compression);
bool SupportsCompressionLevel(const arrow::Compression::type compression, const std::optional<i32>& compressionLevel = {});

std::optional<int> MinimumCompressionLevel(const arrow::Compression::type compression);
std::optional<int> MaximumCompressionLevel(const arrow::Compression::type compression);
Expand Down
13 changes: 10 additions & 3 deletions ydb/core/kqp/host/kqp_gateway_proxy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,12 +410,19 @@ bool FillColumnTableSchema(NKikimrSchemeOp::TColumnTableSchema& schema, const T&
}
familyDescription->SetColumnCodec(codec);
} else {
code = Ydb::StatusIds::BAD_REQUEST;
error = TStringBuilder() << "Compression is not set for column family'" << family.Name << "'";
return false;
if (family.Name != "default") {
code = Ydb::StatusIds::BAD_REQUEST;
error = TStringBuilder() << "Compression is not set for non `default` column family '" << family.Name << "'";
return false;
}
}

if (family.CompressionLevel.Defined()) {
if (!family.Compression.Defined()) {
code = Ydb::StatusIds::BAD_REQUEST;
error = TStringBuilder() << "Compression is not set for column family '" << family.Name << "', but compression level is set";
return false;
}
familyDescription->SetColumnCodecLevel(family.CompressionLevel.GetRef());
}
}
Expand Down
Loading
Loading