diff --git a/build.sh b/build.sh index 456f25f..1d20c05 100755 --- a/build.sh +++ b/build.sh @@ -37,7 +37,7 @@ cd $BUILD_PATH && wget https://github.com/facebook/zstd/archive/v${zstd_version} # Note: if you don't have a good reason, please do not set -DPORTABLE=ON # This one is set here on purpose of compatibility with github action runtime processor -rocksdb_version="9.2.1" +rocksdb_version="9.3.1" cd $BUILD_PATH && wget https://github.com/facebook/rocksdb/archive/v${rocksdb_version}.tar.gz && tar xzf v${rocksdb_version}.tar.gz && cd rocksdb-${rocksdb_version}/ && \ mkdir -p build_place && cd build_place && cmake -DCMAKE_BUILD_TYPE=Release $CMAKE_REQUIRED_PARAMS -DCMAKE_PREFIX_PATH=$INSTALL_PREFIX -DWITH_TESTS=OFF -DWITH_GFLAGS=OFF \ -DWITH_BENCHMARK_TOOLS=OFF -DWITH_TOOLS=OFF -DWITH_MD_LIBRARY=OFF -DWITH_RUNTIME_DEBUG=OFF -DROCKSDB_BUILD_SHARED=OFF -DWITH_SNAPPY=ON -DWITH_LZ4=ON -DWITH_ZLIB=ON -DWITH_LIBURING=OFF \ diff --git a/c.h b/c.h index 9de8770..6e20676 100644 --- a/c.h +++ b/c.h @@ -1064,6 +1064,21 @@ rocksdb_block_based_options_set_pin_top_level_index_and_filter( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_block_based_table_factory( rocksdb_options_t* opt, rocksdb_block_based_table_options_t* table_options); +enum { + rocksdb_block_based_k_fallback_pinning_tier = 0, + rocksdb_block_based_k_none_pinning_tier = 1, + rocksdb_block_based_k_flush_and_similar_pinning_tier = 2, + rocksdb_block_based_k_all_pinning_tier = 3, +}; +extern ROCKSDB_LIBRARY_API void +rocksdb_block_based_options_set_top_level_index_pinning_tier( + rocksdb_block_based_table_options_t*, int); +extern ROCKSDB_LIBRARY_API void +rocksdb_block_based_options_set_partition_pinning_tier( + rocksdb_block_based_table_options_t*, int); +extern ROCKSDB_LIBRARY_API void +rocksdb_block_based_options_set_unpartitioned_pinning_tier( + rocksdb_block_based_table_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_write_buffer_manager( rocksdb_options_t* opt, rocksdb_write_buffer_manager_t* wbm); @@ -1682,6 +1697,18 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_wal_compression( extern ROCKSDB_LIBRARY_API int rocksdb_options_get_wal_compression( rocksdb_options_t* opt); +enum { + rocksdb_k_by_compensated_size_compaction_pri = 0, + rocksdb_k_oldest_largest_seq_first_compaction_pri = 1, + rocksdb_k_oldest_smallest_seq_first_compaction_pri = 2, + rocksdb_k_min_overlapping_ratio_compaction_pri = 3, + rocksdb_k_round_robin_compaction_pri = 4 +}; +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compaction_pri( + rocksdb_options_t*, int); +extern ROCKSDB_LIBRARY_API int rocksdb_options_get_compaction_pri( + rocksdb_options_t*); + /* RateLimiter */ extern ROCKSDB_LIBRARY_API rocksdb_ratelimiter_t* rocksdb_ratelimiter_create( int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness); diff --git a/options.go b/options.go index 3b373c7..b38247f 100644 --- a/options.go +++ b/options.go @@ -1677,6 +1677,30 @@ func (opts *Options) GetWALCompression() CompressionType { return CompressionType(C.rocksdb_options_get_wal_compression(opts.c)) } +// CompactionPri is in Level-based compaction, it Determines which file from a level to be +// picked to merge to the next level. We suggest people try KMinOverlappingRatio first when you tune your database. +type CompactionPri int + +const ( + KByCompensatedSizeCompactionPri CompactionPri = 0 + KOldestLargestSeqFirstCompactionPri = 1 + KOldestSmallestSeqFirstCompactionPri = 2 + KMinOverlappingRatioCompactionPri = 3 + KRoundRobinCompactionPri = 4 +) + +// SetCompactionPri sets in level-based compaction. +// +// Default: KMinOverlappingRatioCompactionPri +func (opts *Options) SetCompactionPri(pri CompactionPri) { + C.rocksdb_options_set_compaction_pri(opts.c, C.int(pri)) +} + +// GetCompactionPri gets in level-based compaction. +func (opts *Options) GetCompactionPri() CompactionPri { + return CompactionPri(C.rocksdb_options_get_compaction_pri(opts.c)) +} + // SetMaxSequentialSkipInIterations specifies whether an iteration->Next() // sequentially skips over keys with the same user-key or not. // diff --git a/options_block_based_table.go b/options_block_based_table.go index db13298..0b9d0dc 100644 --- a/options_block_based_table.go +++ b/options_block_based_table.go @@ -49,6 +49,31 @@ const ( KDataBlockIndexTypeBinarySearchAndHash DataBlockIndexType = 1 ) +// BlockBasedPinningTier is used to specify which tier of block-based tables should +// be affected by a block cache pinning setting. +type BlockBasedPinningTier int + +const ( + // For compatibility, this value specifies to fallback to the behavior + // indicated by the deprecated options, + // `pin_l0_filter_and_index_blocks_in_cache` and + // `pin_top_level_index_and_filter`. + KFallbackPinningTier BlockBasedPinningTier = 0 + + // This tier contains no block-based tables. + KNonePinningTier = 1 + + // This tier contains block-based tables that may have originated from a + // memtable flush. In particular, it includes tables from L0 that are smaller + // than 1.5 times the current `write_buffer_size`. Note these criteria imply + // it can include intra-L0 compaction outputs and ingested files, as long as + // they are not abnormally large compared to flushed files in L0. + KFlushAndSimilarPinningTier = 2 + + // This tier contains all block-based tables. + KAllPinningTier = 3 +) + // BlockBasedTableOptions represents block-based table options. type BlockBasedTableOptions struct { c *C.rocksdb_block_based_table_options_t @@ -118,6 +143,33 @@ func (opts *BlockBasedTableOptions) SetBlockSize(blockSize int) { C.rocksdb_block_based_options_set_block_size(opts.c, C.size_t(blockSize)) } +// SetTopLevelIndexPinningTier sets the tier of block-based tables whose top-level index into metadata +// partitions will be pinned. Currently indexes and filters may be +// partitioned. +// +// Note `cache_index_and_filter_blocks` must be true for this option to have +// any effect. Otherwise any top-level index into metadata partitions would be +// held in table reader memory, outside the block cache. +func (opts *BlockBasedTableOptions) SetTopLevelIndexPinningTier(tier BlockBasedPinningTier) { + C.rocksdb_block_based_options_set_top_level_index_pinning_tier(opts.c, C.int(tier)) +} + +// SetPartitionPinningTier sets the tier of block-based tables whose metadata partitions will be pinned. +// Currently indexes and filters may be partitioned. +func (opts *BlockBasedTableOptions) SetPartitionPinningTier(tier BlockBasedPinningTier) { + C.rocksdb_block_based_options_set_partition_pinning_tier(opts.c, C.int(tier)) +} + +// SetUnpartitionedPinningTier sets the tier of block-based tables whose unpartitioned metadata blocks will be +// pinned. +// +// Note `cache_index_and_filter_blocks` must be true for this option to have +// any effect. Otherwise the unpartitioned meta-blocks would be held in table +// reader memory, outside the block cache. +func (opts *BlockBasedTableOptions) SetUnpartitionedPinningTier(tier BlockBasedPinningTier) { + C.rocksdb_block_based_options_set_unpartitioned_pinning_tier(opts.c, C.int(tier)) +} + // SetBlockSizeDeviation sets the block size deviation. // This is used opts close a block before it reaches the configured // 'block_size'. If the percentage of free space in the current block is less diff --git a/options_block_based_table_test.go b/options_block_based_table_test.go index f50caba..ad0323d 100644 --- a/options_block_based_table_test.go +++ b/options_block_based_table_test.go @@ -12,4 +12,7 @@ func TestBBT(t *testing.T) { b.SetBlockSize(123) b.SetOptimizeFiltersForMemory(true) + b.SetTopLevelIndexPinningTier(KFallbackPinningTier) + b.SetPartitionPinningTier(KNonePinningTier) + b.SetUnpartitionedPinningTier(KAllPinningTier) } diff --git a/options_test.go b/options_test.go index 3aaa0c9..786d1c0 100644 --- a/options_test.go +++ b/options_test.go @@ -387,6 +387,10 @@ func TestOptions(t *testing.T) { opts.SetStatisticsLevel(StatisticsLevelExceptHistogramOrTimers) require.Equal(t, StatisticsLevelExceptHistogramOrTimers, opts.GetStatisticsLevel()) + require.EqualValues(t, KMinOverlappingRatioCompactionPri, opts.GetCompactionPri()) + opts.SetCompactionPri(KRoundRobinCompactionPri) + require.EqualValues(t, KRoundRobinCompactionPri, opts.GetCompactionPri()) + require.EqualValues(t, 0, opts.GetTickerCount(TickerType_BACKUP_WRITE_BYTES)) hData := opts.GetHistogramData(HistogramType_BLOB_DB_MULTIGET_MICROS) require.EqualValues(t, 0, hData.P99)