Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions db/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3748,6 +3748,11 @@ void rocksdb_block_based_options_set_format_version(
options->rep.format_version = v;
}

void rocksdb_block_based_options_set_separate_key_value_in_data_block(
rocksdb_block_based_table_options_t* options, unsigned char v) {
options->rep.separate_key_value_in_data_block = v;
}

void rocksdb_block_based_options_set_index_type(
rocksdb_block_based_table_options_t* options, int v) {
options->rep.index_type = static_cast<BlockBasedTableOptions::IndexType>(v);
Expand Down
4 changes: 2 additions & 2 deletions db/version_edit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ bool VersionEdit::EncodeTo(std::string* dst,
PutVarint32Varint64(dst, kNextFileNumber, next_file_number_);
}
if (has_max_column_family_) {
PutVarint32Varint32(dst, kMaxColumnFamily, max_column_family_);
PutVarint32(dst, kMaxColumnFamily, max_column_family_);
}
if (has_min_log_number_to_keep_) {
PutVarint32Varint64(dst, kMinLogNumberToKeep, min_log_number_to_keep_);
Expand Down Expand Up @@ -143,7 +143,7 @@ bool VersionEdit::EncodeTo(std::string* dst,

// 0 is default and does not need to be explicitly written
if (column_family_ != 0) {
PutVarint32Varint32(dst, kColumnFamily, column_family_);
PutVarint32(dst, kColumnFamily, column_family_);
}

if (is_column_family_add_) {
Expand Down
1 change: 1 addition & 0 deletions db_stress_tool/db_stress_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ DECLARE_int32(level0_slowdown_writes_trigger);
DECLARE_int32(level0_stop_writes_trigger);
DECLARE_int32(block_size);
DECLARE_int32(format_version);
DECLARE_bool(separate_key_value_in_data_block);
DECLARE_int32(index_block_restart_interval);
DECLARE_int32(max_background_compactions);
DECLARE_int32(num_bottom_pri_threads);
Expand Down
5 changes: 5 additions & 0 deletions db_stress_tool/db_stress_gflags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,11 @@ DEFINE_int32(format_version,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().format_version),
"Format version of SST files.");

DEFINE_bool(separate_key_value_in_data_block,
ROCKSDB_NAMESPACE::BlockBasedTableOptions()
.separate_key_value_in_data_block,
"If true, data blocks store keys and values separately.");

DEFINE_int32(
index_block_restart_interval,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().index_block_restart_interval,
Expand Down
2 changes: 2 additions & 0 deletions db_stress_tool/db_stress_test_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4313,6 +4313,8 @@ void InitializeOptionsFromFlags(
: CacheEntryRoleOptions::Decision::kDisabled}});
block_based_options.format_version =
static_cast<uint32_t>(FLAGS_format_version);
block_based_options.separate_key_value_in_data_block =
FLAGS_separate_key_value_in_data_block;
block_based_options.index_block_restart_interval =
static_cast<int32_t>(FLAGS_index_block_restart_interval);
block_based_options.filter_policy = filter_policy;
Expand Down
3 changes: 3 additions & 0 deletions include/rocksdb/c.h
Original file line number Diff line number Diff line change
Expand Up @@ -1195,6 +1195,9 @@ rocksdb_block_based_options_set_whole_key_filtering(
rocksdb_block_based_table_options_t*, unsigned char);
extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_format_version(
rocksdb_block_based_table_options_t*, int);
extern ROCKSDB_LIBRARY_API void
rocksdb_block_based_options_set_separate_key_value_in_data_block(
rocksdb_block_based_table_options_t*, unsigned char);
enum {
rocksdb_block_based_table_index_type_binary_search = 0,
rocksdb_block_based_table_index_type_hash_search = 1,
Expand Down
9 changes: 9 additions & 0 deletions include/rocksdb/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,15 @@ struct BlockBasedTableOptions {
// who might only upgrade a few times per year.
uint32_t format_version = 7;

// When true, data blocks store keys and values separately. Keys are stored
// at the beginning of the block, followed by values at the end. This can
// improve read performance at a cost of a varint per restart interval (~1 bit
// per key by default), in addition to improving compression. Small values or
// low block_restart_interval may prefer to set this as false.
//
// Default: false
bool separate_key_value_in_data_block = false;

// Store index blocks on disk in compressed format. Changing this option to
// false will avoid the overhead of decompression if index blocks are evicted
// and read back
Expand Down
12 changes: 12 additions & 0 deletions include/rocksdb/table_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ struct TablePropertiesNames {
static const std::string kUserDefinedTimestampsPersisted;
static const std::string kKeyLargestSeqno;
static const std::string kKeySmallestSeqno;
static const std::string kDataBlockRestartInterval;
static const std::string kIndexBlockRestartInterval;
static const std::string kSeparatedKVInDataBlock;
};

// `TablePropertiesCollector` provides the mechanism for users to collect
Expand Down Expand Up @@ -320,6 +323,15 @@ struct TableProperties {

bool HasKeySmallestSeqno() const { return key_smallest_seqno != UINT64_MAX; }

// Block restart intervals used when building this SST file.
// 0 means unknown (for backwards compatibility with older SST files).
uint64_t data_block_restart_interval = 0;
uint64_t index_block_restart_interval = 0;

// Whether the SST file uses separated key/value storage in data blocks (0 =
// false).
uint64_t separated_kv_in_data_block = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer not to have slightly different names for essentially the same thing
separated_kv_in_data_block
separate_key_value_in_data_block

This property might not age well if we start to mix separation strategies based on factors in the data, such as avoiding separation for small values.


// DB identity
// db_id is an identifier generated the first time the DB is created
// If DB identity is unset or unassigned, `db_id` will be an empty string.
Expand Down
5 changes: 3 additions & 2 deletions java/rocksjni/portal.h
Original file line number Diff line number Diff line change
Expand Up @@ -9237,8 +9237,8 @@ class BlockBasedTableOptionsJni
return nullptr;
}

jmethodID method_id_init =
env->GetMethodID(jclazz, "<init>", "(ZZZZBBDBZJIIIJZZZZZIIZZJJBBBJD)V");
jmethodID method_id_init = env->GetMethodID(
jclazz, "<init>", "(ZZZZBBDBZJIIIJZZZZZIIZZZJJBBBJD)V");
if (method_id_init == nullptr) {
// exception thrown: NoSuchMethodException or OutOfMemoryError
return nullptr;
Expand Down Expand Up @@ -9281,6 +9281,7 @@ class BlockBasedTableOptionsJni
table_factory_options->verify_compression,
table_factory_options->read_amp_bytes_per_bit,
table_factory_options->format_version,
table_factory_options->separate_key_value_in_data_block,
table_factory_options->enable_index_compression,
table_factory_options->block_align,
static_cast<jlong>(table_factory_options->super_block_alignment_size),
Expand Down
3 changes: 3 additions & 0 deletions java/rocksjni/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle(
jboolean juse_delta_encoding, jlong jfilter_policy_handle,
jboolean jwhole_key_filtering, jboolean jverify_compression,
jint jread_amp_bytes_per_bit, jint jformat_version,
jboolean jseparate_key_value_in_data_block,
jboolean jenable_index_compression, jboolean jblock_align,
jlong jsuper_block_alignment_size,
jlong jsuper_block_alignment_space_overhead_ratio, jbyte jindex_shortening,
Expand Down Expand Up @@ -135,6 +136,8 @@ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle(
options.read_amp_bytes_per_bit =
static_cast<uint32_t>(jread_amp_bytes_per_bit);
options.format_version = static_cast<uint32_t>(jformat_version);
options.separate_key_value_in_data_block =
static_cast<bool>(jseparate_key_value_in_data_block);
options.enable_index_compression =
static_cast<bool>(jenable_index_compression);
options.block_align = static_cast<bool>(jblock_align);
Expand Down
52 changes: 44 additions & 8 deletions java/src/main/java/org/rocksdb/BlockBasedTableConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public BlockBasedTableConfig() {
verifyCompression = false;
readAmpBytesPerBit = 0;
formatVersion = 7;
separateKeyValueInDataBlock = false;
enableIndexCompression = true;
blockAlign = false;
superBlockAlignmentSize = 0;
Expand All @@ -63,10 +64,11 @@ private BlockBasedTableConfig(final boolean cacheIndexAndFilterBlocks,
final boolean partitionFilters, final boolean optimizeFiltersForMemory,
final boolean useDeltaEncoding, final boolean wholeKeyFiltering,
final boolean verifyCompression, final int readAmpBytesPerBit, final int formatVersion,
final boolean enableIndexCompression, final boolean blockAlign,
final long superBlockAlignmentSize, final long superBlockAlignmentSpaceOverheadRatio,
final byte indexShortening, final byte indexSearchType, final byte filterPolicyType,
final long filterPolicyHandle, final double filterPolicyConfigValue) {
final boolean separateKeyValueInDataBlock, final boolean enableIndexCompression,
final boolean blockAlign, final long superBlockAlignmentSize,
final long superBlockAlignmentSpaceOverheadRatio, final byte indexShortening,
final byte indexSearchType, final byte filterPolicyType, final long filterPolicyHandle,
final double filterPolicyConfigValue) {
this.cacheIndexAndFilterBlocks = cacheIndexAndFilterBlocks;
this.cacheIndexAndFilterBlocksWithHighPriority = cacheIndexAndFilterBlocksWithHighPriority;
this.pinL0FilterAndIndexBlocksInCache = pinL0FilterAndIndexBlocksInCache;
Expand All @@ -88,6 +90,7 @@ private BlockBasedTableConfig(final boolean cacheIndexAndFilterBlocks,
this.verifyCompression = verifyCompression;
this.readAmpBytesPerBit = readAmpBytesPerBit;
this.formatVersion = formatVersion;
this.separateKeyValueInDataBlock = separateKeyValueInDataBlock;
this.enableIndexCompression = enableIndexCompression;
this.blockAlign = blockAlign;
this.superBlockAlignmentSize = superBlockAlignmentSize;
Expand Down Expand Up @@ -753,6 +756,36 @@ public BlockBasedTableConfig setFormatVersion(
return this;
}

/**
* Determine if separate key value storage in data blocks is enabled.
* <p>
* See {@link #setSeparateKeyValueInDataBlock(boolean)}.
*
* @return true if separate key value in data block is enabled, false otherwise
*/
public boolean separateKeyValueInDataBlock() {
return separateKeyValueInDataBlock;
}

/**
* When true, data blocks store keys and values separately. Keys are stored
* at the beginning of the block, followed by values at the end. This can
* improve read performance at a cost of a varint per restart interval (~1 bit
* per key by default), in addition to improving compression. Small values or
* low block_restart_interval may prefer to set this as false.
* <p>
* Default: false
*
* @param separateKeyValueInDataBlock true to enable, false to disable
*
* @return the reference to the current option.
*/
public BlockBasedTableConfig setSeparateKeyValueInDataBlock(
final boolean separateKeyValueInDataBlock) {
this.separateKeyValueInDataBlock = separateKeyValueInDataBlock;
return this;
}

/**
* Determine if index compression is enabled.
* <p>
Expand Down Expand Up @@ -1016,9 +1049,10 @@ public BlockBasedTableConfig setHashIndexAllowCollision(
persistentCacheHandle, blockSize, blockSizeDeviation, blockRestartInterval,
indexBlockRestartInterval, metadataBlockSize, partitionFilters, optimizeFiltersForMemory,
useDeltaEncoding, filterPolicyHandle, wholeKeyFiltering, verifyCompression,
readAmpBytesPerBit, formatVersion, enableIndexCompression, blockAlign,
superBlockAlignmentSize, superBlockAlignmentSpaceOverheadRatio, indexShortening.getValue(),
indexSearchType.getValue(), blockCacheSize, blockCacheNumShardBits);
readAmpBytesPerBit, formatVersion, separateKeyValueInDataBlock, enableIndexCompression,
blockAlign, superBlockAlignmentSize, superBlockAlignmentSpaceOverheadRatio,
indexShortening.getValue(), indexSearchType.getValue(), blockCacheSize,
blockCacheNumShardBits);
}

private static native long newTableFactoryHandle(final boolean cacheIndexAndFilterBlocks,
Expand All @@ -1032,7 +1066,8 @@ private static native long newTableFactoryHandle(final boolean cacheIndexAndFilt
final boolean partitionFilters, final boolean optimizeFiltersForMemory,
final boolean useDeltaEncoding, final long filterPolicyHandle,
final boolean wholeKeyFiltering, final boolean verifyCompression,
final int readAmpBytesPerBit, final int formatVersion, final boolean enableIndexCompression,
final int readAmpBytesPerBit, final int formatVersion,
final boolean separateKeyValueInDataBlock, final boolean enableIndexCompression,
final boolean blockAlign, final long superBlockAlignmentSize,
final long superBlockAlignmentSpaceOverheadRatio, final byte indexShortening,
final byte indexSearchType,
Expand Down Expand Up @@ -1064,6 +1099,7 @@ private static native long newTableFactoryHandle(final boolean cacheIndexAndFilt
private boolean verifyCompression;
private int readAmpBytesPerBit;
private int formatVersion;
private boolean separateKeyValueInDataBlock;
private boolean enableIndexCompression;
private boolean blockAlign;
private long superBlockAlignmentSize;
Expand Down
8 changes: 8 additions & 0 deletions java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,14 @@ public void invalidFormatVersion() throws RocksDBException {
}
}

@Test
public void separateKeyValueInDataBlock() {
final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig();
assertThat(blockBasedTableConfig.separateKeyValueInDataBlock()).isFalse();
blockBasedTableConfig.setSeparateKeyValueInDataBlock(true);
assertThat(blockBasedTableConfig.separateKeyValueInDataBlock()).isTrue();
}

@Test
public void enableIndexCompression() {
final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig();
Expand Down
3 changes: 3 additions & 0 deletions java/src/test/java/org/rocksdb/OptionsUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ private void verifyTableFormatOptions(final LoaderUnderTest loaderUnderTest)
altCFTableConfig.setVerifyCompression(true);
altCFTableConfig.setReadAmpBytesPerBit(2);
altCFTableConfig.setFormatVersion(8);
altCFTableConfig.setSeparateKeyValueInDataBlock(true);
altCFTableConfig.setEnableIndexCompression(false);
altCFTableConfig.setBlockAlign(true);
altCFTableConfig.setSuperBlockAlignmentSize(1024 * 1024);
Expand Down Expand Up @@ -365,6 +366,8 @@ private void verifyBlockBasedTableConfig(
assertThat(actual.verifyCompression()).isEqualTo(expected.verifyCompression());
assertThat(actual.readAmpBytesPerBit()).isEqualTo(expected.readAmpBytesPerBit());
assertThat(actual.formatVersion()).isEqualTo(expected.formatVersion());
assertThat(actual.separateKeyValueInDataBlock())
.isEqualTo(expected.separateKeyValueInDataBlock());
assertThat(actual.enableIndexCompression()).isEqualTo(expected.enableIndexCompression());
assertThat(actual.blockAlign()).isEqualTo(expected.blockAlign());
assertThat(actual.superBlockAlignmentSize()).isEqualTo(expected.superBlockAlignmentSize());
Expand Down
6 changes: 4 additions & 2 deletions options/options_settable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
"prepopulate_block_cache=kDisable;"
"initial_auto_readahead_size=0;"
"num_file_reads_for_auto_readahead=0;"
"fail_if_no_udi_on_open=true",
"fail_if_no_udi_on_open=true;"
"separate_key_value_in_data_block=true",
new_bbto));

ASSERT_EQ(unset_bytes_base,
Expand Down Expand Up @@ -293,7 +294,8 @@ TEST_F(OptionsSettableTest, TablePropertiesAllFieldsSettable) {
"name=64656661756C74;user_defined_timestamps_persisted=1;num_entries=100;"
"external_sst_file_global_seqno_offset=0;num_merge_operands=0;index_key_"
"is_user_key=0;key_largest_seqno=18446744073709551615;key_smallest_seqno="
"18;",
"18;data_block_restart_interval=16;index_block_restart_interval=1;"
"separated_kv_in_data_block=0;",
new_tp));

// All bytes are set from the parse
Expand Down
Loading
Loading