Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions be/src/exec/operator/olap_scan_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,33 +238,37 @@ Status OlapScanLocalState::_init_profile() {

_statistics_collect_timer = ADD_TIMER(_scanner_profile, "StatisticsCollectTime");
_inverted_index_filter_counter =
ADD_COUNTER(_segment_profile, "RowsInvertedIndexFiltered", TUnit::UNIT);
_inverted_index_filter_timer = ADD_TIMER(_segment_profile, "InvertedIndexFilterTime");
ADD_COUNTER_WITH_LEVEL(_segment_profile, "RowsInvertedIndexFiltered", TUnit::UNIT, 1);
_inverted_index_filter_timer =
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexFilterTime", 1);
_inverted_index_query_cache_hit_counter =
ADD_COUNTER(_segment_profile, "InvertedIndexQueryCacheHit", TUnit::UNIT);
ADD_COUNTER_WITH_LEVEL(_segment_profile, "InvertedIndexQueryCacheHit", TUnit::UNIT, 1);
_inverted_index_query_cache_miss_counter =
ADD_COUNTER(_segment_profile, "InvertedIndexQueryCacheMiss", TUnit::UNIT);
_inverted_index_query_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryTime");
ADD_COUNTER_WITH_LEVEL(_segment_profile, "InvertedIndexQueryCacheMiss", TUnit::UNIT, 1);
_inverted_index_query_timer =
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexQueryTime", 1);
_inverted_index_query_null_bitmap_timer =
ADD_TIMER(_segment_profile, "InvertedIndexQueryNullBitmapTime");
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexQueryNullBitmapTime", 1);
_inverted_index_query_bitmap_copy_timer =
ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapCopyTime");
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexQueryBitmapCopyTime", 1);
_inverted_index_searcher_open_timer =
ADD_TIMER(_segment_profile, "InvertedIndexSearcherOpenTime");
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexSearcherOpenTime", 1);
_inverted_index_searcher_search_timer =
ADD_TIMER(_segment_profile, "InvertedIndexSearcherSearchTime");
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexSearcherSearchTime", 1);
_inverted_index_searcher_search_init_timer =
ADD_TIMER(_segment_profile, "InvertedIndexSearcherSearchInitTime");
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexSearcherSearchInitTime", 1);
_inverted_index_searcher_search_exec_timer =
ADD_TIMER(_segment_profile, "InvertedIndexSearcherSearchExecTime");
_inverted_index_searcher_cache_hit_counter =
ADD_COUNTER(_segment_profile, "InvertedIndexSearcherCacheHit", TUnit::UNIT);
_inverted_index_searcher_cache_miss_counter =
ADD_COUNTER(_segment_profile, "InvertedIndexSearcherCacheMiss", TUnit::UNIT);
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexSearcherSearchExecTime", 1);
_inverted_index_searcher_cache_hit_counter = ADD_COUNTER_WITH_LEVEL(
_segment_profile, "InvertedIndexSearcherCacheHit", TUnit::UNIT, 1);
_inverted_index_searcher_cache_miss_counter = ADD_COUNTER_WITH_LEVEL(
_segment_profile, "InvertedIndexSearcherCacheMiss", TUnit::UNIT, 1);
_inverted_index_downgrade_count_counter =
ADD_COUNTER(_segment_profile, "InvertedIndexDowngradeCount", TUnit::UNIT);
_inverted_index_analyzer_timer = ADD_TIMER(_segment_profile, "InvertedIndexAnalyzerTime");
_inverted_index_lookup_timer = ADD_TIMER(_segment_profile, "InvertedIndexLookupTimer");
ADD_COUNTER_WITH_LEVEL(_segment_profile, "InvertedIndexDowngradeCount", TUnit::UNIT, 1);
_inverted_index_analyzer_timer =
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexAnalyzerTime", 1);
_inverted_index_lookup_timer =
ADD_TIMER_WITH_LEVEL(_segment_profile, "InvertedIndexLookupTimer", 1);

_output_index_result_column_timer = ADD_TIMER(_segment_profile, "OutputIndexResultColumnTime");
_filtered_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentFiltered", TUnit::UNIT);
Expand Down
59 changes: 45 additions & 14 deletions be/src/exprs/function/function_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "core/data_type/data_type_string.h"
#include "exprs/function/simple_function_factory.h"
#include "exprs/vexpr_context.h"
#include "runtime/runtime_profile.h"
#include "storage/index/index_file_reader.h"
#include "storage/index/index_query_context.h"
#include "storage/index/inverted/analyzer/analyzer.h"
Expand Down Expand Up @@ -191,6 +192,9 @@ Status FieldReaderResolver::resolve(const std::string& field_name,
auto cache_it = _cache.find(binding_key);
if (cache_it != _cache.end()) {
*binding = cache_it->second;
if (_context->stats) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_cache here is the resolver's per-query binding cache, not InvertedIndexSearcherCache. Incrementing inverted_index_searcher_cache_hit on this fast path changes the metric's meaning and double-counts hits when the same field is referenced multiple times in one SEARCH() evaluation. On a cold segment, the first clause will record the real miss at the later cache-open path, and a second clause on the same field will record a synthetic hit here even though the global searcher cache was never consulted.

_context->stats->inverted_index_searcher_cache_hit++;
}
return Status::OK();
}

Expand Down Expand Up @@ -275,6 +279,9 @@ Status FieldReaderResolver::resolve(const std::string& field_name,

std::shared_ptr<lucene::index::IndexReader> reader_holder;
if (cache_hit) {
if (_context->stats) {
_context->stats->inverted_index_searcher_cache_hit++;
}
auto searcher_variant = searcher_cache_handle.get_index_searcher();
auto* searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
if (searcher_ptr != nullptr && *searcher_ptr != nullptr) {
Expand All @@ -285,7 +292,13 @@ Status FieldReaderResolver::resolve(const std::string& field_name,
}

if (!reader_holder) {
if (_context->stats) {
_context->stats->inverted_index_searcher_cache_miss++;
}
// Cache miss: open directory, build IndexSearcher, insert into cache
int64_t dummy_timer = 0;
SCOPED_RAW_TIMER(_context->stats ? &_context->stats->inverted_index_searcher_open_timer
: &dummy_timer);
RETURN_IF_ERROR(
index_file_reader->init(config::inverted_index_read_buffer_size, _context->io_ctx));
auto directory = DORIS_TRY(
Expand Down Expand Up @@ -434,6 +447,11 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param(
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This timer still starts after the SEARCH DSL-cache lookup/return path above. When dsl_cache->lookup() returns a bitmap, the function exits before reaching this scope, so InvertedIndexQueryTime remains 0 on query-cache hits. The same fast path also never updates inverted_index_query_cache_hit/miss, so cached SEARCH queries are still invisible in the profile even after this patch.

}

// Track overall query time (equivalent to inverted_index_query_timer in MATCH path)
int64_t query_timer_dummy = 0;
OlapReaderStatistics* outer_stats = index_query_context ? index_query_context->stats : nullptr;
SCOPED_RAW_TIMER(outer_stats ? &outer_stats->inverted_index_query_timer : &query_timer_dummy);

std::shared_ptr<IndexQueryContext> context;
if (index_query_context) {
context = index_query_context;
Expand Down Expand Up @@ -542,11 +560,19 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param(
minimum_should_match = search_param.minimum_should_match;
}

auto* stats = context->stats;
int64_t dummy_timer = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_timer : &dummy_timer);

query_v2::QueryPtr root_query;
std::string root_binding_key;
RETURN_IF_ERROR(build_query_recursive(search_param.root, context, resolver, &root_query,
&root_binding_key, default_operator,
minimum_should_match));
{
int64_t init_dummy = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_init_timer : &init_dummy);
RETURN_IF_ERROR(build_query_recursive(search_param.root, context, resolver, &root_query,
&root_binding_key, default_operator,
minimum_should_match));
}
if (root_query == nullptr) {
LOG(INFO) << "search: Query tree resolved to empty query, dsl:"
<< search_param.original_dsl;
Expand Down Expand Up @@ -577,17 +603,22 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param(
}

std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
if (enable_scoring && !is_asc && top_k > 0) {
bool use_wand = index_query_context->runtime_state != nullptr &&
index_query_context->runtime_state->query_options()
.enable_inverted_index_wand_query;
query_v2::collect_multi_segment_top_k(weight, exec_ctx, root_binding_key, top_k, roaring,
index_query_context->collection_similarity, use_wand);
} else {
query_v2::collect_multi_segment_doc_set(
weight, exec_ctx, root_binding_key, roaring,
index_query_context ? index_query_context->collection_similarity : nullptr,
enable_scoring);
{
int64_t exec_dummy = 0;
SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_exec_timer : &exec_dummy);
if (enable_scoring && !is_asc && top_k > 0) {
bool use_wand = index_query_context->runtime_state != nullptr &&
index_query_context->runtime_state->query_options()
.enable_inverted_index_wand_query;
query_v2::collect_multi_segment_top_k(
weight, exec_ctx, root_binding_key, top_k, roaring,
index_query_context->collection_similarity, use_wand);
} else {
query_v2::collect_multi_segment_doc_set(
weight, exec_ctx, root_binding_key, roaring,
index_query_context ? index_query_context->collection_similarity : nullptr,
enable_scoring);
}
}

VLOG_DEBUG << "search: Query completed, matched " << roaring->cardinality() << " documents";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ CSIndexInput::CSIndexInput(const CSIndexInput& clone) : BufferedIndexInput(clone
this->file_name = clone.file_name;
this->fileOffset = clone.fileOffset;
this->_length = clone._length;
this->_io_ctx = clone._io_ctx;
}

void CSIndexInput::close() {}
Expand Down Expand Up @@ -355,7 +356,12 @@ bool DorisCompoundReader::openInput(const char* name, lucene::store::IndexInput*
bufferSize = _read_buffer_size;
}

ret = _CLNEW CSIndexInput(_stream, entry->file_name, entry->offset, entry->length, bufferSize);
auto* cs_input = _CLNEW CSIndexInput(_stream, entry->file_name, entry->offset, entry->length,
bufferSize);
if (_io_ctx) {
cs_input->setIoContext(_io_ctx);
}
ret = cs_input;
return true;
}

Expand Down Expand Up @@ -407,9 +413,10 @@ CL_NS(store)::IndexInput* DorisCompoundReader::getDorisIndexInput() {
}

void DorisCompoundReader::initialize(const io::IOContext* io_ctx) {
_io_ctx = io_ctx;
_stream->setIoContext(io_ctx);
_stream->setIdxFileCache(true);
}

} // namespace doris::segment_v2
#include "common/compile_check_end.h"
#include "common/compile_check_end.h"
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class CLUCENE_EXPORT DorisCompoundReader : public lucene::store::Directory {

private:
void initialize(const io::IOContext* io_ctx);
const io::IOContext* _io_ctx = nullptr;
};

} // namespace doris::segment_v2
Loading