-
Notifications
You must be signed in to change notification settings - Fork 3.8k
[fix](profile) populate inverted index metrics for SEARCH() query path #62121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,7 @@ | |
| #include "core/data_type/data_type_string.h" | ||
| #include "exprs/function/simple_function_factory.h" | ||
| #include "exprs/vexpr_context.h" | ||
| #include "runtime/runtime_profile.h" | ||
| #include "storage/index/index_file_reader.h" | ||
| #include "storage/index/index_query_context.h" | ||
| #include "storage/index/inverted/analyzer/analyzer.h" | ||
|
|
@@ -191,6 +192,9 @@ Status FieldReaderResolver::resolve(const std::string& field_name, | |
| auto cache_it = _cache.find(binding_key); | ||
| if (cache_it != _cache.end()) { | ||
| *binding = cache_it->second; | ||
| if (_context->stats) { | ||
| _context->stats->inverted_index_searcher_cache_hit++; | ||
| } | ||
| return Status::OK(); | ||
| } | ||
|
|
||
|
|
@@ -275,6 +279,9 @@ Status FieldReaderResolver::resolve(const std::string& field_name, | |
|
|
||
| std::shared_ptr<lucene::index::IndexReader> reader_holder; | ||
| if (cache_hit) { | ||
| if (_context->stats) { | ||
| _context->stats->inverted_index_searcher_cache_hit++; | ||
| } | ||
| auto searcher_variant = searcher_cache_handle.get_index_searcher(); | ||
| auto* searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant); | ||
| if (searcher_ptr != nullptr && *searcher_ptr != nullptr) { | ||
|
|
@@ -285,7 +292,13 @@ Status FieldReaderResolver::resolve(const std::string& field_name, | |
| } | ||
|
|
||
| if (!reader_holder) { | ||
| if (_context->stats) { | ||
| _context->stats->inverted_index_searcher_cache_miss++; | ||
| } | ||
| // Cache miss: open directory, build IndexSearcher, insert into cache | ||
| int64_t dummy_timer = 0; | ||
| SCOPED_RAW_TIMER(_context->stats ? &_context->stats->inverted_index_searcher_open_timer | ||
| : &dummy_timer); | ||
| RETURN_IF_ERROR( | ||
| index_file_reader->init(config::inverted_index_read_buffer_size, _context->io_ctx)); | ||
| auto directory = DORIS_TRY( | ||
|
|
@@ -434,6 +447,11 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param( | |
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This timer still starts after the SEARCH DSL-cache lookup/return path above. When |
||
| } | ||
|
|
||
| // Track overall query time (equivalent to inverted_index_query_timer in MATCH path) | ||
| int64_t query_timer_dummy = 0; | ||
| OlapReaderStatistics* outer_stats = index_query_context ? index_query_context->stats : nullptr; | ||
| SCOPED_RAW_TIMER(outer_stats ? &outer_stats->inverted_index_query_timer : &query_timer_dummy); | ||
|
|
||
| std::shared_ptr<IndexQueryContext> context; | ||
| if (index_query_context) { | ||
| context = index_query_context; | ||
|
|
@@ -542,11 +560,19 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param( | |
| minimum_should_match = search_param.minimum_should_match; | ||
| } | ||
|
|
||
| auto* stats = context->stats; | ||
| int64_t dummy_timer = 0; | ||
| SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_timer : &dummy_timer); | ||
|
|
||
| query_v2::QueryPtr root_query; | ||
| std::string root_binding_key; | ||
| RETURN_IF_ERROR(build_query_recursive(search_param.root, context, resolver, &root_query, | ||
| &root_binding_key, default_operator, | ||
| minimum_should_match)); | ||
| { | ||
| int64_t init_dummy = 0; | ||
| SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_init_timer : &init_dummy); | ||
| RETURN_IF_ERROR(build_query_recursive(search_param.root, context, resolver, &root_query, | ||
| &root_binding_key, default_operator, | ||
| minimum_should_match)); | ||
| } | ||
| if (root_query == nullptr) { | ||
| LOG(INFO) << "search: Query tree resolved to empty query, dsl:" | ||
| << search_param.original_dsl; | ||
|
|
@@ -577,17 +603,22 @@ Status FunctionSearch::evaluate_inverted_index_with_search_param( | |
| } | ||
|
|
||
| std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); | ||
| if (enable_scoring && !is_asc && top_k > 0) { | ||
| bool use_wand = index_query_context->runtime_state != nullptr && | ||
| index_query_context->runtime_state->query_options() | ||
| .enable_inverted_index_wand_query; | ||
| query_v2::collect_multi_segment_top_k(weight, exec_ctx, root_binding_key, top_k, roaring, | ||
| index_query_context->collection_similarity, use_wand); | ||
| } else { | ||
| query_v2::collect_multi_segment_doc_set( | ||
| weight, exec_ctx, root_binding_key, roaring, | ||
| index_query_context ? index_query_context->collection_similarity : nullptr, | ||
| enable_scoring); | ||
| { | ||
| int64_t exec_dummy = 0; | ||
| SCOPED_RAW_TIMER(stats ? &stats->inverted_index_searcher_search_exec_timer : &exec_dummy); | ||
| if (enable_scoring && !is_asc && top_k > 0) { | ||
| bool use_wand = index_query_context->runtime_state != nullptr && | ||
| index_query_context->runtime_state->query_options() | ||
| .enable_inverted_index_wand_query; | ||
| query_v2::collect_multi_segment_top_k( | ||
| weight, exec_ctx, root_binding_key, top_k, roaring, | ||
| index_query_context->collection_similarity, use_wand); | ||
| } else { | ||
| query_v2::collect_multi_segment_doc_set( | ||
| weight, exec_ctx, root_binding_key, roaring, | ||
| index_query_context ? index_query_context->collection_similarity : nullptr, | ||
| enable_scoring); | ||
| } | ||
| } | ||
|
|
||
| VLOG_DEBUG << "search: Query completed, matched " << roaring->cardinality() << " documents"; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_cachehere is the resolver's per-query binding cache, notInvertedIndexSearcherCache. Incrementinginverted_index_searcher_cache_hiton this fast path changes the metric's meaning and double-counts hits when the same field is referenced multiple times in one SEARCH() evaluation. On a cold segment, the first clause will record the real miss at the later cache-open path, and a second clause on the same field will record a synthetic hit here even though the global searcher cache was never consulted.