Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
93a6d7a
[Feature] Pre/post-filter a residual scalar predicate against vector ANN
sevev Jun 4, 2026
d38345d
[Feature] Use the vector index for residual scalar predicates
sevev Jun 4, 2026
d42fc46
[Doc] Add vector filter strategy (v3) design
sevev Jun 6, 2026
b980afa
[Feature] Add ANN filter-strategy resolver and completeness gate
sevev Jun 6, 2026
33d3fca
[BugFix] Evaluate the whole predicate tree for the vector PRE-filter
sevev Jun 6, 2026
155a7cf
[Feature] Honor the resolved ANN filter strategy (PRE / POST / BRUTE)
sevev Jun 6, 2026
731ed86
[Feature] Add ann_filter_strategy session variable for ANN scalar-filter
sevev Jun 6, 2026
83c7c82
[Doc] Add vector-filter-strategy test coverage + implementation status
sevev Jun 6, 2026
7571c14
[UT] Add e2e SQL cases for the vector filter strategy
sevev Jun 6, 2026
ba27841
[BugFix] Carry runtime-appended vector distance column through Projec…
sevev Jun 6, 2026
526b48b
[BugFix] Fix vector ANN residual-filter correctness (crash, under-ret…
sevev Jun 6, 2026
3409e7a
[Refactor] Detect filter-above-ANN-scan on the BE execution tree
sevev Jun 7, 2026
2c06b3a
[UT] Record R for test_vector_filter_strategy; drop internal design docs
sevev Jun 8, 2026
7caff08
[Enhancement] Adapt filter-strategy code to upstream ANN refine
sevev Jun 8, 2026
d0c0aaf
[Refactor] Apply clang-format fixes
sevev Jun 12, 2026
7bb91e1
[Enhancement] Short-circuit the filtered ANN search on tiny/sparse PR…
sevev Jun 12, 2026
29bc469
[BugFix] Null-guard the provider scan node in lake init_reader_params
sevev Jun 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1990,6 +1990,19 @@ CONF_mDouble(vector_adaptive_ef_alpha, "1.0");
CONF_mDouble(vector_adaptive_ef_cap, "8.0");
CONF_mInt64(vector_adaptive_ef_baseline_rows, "300000");

// Residual scalar predicate (one not exactly resolved by an index) + ANN: when true and the index
// supports efficient filtered search, pre-filter (early-evaluate the predicate into the ANN candidate);
// otherwise post-filter (oversample the ANN and let the read-time predicate path filter the result).
CONF_mBool(enable_vector_index_residual_prefilter, "true");
// Post-filter oversample factor: the ANN searches k * this when post-filtering a residual predicate.
CONF_mInt32(vector_index_residual_post_filter_oversample, "3");
// PRE short-circuit: when the residual pre-filter bitmap holds at most this fraction of the segment's
// rows, skip the filtered ANN search and score the candidates exactly (a sparse bitmap makes the HNSW
// traversal slow and likely to under-return, paying the exact rescan on top of the wasted search).
// Routing only -- both paths are exact, a mis-set value costs speed, never correctness. 0 disables the
// ratio check; the cardinality <= k short-circuit (a logical no-op search) always applies.
CONF_mDouble(vector_index_brute_selectivity_threshold, "0.01");

// Per-builder in-memory row buffer cap before tenann does an intermediate
// add into the faiss in-memory index. Bounds peak memory during HNSWFlat
// build by capping data_buffer_ at |rows| × dim × 4 bytes (does NOT cap
Expand Down
3 changes: 3 additions & 0 deletions be/src/common/config_fwd_headers_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@
"vector_adaptive_ef_alpha",
"vector_adaptive_ef_cap",
"vector_adaptive_ef_baseline_rows",
"enable_vector_index_residual_prefilter",
"vector_index_residual_post_filter_oversample",
"vector_index_brute_selectivity_threshold",
"vector_index_build_flush_threshold_rows"
]
},
Expand Down
15 changes: 15 additions & 0 deletions be/src/common/config_vector_index_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,21 @@ CONF_mDouble(vector_adaptive_ef_cap, "8.0");

CONF_mInt64(vector_adaptive_ef_baseline_rows, "300000");

// Residual scalar predicate (one not exactly resolved by an index) + ANN: when true and the index
// supports efficient filtered search, pre-filter (early-evaluate the predicate into the ANN candidate);
// otherwise post-filter (oversample the ANN and let the read-time predicate path filter the result).
CONF_mBool(enable_vector_index_residual_prefilter, "true");

// Post-filter oversample factor: the ANN searches k * this when post-filtering a residual predicate.
CONF_mInt32(vector_index_residual_post_filter_oversample, "3");

// PRE short-circuit: when the residual pre-filter bitmap holds at most this fraction of the segment's
// rows, skip the filtered ANN search and score the candidates exactly (a sparse bitmap makes the HNSW
// traversal slow and likely to under-return, paying the exact rescan on top of the wasted search).
// Routing only -- both paths are exact, a mis-set value costs speed, never correctness. 0 disables the
// ratio check; the cardinality <= k short-circuit (a logical no-op search) always applies.
CONF_mDouble(vector_index_brute_selectivity_threshold, "0.01");

// Per-builder in-memory row buffer cap before tenann does an intermediate
// add into the faiss in-memory index. Bounds peak memory during HNSWFlat
// build by capping data_buffer_ at |rows| × dim × 4 bytes (does NOT cap
Expand Down
13 changes: 13 additions & 0 deletions be/src/connector/lake_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,9 @@ Status LakeDataSource::init_reader_params(const std::vector<OlapScanRange*>& key
_params.vector_search_option->refine_distance = _refine_distance;
_params.vector_search_option->k_factor = _runtime_state->query_options().k_factor;
_params.vector_search_option->pq_refine_factor = _runtime_state->query_options().pq_refine_factor;
_params.vector_search_option->filter_strategy = _runtime_state->query_options().__isset.ann_filter_strategy
? _runtime_state->query_options().ann_filter_strategy
: 0;
}

ASSIGN_OR_RETURN(auto pred_tree, _conjuncts_manager->get_predicate_tree(parser, _predicate_free_pool));
Expand Down Expand Up @@ -442,6 +445,16 @@ Status LakeDataSource::init_reader_params(const std::vector<OlapScanRange*>& key
RETURN_IF_ERROR(not_pushdown_predicate_rewriter.rewrite_predicate(&_obj_pool, _non_pushdown_pred_tree));
}

// A predicate evaluated above the segment iterator means the iterator cannot fold it into the ANN
// candidate; flag it so the vector filter resolver routes to exact brute-force instead of an unsafe
// segment-level k-limit. Two sources: (1) this scan's own non-pushdown conjuncts; (2) a row-filtering
// operator placed ABOVE this scan in the execution tree (FragmentExecutor's tree walk sets it on the
// ConnectorScanNode). See design doc §7 (lake twin). The provider's scan node is null when the data
// source is built without one (UT path); no scan node means nothing sits above the iterator.
const auto* scan_node = _provider->_scan_node;
_params.has_predicate_above_iterator = !not_pushdown_conjuncts.empty() || !_non_pushdown_pred_tree.empty() ||
(scan_node != nullptr && scan_node->is_filtered_above_iterator());

// Range
for (const auto& key_range : key_ranges) {
if (key_range->begin_scan_range.size() == 1 && key_range->begin_scan_range.get_value(0) == NEGATIVE_INFINITY) {
Expand Down
31 changes: 31 additions & 0 deletions be/src/exec/pipeline/fragment_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,33 @@ int FragmentExecutor::_calc_query_expired_seconds(const UnifiedExecPlanFragmentP
return QueryRuntimeState::DEFAULT_EXPIRE_SECONDS;
}

// Mark every scan whose output is reduced by a row-filtering operator (a SELECT carrying a residual
// predicate that could not be pushed into the scan) sitting ABOVE it but below the TopN limit. An ANN
// top-k scan consumes this so its filter resolver uses the exact brute-force path instead of a
// segment-level k-limit that would under-return. This observes the real execution tree (not a planner
// heuristic), so it stays correct regardless of how single-column predicates are placed. A TopN resets
// the marker: a filter above the limit is applied post-limit and cannot break completeness.
static void mark_filtered_above_scans(ExecNode* node, bool saw_filter) {
switch (node->type()) {
case TPlanNodeType::SORT_NODE:
saw_filter = false;
break;
case TPlanNodeType::SELECT_NODE:
saw_filter = true;
break;
default:
break;
}
if (saw_filter) {
if (auto* scan = dynamic_cast<ScanNode*>(node); scan != nullptr) {
scan->set_filtered_above_iterator(true);
}
}
for (auto* child : node->children()) {
mark_filtered_above_scans(child, saw_filter);
}
}

static void collect_non_broadcast_rf_ids(const ExecNode* node, std::unordered_set<int32_t>& filter_ids) {
for (const auto* child : node->children()) {
collect_non_broadcast_rf_ids(child, filter_ids);
Expand Down Expand Up @@ -486,6 +513,10 @@ Status FragmentExecutor::_prepare_exec_plan(ExecEnv* exec_env, const UnifiedExec
plan->push_down_tuple_slot_mappings(runtime_state, empty_mappings);
runtime_state->set_fragment_root_id(plan->id());

// Flag scans whose output is filtered above (below the TopN) so an ANN top-k scan falls back to
// exact brute-force instead of an unsafe segment-level k-limit. See mark_filtered_above_scans.
mark_filtered_above_scans(plan, false);

// Set senders of exchange nodes before pipeline build
std::vector<ExecNode*> exch_nodes;
plan->collect_nodes(TPlanNodeType::EXCHANGE_NODE, &exch_nodes);
Expand Down
11 changes: 11 additions & 0 deletions be/src/exec/pipeline/scan/olap_chunk_source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ Status OlapChunkSource::_init_reader_params(const std::vector<std::unique_ptr<Ol
_params.vector_search_option->refine_distance = _refine_distance;
_params.vector_search_option->k_factor = _runtime_state->query_options().k_factor;
_params.vector_search_option->pq_refine_factor = _runtime_state->query_options().pq_refine_factor;
_params.vector_search_option->filter_strategy = _runtime_state->query_options().__isset.ann_filter_strategy
? _runtime_state->query_options().ann_filter_strategy
: 0;
}
if (thrift_olap_scan_node.__isset.sorted_by_keys_per_tablet) {
_params.sorted_by_keys_per_tablet = thrift_olap_scan_node.sorted_by_keys_per_tablet;
Expand Down Expand Up @@ -351,6 +354,14 @@ Status OlapChunkSource::_init_reader_params(const std::vector<std::unique_ptr<Ol
RETURN_IF_ERROR(not_pushdown_predicate_rewriter.rewrite_predicate(&_obj_pool, _non_pushdown_pred_tree));
}

// A predicate evaluated above the segment iterator means the iterator cannot fold it into the ANN
// candidate; flag it so the vector filter resolver routes to exact brute-force instead of an unsafe
// segment-level k-limit. Two sources: (1) this scan's own non-pushdown conjuncts; (2) a row-filtering
// operator placed ABOVE this scan in the execution tree (e.g. a SELECT for a residual the optimizer
// could not push down, such as cat+tag>50) -- detected by FragmentExecutor's tree walk. See design §7.
_params.has_predicate_above_iterator = !not_pushdown_conjuncts.empty() || !_non_pushdown_pred_tree.empty() ||
_scan_node->is_filtered_above_iterator();

// Range
for (const auto& key_range : key_ranges) {
if (key_range->begin_scan_range.size() == 1 && key_range->begin_scan_range.get_value(0) == NEGATIVE_INFINITY) {
Expand Down
8 changes: 8 additions & 0 deletions be/src/exec/scan_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,13 @@ class ScanNode : public ExecNode {

std::vector<ExprContext*>& get_heavy_expr_ctxs() { return _heavy_expr_ctxs; }

// Set once at fragment setup (FragmentExecutor tree walk): true when a row-reducing operator
// (e.g. a SELECT for a residual predicate that could not be pushed into this scan) sits ABOVE
// this scan but below the TopN limit. An ANN top-k scan reads this so the vector filter resolver
// routes to the exact brute-force path -- a segment-level k-limit would otherwise under-return.
void set_filtered_above_iterator(bool v) { _filtered_above_iterator = v; }
bool is_filtered_above_iterator() const { return _filtered_above_iterator; }

protected:
RuntimeProfile::Counter* _bytes_read_counter = nullptr; // # bytes read from the scanner
// # rows/tuples read from the scanner (including those discarded by eval_conjucts())
Expand All @@ -172,6 +179,7 @@ class ScanNode : public ExecNode {
RuntimeProfile::Counter* _num_scanner_threads_started_counter = nullptr;
std::string _name;
bool _enable_shared_scan = false;
bool _filtered_above_iterator = false;
int64_t _mem_limit = 0;
int32_t _io_tasks_per_scan_operator = 0;

Expand Down
1 change: 1 addition & 0 deletions be/src/storage/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ set(STORAGE_FILES
index/inverted/clucene/clucene_inverted_reader.cpp
index/inverted/clucene/match_operator.cpp
index/vector/empty_index_reader.cpp
index/vector/vector_filter_strategy.cpp
index/vector/vector_index_cache.cpp
index/vector/vector_index_builder_factory.cpp
index/vector/vector_index_writer.cpp
Expand Down
3 changes: 3 additions & 0 deletions be/src/storage/index/vector/tenann_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class TenANNReader final : public VectorIndexReader {
std::vector<float>* result_distances, tenann::IdFilter* id_filter, float range,
int order) override;

// tenann HNSW/IVF accept an IdFilter (faiss IDSelector), so filtered search is efficient.
bool supports_efficient_filtered_search() const override { return true; }

private:
std::shared_ptr<tenann::AnnSearcher> _searcher;
// Pins the cache entry for the reader's lifetime.
Expand Down
62 changes: 62 additions & 0 deletions be/src/storage/index/vector/vector_filter_strategy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "storage/index/vector/vector_filter_strategy.h"

namespace starrocks {

AnnFilterStrategy resolve_ann_filter_strategy(const AnnFilterResolveInputs& in) {
// --- Explicit user overrides. They may relax the selectivity gate (handled later, in PRE
// execution), but must never bypass the soundness/completeness invariant (design doc §2). ---
switch (in.user_choice) {
case AnnFilterStrategy::POST:
// User opts into the approximate path (may return < k). Only reachable when explicit.
return AnnFilterStrategy::POST;
case AnnFilterStrategy::BRUTE:
return AnnFilterStrategy::BRUTE;
case AnnFilterStrategy::PRE:
// Explicit PRE still falls back to exact BRUTE when PRE cannot be proven sound+complete.
return (!in.has_above_predicate && in.exact_possible && in.supports_filtered) ? AnnFilterStrategy::PRE
: AnnFilterStrategy::BRUTE;
case AnnFilterStrategy::AUTO:
break;
}

// --- AUTO ---
if (!in.has_residual) {
// No residual: this is a plain ANN top-k, exact + complete by construction.
return AnnFilterStrategy::PRE;
}
if (in.has_above_predicate) {
// A predicate is evaluated above the iterator -> a segment-level k-limit would under-return.
return AnnFilterStrategy::BRUTE; // completeness
}
if (!in.supports_filtered) {
// ANN reader cannot consume a candidate bitmap.
return AnnFilterStrategy::BRUTE;
}
if (!in.prefilter_enabled) {
// Kill-switch off: do not scan-prefilter an un-indexed residual.
return AnnFilterStrategy::BRUTE;
}
if (!in.exact_possible) {
// Residual cannot be evaluated into an exact bitmap in this segment.
return AnnFilterStrategy::BRUTE; // exactness
}
// Exact + complete + reader-capable. The selectivity gate (PRE -> BRUTE for very selective
// filters) is applied during PRE execution, not here (it needs the residual bitmap).
return AnnFilterStrategy::PRE;
}

} // namespace starrocks
60 changes: 60 additions & 0 deletions be/src/storage/index/vector/vector_filter_strategy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace starrocks {

// Vector-search-with-residual-filter execution strategy.
//
// AUTO is an *input* choice only (the user/session default). The resolver
// (resolve_ann_filter_strategy) always returns one of PRE/POST/BRUTE.
//
// Wire-compatible with TVectorSearchOptions.filter_strategy:
// 0 = AUTO, 1 = PRE, 2 = POST, 3 = BRUTE.
enum class AnnFilterStrategy { AUTO = 0, PRE = 1, POST = 2, BRUTE = 3 };

// Bitmap-free inputs to the strategy decision (design doc §4).
//
// The selectivity gate is intentionally NOT an input here: it needs the residual
// bitmap, so it is applied later, during PRE execution (which may downgrade to BRUTE).
struct AnnFilterResolveInputs {
// User/session choice from TVectorSearchOptions.filter_strategy. AUTO follows the config.
AnnFilterStrategy user_choice = AnnFilterStrategy::AUTO;
// config::enable_vector_index_residual_prefilter. When false, AUTO never scan-prefilters
// an un-indexed residual (kill-switch).
bool prefilter_enabled = true;
// !_opts.pred_tree.empty() -- there is a residual predicate to satisfy alongside the ANN.
bool has_residual = false;
// A predicate is evaluated *above* the segment iterator (OlapChunkSource not_push_down /
// _non_pushdown_pred_tree). The iterator cannot fold it into the ANN candidate, so a
// segment-level k-limit would under-return -> must BRUTE (completeness).
bool has_above_predicate = false;
// Every column referenced by _opts.pred_tree has a readable iterator in this segment, so the
// whole tree can be evaluated into an exact bitmap (exactness precondition for PRE).
bool exact_possible = true;
// ann_reader->supports_efficient_filtered_search() -- the reader can consume an IdFilter.
bool supports_filtered = true;
};

// Pure decision function (design doc §4). Runs per-segment at BE runtime, after FE has
// unconditionally rewritten to an ANN plan. Returns PRE/POST/BRUTE, never AUTO.
//
// Soundness/completeness invariant (design doc §2): PRE (segment-level k-limit) is only allowed
// when the residual can be folded into an exact + complete bitmap the ANN reader can consume;
// otherwise the query falls back to exact BRUTE. The selectivity downgrade (PRE -> BRUTE for very
// selective filters) happens later, in PRE execution, and is not modeled here.
AnnFilterStrategy resolve_ann_filter_strategy(const AnnFilterResolveInputs& in);

} // namespace starrocks
5 changes: 5 additions & 0 deletions be/src/storage/index/vector/vector_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ class VectorIndexReader {
VectorIndexReader() = default;
virtual ~VectorIndexReader() = default;

// Whether the reader can restrict the ANN search to a candidate id set efficiently (filtered
// search), which enables true pre-filtering. Readers that cannot should be queried via
// post-filtering (oversample + filter the result). Conservative default: false.
virtual bool supports_efficient_filtered_search() const { return false; }

#ifdef WITH_TENANN
virtual Status init_searcher(const tenann::IndexMeta& meta, const std::string& index_path,
FileSystem* fs = nullptr) = 0;
Expand Down
Loading
Loading