Skip to content

Commit bf9333e

Browse files
authored
[core] Stabilize hybrid RRF tie-breaking (#8288)
Hybrid RRF ranked rows within each route only by score. When multiple rows had the same route score, their RRF rank depended on the input bitmap iteration order and Java sort stability, while the final `topK` already used `score desc, rowId asc`. This PR makes the route-level RRF ranking use the same deterministic tie-break: `score desc, rowId asc`. It also adds regression coverage for tied route scores and verifies the resulting RRF contributions.
1 parent 926459d commit bf9333e

2 files changed

Lines changed: 39 additions & 1 deletion

File tree

paimon-common/src/main/java/org/apache/paimon/globalindex/HybridSearchRanker.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,14 @@ private static List<Long> rankedRowIds(ScoredGlobalIndexResult result) {
122122
}
123123
final ScoreGetter scoreGetter = result.scoreGetter();
124124
rowIds.sort(
125-
(left, right) -> Float.compare(scoreGetter.score(right), scoreGetter.score(left)));
125+
(left, right) -> {
126+
int scoreCompare =
127+
Float.compare(scoreGetter.score(right), scoreGetter.score(left));
128+
if (scoreCompare != 0) {
129+
return scoreCompare;
130+
}
131+
return Long.compare(left, right);
132+
});
126133
return rowIds;
127134
}
128135

paimon-common/src/test/java/org/apache/paimon/globalindex/HybridSearchRankerTest.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.Arrays;
2626
import java.util.Collections;
2727
import java.util.HashMap;
28+
import java.util.Iterator;
2829
import java.util.Map;
2930

3031
import static org.assertj.core.api.Assertions.assertThat;
@@ -46,6 +47,20 @@ public void testRrfFavorsRowsReturnedByMultipleRoutes() {
4647
assertThat(ranked.scoreGetter().score(2L)).isGreaterThan(ranked.scoreGetter().score(1L));
4748
}
4849

50+
@Test
51+
public void testRrfBreaksRouteScoreTiesByRowId() {
52+
ScoredGlobalIndexResult result =
53+
result(new long[] {3, 1, 2}, new float[] {1.0f, 1.0f, 1.0f}, new long[] {3, 1, 2});
54+
55+
ScoredGlobalIndexResult ranked =
56+
HybridSearchRanker.rrf(Collections.singletonList(result), new float[] {1.0f}, 2);
57+
58+
assertThat(ranked.results()).contains(1L, 2L);
59+
assertThat(ranked.results()).doesNotContain(3L);
60+
assertThat(ranked.scoreGetter().score(1L)).isCloseTo(1.0f / 61.0f, within(0.000001f));
61+
assertThat(ranked.scoreGetter().score(2L)).isCloseTo(1.0f / 62.0f, within(0.000001f));
62+
}
63+
4964
@Test
5065
public void testWeightedScoreUsesAlignedWeightsAfterEmptyRouteIsSkipped() {
5166
ScoredGlobalIndexResult result = result(new long[] {1, 2}, new float[] {0.3f, 0.2f});
@@ -61,6 +76,22 @@ public void testWeightedScoreUsesAlignedWeightsAfterEmptyRouteIsSkipped() {
6176

6277
private ScoredGlobalIndexResult result(long[] rowIds, float[] scores) {
6378
RoaringNavigableMap64 bitmap = new RoaringNavigableMap64();
79+
return result(rowIds, scores, bitmap);
80+
}
81+
82+
private ScoredGlobalIndexResult result(long[] rowIds, float[] scores, long[] iterationOrder) {
83+
RoaringNavigableMap64 bitmap =
84+
new RoaringNavigableMap64() {
85+
@Override
86+
public Iterator<Long> iterator() {
87+
return Arrays.stream(iterationOrder).boxed().iterator();
88+
}
89+
};
90+
return result(rowIds, scores, bitmap);
91+
}
92+
93+
private ScoredGlobalIndexResult result(
94+
long[] rowIds, float[] scores, RoaringNavigableMap64 bitmap) {
6495
Map<Long, Float> scoreMap = new HashMap<>();
6596
for (int i = 0; i < rowIds.length; i++) {
6697
bitmap.add(rowIds[i]);

0 commit comments

Comments
 (0)