apache
diff --git a/‎integration-tests/src/test/resources/queries/twitterstream_queries.json‎
Lines changed: 8 additions & 4 deletions b/‎integration-tests/src/test/resources/queries/twitterstream_queries.json‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎processing/src/main/java/org/apache/druid/query/CursorGranularizer.java‎
Lines changed: 8 additions & 2 deletions b/‎processing/src/main/java/org/apache/druid/query/CursorGranularizer.java‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎processing/src/main/java/org/apache/druid/query/QueryContexts.java‎
Lines changed: 6 additions & 0 deletions b/‎processing/src/main/java/org/apache/druid/query/QueryContexts.java‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java‎
Lines changed: 1 addition & 1 deletion b/‎processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎processing/src/main/java/org/apache/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java‎
Lines changed: 1 addition & 0 deletions b/‎processing/src/main/java/org/apache/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎processing/src/main/java/org/apache/druid/query/topn/BaseTopNAlgorithm.java‎
Lines changed: 5 additions & 2 deletions b/‎processing/src/main/java/org/apache/druid/query/topn/BaseTopNAlgorithm.java‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎processing/src/main/java/org/apache/druid/query/topn/Generic1AggPooledTopNScannerPrototype.java‎
Lines changed: 20 additions & 18 deletions b/‎processing/src/main/java/org/apache/druid/query/topn/Generic1AggPooledTopNScannerPrototype.java‎
Lines changed: 20 additions & 18 deletions
diff --git a/‎processing/src/main/java/org/apache/druid/query/topn/Generic2AggPooledTopNScannerPrototype.java‎
Lines changed: 24 additions & 22 deletions b/‎processing/src/main/java/org/apache/druid/query/topn/Generic2AggPooledTopNScannerPrototype.java‎
Lines changed: 24 additions & 22 deletions
@@ -94,7 +94,8 @@
             "context": {
                 "useCache": "true",
                 "populateCache": "true",
-                "timeout": 60000
+                "timeout": 60000,
+                "useTopNMultiPassPooledQueryGranularity": "true"
             }
         },
         "expectedResults": [
@@ -198,7 +199,8 @@
             "context": {
                 "useCache": "true",
                 "populateCache": "true",
-                "timeout": 60000
+                "timeout": 60000,
+                "useTopNMultiPassPooledQueryGranularity": "true"
             }
         },
         "expectedResults": [
@@ -322,7 +324,8 @@
             "context": {
                 "useCache": "true",
                 "populateCache": "true",
-                "timeout": 60000
+                "timeout": 60000,
+                "useTopNMultiPassPooledQueryGranularity": "true"
             }
         },
         "expectedResults": [
@@ -741,7 +744,8 @@
             "context": {
                 "useCache": "true",
                 "populateCache": "true",
-                "timeout": 60000
+                "timeout": 60000,
+                "useTopNMultiPassPooledQueryGranularity": "true"
             }
         },
         "expectedResults": [
 
@@ -24,6 +24,7 @@
 import com.google.common.collect.Lists;
 import org.apache.druid.error.DruidException;
 import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.granularity.Granularities;
 import org.apache.druid.java.util.common.granularity.Granularity;
 import org.apache.druid.segment.ColumnValueSelector;
@@ -133,13 +134,18 @@ public DateTime getBucketStart()
     return DateTimes.utc(currentBucketStart);
   }
 
+  public Interval getCurrentInterval()
+  {
+    return Intervals.utc(currentBucketStart, currentBucketEnd);
+  }
+
   public boolean advanceToBucket(final Interval bucketInterval)
   {
+    currentBucketStart = bucketInterval.getStartMillis();
+    currentBucketEnd = bucketInterval.getEndMillis();
     if (cursor.isDone()) {
       return false;
     }
-    currentBucketStart = bucketInterval.getStartMillis();
-    currentBucketEnd = bucketInterval.getEndMillis();
     if (timeSelector == null) {
       return true;
     }
 
@@ -88,6 +88,12 @@ public class QueryContexts
   public static final String UNCOVERED_INTERVALS_LIMIT_KEY = "uncoveredIntervalsLimit";
   public static final String MIN_TOP_N_THRESHOLD = "minTopNThreshold";
   public static final String CATALOG_VALIDATION_ENABLED = "catalogValidationEnabled";
+  // this flag controls whether the topN engine can use the 'pooled' algorithm when query granularity is set to
+  // anything other than 'ALL' and the cardinality + number of aggregators would require more size than is available
+  // in the buffers and so must reset the cursor to use multiple passes. This is likely slower than the default
+  // behavior of falling back to heap memory, but less dangerous since too large of a query can cause the heap to run
+  // out of memory
+  public static final String TOPN_USE_MULTI_PASS_POOLED_QUERY_GRANULARITY = "useTopNMultiPassPooledQueryGranularity";
 
   // projection context keys
   public static final String NO_PROJECTIONS = "noProjections";
 
@@ -391,7 +391,7 @@ public boolean hasNext()
       if (delegate != null && delegate.hasNext()) {
         return true;
       } else {
-        if (!cursor.isDone() && granularizer.currentOffsetWithinBucket()) {
+        if (granularizer.currentOffsetWithinBucket()) {
           if (delegate != null) {
             delegate.close();
           }
 
@@ -113,6 +113,7 @@ public void run(
     try {
       // reset cursor since we call run again
       params.getCursor().reset();
+      params.getGranularizer().advanceToBucket(params.getGranularizer().getCurrentInterval());
       // Run topN for all metrics for top N dimension values
       allMetricsParam = allMetricAlgo.makeInitParams(params.getSelectorPlus(), params.getCursor(), params.getGranularizer());
       allMetricAlgo.run(
 
@@ -97,12 +97,14 @@ private void runWithCardinalityKnown(
     }
     boolean hasDimValSelector = (dimValSelector != null);
 
-    int cardinality = params.getCardinality();
+    final int cardinality = params.getCardinality();
+    final int numValuesPerPass = params.getNumValuesPerPass();
     int numProcessed = 0;
     long processedRows = 0;
     while (numProcessed < cardinality) {
       final int numToProcess;
-      int maxNumToProcess = Math.min(params.getNumValuesPerPass(), cardinality - numProcessed);
+      int maxNumToProcess = Math.min(numValuesPerPass, cardinality - numProcessed);
+
 
       DimValSelector theDimValSelector;
       if (!hasDimValSelector) {
@@ -125,6 +127,7 @@ private void runWithCardinalityKnown(
       numProcessed += numToProcess;
       if (numProcessed < cardinality) {
         params.getCursor().reset();
+        params.getGranularizer().advanceToBucket(params.getGranularizer().getCurrentInterval());
       }
     }
     if (queryMetrics != null) {
 
@@ -54,25 +54,27 @@ public long scanAndAggregate(
   {
     long processedRows = 0;
     int positionToAllocate = 0;
-    while (!cursor.isDoneOrInterrupted()) {
-      final IndexedInts dimValues = dimensionSelector.getRow();
-      final int dimSize = dimValues.size();
-      for (int i = 0; i < dimSize; i++) {
-        int dimIndex = dimValues.get(i);
-        int position = positions[dimIndex];
-        if (position >= 0) {
-          aggregator.aggregate(resultsBuffer, position);
-        } else if (position == TopNAlgorithm.INIT_POSITION_VALUE) {
-          positions[dimIndex] = positionToAllocate;
-          position = positionToAllocate;
-          aggregator.init(resultsBuffer, position);
-          aggregator.aggregate(resultsBuffer, position);
-          positionToAllocate += aggregatorSize;
+    if (granularizer.currentOffsetWithinBucket()) {
+      while (!cursor.isDoneOrInterrupted()) {
+        final IndexedInts dimValues = dimensionSelector.getRow();
+        final int dimSize = dimValues.size();
+        for (int i = 0; i < dimSize; i++) {
+          int dimIndex = dimValues.get(i);
+          int position = positions[dimIndex];
+          if (position >= 0) {
+            aggregator.aggregate(resultsBuffer, position);
+          } else if (position == TopNAlgorithm.INIT_POSITION_VALUE) {
+            positions[dimIndex] = positionToAllocate;
+            position = positionToAllocate;
+            aggregator.init(resultsBuffer, position);
+            aggregator.aggregate(resultsBuffer, position);
+            positionToAllocate += aggregatorSize;
+          }
+        }
+        processedRows++;
+        if (!granularizer.advanceCursorWithinBucketUninterruptedly()) {
+          break;
         }
-      }
-      processedRows++;
-      if (!granularizer.advanceCursorWithinBucketUninterruptedly()) {
-        break;
       }
     }
     return processedRows;
 
@@ -57,29 +57,31 @@ public long scanAndAggregate(
     int totalAggregatorsSize = aggregator1Size + aggregator2Size;
     long processedRows = 0;
     int positionToAllocate = 0;
-    while (!cursor.isDoneOrInterrupted()) {
-      final IndexedInts dimValues = dimensionSelector.getRow();
-      final int dimSize = dimValues.size();
-      for (int i = 0; i < dimSize; i++) {
-        int dimIndex = dimValues.get(i);
-        int position = positions[dimIndex];
-        if (position >= 0) {
-          aggregator1.aggregate(resultsBuffer, position);
-          aggregator2.aggregate(resultsBuffer, position + aggregator1Size);
-        } else if (position == TopNAlgorithm.INIT_POSITION_VALUE) {
-          positions[dimIndex] = positionToAllocate;
-          position = positionToAllocate;
-          aggregator1.init(resultsBuffer, position);
-          aggregator1.aggregate(resultsBuffer, position);
-          position += aggregator1Size;
-          aggregator2.init(resultsBuffer, position);
-          aggregator2.aggregate(resultsBuffer, position);
-          positionToAllocate += totalAggregatorsSize;
+    if (granularizer.currentOffsetWithinBucket()) {
+      while (!cursor.isDoneOrInterrupted()) {
+        final IndexedInts dimValues = dimensionSelector.getRow();
+        final int dimSize = dimValues.size();
+        for (int i = 0; i < dimSize; i++) {
+          int dimIndex = dimValues.get(i);
+          int position = positions[dimIndex];
+          if (position >= 0) {
+            aggregator1.aggregate(resultsBuffer, position);
+            aggregator2.aggregate(resultsBuffer, position + aggregator1Size);
+          } else if (position == TopNAlgorithm.INIT_POSITION_VALUE) {
+            positions[dimIndex] = positionToAllocate;
+            position = positionToAllocate;
+            aggregator1.init(resultsBuffer, position);
+            aggregator1.aggregate(resultsBuffer, position);
+            position += aggregator1Size;
+            aggregator2.init(resultsBuffer, position);
+            aggregator2.aggregate(resultsBuffer, position);
+            positionToAllocate += totalAggregatorsSize;
+          }
+        }
+        processedRows++;
+        if (!granularizer.advanceCursorWithinBucketUninterruptedly()) {
+          break;
         }
-      }
-      processedRows++;
-      if (!granularizer.advanceCursorWithinBucketUninterruptedly()) {
-        break;
       }
     }
     return processedRows;
Original file line number	Diff line number	Diff line change
`@@ -391,7 +391,7 @@ public boolean hasNext()`
`391`	`391`	`if (delegate != null && delegate.hasNext()) {`
`392`	`392`	`return true;`
`393`	`393`	`} else {`
`394`		`- if (!cursor.isDone() && granularizer.currentOffsetWithinBucket()) {`
	`394`	`+ if (granularizer.currentOffsetWithinBucket()) {`
`395`	`395`	`if (delegate != null) {`
`396`	`396`	`delegate.close();`
`397`	`397`	`}`