-
Notifications
You must be signed in to change notification settings - Fork 79
Add maxSamples limit #663
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add maxSamples limit #663
Changes from 6 commits
4c169c9
8098487
1fc6e89
734630c
1a7cf64
975cce8
8945224
418afb2
639ec0f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| // Copyright (c) The Thanos Community Authors. | ||
| // Licensed under the Apache License 2.0. | ||
|
|
||
| package query | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "sync/atomic" | ||
| ) | ||
|
|
||
| type SampleTracker struct { | ||
| current atomic.Int64 | ||
| limit int64 | ||
| } | ||
|
|
||
| func NewSampleTracker(maxSamples int) *SampleTracker { | ||
| return &SampleTracker{ | ||
| limit: int64(maxSamples), | ||
| } | ||
| } | ||
|
|
||
| func (st *SampleTracker) Add(count int) { | ||
| st.current.Add(int64(count)) | ||
| } | ||
|
|
||
| func (st *SampleTracker) Remove(count int) { | ||
| st.current.Add(-int64(count)) | ||
| } | ||
|
|
||
| func (st *SampleTracker) CheckLimit() error { | ||
| if st.limit <= 0 { | ||
| return nil | ||
| } | ||
| current := st.current.Load() | ||
| if current > st.limit { | ||
| return ErrMaxSamplesExceeded{Current: current, Limit: st.limit} | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| type ErrMaxSamplesExceeded struct { | ||
| Current int64 | ||
| Limit int64 | ||
| } | ||
|
|
||
| func (e ErrMaxSamplesExceeded) Error() string { | ||
| return fmt.Sprintf("query processing would load too many samples into memory: current=%d, limit=%d", e.Current, e.Limit) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,10 +74,14 @@ type matrixSelector struct { | |
|
|
||
| nonCounterMetric string | ||
| hasFloats bool | ||
|
|
||
| lastTrackedSamples int | ||
| } | ||
|
|
||
| var ErrNativeHistogramsNotSupported = errors.New("native histograms are not supported in extended range functions") | ||
|
|
||
| const maxSamplesCheckIntervalSeries = 1000 | ||
|
|
||
| // NewMatrixSelector creates operator which selects vector of series over time. | ||
| func NewMatrixSelector( | ||
| selector SeriesSelector, | ||
|
|
@@ -217,7 +221,40 @@ func (o *matrixSelector) Next(ctx context.Context, buf []model.StepVector) (int, | |
| o.telemetry.IncrementSamplesAtTimestamp(scanner.buffer.SampleCount(), seriesTs) | ||
| seriesTs += o.step | ||
| } | ||
|
|
||
| if o.opts.SampleTracker != nil && (o.currentSeries+1-firstSeries)%maxSamplesCheckIntervalSeries == 0 { | ||
| totalSamplesInBatch := 0 | ||
| for i := range o.scanners { | ||
| totalSamplesInBatch += o.scanners[i].buffer.SampleCount() | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that we are counting sample count from all series not just the current series? It seems that we can just maintain a running sum for processed series instead of counting all processed series again
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes that would make more sense. |
||
| if totalSamplesInBatch > o.lastTrackedSamples { | ||
| o.opts.SampleTracker.Add(totalSamplesInBatch - o.lastTrackedSamples) | ||
| } else if totalSamplesInBatch < o.lastTrackedSamples { | ||
| o.opts.SampleTracker.Remove(o.lastTrackedSamples - totalSamplesInBatch) | ||
| } | ||
| o.lastTrackedSamples = totalSamplesInBatch | ||
| if err := o.opts.SampleTracker.CheckLimit(); err != nil { | ||
| return 0, err | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if o.opts.SampleTracker != nil { | ||
| totalSamplesInBatch := 0 | ||
| for i := range o.scanners { | ||
| totalSamplesInBatch += o.scanners[i].buffer.SampleCount() | ||
| } | ||
| if totalSamplesInBatch > o.lastTrackedSamples { | ||
| o.opts.SampleTracker.Add(totalSamplesInBatch - o.lastTrackedSamples) | ||
| } else if totalSamplesInBatch < o.lastTrackedSamples { | ||
| o.opts.SampleTracker.Remove(o.lastTrackedSamples - totalSamplesInBatch) | ||
| } | ||
| o.lastTrackedSamples = totalSamplesInBatch | ||
| if err := o.opts.SampleTracker.CheckLimit(); err != nil { | ||
| return 0, err | ||
| } | ||
| } | ||
|
|
||
| if o.currentSeries == int64(len(o.scanners)) { | ||
| o.currentStep += o.step * int64(n) | ||
| o.currentSeries = 0 | ||
|
|
@@ -311,6 +348,7 @@ func (o *matrixSelector) newBuffer(ctx context.Context) ringbuffer.Buffer { | |
| return ringbuffer.NewWithExtLookback(ctx, 8, o.selectRange, o.offset, o.opts.ExtLookbackDelta.Milliseconds()-1, o.call) | ||
| } | ||
| return ringbuffer.New(ctx, 8, o.selectRange, o.offset, o.call) | ||
|
|
||
| } | ||
|
|
||
| func (o *matrixSelector) String() string { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we have a way to infer a good sample check interval dynamically. Cardinality and number of total steps are known when we call
Next(). So ideally we can check more frequently if the query has high cardinality?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have moved the checkSampleLimit to inner loop as noted from the run on beta cell, the instant subQueries were getting checked too late. Also added dynamic interval check.
Thanks.