Skip to content

Commit a6a0c27

Browse files
docmerlinlesam
andauthored
feat: Flux batch queries in TICKscripts (#2550)
* feat: Flux batch queries in TICKscripts * chore: remove unused code * fix: split generic parser and kapacitor specific pieces * chore: remove unnecessary commented out import * chore: cleanup * chore: more cleanup * fix: HTTPClient.QueryFluxResponse should actually do what it is supposed to * fix: ResultToBufferedBatches should be able to handle time.Time type Co-authored-by: Sam Arnold <sarnold@influxdata.com>
1 parent 478a7d8 commit a6a0c27

26 files changed

Lines changed: 1185 additions & 64 deletions

batch.go

Lines changed: 277 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,20 @@ func (n *BatchNode) Wait() error {
6666
func (n *BatchNode) DBRPs() ([]DBRP, error) {
6767
var dbrps []DBRP
6868
for _, b := range n.children {
69-
d, err := b.(*QueryNode).DBRPs()
70-
if err != nil {
71-
return nil, err
69+
switch b := b.(type) {
70+
case *QueryNode:
71+
if b != nil {
72+
d, err := b.DBRPs()
73+
if err != nil {
74+
return nil, err
75+
}
76+
dbrps = append(dbrps, d...)
77+
}
78+
case *FluxQueryNode:
79+
// flux queries don't really have DBRPs
80+
default:
81+
panic("BatchNode shouldn't be followed by anything except QueryNode or QueryFluxNode")
7282
}
73-
dbrps = append(dbrps, d...)
7483
}
7584
return dbrps, nil
7685
}
@@ -81,35 +90,66 @@ func (n *BatchNode) Count() int {
8190

8291
func (n *BatchNode) Start() {
8392
for _, b := range n.children {
84-
b.(*QueryNode).Start()
93+
switch b := b.(type) {
94+
case *QueryNode:
95+
b.Start()
96+
case *FluxQueryNode:
97+
b.Start()
98+
default:
99+
panic("BatchNode shouldn't be followed by anything except QueryNode or QueryFluxNode")
100+
}
85101
}
86102
}
87103

88104
func (n *BatchNode) Abort() {
89105
for _, b := range n.children {
90-
b.(*QueryNode).Abort()
106+
switch b := b.(type) {
107+
case *QueryNode:
108+
b.Abort()
109+
case *FluxQueryNode:
110+
b.Abort()
111+
default:
112+
panic("BatchNode shouldn't be followed by anything except QueryNode or QueryFluxNode")
113+
}
91114
}
92115
}
93116

94117
type BatchQueries struct {
95118
Queries []*Query
119+
FluxQueries []*QueryFlux
96120
Cluster string
97121
GroupByMeasurement bool
98122
}
99123

100124
func (n *BatchNode) Queries(start, stop time.Time) ([]BatchQueries, error) {
101125
queries := make([]BatchQueries, len(n.children))
102-
for i, b := range n.children {
103-
qn := b.(*QueryNode)
104-
qs, err := qn.Queries(start, stop)
105-
if err != nil {
106-
return nil, err
107-
}
108-
queries[i] = BatchQueries{
109-
Queries: qs,
110-
Cluster: qn.Cluster(),
111-
GroupByMeasurement: qn.GroupByMeasurement(),
126+
for i, qn := range n.children {
127+
switch qn := qn.(type) {
128+
case *QueryNode:
129+
qs, err := qn.Queries(start, stop)
130+
if err != nil {
131+
return nil, err
132+
}
133+
queries[i] = BatchQueries{
134+
Queries: qs,
135+
Cluster: qn.Cluster(),
136+
GroupByMeasurement: qn.GroupByMeasurement(),
137+
}
138+
139+
case *FluxQueryNode:
140+
qs, err := qn.Queries(start, stop)
141+
if err != nil {
142+
return nil, err
143+
}
144+
queries[i] = BatchQueries{
145+
FluxQueries: qs,
146+
Cluster: qn.Cluster(),
147+
}
148+
149+
default:
150+
panic("BatchNode shouldn't be followed by anything except QueryNode or QueryFluxNode")
112151
}
152+
113153
}
114154
return queries, nil
115155
}
@@ -522,3 +562,224 @@ func (c *cronTicker) Stop() {
522562
func (c *cronTicker) Next(now time.Time) time.Time {
523563
return c.expr.Next(now)
524564
}
565+
566+
// FluxQueryNode is a node for making flux queries
567+
type FluxQueryNode struct {
568+
node
569+
b *pipeline.QueryFluxNode
570+
query *QueryFlux
571+
ticker ticker
572+
queryMu sync.Mutex
573+
queryErr chan error
574+
closing chan struct{}
575+
aborting chan struct{}
576+
577+
batchesQueried *expvar.Int
578+
pointsQueried *expvar.Int
579+
byName bool
580+
}
581+
582+
func newQueryFluxNode(et *ExecutingTask, n *pipeline.QueryFluxNode, d NodeDiagnostic) (*FluxQueryNode, error) {
583+
bn := &FluxQueryNode{
584+
node: node{Node: n, et: et, diag: d},
585+
b: n,
586+
closing: make(chan struct{}),
587+
aborting: make(chan struct{}),
588+
}
589+
bn.node.runF = bn.runBatch
590+
bn.node.stopF = bn.stopBatch
591+
592+
// Create query
593+
q, err := NewQueryFlux(n.QueryStr, n.Org, n.OrgID)
594+
if err != nil {
595+
return nil, err
596+
}
597+
bn.query = q
598+
// Determine schedule
599+
if n.Every != 0 && n.Cron != "" {
600+
return nil, errors.New("must not set both 'every' and 'cron' properties")
601+
}
602+
switch {
603+
case n.Every > 0:
604+
bn.ticker = newTimeTicker(n.Every, n.AlignFlag)
605+
case n.Cron != "":
606+
var err error
607+
bn.ticker, err = newCronTicker(n.Cron)
608+
if err != nil {
609+
return nil, err
610+
}
611+
case n.Every < 0:
612+
return nil, errors.New("'every' duration must must non-negative")
613+
default:
614+
return nil, errors.New("must define one of 'every' or 'cron'")
615+
}
616+
617+
return bn, nil
618+
}
619+
620+
func (n *FluxQueryNode) Start() {
621+
n.queryMu.Lock()
622+
defer n.queryMu.Unlock()
623+
n.queryErr = make(chan error, 1)
624+
go func() {
625+
n.queryErr <- n.doQuery(n.ins[0])
626+
}()
627+
}
628+
629+
func (n *FluxQueryNode) Abort() {
630+
close(n.aborting)
631+
}
632+
633+
func (n *FluxQueryNode) Cluster() string {
634+
return n.b.Cluster
635+
}
636+
637+
func (n *FluxQueryNode) Queries(start, stop time.Time) ([]*QueryFlux, error) {
638+
now := time.Now()
639+
if stop.IsZero() {
640+
stop = now
641+
}
642+
// Crons are sensitive to timezones.
643+
// Make sure we are using local time.
644+
current := start.Local()
645+
queries := make([]*QueryFlux, 0)
646+
for {
647+
current = n.ticker.Next(current)
648+
if current.IsZero() || current.After(stop) {
649+
break
650+
}
651+
qstop := current.Add(-1 * n.b.Offset)
652+
if qstop.After(now) {
653+
break
654+
}
655+
656+
q, err := n.query.Clone()
657+
if err != nil {
658+
return nil, err
659+
}
660+
q.Now = now
661+
queries = append(queries, q)
662+
}
663+
return queries, nil
664+
}
665+
666+
// Query InfluxDB and collect batches on batch collector.
667+
func (n *FluxQueryNode) doQuery(in edge.Edge) (err error) {
668+
defer in.Close()
669+
n.batchesQueried = &expvar.Int{}
670+
n.pointsQueried = &expvar.Int{}
671+
672+
n.statMap.Set(statsBatchesQueried, n.batchesQueried)
673+
n.statMap.Set(statsPointsQueried, n.pointsQueried)
674+
675+
if n.et.tm.InfluxDBService == nil {
676+
return errors.New("InfluxDB not configured, cannot query InfluxDB for batch query")
677+
}
678+
679+
con, err := n.et.tm.InfluxDBService.NewNamedClient(n.b.Cluster)
680+
if err != nil {
681+
return errors.Wrap(err, "failed to get InfluxDB client")
682+
}
683+
tickC := n.ticker.Start()
684+
for {
685+
select {
686+
case <-n.closing:
687+
return nil
688+
case <-n.aborting:
689+
return errors.New("batch doQuery aborted")
690+
case now := <-tickC:
691+
n.timer.Start()
692+
// Update times for query
693+
n.query.Now = now.Add(-1 * n.b.Offset) //SetStartTime(stop.Add(-1 * n.b.Period))
694+
n.diag.StartingBatchQuery(n.query.stmt)
695+
696+
// Execute query
697+
resp, err := con.QueryFluxResponse(influxdb.FluxQuery{
698+
Query: n.query.stmt,
699+
Org: n.query.org,
700+
OrgID: n.query.orgID,
701+
Now: n.query.Now,
702+
})
703+
if err != nil {
704+
n.diag.Error("error executing query", err)
705+
n.timer.Stop()
706+
break
707+
}
708+
//Collect batches
709+
for _, res := range resp.Results {
710+
batches, err := edge.ResultToBufferedBatches(res, n.byName)
711+
if err != nil {
712+
n.diag.Error("failed to understand query result", err)
713+
continue
714+
}
715+
for _, bch := range batches {
716+
// Set stop time based off query bounds
717+
if bch.Begin().Time().IsZero() {
718+
bch.Begin().SetTime(now)
719+
}
720+
n.batchesQueried.Add(1)
721+
n.pointsQueried.Add(int64(len(bch.Points())))
722+
723+
n.timer.Pause()
724+
if err := in.Collect(bch); err != nil {
725+
return err
726+
}
727+
n.timer.Resume()
728+
}
729+
}
730+
n.timer.Stop()
731+
}
732+
}
733+
}
734+
735+
func (n *FluxQueryNode) runBatch([]byte) error {
736+
errC := make(chan error, 1)
737+
go func() {
738+
defer func() {
739+
err := recover()
740+
if err != nil {
741+
errC <- fmt.Errorf("%v", err)
742+
}
743+
}()
744+
for bt, ok := n.ins[0].Emit(); ok; bt, ok = n.ins[0].Emit() {
745+
for _, child := range n.outs {
746+
err := child.Collect(bt)
747+
if err != nil {
748+
errC <- err
749+
return
750+
}
751+
}
752+
}
753+
errC <- nil
754+
}()
755+
var queryErr error
756+
n.queryMu.Lock()
757+
if n.queryErr != nil {
758+
n.queryMu.Unlock()
759+
select {
760+
case queryErr = <-n.queryErr:
761+
case <-n.aborting:
762+
queryErr = errors.New("batch queryErr aborted")
763+
}
764+
} else {
765+
n.queryMu.Unlock()
766+
}
767+
768+
var err error
769+
select {
770+
case err = <-errC:
771+
case <-n.aborting:
772+
err = errors.New("batch run aborted")
773+
}
774+
if queryErr != nil {
775+
return queryErr
776+
}
777+
return err
778+
}
779+
780+
func (n *FluxQueryNode) stopBatch() {
781+
if n.ticker != nil {
782+
n.ticker.Stop()
783+
}
784+
close(n.closing)
785+
}

cmd/kapacitor/main.go

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -348,11 +348,12 @@ var (
348348
rbId = recordBatchFlags.String("recording-id", "", "The ID to give to this recording. If not set an random ID is chosen.")
349349

350350
recordQueryFlags = flag.NewFlagSet("record-query", flag.ExitOnError)
351-
rqQuery = recordQueryFlags.String("query", "", "The query to record.")
352-
rqType = recordQueryFlags.String("type", "", "The type of the recording to save (stream|batch).")
353-
rqCluster = recordQueryFlags.String("cluster", "", "Optional named InfluxDB cluster from configuration.")
354-
rqNowait = recordQueryFlags.Bool("no-wait", false, "Do not wait for the recording to finish.")
355-
rqId = recordQueryFlags.String("recording-id", "", "The ID to give to this recording. If not set an random ID is chosen.")
351+
// TODO: queryFlux recording
352+
rqQuery = recordQueryFlags.String("query", "", "The query to record.")
353+
rqType = recordQueryFlags.String("type", "", "The type of the recording to save (stream|batch).")
354+
rqCluster = recordQueryFlags.String("cluster", "", "Optional named InfluxDB cluster from configuration.")
355+
rqNowait = recordQueryFlags.Bool("no-wait", false, "Do not wait for the recording to finish.")
356+
rqId = recordQueryFlags.String("recording-id", "", "The ID to give to this recording. If not set an random ID is chosen.")
356357
)
357358

358359
func recordUsage() {
@@ -539,7 +540,6 @@ func doRecord(args []string) error {
539540
return fmt.Errorf("Unknown record type %q, expected 'stream', 'batch' or 'query'", args[0])
540541
}
541542
if noWait {
542-
fmt.Println(recording.ID)
543543
return nil
544544
}
545545
for recording.Status == client.Running {
@@ -1041,7 +1041,6 @@ func doReplay(args []string) error {
10411041
return err
10421042
}
10431043
if *rnowait {
1044-
fmt.Println(replay.ID)
10451044
return nil
10461045
}
10471046
for replay.Status == client.Running {
@@ -1225,7 +1224,6 @@ func doReplayLive(args []string) error {
12251224
return fmt.Errorf("Unknown replay-live type %q, expected 'batch' or 'query'", args[0])
12261225
}
12271226
if noWait {
1228-
fmt.Println(replay.ID)
12291227
return nil
12301228
}
12311229
for replay.Status == client.Running {

edge/edge.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ func (e *channelEdge) Collect(m Message) error {
6464
}
6565

6666
func (e *channelEdge) Emit() (m Message, ok bool) {
67+
// locked here
6768
select {
6869
case m, ok = <-e.messages:
6970
case <-e.aborting:

0 commit comments

Comments
 (0)