-
Notifications
You must be signed in to change notification settings - Fork 0
feat: long-running jobs (polling + callback semantics) #43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
490910a
1f5e3ab
24e8b27
4f663ed
ac3ed11
e5b748f
71b0b87
ec829ff
2534bd1
a43ad5f
98dc6e3
868ce4b
e70bca2
fcb70e1
c53c3fa
d0bb5a1
edd22d8
9861bc9
302a5ed
4df9f24
ecafe73
1d482c9
c35a783
7e52c61
a2b0993
7a0f4ac
cf5dbde
f3c944c
f51faf8
1eb4b88
f156909
8ba6759
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| pub mod callbacks; | ||
| pub mod configs; | ||
| pub mod endpoints; | ||
| pub mod executions; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,215 @@ | ||
| use actix_web::{web, HttpResponse}; | ||
| use kronos_common::{ | ||
| db::{self, scoped, DbContext}, | ||
| metrics as m, | ||
| }; | ||
| use serde::Deserialize; | ||
| use serde_json::Value; | ||
|
|
||
| use crate::extractors::AuthenticatedRequest; | ||
| use crate::router::AppState; | ||
|
|
||
| #[derive(Deserialize)] | ||
| pub struct CompleteBody { | ||
| pub output: Value, | ||
| } | ||
|
|
||
| #[derive(Deserialize)] | ||
| pub struct FailBody { | ||
| pub error: Value, | ||
| } | ||
|
|
||
| pub async fn complete( | ||
| state: web::Data<AppState>, | ||
| _auth: AuthenticatedRequest, | ||
| path: web::Path<(String, String, String)>, | ||
| body: web::Json<CompleteBody>, | ||
| ) -> HttpResponse { | ||
| let (org_id, workspace_id, execution_id) = path.into_inner(); | ||
| let schema_name = | ||
| match db::workspaces::resolve_schema(&state.pool, &org_id, &workspace_id).await { | ||
| Ok(Some(s)) => s, | ||
| Ok(None) => return HttpResponse::Forbidden().finish(), | ||
| Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), | ||
| }; | ||
|
|
||
| let mut tx = match scoped::scoped_transaction(&state.pool, &schema_name).await { | ||
| Ok(tx) => tx, | ||
| Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), | ||
| }; | ||
| let mut db = DbContext::new(&mut *tx, state.prefix()); | ||
|
|
||
| let rows_affected = | ||
| match db::executions::complete_success_from_long_running(&mut db, &execution_id, &body.output) | ||
| .await | ||
| { | ||
| Ok(n) => n, | ||
| Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), | ||
| }; | ||
|
|
||
| if rows_affected == 0 { | ||
| let current = db::executions::get(&mut db, &execution_id).await.ok().flatten(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don’t mask database errors with Line 51, Line 80, Line 160, and Line 193 convert DB failures into Suggested fix- let current = db::executions::get(&mut db, &execution_id).await.ok().flatten();
+ let current = match db::executions::get(&mut db, &execution_id).await {
+ Ok(v) => v,
+ Err(e) => return HttpResponse::InternalServerError().body(e.to_string()),
+ };Use the same explicit error handling for all Also applies to: 80-80, 160-160, 193-193 🤖 Prompt for AI Agents |
||
| return match current { | ||
| None => HttpResponse::NotFound().finish(), | ||
| Some(e) if matches!(e.status.as_str(), "SUCCESS" | "FAILED" | "CANCELLED") => { | ||
| HttpResponse::Conflict().json(serde_json::json!({ | ||
| "code": "ALREADY_TERMINAL", | ||
| "current_status": e.status, | ||
| })) | ||
| } | ||
| Some(e) => HttpResponse::Conflict().json(serde_json::json!({ | ||
| "code": "NOT_YET_WAITING", | ||
| "current_status": e.status, | ||
| })), | ||
| }; | ||
| } | ||
|
|
||
| metrics::counter!(m::CALLBACKS_RECEIVED_TOTAL, "kind" => "complete", "result" => "applied") | ||
| .increment(1); | ||
| metrics::counter!(m::LONG_RUNNING_COMPLETED_TOTAL, "terminator" => "callback", "status" => "SUCCESS") | ||
| .increment(1); | ||
| metrics::gauge!(m::EXECUTIONS_WAITING).decrement(1.0); | ||
| let _ = db::execution_logs::insert( | ||
| &mut db, | ||
| &execution_id, | ||
| 0, | ||
| "INFO", | ||
| "Callback received: complete", | ||
| ) | ||
| .await; | ||
| let row = db::executions::get(&mut db, &execution_id).await.ok().flatten(); | ||
| let _ = tx.commit().await; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handle transaction commit failures instead of returning success. Line 81, Line 161, and Line 194 ignore Suggested fix- let _ = tx.commit().await;
+ if let Err(e) = tx.commit().await {
+ return HttpResponse::InternalServerError().body(e.to_string());
+ }Apply the same pattern at all three commit sites. Also applies to: 161-161, 194-194 🤖 Prompt for AI Agents |
||
| match row { | ||
| Some(exec) => HttpResponse::Ok().json(serde_json::json!({ "data": { | ||
| "execution_id": exec.execution_id, | ||
| "job_id": exec.job_id, | ||
| "endpoint": exec.endpoint, | ||
| "endpoint_type": exec.endpoint_type, | ||
| "status": exec.status, | ||
| "input": exec.input, | ||
| "output": exec.output, | ||
| "attempt_count": exec.attempt_count, | ||
| "max_attempts": exec.max_attempts, | ||
| "worker_id": exec.worker_id, | ||
| "run_at": exec.run_at, | ||
| "started_at": exec.started_at, | ||
| "completed_at": exec.completed_at, | ||
| "duration_ms": exec.duration_ms, | ||
| "created_at": exec.created_at, | ||
| }})), | ||
| None => HttpResponse::Ok().finish(), | ||
| } | ||
| } | ||
|
|
||
| pub async fn fail( | ||
| state: web::Data<AppState>, | ||
| _auth: AuthenticatedRequest, | ||
| path: web::Path<(String, String, String)>, | ||
| body: web::Json<FailBody>, | ||
| ) -> HttpResponse { | ||
| let (org_id, workspace_id, execution_id) = path.into_inner(); | ||
| let schema_name = | ||
| match db::workspaces::resolve_schema(&state.pool, &org_id, &workspace_id).await { | ||
| Ok(Some(s)) => s, | ||
| Ok(None) => return HttpResponse::Forbidden().finish(), | ||
| Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), | ||
| }; | ||
|
|
||
| let mut tx = match scoped::scoped_transaction(&state.pool, &schema_name).await { | ||
| Ok(tx) => tx, | ||
| Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), | ||
| }; | ||
| let mut db = DbContext::new(&mut *tx, state.prefix()); | ||
|
|
||
| let exec = match db::executions::get(&mut db, &execution_id).await { | ||
| Ok(Some(e)) => e, | ||
| Ok(None) => return HttpResponse::NotFound().finish(), | ||
| Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), | ||
| }; | ||
|
|
||
| if matches!(exec.status.as_str(), "SUCCESS" | "FAILED" | "CANCELLED") { | ||
| return HttpResponse::Conflict().json(serde_json::json!({ | ||
| "code": "ALREADY_TERMINAL", | ||
| "current_status": exec.status, | ||
| })); | ||
| } | ||
| if !matches!(exec.status.as_str(), "WAITING" | "POLLING") { | ||
| return HttpResponse::Conflict().json(serde_json::json!({ | ||
| "code": "NOT_YET_WAITING", | ||
| "current_status": exec.status, | ||
| })); | ||
| } | ||
|
|
||
| let endpoint = match db::endpoints::get(&mut db, &exec.endpoint).await { | ||
| Ok(Some(ep)) => ep, | ||
| Ok(None) => return HttpResponse::InternalServerError().body("endpoint missing"), | ||
| Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), | ||
| }; | ||
| let retry_policy = endpoint.get_retry_policy(); | ||
| let backoff_ms = kronos_common::backoff::compute_backoff(&retry_policy, exec.attempt_count); | ||
|
|
||
| let applied = match db::executions::retry_from_long_running(&mut db, &execution_id, backoff_ms, &body.error).await { | ||
| Ok(rows) => rows > 0, | ||
| Err(e) => return HttpResponse::InternalServerError().body(e.to_string()), | ||
| }; | ||
|
|
||
| if !applied { | ||
| // Race-lost: another path finalized this row between our get() and our UPDATE. | ||
| metrics::counter!(m::CALLBACKS_RECEIVED_TOTAL, "kind" => "fail", "result" => "race_lost") | ||
| .increment(1); | ||
| let current = db::executions::get(&mut db, &execution_id).await.ok().flatten(); | ||
| let _ = tx.commit().await; | ||
| return match current { | ||
| None => HttpResponse::NotFound().finish(), | ||
| Some(e) if matches!(e.status.as_str(), "SUCCESS" | "FAILED" | "CANCELLED") => { | ||
| HttpResponse::Conflict().json(serde_json::json!({ | ||
| "code": "ALREADY_TERMINAL", | ||
| "current_status": e.status, | ||
| })) | ||
| } | ||
| Some(e) => HttpResponse::Conflict().json(serde_json::json!({ | ||
| "code": "RACE_LOST", | ||
| "current_status": e.status, | ||
| })), | ||
| }; | ||
| } | ||
|
|
||
| metrics::counter!(m::CALLBACKS_RECEIVED_TOTAL, "kind" => "fail", "result" => "applied") | ||
| .increment(1); | ||
| metrics::counter!(m::LONG_RUNNING_COMPLETED_TOTAL, "terminator" => "callback", "status" => "FAILED") | ||
| .increment(1); | ||
| metrics::gauge!(m::EXECUTIONS_WAITING).decrement(1.0); | ||
| let _ = db::execution_logs::insert( | ||
| &mut db, | ||
| &execution_id, | ||
| 0, | ||
| "INFO", | ||
| "Callback received: fail → re-dispatch", | ||
| ) | ||
| .await; | ||
|
|
||
| // Re-fetch after the body has been consumed (body is still in scope via `body.error` reference) | ||
| let _ = &body.error; // ensure body is held until here | ||
| let row = db::executions::get(&mut db, &execution_id).await.ok().flatten(); | ||
| let _ = tx.commit().await; | ||
| match row { | ||
| Some(exec) => HttpResponse::Ok().json(serde_json::json!({ "data": { | ||
| "execution_id": exec.execution_id, | ||
| "job_id": exec.job_id, | ||
| "endpoint": exec.endpoint, | ||
| "endpoint_type": exec.endpoint_type, | ||
| "status": exec.status, | ||
| "input": exec.input, | ||
| "output": exec.output, | ||
| "attempt_count": exec.attempt_count, | ||
| "max_attempts": exec.max_attempts, | ||
| "worker_id": exec.worker_id, | ||
| "run_at": exec.run_at, | ||
| "started_at": exec.started_at, | ||
| "completed_at": exec.completed_at, | ||
| "duration_ms": exec.duration_ms, | ||
| "created_at": exec.created_at, | ||
| }})), | ||
| None => HttpResponse::Ok().finish(), | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: juspay/kronos
Length of output: 50369
🏁 Script executed:
Repository: juspay/kronos
Length of output: 20362
Ensure callback bearer auth is tenant-bound to
{org_id}/{workspace_id}before applying execution updates.AuthenticatedRequestonly validatesAuthorization: Bearer <state.config.server.api_key>(server-wide) and contains no org/workspace authorization logic;complete/failaccept_authbut never use it.path-driven viadb::workspaces::resolve_schema(&org_id, &workspace_id)(checks workspace exists/ACTIVE), with no check that the bearer key is authorized for that tenant.crates/api/src/handlers/callbacks.rs~lines 24-33 and 106-116).🤖 Prompt for AI Agents