juspay · prakhar-prakash-juspay · May 27, 2026 · May 26, 2026 · May 27, 2026
diff --git a/docker/Dockerfile.database b/docker/Dockerfile.database
@@ -42,6 +42,18 @@ RUN pip install dist/*.whl
 # install dependencies as wheels
 RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
 
+# Optionally build litellm-proxy-extras from the fork source instead of the PyPI
+# version pinned in requirements.txt. The PyPI wheel (fetched above into /wheels)
+# lags the fork's schema.prisma + migrations, so without this the fork's custom
+# tables are invisible to `prisma migrate deploy`. Mirrors docker/Dockerfile.non_root.
+# Defaults to "published" (PyPI) so existing builds are unaffected.
+ARG PROXY_EXTRAS_SOURCE=published
+RUN if [ "$PROXY_EXTRAS_SOURCE" = "local" ]; then \
+      cd /app/litellm-proxy-extras && rm -rf dist && python -m build && \
+      rm -f /wheels/litellm_proxy_extras-*.whl && \
+      cp dist/*.whl /wheels/; \
+    fi
+
 # Runtime stage
 FROM $LITELLM_RUNTIME_IMAGE AS runtime
 

diff --git a/...y-extras/litellm_proxy_extras/migrations/20260527000000_add_bench_run_table/migration.sql b/...y-extras/litellm_proxy_extras/migrations/20260527000000_add_bench_run_table/migration.sql
@@ -0,0 +1,21 @@
+-- CreateTable
+CREATE TABLE "LiteLLM_BenchRun" (
+    "bench_run_id" TEXT NOT NULL,
+    "model_name" TEXT NOT NULL,
+    "deployment_server" TEXT,
+    "bench_type" TEXT,
+    "input_tokens" INTEGER,
+    "output_tokens" INTEGER,
+    "max_concurrency" INTEGER,
+    "raw_command" TEXT,
+    "raw_results" TEXT,
+    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "created_by" TEXT,
+    "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updated_by" TEXT,
+
+    CONSTRAINT "LiteLLM_BenchRun_pkey" PRIMARY KEY ("bench_run_id")
+);
+
+-- CreateIndex
+CREATE INDEX "LiteLLM_BenchRun_model_name_idx" ON "LiteLLM_BenchRun"("model_name");
diff --git a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma
@@ -1286,3 +1286,22 @@ model LiteLLM_UserSSHKey {
 
   @@index([user_id])
 }
+
+// Bench Runs — admin registry of vLLM/SGLang benchmark results (Bench Runs dashboard tab)
+model LiteLLM_BenchRun {
+  bench_run_id      String   @id @default(uuid())
+  model_name        String   // litellm model name (litellm_params.model), required
+  deployment_server String?  // "vllm" | "sglang"
+  bench_type        String?  // "random" | "multi-turn"
+  input_tokens      Int?
+  output_tokens     Int?
+  max_concurrency   Int?
+  raw_command       String?  // raw `vllm bench serve ...` command text
+  raw_results       String?  // raw "Serving Benchmark Result" stdout block
+  created_at        DateTime @default(now()) @map("created_at")
+  created_by        String?
+  updated_at        DateTime @default(now()) @updatedAt @map("updated_at")
+  updated_by        String?
+
+  @@index([model_name])
+}
diff --git a/litellm/proxy/management_endpoints/bench_run_endpoints.py b/litellm/proxy/management_endpoints/bench_run_endpoints.py
@@ -0,0 +1,169 @@
+"""
+BENCH RUNS — admin registry of vLLM/SGLang benchmark results.
+
+Backs the grid-ai-onboarding "Bench Runs" dashboard tab. Admin-only CRUD over
+LiteLLM_BenchRun: each row is one benchmark run (model + run params + the raw
+`vllm bench serve` command and its raw stdout, stored verbatim).
+
+  POST   /bench/run/new      — create a bench run record
+  GET    /bench/run/list     — list all bench runs (newest first)
+  POST   /bench/run/delete   — delete a bench run by id
+
+All endpoints require PROXY_ADMIN. Grid forwards these with its admin key, so
+the user-facing admin gate lives in grid; the PROXY_ADMIN check here is
+defense-in-depth (mirrors playground_endpoints._require_admin).
+"""
+
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Request
+from prisma.errors import RecordNotFoundError
+
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy._types import (
+    LiteLLMPydanticObjectBase,
+    LitellmUserRoles,
+    UserAPIKeyAuth,
+)
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
+
+router = APIRouter()
+
+_TAGS = ["bench runs"]
+_DEPS = [Depends(user_api_key_auth)]
+
+
+# ─── request / response models ───────────────────────────────────────────────
+
+
+class NewBenchRunRequest(LiteLLMPydanticObjectBase):
+    model_name: str  # litellm model name (litellm_params.model), required
+    deployment_server: Optional[str] = None  # "vllm" | "sglang"
+    bench_type: Optional[str] = None  # "random" | "multi-turn"
+    input_tokens: Optional[int] = None
+    output_tokens: Optional[int] = None
+    max_concurrency: Optional[int] = None
+    raw_command: Optional[str] = None
+    raw_results: Optional[str] = None
+    # Set by grid to the acting user's email so the row is attributed to the
+    # real caller rather than the shared admin key.
+    created_by: Optional[str] = None
+
+
+class DeleteBenchRunRequest(LiteLLMPydanticObjectBase):
+    bench_run_id: str
+
+
+class BenchRunResponse(LiteLLMPydanticObjectBase):
+    bench_run_id: str
+    model_name: str
+    deployment_server: Optional[str] = None
+    bench_type: Optional[str] = None
+    input_tokens: Optional[int] = None
+    output_tokens: Optional[int] = None
+    max_concurrency: Optional[int] = None
+    raw_command: Optional[str] = None
+    raw_results: Optional[str] = None
+    created_at: datetime
+    created_by: Optional[str] = None
+    updated_at: datetime
+    updated_by: Optional[str] = None
+
+
+# ─── helpers ─────────────────────────────────────────────────────────────────
+
+
+def _prisma():
+    from litellm.proxy.proxy_server import prisma_client
+
+    if prisma_client is None:
+        raise HTTPException(500, "prisma_client not initialized")
+    return prisma_client
+
+
+def _require_admin(key: UserAPIKeyAuth) -> None:
+    role = key.user_role
+    value = role.value if role and hasattr(role, "value") else role
+    if value != LitellmUserRoles.PROXY_ADMIN.value:
+        raise HTTPException(403, "PROXY_ADMIN required")
+
+
+# ─── endpoints ────────────────────────────────────────────────────────────────
+
+
+@router.post(
+    "/bench/run/new",
+    tags=_TAGS,
+    dependencies=_DEPS,
+    response_model=BenchRunResponse,
+)
+@management_endpoint_wrapper
+async def new_bench_run(
+    request: Request,
+    data: NewBenchRunRequest,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+) -> BenchRunResponse:
+    """Record a benchmark run. Only model_name is required; everything else is
+    optional (grid pre-fills what it can parse from the run command)."""
+    _require_admin(user_api_key_dict)
+
+    payload = data.model_dump(exclude_unset=True)
+    model_name = (payload.get("model_name") or "").strip()
+    if not model_name:
+        raise HTTPException(400, "model_name is required")
+    payload["model_name"] = model_name
+    created_by = payload.pop("created_by", None)
+
+    row = await _prisma().db.litellm_benchrun.create(
+        data={
+            **payload,
+            "created_by": created_by,
+            "updated_by": created_by,
+        }
+    )
+    verbose_proxy_logger.info(
+        f"bench_runs: created {row.bench_run_id} model={model_name} by={created_by}"
+    )
+    return BenchRunResponse(**row.model_dump())
+
+
+@router.get(
+    "/bench/run/list",
+    tags=_TAGS,
+    dependencies=_DEPS,
+    response_model=List[BenchRunResponse],
+)
+@management_endpoint_wrapper
+async def list_bench_runs(
+    request: Request,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+) -> List[BenchRunResponse]:
+    """All bench runs, newest first. Volume is low (manual entries); grid
+    derives filter options and filters client-side."""
+    _require_admin(user_api_key_dict)
+    rows = await _prisma().db.litellm_benchrun.find_many(order={"created_at": "desc"})
+    return [BenchRunResponse(**r.model_dump()) for r in rows]
+
+
+@router.post(
+    "/bench/run/delete",
+    tags=_TAGS,
+    dependencies=_DEPS,
+)
+@management_endpoint_wrapper
+async def delete_bench_run(
+    request: Request,
+    data: DeleteBenchRunRequest,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+) -> Dict[str, Any]:
+    """Delete a bench run by id."""
+    _require_admin(user_api_key_dict)
+    try:
+        await _prisma().db.litellm_benchrun.delete(
+            where={"bench_run_id": data.bench_run_id}
+        )
+    except RecordNotFoundError:
+        raise HTTPException(404, "bench run not found")
+    return {"success": True}
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -409,6 +409,9 @@ def generate_feedback_box():
 from litellm.proxy.management_endpoints.playground_endpoints import (
     router as playground_router,
 )
+from litellm.proxy.management_endpoints.bench_run_endpoints import (
+    router as bench_run_router,
+)
 from litellm.proxy.management_endpoints.policy_endpoints import router as policy_router
 from litellm.proxy.management_endpoints.project_endpoints import (
     router as project_router,
@@ -13986,6 +13989,7 @@ async def get_routes():
 app.include_router(policy_crud_router)
 app.include_router(policy_resolve_router)
 app.include_router(playground_router)
+app.include_router(bench_run_router)
 app.include_router(search_tool_management_router)
 app.include_router(prompts_router)
 app.include_router(callback_management_endpoints_router)

diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
@@ -1286,3 +1286,22 @@ model LiteLLM_UserSSHKey {
 
   @@index([user_id])
 }
+
+// Bench Runs — admin registry of vLLM/SGLang benchmark results (Bench Runs dashboard tab)
+model LiteLLM_BenchRun {
+  bench_run_id      String   @id @default(uuid())
+  model_name        String   // litellm model name (litellm_params.model), required
+  deployment_server String?  // "vllm" | "sglang"
+  bench_type        String?  // "random" | "multi-turn"
+  input_tokens      Int?
+  output_tokens     Int?
+  max_concurrency   Int?
+  raw_command       String?  // raw `vllm bench serve ...` command text
+  raw_results       String?  // raw "Serving Benchmark Result" stdout block
+  created_at        DateTime @default(now()) @map("created_at")
+  created_by        String?
+  updated_at        DateTime @default(now()) @updatedAt @map("updated_at")
+  updated_by        String?
+
+  @@index([model_name])
+}
diff --git a/schema.prisma b/schema.prisma
@@ -1286,3 +1286,22 @@ model LiteLLM_UserSSHKey {
 
   @@index([user_id])
 }
+
+// Bench Runs — admin registry of vLLM/SGLang benchmark results (Bench Runs dashboard tab)
+model LiteLLM_BenchRun {
+  bench_run_id      String   @id @default(uuid())
+  model_name        String   // litellm model name (litellm_params.model), required
+  deployment_server String?  // "vllm" | "sglang"
+  bench_type        String?  // "random" | "multi-turn"
+  input_tokens      Int?
+  output_tokens     Int?
+  max_concurrency   Int?
+  raw_command       String?  // raw `vllm bench serve ...` command text
+  raw_results       String?  // raw "Serving Benchmark Result" stdout block
+  created_at        DateTime @default(now()) @map("created_at")
+  created_by        String?
+  updated_at        DateTime @default(now()) @updatedAt @map("updated_at")
+  updated_by        String?
+
+  @@index([model_name])
+}