microboxlabs · korutx · Jul 3, 2026 · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026
diff --git a/miot-harness/src/miot_harness/agents/chat_models.py b/miot-harness/src/miot_harness/agents/chat_models.py
@@ -55,6 +55,7 @@ def get_chat_model(
     *,
     thinking_budget_tokens: int | None = None,
     effort: Effort | None = None,
+    timeout: int | None = None,
 ) -> BaseChatModel:
     """Multi-provider chat-model factory.
 
@@ -96,7 +97,10 @@ def get_chat_model(
         kwargs: dict[str, object] = {
             "model_name": name,
             "api_key": SecretStr(settings.anthropic_api_key),
-            "timeout": 60,
+            # 60s suits single-shot seats; the agent loop passes a longer
+            # budget because an adaptive-thinking turn that plans several
+            # tool calls can legitimately exceed a minute.
+            "timeout": timeout if timeout is not None else 60,
             "stop": None,
         }
         if effort is not None:

diff --git a/miot-harness/src/miot_harness/agents/native_tools.py b/miot-harness/src/miot_harness/agents/native_tools.py
@@ -0,0 +1,34 @@
+"""Anthropic-format tool definitions for the single-agent loop.
+
+The tool list is part of the prompt-cache prefix (tools render before
+system), so it must be byte-stable across requests: same scope rules as
+the legacy planner catalog (curated `tool_prefix` functions + exploration
+primitives), sorted by name, schemas derived deterministically from the
+pydantic input models.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from miot_harness.datasource.provider import DataSourceProfile
+from miot_harness.tools.registry import ToolRegistry
+
+
+def build_native_tools(
+    registry: ToolRegistry, *, profile: DataSourceProfile
+) -> list[dict[str, Any]]:
+    tools: list[dict[str, Any]] = []
+    for name in registry.names():  # .names() is already sorted
+        tool = registry.get(name)
+        in_scope = name.startswith(profile.tool_prefix) or tool.kind == "primitive"
+        if not in_scope:
+            continue
+        tools.append(
+            {
+                "name": name,
+                "description": tool.description,
+                "input_schema": tool.input_model.model_json_schema(),
+            }
+        )
+    return tools
diff --git a/miot-harness/src/miot_harness/api/server.py b/miot-harness/src/miot_harness/api/server.py
@@ -39,6 +39,7 @@
 from miot_harness.datasource.registry import resolve as resolve_datasource
 from miot_harness.observability.otel import configure_tracing, shutdown_tracing
 from miot_harness.observability.provenance import ProvenanceLog
+from miot_harness.runtime.agent_loop import AgentLoopRunner
 from miot_harness.runtime.agentic_graph import build_agentic_graph
 from miot_harness.runtime.context import UserRequest
 from miot_harness.runtime.data_graph import build_data_graph
@@ -470,6 +471,24 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
                     profile=effective_profile,
                     registry=harness.tools,
                 )
+                # Single-agent loop (flag-gated). Reuses the planner seat's
+                # model/effort; the runner freezes prompt + tool list at boot
+                # so every request shares one prompt-cache prefix.
+                if settings.agents_agent_loop_enabled:
+                    harness.agent_loop = AgentLoopRunner(
+                        model=get_chat_model(
+                            settings.agents_planner_model,
+                            effort=settings.agents_planner_effort,
+                            timeout=settings.agents_agent_loop_llm_timeout_seconds,
+                        ),
+                        registry=harness.tools,
+                        settings=settings,
+                        profile=effective_profile,
+                        provenance_log=ProvenanceLog(
+                            settings.provenance_log_dir,
+                            enabled=settings.provenance_log_enabled,
+                        ),
+                    )
                 harness.meta_model = get_chat_model(
                     settings.intent_router_model,
                     thinking_budget_tokens=synth_thinking_budget,
@@ -525,6 +544,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
                 app.state.datasource_freshness = {}
                 harness.data_graph = None
                 harness.agentic_graph = None
+                harness.agent_loop = None
                 harness.meta_model = None
                 harness.meta_primer = ""  # meta path gates on meta_model
                 harness.meta_catalog = []

diff --git a/miot-harness/src/miot_harness/config.py b/miot-harness/src/miot_harness/config.py
@@ -73,6 +73,18 @@ class HarnessSettings(BaseSettings):
     # so they exercise the rules-only path.
     agents_agentic_verify_enabled: bool = True
     agents_agentic_max_replans: int = Field(default=2, ge=0)
+    # Single-agent tool-calling loop (spec 2026-07-02). When enabled, the
+    # DATA_AGENTIC route runs one cached native tool-use loop instead of the
+    # planner/verifier/synthesizer/critic panel. Default off until golden
+    # evals show parity with the legacy agentic graph.
+    agents_agent_loop_enabled: bool = False
+    # Per-tool-result cap on the JSON fed back to the model. Bounds context
+    # growth (and cache-write size) when a tool returns a large row set.
+    agents_agent_loop_tool_result_max_chars: int = Field(default=6000, gt=0)
+    # LLM timeout for the agent loop (seconds). The loop's final turn writes
+    # the full user-facing answer and adaptive-thinking turns can exceed the
+    # current hard 60s default. 300s (5 minutes) suits multi-step planning.
+    agents_agent_loop_llm_timeout_seconds: int = Field(default=300, gt=0)
     # Small "did we answer it?" judge. Held separate from the synthesizer so it
     # can stay cheap. Empty string disables the LLM judge (rules-only verify).
     agents_verifier_model: str = "claude-haiku-4-5"

diff --git a/miot-harness/src/miot_harness/observability/callbacks.py b/miot-harness/src/miot_harness/observability/callbacks.py
@@ -82,7 +82,15 @@ def _extract_usage(response: LLMResult) -> TokenUsage:
                 continue
             details = usage_metadata.get("input_token_details") or {}
             cache_read = int(details.get("cache_read", 0) or 0)
-            cache_creation = int(details.get("cache_creation", 0) or 0)
+            # langchain_anthropic maps ephemeral cache writes to
+            # ephemeral_5m_input_tokens / ephemeral_1h_input_tokens and sets
+            # cache_creation=0. Sum all three so telemetry captures ephemeral
+            # writes at the correct 1.25× pricing rate.
+            cache_creation = (
+                int(details.get("cache_creation", 0) or 0)
+                + int(details.get("ephemeral_5m_input_tokens", 0) or 0)
+                + int(details.get("ephemeral_1h_input_tokens", 0) or 0)
+            )
             total_input = int(usage_metadata.get("input_tokens", 0) or 0)
             return TokenUsage(
                 input_tokens=max(total_input - cache_read - cache_creation, 0),