Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion miot-harness/src/miot_harness/agents/chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def get_chat_model(
*,
thinking_budget_tokens: int | None = None,
effort: Effort | None = None,
timeout: int | None = None,
) -> BaseChatModel:
"""Multi-provider chat-model factory.

Expand Down Expand Up @@ -96,7 +97,10 @@ def get_chat_model(
kwargs: dict[str, object] = {
"model_name": name,
"api_key": SecretStr(settings.anthropic_api_key),
"timeout": 60,
# 60s suits single-shot seats; the agent loop passes a longer
# budget because an adaptive-thinking turn that plans several
# tool calls can legitimately exceed a minute.
"timeout": timeout if timeout is not None else 60,
"stop": None,
}
if effort is not None:
Expand Down
34 changes: 34 additions & 0 deletions miot-harness/src/miot_harness/agents/native_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Anthropic-format tool definitions for the single-agent loop.

The tool list is part of the prompt-cache prefix (tools render before
system), so it must be byte-stable across requests: same scope rules as
the legacy planner catalog (curated `tool_prefix` functions + exploration
primitives), sorted by name, schemas derived deterministically from the
pydantic input models.
"""

from __future__ import annotations

from typing import Any

from miot_harness.datasource.provider import DataSourceProfile
from miot_harness.tools.registry import ToolRegistry


def build_native_tools(
registry: ToolRegistry, *, profile: DataSourceProfile
) -> list[dict[str, Any]]:
tools: list[dict[str, Any]] = []
for name in registry.names(): # .names() is already sorted
tool = registry.get(name)
in_scope = name.startswith(profile.tool_prefix) or tool.kind == "primitive"
if not in_scope:
continue
tools.append(
{
"name": name,
"description": tool.description,
"input_schema": tool.input_model.model_json_schema(),
}
)
return tools
20 changes: 20 additions & 0 deletions miot-harness/src/miot_harness/api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from miot_harness.datasource.registry import resolve as resolve_datasource
from miot_harness.observability.otel import configure_tracing, shutdown_tracing
from miot_harness.observability.provenance import ProvenanceLog
from miot_harness.runtime.agent_loop import AgentLoopRunner
from miot_harness.runtime.agentic_graph import build_agentic_graph
from miot_harness.runtime.context import UserRequest
from miot_harness.runtime.data_graph import build_data_graph
Expand Down Expand Up @@ -470,6 +471,24 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
profile=effective_profile,
registry=harness.tools,
)
# Single-agent loop (flag-gated). Reuses the planner seat's
# model/effort; the runner freezes prompt + tool list at boot
# so every request shares one prompt-cache prefix.
if settings.agents_agent_loop_enabled:
harness.agent_loop = AgentLoopRunner(
model=get_chat_model(
settings.agents_planner_model,
effort=settings.agents_planner_effort,
timeout=settings.agents_agent_loop_llm_timeout_seconds,
),
registry=harness.tools,
settings=settings,
profile=effective_profile,
provenance_log=ProvenanceLog(
settings.provenance_log_dir,
enabled=settings.provenance_log_enabled,
),
)
harness.meta_model = get_chat_model(
settings.intent_router_model,
thinking_budget_tokens=synth_thinking_budget,
Expand Down Expand Up @@ -525,6 +544,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
app.state.datasource_freshness = {}
harness.data_graph = None
harness.agentic_graph = None
harness.agent_loop = None
harness.meta_model = None
harness.meta_primer = "" # meta path gates on meta_model
harness.meta_catalog = []
Expand Down
12 changes: 12 additions & 0 deletions miot-harness/src/miot_harness/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ class HarnessSettings(BaseSettings):
# so they exercise the rules-only path.
agents_agentic_verify_enabled: bool = True
agents_agentic_max_replans: int = Field(default=2, ge=0)
# Single-agent tool-calling loop (spec 2026-07-02). When enabled, the
# DATA_AGENTIC route runs one cached native tool-use loop instead of the
# planner/verifier/synthesizer/critic panel. Default off until golden
# evals show parity with the legacy agentic graph.
agents_agent_loop_enabled: bool = False
# Per-tool-result cap on the JSON fed back to the model. Bounds context
# growth (and cache-write size) when a tool returns a large row set.
agents_agent_loop_tool_result_max_chars: int = Field(default=6000, gt=0)
# LLM timeout for the agent loop (seconds). The loop's final turn writes
# the full user-facing answer and adaptive-thinking turns can exceed the
# current hard 60s default. 300s (5 minutes) suits multi-step planning.
agents_agent_loop_llm_timeout_seconds: int = Field(default=300, gt=0)
# Small "did we answer it?" judge. Held separate from the synthesizer so it
# can stay cheap. Empty string disables the LLM judge (rules-only verify).
agents_verifier_model: str = "claude-haiku-4-5"
Expand Down
10 changes: 9 additions & 1 deletion miot-harness/src/miot_harness/observability/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,15 @@ def _extract_usage(response: LLMResult) -> TokenUsage:
continue
details = usage_metadata.get("input_token_details") or {}
cache_read = int(details.get("cache_read", 0) or 0)
cache_creation = int(details.get("cache_creation", 0) or 0)
# langchain_anthropic maps ephemeral cache writes to
# ephemeral_5m_input_tokens / ephemeral_1h_input_tokens and sets
# cache_creation=0. Sum all three so telemetry captures ephemeral
# writes at the correct 1.25× pricing rate.
cache_creation = (
int(details.get("cache_creation", 0) or 0)
+ int(details.get("ephemeral_5m_input_tokens", 0) or 0)
+ int(details.get("ephemeral_1h_input_tokens", 0) or 0)
)
total_input = int(usage_metadata.get("input_tokens", 0) or 0)
return TokenUsage(
input_tokens=max(total_input - cache_read - cache_creation, 0),
Expand Down
Loading
Loading