HKUDS
diff --git a/‎deeptutor/agents/chat/agentic_pipeline.py‎
Lines changed: 22 additions & 28 deletions b/‎deeptutor/agents/chat/agentic_pipeline.py‎
Lines changed: 22 additions & 28 deletions
diff --git a/‎deeptutor/api/routers/auth.py‎
Lines changed: 45 additions & 17 deletions b/‎deeptutor/api/routers/auth.py‎
Lines changed: 45 additions & 17 deletions
diff --git a/‎deeptutor/core/agentic/labeled_step.py‎
Lines changed: 20 additions & 66 deletions b/‎deeptutor/core/agentic/labeled_step.py‎
Lines changed: 20 additions & 66 deletions
@@ -456,18 +456,11 @@ async def run(self, context: UnifiedContext, stream: StreamBus) -> None:
                 max_iterations=max(1, self._max_iterations),
                 host=host,
                 usage=self._usage,
-                # Reasoning models with native tool-calling support emit
-                # ``reasoning_content`` without parroting back
-                # ````THINK```` labels.  When the model is a reasoner
-                # AND native tool calling is active, use LABEL_FINISH so
-                # that a reply containing ``reasoning_content`` + answer
-                # text (but no explicit label) terminates the loop — the
-                # answer lives in ``content``, reasoning in
-                # ``reasoning_content``.  Non-reasoning models that emit
-                # ``<think/>`` tags get LABEL_THINK so the loop continues.
-                implicit_think_label=LABEL_FINISH
-                if (_is_reasoner and use_native_tools)
-                else LABEL_THINK,
+                # Thinking traces are trace data, not loop-control actions.
+                # The formal content stream must still begin with FINISH /
+                # TOOL / THINK / PAUSE so protocol repair can catch missing
+                # labels instead of silently treating answers as THINK.
+                implicit_think_label=None,
             )
 
         if outcome.sources:
@@ -1248,32 +1241,33 @@ def _append_reasoner_protocol_note(self, system_prompt: str) -> str:
 
         Reasoning models (e.g. Qwen3.6-Plus) emit reasoning via
         ``reasoning_content`` and tool calls via native ``tool_calls``
-        deltas. The label protocol (TOOL/THINK/FINISH/PAUSE) confuses
-        them into writing tool-call JSON in ``content`` instead of using
-        the native mechanism. This note tells them to ignore labels and
-        use native tool calling directly.
+        deltas. The reasoning stream is displayed separately, but the
+        formal ``content`` stream still controls the agent loop through the
+        standard first-line label.
         """
         if self.language == "zh":
             note = (
                 "\n\n# 推理模型特别说明\n"
                 "你是一个原生支持推理和工具调用的模型。"
-                "请**忽略上面「输出协议」中关于 ``TOOL``/``THINK``/``FINISH``/``PAUSE`` 标签的指示**。"
-                "你不需要在回复中输出任何标签。"
-                "你的推理过程会自动在独立区域显示。"
-                "当你需要调用工具时，直接通过原生 tool_calls 功能发起调用，不要在文本中写 JSON。"
-                "当你准备好给出最终答案时，直接在回复正文中写出答案即可。"
+                "你的推理过程会自动在独立的思考轨迹区域显示；"
+                "但正式回复正文仍然必须遵守上面的「输出协议」，第一行写且只写一个标签："
+                "``FINISH``、``TOOL``、``THINK`` 或 ``PAUSE``。"
+                "需要调用工具时，正式正文第一行写 ``TOOL``，并在同一次回复里通过原生 tool_calls 发起调用；"
+                "不要把工具调用写成 JSON 文本。"
+                "准备给最终答案时，正式正文第一行写 ``FINISH``。"
             )
         else:
             note = (
                 "\n\n# Reasoning Model Special Instructions\n"
                 "You are a model with native reasoning and tool-calling support. "
-                "Please **ignore the 'Output Protocol' instructions above about "
-                "``TOOL``/``THINK``/``FINISH``/``PAUSE`` labels**. "
-                "You do NOT need to output any labels in your replies. "
-                "Your reasoning is automatically displayed in a separate area. "
-                "When you need to call a tool, use native tool_calls directly — "
-                "do NOT write JSON in your text output. "
-                "When you are ready to give the final answer, just write it in your reply body."
+                "Your reasoning is automatically displayed in a separate trace, "
+                "but your formal content stream must still follow the Output "
+                "Protocol above: the first line must be exactly one label, "
+                "``FINISH``, ``TOOL``, ``THINK``, or ``PAUSE``. "
+                "When you need a tool, put ``TOOL`` on the first line and emit "
+                "real native tool_calls in the same reply; do NOT write tool-call "
+                "JSON as text. When you are ready to answer, put ``FINISH`` on "
+                "the first line."
             )
         return system_prompt + note
 
 
@@ -24,6 +24,8 @@
 _SECURE = bool(load_auth_settings()["cookie_secure"])
 _SAMESITE = "none" if _SECURE else "lax"
 
+from deeptutor.multi_user.context import set_current_user, user_from_token_payload
+from deeptutor.multi_user.paths import local_admin_user
 from deeptutor.services.auth import (
     AUTH_ENABLED,
     POCKETBASE_ENABLED,
@@ -157,6 +159,27 @@ def _extract_token(authorization: str | None, dt_token: str | None) -> str | Non
 # ---------------------------------------------------------------------------
 
 
+def _install_current_user(payload: TokenPayload | None) -> _CtxToken:
+    """Install the request-local current-user ContextVar from an auth result.
+
+    Single point of truth for ``payload → CurrentUser`` so HTTP and WebSocket
+    entry points produce identical user objects. ``payload is None`` means
+    "no JWT was required" (AUTH_ENABLED=false) and resolves to the local
+    admin user; a non-None payload resolves through ``user_from_token_payload``.
+
+    Returns the ContextVar reset token. HTTP callers ignore it (the request
+    ends with the task, so the var is GC'd with the task context). WebSocket
+    callers keep it and call ``reset_current_user`` in their ``finally`` block,
+    because a WS connection outlives the dependency-resolution task.
+
+    ⚠ Invariant: every authenticated entry point MUST call this before the
+    handler runs. Skipping it leaves ``get_current_path_service()`` falling
+    back to the admin workspace — the silent-routing root cause of #481.
+    """
+    user = local_admin_user() if payload is None else user_from_token_payload(payload)
+    return set_current_user(user)
+
+
 async def require_auth(
     authorization: str | None = Header(default=None, alias="Authorization"),
     dt_token: str | None = Cookie(default=None),
@@ -184,14 +207,10 @@ async def require_auth(
     of #481.
     """
     if not AUTH_ENABLED:
-        from deeptutor.multi_user.context import set_current_user
-        from deeptutor.multi_user.paths import local_admin_user
-
-        set_current_user(local_admin_user())
+        _install_current_user(None)
         return None
 
     token = _extract_token(authorization, dt_token)
-
     if not token:
         raise HTTPException(
             status_code=status.HTTP_401_UNAUTHORIZED,
@@ -207,9 +226,7 @@ async def require_auth(
             headers={"WWW-Authenticate": "Bearer"},
         )
 
-    from deeptutor.multi_user.context import set_current_user, user_from_token_payload
-
-    set_current_user(user_from_token_payload(payload))
+    _install_current_user(payload)
     return payload
 
 
@@ -241,20 +258,16 @@ async def ws_require_auth(ws: WebSocket) -> _CtxToken | _WsAuthFailed:
         finally:
             reset_current_user(user_token)
     """
-    from deeptutor.multi_user.context import set_current_user, user_from_token_payload
-    from deeptutor.multi_user.paths import local_admin_user
-    from deeptutor.services.auth import AUTH_ENABLED, decode_token
-
     if not AUTH_ENABLED:
-        return set_current_user(local_admin_user())
+        return _install_current_user(None)
 
     token = ws.query_params.get("token") or ws.cookies.get("dt_token")
     payload = decode_token(token) if token else None
     if not payload:
         await ws.close(code=4001)
         return ws_auth_failed
 
-    return set_current_user(user_from_token_payload(payload))
+    return _install_current_user(payload)
 
 
 async def require_admin(
@@ -271,9 +284,7 @@ async def require_admin(
     to the endpoint.
     """
     if not AUTH_ENABLED:
-        from deeptutor.services.auth import TokenPayload as TP
-
-        return TP(username="local", role="admin", user_id="local-admin")
+        return _local_admin_token_payload()
 
     if payload is None or payload.role != "admin":
         raise HTTPException(
@@ -283,6 +294,23 @@ async def require_admin(
     return payload
 
 
+def _local_admin_token_payload() -> TokenPayload:
+    """Synthetic admin payload used when AUTH_ENABLED=false.
+
+    Mirrors the local admin identity (LOCAL_ADMIN_USERNAME / LOCAL_ADMIN_ID)
+    so audit logs and self-reference checks behave the same as in multi-user
+    mode. Values are kept aligned with ``local_admin_user()`` in
+    ``deeptutor/multi_user/paths.py``.
+    """
+    from deeptutor.multi_user.models import LOCAL_ADMIN_ID, LOCAL_ADMIN_USERNAME
+
+    return TokenPayload(
+        username=LOCAL_ADMIN_USERNAME,
+        role="admin",
+        user_id=LOCAL_ADMIN_ID,
+    )
+
+
 # ---------------------------------------------------------------------------
 # Public endpoints (no auth required)
 # ---------------------------------------------------------------------------
 
@@ -11,9 +11,10 @@
   post-label text and returns it to the caller; the caller decides whether
   to emit it as body content (so a mixed ``FINISH+TOOL`` reply never leaks
   prose into the answer area before the protocol is validated).
-* Accumulates ``tool_calls`` deltas. When ``tool_label`` is set and tool-call
-  deltas arrive before the label resolves, force-resolves the label to that
-  value (tool-call presence is authoritative).
+* Accumulates ``tool_calls`` deltas. Tool-call presence alone does not choose
+  the action label: the formal content stream must still begin with the
+  caller's tool label (e.g. ``TOOL``), otherwise the caller's protocol repair
+  path handles the missing label.
 * When a reasoning model prepends a literal ``<think>...</think>`` block
   *before* the protocol label, that prelude is detected and streamed live
   into the reasoning sub-trace (same routing as the ``THINK`` label).
@@ -231,16 +232,10 @@ async def run_labeled_step(
     behavior — its cards only open when there is actual reasoning text to
     show, avoiding empty "Reasoning…" cards for direct FINISH replies.
 
-    ``implicit_think_label`` lets a caller (e.g. chat) say "if a reasoning
-    model emits ``<think>...</think>`` without following it with one of my
-    protocol labels, treat the whole iteration as *this* label". The intent
-    is to gracefully accept native-format reasoning models — they think in
-    ``<think>`` blocks and may not parrot back the protocol's
-    ``\`\`THINK\`\``` token. Without this, the loop would see a missing
-    label and burn iterations on repair-retries. When the implicit
-    resolution fires, the prelude markers are preserved in the returned
-    ``text`` so the next iteration's assistant context still shows the
-    model's reasoning verbatim.
+    ``implicit_think_label`` is kept for API compatibility with older
+    callers, but is intentionally ignored. Reasoning traces from
+    ``reasoning_content`` or inline ``<think>`` are trace data, not loop
+    actions; the formal content stream must still provide the protocol label.
     """
     kwargs: dict[str, Any] = {
         "model": model,
@@ -459,20 +454,10 @@ async def _ingest_pre_label(text: str) -> None:
                 return
 
             if len(label_buf) > LABEL_PROBE_MAX_CHARS:
-                # Probe window exhausted with no protocol label match. If
-                # we previously consumed a ``<think>`` prelude AND the
-                # caller opted into implicit-THINK semantics, treat this
-                # iteration as an implicit ``THINK`` — the model is a
-                # reasoning model speaking its native dialect. Otherwise
-                # fall to ``LABEL_UNKNOWN`` so the caller can repair.
-                if (
-                    saw_pre_label_think
-                    and implicit_think_label
-                    and implicit_think_label in allowed_labels
-                ):
-                    label = implicit_think_label
-                else:
-                    label = LABEL_UNKNOWN
+                # Probe window exhausted with no protocol label match.
+                # Reasoning traces are not action labels, so fall to
+                # ``LABEL_UNKNOWN`` and let the caller repair.
+                label = LABEL_UNKNOWN
                 flushed = label_buf
                 label_buf = ""
                 await _emit_text(flushed)
@@ -603,21 +588,6 @@ async def _create_response_stream() -> Any:
                 fn_for_chars = getattr(tc_delta, "function", None)
                 output_chars_seen += len(str(getattr(fn_for_chars, "name", "") or ""))
                 output_chars_seen += len(str(getattr(fn_for_chars, "arguments", "") or ""))
-                # Tool-call deltas are authoritative for the tool branch. If
-                # we're still buffering a label when tool-call deltas arrive,
-                # force-resolve to ``tool_label`` so the buffered prose
-                # flushes into the reasoning sub-trace and subsequent prose
-                # continues there.
-                if label is None and tool_label:
-                    label = tool_label
-                    if in_prelude_think:
-                        # Close out the prelude before treating any buffered
-                        # prose as the tool branch's reasoning preamble.
-                        await _close_prelude_artificially()
-                    flushed = label_buf
-                    label_buf = ""
-                    if flushed:
-                        await _emit_text(flushed)
                 idx = getattr(tc_delta, "index", 0)
                 entry = tc_acc.setdefault(idx, {"id": "", "name": "", "arguments": ""})
                 if getattr(tc_delta, "id", None):
@@ -636,12 +606,9 @@ async def _create_response_stream() -> Any:
 
     # Stream ended while still buffering a label. Decide how to resolve:
     #
-    # - If we saw a ``<think>`` prelude and the caller opted into
-    #   implicit-THINK semantics, treat the iteration as an implicit
-    #   ``THINK`` so the loop continues (reasoning models that natively
-    #   speak ``<think>...</think>`` get accepted instead of treated as
-    #   protocol violators).
-    # - Otherwise fall to ``LABEL_UNKNOWN`` and let the caller repair.
+    # - Reasoning traces (``reasoning_content`` or inline ``<think>``) are
+    #   not action labels. If no formal content label appeared, fall to
+    #   ``LABEL_UNKNOWN`` and let the caller repair.
     if label is None:
         if in_prelude_think:
             # Stream ended mid-prelude — flush remaining reasoning live so
@@ -657,14 +624,7 @@ async def _create_response_stream() -> Any:
             label, after_label = final_parsed
             label_buf = ""
             await _emit_text(after_label)
-        if (
-            label is None
-            and saw_pre_label_think
-            and implicit_think_label
-            and implicit_think_label in allowed_labels
-        ):
-            label = implicit_think_label
-        elif label is None:
+        if label is None:
             label = LABEL_UNKNOWN
         if label_buf:
             await _emit_text(label_buf)
@@ -692,16 +652,10 @@ async def _create_response_stream() -> Any:
         )
 
     text = "".join(content_acc)
-    # Preserve the literal ``<think>...</think>`` block when we resolved the
-    # iteration implicitly as ``THINK`` — the next iteration's assistant
-    # context should reflect the model's reasoning verbatim, not a stripped
-    # empty draft. For all other resolutions, fall through to the standard
-    # cleanup so downstream consumers (assistant messages, final-response
-    # text) aren't polluted with the prelude markers.
-    implicit_think_resolved = bool(
-        saw_pre_label_think and implicit_think_label and label == implicit_think_label
-    )
-    if (binding or saw_pre_label_think) and not implicit_think_resolved:
+    # Reasoning traces have already been streamed into the trace channel; the
+    # returned formal text should not leak inline provider markers or private
+    # pre-label thinking.
+    if binding or saw_pre_label_think:
         text = clean_thinking_tags(text, binding, model)
     ordered_tool_calls = [tc_acc[k] for k in sorted(tc_acc.keys())]
     ordered_tool_calls = [tc for tc in ordered_tool_calls if tc.get("name")]