Skip to content

Commit c54743b

Browse files
committed
improve chat
1 parent db41c57 commit c54743b

15 files changed

Lines changed: 575 additions & 352 deletions

File tree

deeptutor/agents/chat/agentic_pipeline.py

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -456,18 +456,11 @@ async def run(self, context: UnifiedContext, stream: StreamBus) -> None:
456456
max_iterations=max(1, self._max_iterations),
457457
host=host,
458458
usage=self._usage,
459-
# Reasoning models with native tool-calling support emit
460-
# ``reasoning_content`` without parroting back
461-
# ````THINK```` labels. When the model is a reasoner
462-
# AND native tool calling is active, use LABEL_FINISH so
463-
# that a reply containing ``reasoning_content`` + answer
464-
# text (but no explicit label) terminates the loop — the
465-
# answer lives in ``content``, reasoning in
466-
# ``reasoning_content``. Non-reasoning models that emit
467-
# ``<think/>`` tags get LABEL_THINK so the loop continues.
468-
implicit_think_label=LABEL_FINISH
469-
if (_is_reasoner and use_native_tools)
470-
else LABEL_THINK,
459+
# Thinking traces are trace data, not loop-control actions.
460+
# The formal content stream must still begin with FINISH /
461+
# TOOL / THINK / PAUSE so protocol repair can catch missing
462+
# labels instead of silently treating answers as THINK.
463+
implicit_think_label=None,
471464
)
472465

473466
if outcome.sources:
@@ -1248,32 +1241,33 @@ def _append_reasoner_protocol_note(self, system_prompt: str) -> str:
12481241
12491242
Reasoning models (e.g. Qwen3.6-Plus) emit reasoning via
12501243
``reasoning_content`` and tool calls via native ``tool_calls``
1251-
deltas. The label protocol (TOOL/THINK/FINISH/PAUSE) confuses
1252-
them into writing tool-call JSON in ``content`` instead of using
1253-
the native mechanism. This note tells them to ignore labels and
1254-
use native tool calling directly.
1244+
deltas. The reasoning stream is displayed separately, but the
1245+
formal ``content`` stream still controls the agent loop through the
1246+
standard first-line label.
12551247
"""
12561248
if self.language == "zh":
12571249
note = (
12581250
"\n\n# 推理模型特别说明\n"
12591251
"你是一个原生支持推理和工具调用的模型。"
1260-
"请**忽略上面「输出协议」中关于 ``TOOL``/``THINK``/``FINISH``/``PAUSE`` 标签的指示**。"
1261-
"你不需要在回复中输出任何标签。"
1262-
"你的推理过程会自动在独立区域显示。"
1263-
"当你需要调用工具时,直接通过原生 tool_calls 功能发起调用,不要在文本中写 JSON。"
1264-
"当你准备好给出最终答案时,直接在回复正文中写出答案即可。"
1252+
"你的推理过程会自动在独立的思考轨迹区域显示;"
1253+
"但正式回复正文仍然必须遵守上面的「输出协议」,第一行写且只写一个标签:"
1254+
"``FINISH``、``TOOL``、``THINK`` 或 ``PAUSE``。"
1255+
"需要调用工具时,正式正文第一行写 ``TOOL``,并在同一次回复里通过原生 tool_calls 发起调用;"
1256+
"不要把工具调用写成 JSON 文本。"
1257+
"准备给最终答案时,正式正文第一行写 ``FINISH``。"
12651258
)
12661259
else:
12671260
note = (
12681261
"\n\n# Reasoning Model Special Instructions\n"
12691262
"You are a model with native reasoning and tool-calling support. "
1270-
"Please **ignore the 'Output Protocol' instructions above about "
1271-
"``TOOL``/``THINK``/``FINISH``/``PAUSE`` labels**. "
1272-
"You do NOT need to output any labels in your replies. "
1273-
"Your reasoning is automatically displayed in a separate area. "
1274-
"When you need to call a tool, use native tool_calls directly — "
1275-
"do NOT write JSON in your text output. "
1276-
"When you are ready to give the final answer, just write it in your reply body."
1263+
"Your reasoning is automatically displayed in a separate trace, "
1264+
"but your formal content stream must still follow the Output "
1265+
"Protocol above: the first line must be exactly one label, "
1266+
"``FINISH``, ``TOOL``, ``THINK``, or ``PAUSE``. "
1267+
"When you need a tool, put ``TOOL`` on the first line and emit "
1268+
"real native tool_calls in the same reply; do NOT write tool-call "
1269+
"JSON as text. When you are ready to answer, put ``FINISH`` on "
1270+
"the first line."
12771271
)
12781272
return system_prompt + note
12791273

deeptutor/api/routers/auth.py

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
_SECURE = bool(load_auth_settings()["cookie_secure"])
2525
_SAMESITE = "none" if _SECURE else "lax"
2626

27+
from deeptutor.multi_user.context import set_current_user, user_from_token_payload
28+
from deeptutor.multi_user.paths import local_admin_user
2729
from deeptutor.services.auth import (
2830
AUTH_ENABLED,
2931
POCKETBASE_ENABLED,
@@ -157,6 +159,27 @@ def _extract_token(authorization: str | None, dt_token: str | None) -> str | Non
157159
# ---------------------------------------------------------------------------
158160

159161

162+
def _install_current_user(payload: TokenPayload | None) -> _CtxToken:
163+
"""Install the request-local current-user ContextVar from an auth result.
164+
165+
Single point of truth for ``payload → CurrentUser`` so HTTP and WebSocket
166+
entry points produce identical user objects. ``payload is None`` means
167+
"no JWT was required" (AUTH_ENABLED=false) and resolves to the local
168+
admin user; a non-None payload resolves through ``user_from_token_payload``.
169+
170+
Returns the ContextVar reset token. HTTP callers ignore it (the request
171+
ends with the task, so the var is GC'd with the task context). WebSocket
172+
callers keep it and call ``reset_current_user`` in their ``finally`` block,
173+
because a WS connection outlives the dependency-resolution task.
174+
175+
⚠ Invariant: every authenticated entry point MUST call this before the
176+
handler runs. Skipping it leaves ``get_current_path_service()`` falling
177+
back to the admin workspace — the silent-routing root cause of #481.
178+
"""
179+
user = local_admin_user() if payload is None else user_from_token_payload(payload)
180+
return set_current_user(user)
181+
182+
160183
async def require_auth(
161184
authorization: str | None = Header(default=None, alias="Authorization"),
162185
dt_token: str | None = Cookie(default=None),
@@ -184,14 +207,10 @@ async def require_auth(
184207
of #481.
185208
"""
186209
if not AUTH_ENABLED:
187-
from deeptutor.multi_user.context import set_current_user
188-
from deeptutor.multi_user.paths import local_admin_user
189-
190-
set_current_user(local_admin_user())
210+
_install_current_user(None)
191211
return None
192212

193213
token = _extract_token(authorization, dt_token)
194-
195214
if not token:
196215
raise HTTPException(
197216
status_code=status.HTTP_401_UNAUTHORIZED,
@@ -207,9 +226,7 @@ async def require_auth(
207226
headers={"WWW-Authenticate": "Bearer"},
208227
)
209228

210-
from deeptutor.multi_user.context import set_current_user, user_from_token_payload
211-
212-
set_current_user(user_from_token_payload(payload))
229+
_install_current_user(payload)
213230
return payload
214231

215232

@@ -241,20 +258,16 @@ async def ws_require_auth(ws: WebSocket) -> _CtxToken | _WsAuthFailed:
241258
finally:
242259
reset_current_user(user_token)
243260
"""
244-
from deeptutor.multi_user.context import set_current_user, user_from_token_payload
245-
from deeptutor.multi_user.paths import local_admin_user
246-
from deeptutor.services.auth import AUTH_ENABLED, decode_token
247-
248261
if not AUTH_ENABLED:
249-
return set_current_user(local_admin_user())
262+
return _install_current_user(None)
250263

251264
token = ws.query_params.get("token") or ws.cookies.get("dt_token")
252265
payload = decode_token(token) if token else None
253266
if not payload:
254267
await ws.close(code=4001)
255268
return ws_auth_failed
256269

257-
return set_current_user(user_from_token_payload(payload))
270+
return _install_current_user(payload)
258271

259272

260273
async def require_admin(
@@ -271,9 +284,7 @@ async def require_admin(
271284
to the endpoint.
272285
"""
273286
if not AUTH_ENABLED:
274-
from deeptutor.services.auth import TokenPayload as TP
275-
276-
return TP(username="local", role="admin", user_id="local-admin")
287+
return _local_admin_token_payload()
277288

278289
if payload is None or payload.role != "admin":
279290
raise HTTPException(
@@ -283,6 +294,23 @@ async def require_admin(
283294
return payload
284295

285296

297+
def _local_admin_token_payload() -> TokenPayload:
298+
"""Synthetic admin payload used when AUTH_ENABLED=false.
299+
300+
Mirrors the local admin identity (LOCAL_ADMIN_USERNAME / LOCAL_ADMIN_ID)
301+
so audit logs and self-reference checks behave the same as in multi-user
302+
mode. Values are kept aligned with ``local_admin_user()`` in
303+
``deeptutor/multi_user/paths.py``.
304+
"""
305+
from deeptutor.multi_user.models import LOCAL_ADMIN_ID, LOCAL_ADMIN_USERNAME
306+
307+
return TokenPayload(
308+
username=LOCAL_ADMIN_USERNAME,
309+
role="admin",
310+
user_id=LOCAL_ADMIN_ID,
311+
)
312+
313+
286314
# ---------------------------------------------------------------------------
287315
# Public endpoints (no auth required)
288316
# ---------------------------------------------------------------------------

deeptutor/core/agentic/labeled_step.py

Lines changed: 20 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
post-label text and returns it to the caller; the caller decides whether
1212
to emit it as body content (so a mixed ``FINISH+TOOL`` reply never leaks
1313
prose into the answer area before the protocol is validated).
14-
* Accumulates ``tool_calls`` deltas. When ``tool_label`` is set and tool-call
15-
deltas arrive before the label resolves, force-resolves the label to that
16-
value (tool-call presence is authoritative).
14+
* Accumulates ``tool_calls`` deltas. Tool-call presence alone does not choose
15+
the action label: the formal content stream must still begin with the
16+
caller's tool label (e.g. ``TOOL``), otherwise the caller's protocol repair
17+
path handles the missing label.
1718
* When a reasoning model prepends a literal ``<think>...</think>`` block
1819
*before* the protocol label, that prelude is detected and streamed live
1920
into the reasoning sub-trace (same routing as the ``THINK`` label).
@@ -231,16 +232,10 @@ async def run_labeled_step(
231232
behavior — its cards only open when there is actual reasoning text to
232233
show, avoiding empty "Reasoning…" cards for direct FINISH replies.
233234
234-
``implicit_think_label`` lets a caller (e.g. chat) say "if a reasoning
235-
model emits ``<think>...</think>`` without following it with one of my
236-
protocol labels, treat the whole iteration as *this* label". The intent
237-
is to gracefully accept native-format reasoning models — they think in
238-
``<think>`` blocks and may not parrot back the protocol's
239-
``\`\`THINK\`\``` token. Without this, the loop would see a missing
240-
label and burn iterations on repair-retries. When the implicit
241-
resolution fires, the prelude markers are preserved in the returned
242-
``text`` so the next iteration's assistant context still shows the
243-
model's reasoning verbatim.
235+
``implicit_think_label`` is kept for API compatibility with older
236+
callers, but is intentionally ignored. Reasoning traces from
237+
``reasoning_content`` or inline ``<think>`` are trace data, not loop
238+
actions; the formal content stream must still provide the protocol label.
244239
"""
245240
kwargs: dict[str, Any] = {
246241
"model": model,
@@ -459,20 +454,10 @@ async def _ingest_pre_label(text: str) -> None:
459454
return
460455

461456
if len(label_buf) > LABEL_PROBE_MAX_CHARS:
462-
# Probe window exhausted with no protocol label match. If
463-
# we previously consumed a ``<think>`` prelude AND the
464-
# caller opted into implicit-THINK semantics, treat this
465-
# iteration as an implicit ``THINK`` — the model is a
466-
# reasoning model speaking its native dialect. Otherwise
467-
# fall to ``LABEL_UNKNOWN`` so the caller can repair.
468-
if (
469-
saw_pre_label_think
470-
and implicit_think_label
471-
and implicit_think_label in allowed_labels
472-
):
473-
label = implicit_think_label
474-
else:
475-
label = LABEL_UNKNOWN
457+
# Probe window exhausted with no protocol label match.
458+
# Reasoning traces are not action labels, so fall to
459+
# ``LABEL_UNKNOWN`` and let the caller repair.
460+
label = LABEL_UNKNOWN
476461
flushed = label_buf
477462
label_buf = ""
478463
await _emit_text(flushed)
@@ -603,21 +588,6 @@ async def _create_response_stream() -> Any:
603588
fn_for_chars = getattr(tc_delta, "function", None)
604589
output_chars_seen += len(str(getattr(fn_for_chars, "name", "") or ""))
605590
output_chars_seen += len(str(getattr(fn_for_chars, "arguments", "") or ""))
606-
# Tool-call deltas are authoritative for the tool branch. If
607-
# we're still buffering a label when tool-call deltas arrive,
608-
# force-resolve to ``tool_label`` so the buffered prose
609-
# flushes into the reasoning sub-trace and subsequent prose
610-
# continues there.
611-
if label is None and tool_label:
612-
label = tool_label
613-
if in_prelude_think:
614-
# Close out the prelude before treating any buffered
615-
# prose as the tool branch's reasoning preamble.
616-
await _close_prelude_artificially()
617-
flushed = label_buf
618-
label_buf = ""
619-
if flushed:
620-
await _emit_text(flushed)
621591
idx = getattr(tc_delta, "index", 0)
622592
entry = tc_acc.setdefault(idx, {"id": "", "name": "", "arguments": ""})
623593
if getattr(tc_delta, "id", None):
@@ -636,12 +606,9 @@ async def _create_response_stream() -> Any:
636606

637607
# Stream ended while still buffering a label. Decide how to resolve:
638608
#
639-
# - If we saw a ``<think>`` prelude and the caller opted into
640-
# implicit-THINK semantics, treat the iteration as an implicit
641-
# ``THINK`` so the loop continues (reasoning models that natively
642-
# speak ``<think>...</think>`` get accepted instead of treated as
643-
# protocol violators).
644-
# - Otherwise fall to ``LABEL_UNKNOWN`` and let the caller repair.
609+
# - Reasoning traces (``reasoning_content`` or inline ``<think>``) are
610+
# not action labels. If no formal content label appeared, fall to
611+
# ``LABEL_UNKNOWN`` and let the caller repair.
645612
if label is None:
646613
if in_prelude_think:
647614
# Stream ended mid-prelude — flush remaining reasoning live so
@@ -657,14 +624,7 @@ async def _create_response_stream() -> Any:
657624
label, after_label = final_parsed
658625
label_buf = ""
659626
await _emit_text(after_label)
660-
if (
661-
label is None
662-
and saw_pre_label_think
663-
and implicit_think_label
664-
and implicit_think_label in allowed_labels
665-
):
666-
label = implicit_think_label
667-
elif label is None:
627+
if label is None:
668628
label = LABEL_UNKNOWN
669629
if label_buf:
670630
await _emit_text(label_buf)
@@ -692,16 +652,10 @@ async def _create_response_stream() -> Any:
692652
)
693653

694654
text = "".join(content_acc)
695-
# Preserve the literal ``<think>...</think>`` block when we resolved the
696-
# iteration implicitly as ``THINK`` — the next iteration's assistant
697-
# context should reflect the model's reasoning verbatim, not a stripped
698-
# empty draft. For all other resolutions, fall through to the standard
699-
# cleanup so downstream consumers (assistant messages, final-response
700-
# text) aren't polluted with the prelude markers.
701-
implicit_think_resolved = bool(
702-
saw_pre_label_think and implicit_think_label and label == implicit_think_label
703-
)
704-
if (binding or saw_pre_label_think) and not implicit_think_resolved:
655+
# Reasoning traces have already been streamed into the trace channel; the
656+
# returned formal text should not leak inline provider markers or private
657+
# pre-label thinking.
658+
if binding or saw_pre_label_think:
705659
text = clean_thinking_tags(text, binding, model)
706660
ordered_tool_calls = [tc_acc[k] for k in sorted(tc_acc.keys())]
707661
ordered_tool_calls = [tc for tc in ordered_tool_calls if tc.get("name")]

0 commit comments

Comments
 (0)