Skip to content
This repository was archived by the owner on May 13, 2026. It is now read-only.

Commit af9c51f

Browse files
authored
Merge pull request #245 from CJackHwang/dev
Merge pull request #244 from CJackHwang/codex/temporarily-switch-to-internal-usage-count Temporarily ignore DeepSeek upstream usage fields and prefer internal token estimation
2 parents d32765b + 92bb252 commit af9c51f

54 files changed

Lines changed: 1832 additions & 526 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

AGENTS.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# AGENTS.md
2+
3+
These rules apply to all agent-made changes in this repository.
4+
5+
## PR Gate
6+
7+
- Before opening or updating a PR, run the same local gates as `.github/workflows/quality-gates.yml`.
8+
- Required commands:
9+
- `./scripts/lint.sh`
10+
- `./tests/scripts/check-refactor-line-gate.sh`
11+
- `./tests/scripts/run-unit-all.sh`
12+
- `npm run build --prefix webui`
13+
14+
## Go Lint Rules
15+
16+
- Run `gofmt -w` on every changed Go file before commit or push.
17+
- Do not ignore error returns from I/O-style cleanup calls such as `Close`, `Flush`, `Sync`, or similar methods.
18+
- If a cleanup error cannot be returned, log it explicitly.
19+
20+
## Change Scope
21+
22+
- Keep changes additive and tightly scoped to the requested feature or bugfix.
23+
- Do not mix unrelated refactors into feature PRs unless they are required to make the change pass gates.

docs/TESTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ go run ./cmd/ds2api-tests --no-preflight
237237
说明:
238238
- 该工具默认重放 `tests/raw_stream_samples/manifest.json` 声明的 canonical 样本,按上游 SSE 顺序做 1:1 仿真解析。
239239
- 默认校验不出现 `FINISHED` 文本泄露,并要求存在结束信号。
240+
- 默认****`raw accumulated_token_usage` 与本地解析 token 做强一致校验(当前实现以内容估算为准);如需强校验可显式加 `--fail-on-token-mismatch`
240241
- 每次运行都会把本地派生结果写入 `artifacts/raw-stream-sim/<run-id>/<sample-id>/replay.output.txt`,并输出结构化报告。
241242
- 如果你有历史基线目录,可以通过 `--baseline-root` 让工具直接做文本对比。
242243
- 更完整的协议级行为结构说明见 [DeepSeekSSE行为结构说明-2026-04-05.md](./DeepSeekSSE行为结构说明-2026-04-05.md)

internal/adapter/claude/stream_runtime_core.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@ type claudeStreamRuntime struct {
2424
bufferToolContent bool
2525
stripReferenceMarkers bool
2626

27-
messageID string
28-
thinking strings.Builder
29-
text strings.Builder
30-
outputTokens int
27+
messageID string
28+
thinking strings.Builder
29+
text strings.Builder
3130

3231
nextBlockIndex int
3332
thinkingBlockOpen bool
@@ -70,9 +69,6 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
7069
if !parsed.Parsed {
7170
return streamengine.ParsedDecision{}
7271
}
73-
if parsed.OutputTokens > 0 {
74-
s.outputTokens = parsed.OutputTokens
75-
}
7672
if parsed.ErrorMessage != "" {
7773
s.upstreamErr = parsed.ErrorMessage
7874
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("upstream_error")}

internal/adapter/claude/stream_runtime_finalize.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,6 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
109109
}
110110

111111
outputTokens := util.EstimateTokens(finalThinking) + util.EstimateTokens(finalText)
112-
if s.outputTokens > 0 {
113-
outputTokens = s.outputTokens
114-
}
115112
s.send("message_delta", map[string]any{
116113
"type": "message_delta",
117114
"delta": map[string]any{

internal/adapter/gemini/handler_generate.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,13 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
149149
cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
150150
cleanVisibleOutput(result.Text, stripReferenceMarkers),
151151
toolNames,
152-
result.OutputTokens,
153152
))
154153
}
155154

156155
//nolint:unused // retained for native Gemini non-stream handling path.
157-
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any {
156+
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
158157
parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
159-
usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens)
158+
usage := buildGeminiUsage(finalPrompt, finalThinking, finalText)
160159
return map[string]any{
161160
"candidates": []map[string]any{
162161
{
@@ -174,14 +173,10 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final
174173
}
175174

176175
//nolint:unused // retained for native Gemini non-stream handling path.
177-
func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any {
176+
func buildGeminiUsage(finalPrompt, finalThinking, finalText string) map[string]any {
178177
promptTokens := util.EstimateTokens(finalPrompt)
179178
reasoningTokens := util.EstimateTokens(finalThinking)
180179
completionTokens := util.EstimateTokens(finalText)
181-
if outputTokens > 0 {
182-
completionTokens = outputTokens
183-
reasoningTokens = 0
184-
}
185180
return map[string]any{
186181
"promptTokenCount": promptTokens,
187182
"candidatesTokenCount": reasoningTokens + completionTokens,

internal/adapter/gemini/handler_stream_runtime.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ type geminiStreamRuntime struct {
6565
stripReferenceMarkers bool
6666
toolNames []string
6767

68-
thinking strings.Builder
69-
text strings.Builder
70-
outputTokens int
68+
thinking strings.Builder
69+
text strings.Builder
7170
}
7271

7372
//nolint:unused // retained for native Gemini stream handling path.
@@ -112,9 +111,6 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
112111
if !parsed.Parsed {
113112
return streamengine.ParsedDecision{}
114113
}
115-
if parsed.OutputTokens > 0 {
116-
s.outputTokens = parsed.OutputTokens
117-
}
118114
if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
119115
return streamengine.ParsedDecision{Stop: true}
120116
}
@@ -198,6 +194,6 @@ func (s *geminiStreamRuntime) finalize() {
198194
},
199195
},
200196
"modelVersion": s.model,
201-
"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens),
197+
"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText),
202198
})
203199
}

internal/adapter/openai/chat_stream_runtime.go

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ type chatStreamRuntime struct {
3737
streamToolNames map[int]string
3838
thinking strings.Builder
3939
text strings.Builder
40-
promptTokens int
41-
outputTokens int
4240
}
4341

4442
func newChatStreamRuntime(
@@ -171,17 +169,6 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
171169
finishReason = "tool_calls"
172170
}
173171
usage := openaifmt.BuildChatUsage(s.finalPrompt, finalThinking, finalText)
174-
if s.promptTokens > 0 {
175-
usage["prompt_tokens"] = s.promptTokens
176-
}
177-
if s.outputTokens > 0 {
178-
usage["completion_tokens"] = s.outputTokens
179-
}
180-
if s.promptTokens > 0 || s.outputTokens > 0 {
181-
p := usage["prompt_tokens"].(int)
182-
c := usage["completion_tokens"].(int)
183-
usage["total_tokens"] = p + c
184-
}
185172
s.sendChunk(openaifmt.BuildChatStreamChunk(
186173
s.completionID,
187174
s.created,
@@ -196,12 +183,6 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
196183
if !parsed.Parsed {
197184
return streamengine.ParsedDecision{}
198185
}
199-
if parsed.PromptTokens > 0 {
200-
s.promptTokens = parsed.PromptTokens
201-
}
202-
if parsed.OutputTokens > 0 {
203-
s.outputTokens = parsed.OutputTokens
204-
}
205186
if parsed.ContentFilter {
206187
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
207188
}

internal/adapter/openai/handler_chat.go

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -131,19 +131,6 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, re
131131
return
132132
}
133133
respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
134-
if result.PromptTokens > 0 || result.OutputTokens > 0 {
135-
if usage, ok := respBody["usage"].(map[string]any); ok {
136-
if result.PromptTokens > 0 {
137-
usage["prompt_tokens"] = result.PromptTokens
138-
}
139-
if result.OutputTokens > 0 {
140-
usage["completion_tokens"] = result.OutputTokens
141-
}
142-
p, _ := usage["prompt_tokens"].(int)
143-
c, _ := usage["completion_tokens"].(int)
144-
usage["total_tokens"] = p + c
145-
}
146-
}
147134
writeJSON(w, http.StatusOK, respBody)
148135
}
149136

internal/adapter/openai/responses_handler.go

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -130,19 +130,6 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
130130
}
131131

132132
responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, toolNames)
133-
if result.PromptTokens > 0 || result.OutputTokens > 0 {
134-
if usage, ok := responseObj["usage"].(map[string]any); ok {
135-
if result.PromptTokens > 0 {
136-
usage["input_tokens"] = result.PromptTokens
137-
}
138-
if result.OutputTokens > 0 {
139-
usage["output_tokens"] = result.OutputTokens
140-
}
141-
input, _ := usage["input_tokens"].(int)
142-
output, _ := usage["output_tokens"].(int)
143-
usage["total_tokens"] = input + output
144-
}
145-
}
146133
h.getResponseStore().put(owner, responseID, responseObj)
147134
writeJSON(w, http.StatusOK, responseObj)
148135
}

internal/adapter/openai/responses_stream_runtime_core.go

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,6 @@ type responsesStreamRuntime struct {
5151
messagePartAdded bool
5252
sequence int
5353
failed bool
54-
promptTokens int
55-
outputTokens int
5654

5755
persistResponse func(obj map[string]any)
5856
}
@@ -150,24 +148,6 @@ func (s *responsesStreamRuntime) finalize() {
150148
s.closeIncompleteFunctionItems()
151149

152150
obj := s.buildCompletedResponseObject(finalThinking, finalText, detected)
153-
if s.outputTokens > 0 {
154-
if usage, ok := obj["usage"].(map[string]any); ok {
155-
usage["output_tokens"] = s.outputTokens
156-
}
157-
}
158-
if s.promptTokens > 0 || s.outputTokens > 0 {
159-
if usage, ok := obj["usage"].(map[string]any); ok {
160-
if s.promptTokens > 0 {
161-
usage["input_tokens"] = s.promptTokens
162-
}
163-
if s.outputTokens > 0 {
164-
usage["output_tokens"] = s.outputTokens
165-
}
166-
input, _ := usage["input_tokens"].(int)
167-
output, _ := usage["output_tokens"].(int)
168-
usage["total_tokens"] = input + output
169-
}
170-
}
171151
if s.persistResponse != nil {
172152
s.persistResponse(obj)
173153
}
@@ -196,12 +176,6 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
196176
if !parsed.Parsed {
197177
return streamengine.ParsedDecision{}
198178
}
199-
if parsed.PromptTokens > 0 {
200-
s.promptTokens = parsed.PromptTokens
201-
}
202-
if parsed.OutputTokens > 0 {
203-
s.outputTokens = parsed.OutputTokens
204-
}
205179
if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
206180
return streamengine.ParsedDecision{Stop: true}
207181
}

0 commit comments

Comments
 (0)