Skip to content

Commit e5eb9a0

Browse files
deepseek-v4-probennylii
authored andcommitted
fix: implement multi-turn conversation session reuse across providers
Previously, every chat request created a new upstream session because providerSessionId (populated by each adapter) was never consumed by the session manager or routes. This broke multi-turn conversation for all providers. Changes: - sessionManager: add computeHistoryHash, 3-tier session lookup (hash match → active session → new), and updateProviderSession method - DeepSeek/Kimi/QwenAI adapters: accept and reuse existing session IDs instead of always creating new upstream sessions - forwarder: pass providerSessionId/parentMessageId to adapter calls - routes/chat: integrate sessionManager into request flow — create sessions before forwarding, update after response - Tests: 27 unit tests covering hash computation, session lifecycle, and multi-turn flow simulation Closes #86 Co-Authored-By: deepseek-v4-pro <noreply@deepseek.com>
1 parent 7464ff5 commit e5eb9a0

7 files changed

Lines changed: 654 additions & 41 deletions

File tree

src/main/proxy/adapters/deepseek.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ interface ChatCompletionRequest {
6363
reasoning_effort?: 'low' | 'medium' | 'high'
6464
tools?: any[]
6565
tool_choice?: any
66+
providerSessionId?: string
67+
parentMessageId?: string
6668
}
6769

6870
const tokenCache = new Map<string, TokenInfo>()
@@ -371,18 +373,21 @@ ${message.content || ''}
371373

372374
async chatCompletion(request: ChatCompletionRequest): Promise<{ response: AxiosResponse; sessionId: string }> {
373375
const token = await this.acquireToken()
374-
375-
const sessionId = await this.createSession()
376-
console.log('[DeepSeek] Created new session:', sessionId)
377-
376+
377+
// Reuse existing session or create a new one
378+
const sessionId = request.providerSessionId || await this.createSession()
379+
console.log('[DeepSeek] Using session:', sessionId, request.providerSessionId ? '(reused)' : '(new)')
380+
381+
const parentMessageId = request.parentMessageId || null
382+
378383
const challenge = await this.getChallenge('/api/v0/chat/completion')
379384
const challengeAnswer = await this.calculateChallengeAnswer(challenge)
380385

381386
// Clone messages to avoid modifying original request
382387
// Note: Tool prompt injection is already handled by Forwarder.transformRequestForPromptToolUse()
383388
const messages = [...request.messages]
384389

385-
let prompt = this.messagesToPrompt(messages, false)
390+
let prompt = this.messagesToPrompt(messages, !!request.providerSessionId)
386391

387392
// Use request parameters for mode control (OpenAI compatible)
388393
let searchEnabled = false
@@ -421,7 +426,7 @@ ${message.content || ''}
421426
`${DEEPSEEK_API_BASE}/v0/chat/completion`,
422427
{
423428
chat_session_id: sessionId,
424-
parent_message_id: null,
429+
parent_message_id: parentMessageId,
425430
prompt,
426431
model_type: modelType,
427432
ref_file_ids: [],

src/main/proxy/adapters/kimi.ts

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ interface ChatCompletionRequest {
6060
tool_choice?: any
6161
conversationId?: string
6262
parentId?: string
63+
providerSessionId?: string
64+
parentMessageId?: string
6365
}
6466

6567
const accessTokenMap = new Map<string, TokenInfo>()
@@ -272,6 +274,10 @@ export class KimiAdapter {
272274
async chatCompletion(request: ChatCompletionRequest): Promise<{ response: AxiosResponse; conversationId: string }> {
273275
const { accessToken } = await this.acquireToken()
274276

277+
const conversationId = request.conversationId || request.providerSessionId || ''
278+
const parentId = request.parentId || request.parentMessageId || ''
279+
const isMultiTurn = !!conversationId
280+
275281
const messages = [...request.messages]
276282

277283
// Check if tool prompt has already been injected by client
@@ -297,17 +303,17 @@ export class KimiAdapter {
297303
}
298304
}
299305

300-
const content = this.messagesPrepare(messages, toolsPrompt, false)
306+
const content = this.messagesPrepare(messages, toolsPrompt, isMultiTurn)
301307

302308
// Determine if thinking and web search should be enabled
303309
// Priority: explicit parameters > model name detection
304310
// Use originalModel for feature detection (preserves user's intent before mapping)
305311
const modelForDetection = request.originalModel || request.model
306312
const modelLower = modelForDetection.toLowerCase()
307-
313+
308314
let enableThinking = request.enableThinking ?? false
309315
let enableWebSearch = request.enableWebSearch ?? false
310-
316+
311317
// Auto-enable based on model name (if not explicitly set)
312318
if (!enableThinking && (modelLower.includes('think') || modelLower.includes('r1'))) {
313319
enableThinking = true
@@ -320,10 +326,10 @@ export class KimiAdapter {
320326

321327
const jsonBody = JSON.stringify({
322328
scenario: 'SCENARIO_K2D5',
323-
chat_id: '',
329+
chat_id: conversationId,
324330
tools: enableWebSearch ? [{ type: 'TOOL_TYPE_SEARCH', search: {} }] : [],
325331
message: {
326-
parent_id: '',
332+
parent_id: parentId,
327333
role: 'user',
328334
blocks: [{
329335
message_id: '',
@@ -371,7 +377,7 @@ export class KimiAdapter {
371377
throw new Error(`Completion request failed: HTTP ${response.status}`)
372378
}
373379

374-
return { response, conversationId: '' }
380+
return { response, conversationId }
375381
}
376382

377383
async deleteConversation(conversationId: string): Promise<boolean> {

src/main/proxy/adapters/qwen-ai.ts

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ interface ChatCompletionRequest {
5757
enable_thinking?: boolean
5858
thinking_budget?: number
5959
chatId?: string
60+
providerSessionId?: string
61+
parentMessageId?: string
6062
}
6163

6264
function uuid(): string {
@@ -256,25 +258,27 @@ export class QwenAiAdapter {
256258
forceThinking = (this as any)._forceThinking
257259
}
258260

259-
// Always create a new chat (single-turn mode only)
260-
const chatId = await this.createChat(modelId, 'OpenAI_API_Chat')
261-
console.log('[QwenAI] Created new chat:', chatId)
261+
// Reuse existing chat or create a new one
262+
const existingChatId = request.providerSessionId || request.chatId
263+
const chatId = existingChatId || await this.createChat(modelId, 'OpenAI_API_Chat')
264+
console.log('[QwenAI] Using chat:', chatId, existingChatId ? '(reused)' : '(new)')
265+
266+
const parentId = request.parentMessageId || null
262267

263268
const messages = request.messages
264-
265-
// Extract system message and user message
269+
270+
// Extract system message and last user message
266271
let systemContent = ''
267272
let userContent = ''
268-
269-
// Single-turn mode: extract all messages
273+
270274
for (const msg of messages) {
271275
if (msg.role === 'system') {
272276
systemContent += (systemContent ? '\n\n' : '') + msg.content
273277
} else if (msg.role === 'user') {
274278
userContent = msg.content
275279
}
276280
}
277-
281+
278282
// If system prompt exists, prepend it to user content
279283
if (systemContent) {
280284
userContent = `${systemContent}\n\nUser: ${userContent}`
@@ -289,10 +293,10 @@ export class QwenAiAdapter {
289293
// 1. Model name suffix: -thinking (force thinking), -fast (force fast mode)
290294
// 2. enable_thinking parameter for explicit control
291295
// 3. If neither is specified, thinking mode is disabled by default (fast mode)
292-
const shouldEnableThinking = forceThinking !== undefined
293-
? forceThinking
296+
const shouldEnableThinking = forceThinking !== undefined
297+
? forceThinking
294298
: request.enable_thinking === true
295-
299+
296300
const featureConfig: Record<string, any> = {
297301
thinking_enabled: shouldEnableThinking,
298302
output_schema: 'phase',
@@ -313,11 +317,11 @@ export class QwenAiAdapter {
313317
chat_id: chatId,
314318
chat_mode: 'normal',
315319
model: modelId,
316-
parent_id: null,
320+
parent_id: parentId,
317321
messages: [
318322
{
319323
fid,
320-
parentId: null,
324+
parentId: parentId,
321325
childrenIds: [childId],
322326
role: 'user',
323327
content: userContent,
@@ -329,7 +333,7 @@ export class QwenAiAdapter {
329333
feature_config: featureConfig,
330334
extra: { meta: { subChatType: 't2t' } },
331335
sub_chat_type: 't2t',
332-
parent_id: null,
336+
parent_id: parentId,
333337
},
334338
],
335339
timestamp: ts + 1,

src/main/proxy/forwarder.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,8 @@ CRITICAL RULES:
589589
temperature: transformedRequest.temperature,
590590
web_search: transformedRequest.web_search,
591591
reasoning_effort: transformedRequest.reasoning_effort,
592+
providerSessionId: request.providerSessionId,
593+
parentMessageId: request.parentMessageId,
592594
})
593595

594596
const latency = Date.now() - startTime
@@ -634,7 +636,8 @@ CRITICAL RULES:
634636

635637
if (request.stream) {
636638
const transformedStream = await handler.handleStream(response.data)
637-
639+
;(transformedStream as any)._handler = handler
640+
638641
return {
639642
success: true,
640643
status: response.status,
@@ -804,6 +807,8 @@ CRITICAL RULES:
804807
temperature: request.temperature,
805808
enableThinking: !!request.reasoning_effort,
806809
enableWebSearch: !!request.web_search,
810+
providerSessionId: request.providerSessionId,
811+
parentMessageId: request.parentMessageId,
807812
})
808813

809814
const latency = Date.now() - startTime
@@ -837,14 +842,17 @@ CRITICAL RULES:
837842
}
838843
}
839844

845+
// Attach handler to stream for post-stream session ID extraction
846+
(transformedStream as any)._handler = handler
847+
840848
return {
841849
success: true,
842850
status: response.status,
843851
headers: this.extractHeaders(response.headers),
844852
stream: transformedStream,
845853
skipTransform: true,
846854
latency,
847-
providerSessionId: undefined,
855+
providerSessionId: handler.getConversationId() || conversationId || undefined,
848856
}
849857
}
850858

@@ -992,6 +1000,8 @@ CRITICAL RULES:
9921000
stream: request.stream,
9931001
temperature: request.temperature,
9941002
enable_thinking: !!request.reasoning_effort,
1003+
providerSessionId: request.providerSessionId,
1004+
parentMessageId: request.parentMessageId,
9951005
})
9961006

9971007
const latency = Date.now() - startTime
@@ -1011,6 +1021,7 @@ CRITICAL RULES:
10111021

10121022
if (request.stream) {
10131023
const transformedStream = await handler.handleStream(response.data)
1024+
;(transformedStream as any)._handler = handler
10141025

10151026
if (shouldDeleteSession()) {
10161027
const originalEnd = transformedStream.end.bind(transformedStream)

src/main/proxy/routes/chat.ts

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ import { streamHandler } from '../stream'
1313
import { proxyStatusManager } from '../status'
1414
import { modelMapper } from '../modelMapper'
1515
import { storeManager } from '../../store/store'
16-
import {
16+
import { sessionManager } from '../sessionManager'
17+
import type { ChatMessage as StoreChatMessage } from '../../store/types'
18+
import {
1719
isAnthropicToolFormat,
1820
transformResponseToAnthropic,
1921
transformChunkToAnthropic
@@ -165,6 +167,21 @@ router.post('/completions', async (ctx: Context) => {
165167

166168
const { account, provider, actualModel } = selection
167169

170+
// Get or create session for multi-turn conversation continuation
171+
const storeMessages: StoreChatMessage[] = request.messages.map(m => ({
172+
role: m.role,
173+
content: m.content === null ? '' : m.content,
174+
timestamp: startTime,
175+
}))
176+
const sessionContext = sessionManager.getOrCreateSession({
177+
providerId: provider.id,
178+
accountId: account.id,
179+
model: request.model,
180+
messages: storeMessages,
181+
})
182+
request.providerSessionId = sessionContext.providerSessionId
183+
request.parentMessageId = sessionContext.parentMessageId
184+
168185
const context: ProxyContext = {
169186
requestId,
170187
providerId: provider.id,
@@ -331,6 +348,14 @@ router.post('/completions', async (ctx: Context) => {
331348

332349
storeManager.recordRequestInStats(true, latency, request.model, provider.id, account.id)
333350

351+
// Update session with provider-side IDs for multi-turn continuation
352+
sessionManager.updateProviderSession(
353+
sessionContext.sessionId,
354+
result.providerSessionId,
355+
result.parentMessageId,
356+
storeMessages,
357+
)
358+
334359
if (request.stream === true && result.stream) {
335360
ctx.set('Content-Type', 'text/event-stream')
336361
ctx.set('Cache-Control', 'no-cache')
@@ -391,6 +416,18 @@ router.post('/completions', async (ctx: Context) => {
391416
responseBody: collectedContent || undefined,
392417
})
393418
}
419+
// Update session with final provider IDs from stream handler
420+
const handler = (result.stream as any)._handler
421+
if (handler) {
422+
const finalSessionId = handler.getConversationId?.() || handler.getChatId?.()
423+
const finalParentId = handler.getLastMessageId?.() || handler.getResponseId?.()
424+
sessionManager.updateProviderSession(
425+
sessionContext.sessionId,
426+
finalSessionId ?? result.providerSessionId,
427+
finalParentId ?? result.parentMessageId,
428+
storeMessages,
429+
)
430+
}
394431
wrapperStream.end()
395432
})
396433
} else {

0 commit comments

Comments
 (0)