Skip to content

Commit cc9c181

Browse files
CyningMMcursoragent
andcommitted
chore(rag): 默认 Chat/Intent 模型切换为 DeepSeek-V4-Pro
Portfolio JD 匹配实测 V4-Pro 优于 Flash/V3;统一代码 fallback、.env.example 与 PROJECT_CONFIG。 Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent d543c7f commit cc9c181

8 files changed

Lines changed: 17 additions & 17 deletions

File tree

.env.example

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,10 @@ I18N_EXPAND_MAX_QUERY_TEXT_CHARS=240
117117
# Unified Chat 是否走 V2 ReAct 路径(默认 false)
118118
CHATBI_USE_AGENT=true
119119
# 对话主模型(与 INTENT 可不同)
120-
SILICONFLOW_CHAT_MODEL=deepseek-ai/DeepSeek-V4-Flash
120+
SILICONFLOW_CHAT_MODEL=deepseek-ai/DeepSeek-V4-Pro
121121
# Intent 是否调用 SiliconFlow(false = 启发式 + 超时后 V1,无上游 client,适合 CI)
122122
CHATBI_V2_INTENT_LLM=true
123-
INTENT_LLM_MODEL=deepseek-ai/DeepSeek-V4-Flash
123+
INTENT_LLM_MODEL=deepseek-ai/DeepSeek-V4-Pro
124124
# Intent 单次 wait_for 上限(秒);超时 → raw_response.used=v1_fallback
125125
CHATBI_V2_INTENT_TIMEOUT_S=60
126126
# Intent 站点上下文 YAML(Portfolio Q4/人名路由 · Step1 C-lite)

api/chain_chat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ async def handle_chain_chat(
160160

161161
# init LLM client
162162
api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
163-
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
163+
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
164164
oai = OpenAI(api_key=api_key, base_url=siliconflow_base())
165165

166166
# 2) generate sql

api/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
os.getenv("SILICONFLOW_EMBEDDING_MODEL", "").strip() or "Qwen/Qwen3-Embedding-0.6B"
6464
)
6565
SILICONFLOW_EMBEDDING_DIMENSIONS = int(os.getenv("SILICONFLOW_EMBEDDING_DIMENSIONS", "1024"))
66-
SILICONFLOW_CHAT_MODEL = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
66+
SILICONFLOW_CHAT_MODEL = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
6767

6868
MATCH_COUNT = 10
6969
CONTEXT_MAX_CHARS = 6000

api/intent_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ async def _llm_decide_v2(
412412
}}
413413
"""
414414

415-
intent_model = os.getenv("INTENT_LLM_MODEL", "Qwen/Qwen2.5-7B-Instruct")
415+
intent_model = os.getenv("INTENT_LLM_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
416416
intent_messages: list[dict[str, str]] = [
417417
{
418418
"role": "system",

api/text2sql_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ async def handle_text2sql_chat(
104104
_t2s_debug(f"query={query!r} retrieved={len(retrieved)} topk={topk}")
105105

106106
api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
107-
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
107+
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
108108
oai = OpenAI(api_key=api_key, base_url=siliconflow_base())
109109

110110
sql_prompt = build_sql_prompt(

api/tools.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def _elapsed_ms(started_at: float) -> int:
8282

8383

8484
def _pick_chat_model() -> str:
85-
return os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
85+
return os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
8686

8787

8888
def _pick_embed_model_kwargs() -> dict[str, Any]:
@@ -402,7 +402,7 @@ def _text2sql_summary_chat_model() -> str:
402402
raw = (os.getenv("CHATBI_TEXT2SQL_SUMMARY_LLM_MODEL") or "").strip()
403403
if raw:
404404
return raw
405-
return os.getenv("INTENT_LLM_MODEL", "Qwen/Qwen2.5-7B-Instruct")
405+
return os.getenv("INTENT_LLM_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
406406

407407

408408
def _clip_dialogue_context_block(ctx: str) -> str:

api/unified_chat.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,7 +1365,7 @@ def finish(*, ok: bool, mode: str, persist: dict[str, Any] | None = None) -> JSO
13651365

13661366
if mode == "no_data":
13671367
oai = openai_siliconflow_client()
1368-
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
1368+
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
13691369
events.append(
13701370
_event(
13711371
typ="tool.call.start",
@@ -1486,7 +1486,7 @@ def finish(*, ok: bool, mode: str, persist: dict[str, Any] | None = None) -> JSO
14861486
retrieved = filter_text2sql_retrieved(retrieved, principal=principal, policies=pols)
14871487

14881488
oai = OpenAI(api_key=os.getenv("SILICONFLOW_API_KEY", "").strip(), base_url=siliconflow_base())
1489-
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
1489+
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
14901490

14911491
events.append(
14921492
_event(
@@ -1730,7 +1730,7 @@ def finish(*, ok: bool, mode: str, persist: dict[str, Any] | None = None) -> JSO
17301730

17311731
# ---- RAG branch (non-streaming v1) ----
17321732
oai = openai_siliconflow_client()
1733-
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
1733+
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
17341734

17351735
# rewrite
17361736
events.append(
@@ -2809,7 +2809,7 @@ async def event_stream():
28092809

28102810
if mode == "no_data":
28112811
oai = openai_siliconflow_client()
2812-
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
2812+
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
28132813
yield _sse(
28142814
"chain",
28152815
_event(
@@ -2885,7 +2885,7 @@ async def event_stream():
28852885
retrieved = filter_text2sql_retrieved(retrieved, principal=principal, policies=pols)
28862886

28872887
oai = OpenAI(api_key=os.getenv("SILICONFLOW_API_KEY", "").strip(), base_url=siliconflow_base())
2888-
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
2888+
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
28892889

28902890
yield _sse(
28912891
"chain",
@@ -3009,7 +3009,7 @@ async def event_stream():
30093009

30103010
# ---- RAG branch (non-streaming answer v1) ----
30113011
oai = openai_siliconflow_client()
3012-
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V3")
3012+
chat_model = os.getenv("SILICONFLOW_CHAT_MODEL", "deepseek-ai/DeepSeek-V4-Pro")
30133013

30143014
# rewrite
30153015
yield _sse("chain", _event(typ="tool.call.start", started_at=started_at, step_id="t_rewrite", payload={"tool": "rag.rewrite", "input": {"query": query}}))

docs/meta/PROJECT_CONFIG_AI_INK_BRAIN_API_PYTHON.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
| `SILICONFLOW_BASE_URL` | OpenAI 兼容 Base URL | 可选 | `api/index.py``SILICONFLOW_BASE`);`api/rag_env.py:siliconflow_base()` | 默认 `https://api.siliconflow.cn/v1` | 与项目无关 |
5555
| `SILICONFLOW_EMBEDDING_MODEL` | Embedding 模型名 | 可选 | `api/index.py``api/rag_env.py:siliconflow_embedding_model()` | **空字符串会被视为未设置**:回退默认 `Qwen/Qwen3-Embedding-0.6B`(避免 CI/环境变量显式空值导致上游 400) | 影响向量空间;需与入库一致 |
5656
| `SILICONFLOW_EMBEDDING_DIMENSIONS` | Embedding 输出维度(Qwen3 需要) | 可选 | `api/index.py``api/rag_env.py:siliconflow_embedding_dimensions()` | 默认 `1024`;当模型名包含 `Qwen3-Embedding` 时传给 embeddings API | **必须与** `public.documents.embedding vector(N)` **一致**(默认 N=1024) |
57-
| `SILICONFLOW_CHAT_MODEL` | Chat 模型 | 可选 | `api/index.py` | 默认 `deepseek-ai/DeepSeek-V3` | 与向量维度无关 |
57+
| `SILICONFLOW_CHAT_MODEL` | Chat 模型 | 可选 | `api/index.py` | 默认 `deepseek-ai/DeepSeek-V4-Pro` | 与向量维度无关 |
5858
| `SYNC_ADMIN_SECRET` | admin/sync Bearer secret(与前端 BFF 同值) | **推荐 · portfolio 真值** | `api/rag_env.py:admin_secret()``api/index.py:_require_auth()` | 留空且无废弃 fallback 时:鉴权接口 500 | 与项目无关 |
5959
| `CHAT_API_SECRET` | Admin secret **(已废弃 · 待删)** | 可选 fallback | `api/rag_env.py:admin_secret()` | 仅当未设 `SYNC_ADMIN_SECRET` 时读取 | ****在新环境配置 |
6060
| `NEXT_PUBLIC_ADMIN_SECRET` | Admin secret **(已废弃 · 待删)** | 可选 fallback | `api/rag_env.py:admin_secret()` | 前端 **不再**使用;仅兼容旧 `.env` | ****在新环境配置 |
@@ -74,7 +74,7 @@
7474
| `CHATBI_ACCESS_TOKEN_PEPPER` | 可选全局 pepper:参与 `SHA256(pepper_bytes + 明文 token)`,须与运维本地脚本 `docs/diary/local_chatbi_access_token_gen.py` 及 Supabase 插入的 `key_hash` **一致** | 可选 | `api/chatbi_access_hash.py``api/chatbi_principal.py`;本地脚本 `docs/diary/local_chatbi_access_token_gen.py` | 留空则 pepper 为空字节串;****把 pepper 提交进 Git | 与项目无关 |
7575
| `CHATBI_AGENT_DB_PERSIST_TIMEOUT_S` | V2 Agent 每轮结束写 `rag_conversation_logs`**最大等待秒数**(在发出 SSE `done` 之前 `await`| 可选 | `api/unified_chat.py:_await_persist_chatbi_v2_agent_log()` | 默认 `12`;范围 clamp 为 `1``120`;超时则 `done.persist.ok=false` 且先发 `error``stage=agent_db`| 与项目无关 |
7676
| `CHATBI_V2_INTENT_LLM` | V2 意图是否调用 SiliconFlow LLM | 可选 | `api/intent_agent.py``tests/test_intent_agent_accuracy.py``tests/benchmark_intent_latency.py`| 默认 `true``false` 为纯启发式/V1 超时降级,**不创建上游 client(CI 零外呼)** | 与项目无关 |
77-
| `INTENT_LLM_MODEL` | 意图识别所用 chat 模型名 | 可选 | `api/intent_agent.py` | 默认 `Qwen/Qwen2.5-7B-Instruct` | 与项目无关 |
77+
| `INTENT_LLM_MODEL` | 意图识别所用 chat 模型名 | 可选 | `api/intent_agent.py` | 默认 `deepseek-ai/DeepSeek-V4-Pro` | 与项目无关 |
7878
| `CHATBI_V2_INTENT_EVAL` | 启用 60 条意图准确率 pytest(`@pytest.mark.intent_eval`| 可选 | `tests/test_intent_agent_accuracy.py` | 不设为跳过;需密钥时配 `CHATBI_V2_INTENT_LLM=true` + `SILICONFLOW_API_KEY` | 与项目无关 |
7979
| `CHATBI_V2_INTENT_EVAL_OUT` | 评测结果 JSONL/同 stem CSV 输出路径 | 可选 | `tests/test_intent_agent_accuracy.py` | 默认 `tests/_out/intent_accuracy.jsonl`**相对路径**(推荐):以 `ai-ink-brain-api-python` 仓库根为锚写 `tests/_out/foo.jsonl`;或以 `tests/` 为锚写 `_out/foo.jsonl`(与 pytest 启动目录无关) | 与项目无关 |
8080
| `CHATBI_V2_INTENT_EVAL_PROGRESS` | 60 条评测是否逐条打印开始/结束行 | 可选 | `tests/test_intent_agent_accuracy.py` | 默认 `true`;CI 内 `test_stub_eval_end_to_end_writes_exports` 强制 `false` | 与项目无关 |
@@ -111,7 +111,7 @@
111111
| `CHATBI_TEXT2SQL_LLM_TIMEOUT_S` | Text2SQL **两段 LLM**(生成 SQL / 总结)共用的 **超时秒数**兜底 | 可选 | `api/tools.py::text2sql_execute``asyncio.wait_for`| **未设**时代码默认 **`120.0`**;当 **`CHATBI_TEXT2SQL_LLM_SQL_TIMEOUT_S` / `CHATBI_TEXT2SQL_LLM_SUMMARY_TIMEOUT_S`** 已分别设置时,本变量仅作二者缺省时的回退 | 与项目无关 |
112112
| `CHATBI_TEXT2SQL_LLM_SQL_TIMEOUT_S` | `llm_generate_sql` 阶段 **timeout**(秒) | 可选 | `api/tools.py::text2sql_execute` | 未设时回退 **`CHATBI_TEXT2SQL_LLM_TIMEOUT_S`** → 再未设则代码默认 **`120.0`** | 与项目无关 |
113113
| `CHATBI_TEXT2SQL_LLM_SUMMARY_TIMEOUT_S` | `llm_summarize` 阶段 **timeout**(秒) | 可选 | `api/tools.py::text2sql_execute` | 未设时回退 **`CHATBI_TEXT2SQL_LLM_TIMEOUT_S`** → 再未设则代码默认 **`120.0`** | 与项目无关 |
114-
| `CHATBI_TEXT2SQL_SUMMARY_LLM_MODEL` | Text2SQL **总结**阶段 chat 模型名(可选加速 / 降级) | 可选 | `api/tools.py::text2sql_execute` | **未设置或仅空白**时,与 **Intent** 默认一致(`INTENT_LLM_MODEL` 默认 `Qwen/Qwen2.5-7B-Instruct`,与 `api/intent_agent.py` 对齐) | 与项目无关 |
114+
| `CHATBI_TEXT2SQL_SUMMARY_LLM_MODEL` | Text2SQL **总结**阶段 chat 模型名(可选加速 / 降级) | 可选 | `api/tools.py::text2sql_execute` | **未设置或仅空白**时,与 **Intent** 默认一致(`INTENT_LLM_MODEL` 默认 `deepseek-ai/DeepSeek-V4-Pro`,与 `api/intent_agent.py` 对齐) | 与项目无关 |
115115
| `TEXT2SQL_DIALOGUE_CONTEXT_MAX_LEN` | 多轮 `history_to_rewrite_block` 注入 `build_sql_prompt` 前的 **最大字符数**(超出保留尾部) | 可选 | `api/tools.py::text2sql_execute` | 默认 **`8000`**`<=0` 表示不截断 | 与项目无关 |
116116
| `TEXT2SQL_SCHEMA_PREFETCH` | 写入/更新意图下是否在 LLM 生成前 **只读预取** `information_schema.columns` | 可选 | `api/text2sql_schema_prefetch.py::schema_prefetch_enabled()` | `0`/`false`/`no`/`off` 关闭;`1`/`true` 强制开启;**未设**时若已配置 **`TEXT2SQL_DATABASE_URL`** 则默认开启 | 依赖 `TEXT2SQL_DATABASE_URL` |
117117
| `TEXT2SQL_SCHEMA_PREFETCH_TIMEOUT_MS` | 预取查询 `SET LOCAL statement_timeout`(毫秒) | 可选 | `api/text2sql_schema_prefetch.py::fetch_public_table_columns_sync()` | 默认 **`8000`**;clamp `200..60000` | 与项目无关 |

0 commit comments

Comments
 (0)