feat: add ModelScope API rate limit monitor and update model presets

ScarletMercy · ScarletMercy · commit bc09c0a9743e · 2026-04-24T20:14:53.000+08:00
- New modelscope_ratelimit.py: captures ratelimit headers via custom
  httpx transport, displays daily quota in status bar
- agent_setup.py: inject header-capturing clients for ModelScope models
- chat.py: show 魔搭今日免费额度 in prompt status bar
- prompts.py: remove GLM-4.7, add Qwen3-Next-80B-A3B-Thinking preset,
  comment out unavailable models (MiniMax-M2.7, DeepSeek-V4-Pro/Flash)
- README.md: document rate limit feature
diff --git a/README.md b/README.md
@@ -80,6 +80,11 @@ https://github.com/ScarletMercy/chcode/blob/main/assets/test.mp4
 - Skills are injected into system prompt via LangChain middleware
 - Supports project-level and global skill directories
 
+### ModelScope Rate Limit
+
+- Real-time **API quota display** in status bar (daily limit remaining, per-model remaining)
+- Auto-enabled when using ModelScope models
+
 ## Built-in Tools (14)
 
 | Tool | Description |
@@ -199,6 +204,7 @@ chcode/
     ├── enhanced_chat_openai.py  # Extended ChatOpenAI with reasoning support
     ├── git_manager.py      # Git checkpoint management
     ├── skill_loader.py     # Skill discovery and loading
+    ├── modelscope_ratelimit.py  # ModelScope API rate limit monitor
     └── tool_result_pipeline.py  # Output truncation and budget enforcement
 ```
 
diff --git a/chcode/agent_setup.py b/chcode/agent_setup.py
@@ -29,6 +29,7 @@
 from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
 
 from chcode.utils.enhanced_chat_openai import EnhancedChatOpenAI
+from chcode.utils.modelscope_ratelimit import is_modelscope_model, get_modelscope_clients
 from chcode.utils.skill_loader import SkillAgentContext
 from chcode.display import console
 from chcode.utils.tool_result_pipeline import (
@@ -221,7 +222,12 @@ async def load_model(
 ) -> ModelResponse:
     """动态加载模型"""
     model_config = request.runtime.context.model_config
-    return await handler(request.override(model=EnhancedChatOpenAI(**model_config)))
+    kwargs = dict(model_config)
+    if is_modelscope_model(model_config):
+        sync_client, async_client = get_modelscope_clients()
+        kwargs["http_client"] = sync_client
+        kwargs["http_async_client"] = async_client
+    return await handler(request.override(model=EnhancedChatOpenAI(**kwargs)))
 
 
 @wrap_model_call
diff --git a/chcode/chat.py b/chcode/chat.py
@@ -74,6 +74,7 @@
 from chcode.skill_manager import manage_skills
 from chcode.utils.git_checker import check_git_availability
 from chcode.utils.git_manager import GitManager
+from chcode.utils.modelscope_ratelimit import get_ratelimit, is_modelscope_model
 
 
 # ─── 命令自动补全 ──────────────────────────────────────
@@ -467,7 +468,15 @@ def _bottom_toolbar():
                 if wp:
                     parts.append(f"cwd: {wp}")
                 status = "  │  ".join(parts)
-                return HTML(f"<ansiblue>{sep}</ansiblue>\n{status}")
+                ratelimit_line = ""
+                if is_modelscope_model(self.model_config):
+                    rl = get_ratelimit()
+                    if rl:
+                        total = f"{rl['total_remaining']}/{rl['total_limit']}"
+                        model_name = self.model_config.get("model", "").split("/")[-1]
+                        model_rl = f"{rl['model_remaining']}/{rl['model_limit']}"
+                        ratelimit_line = f"\n<ansicyan>魔搭今日免费额度剩余: 全局 {total} │ 模型({model_name}) {model_rl}</ansicyan>"
+                return HTML(f"<ansiblue>{sep}</ansiblue>\n{status}{ratelimit_line}")
 
             self._prompt_session = PromptSession(
                 multiline=True,
diff --git a/chcode/prompts.py b/chcode/prompts.py
@@ -159,14 +159,6 @@ async def select_or_custom(
         "top_p": 0.95,
         "stream_usage": True,
     },
-    {
-        "model": "ZhipuAI/GLM-4.7",
-        "base_url": MODELSCOPE_BASE_URL,
-        "temperature": 1.0,
-        "top_p": 0.95,
-        "stream_usage": True,
-        "extra_body": {"max_completion_tokens": 131072},
-    },
     {
         "model": "ZhipuAI/GLM-5.1",
         "base_url": MODELSCOPE_BASE_URL,
@@ -196,6 +188,36 @@ async def select_or_custom(
         "top_p": 0.95,
         "stream_usage": True,
     },
+    {
+        "model": "Qwen/Qwen3-Next-80B-A3B-Thinking",
+        "base_url": MODELSCOPE_BASE_URL,
+        "temperature": 0.6,
+        "top_p": 0.95,
+        "stream_usage": True,
+        "extra_body": {"top_k": 20},
+    },
+    # {
+    #     "model": "MiniMax/MiniMax-M2.7",
+    #     "base_url": MODELSCOPE_BASE_URL,
+    #     "temperature": 1.0,
+    #     "top_p": 0.95,
+    #     "stream_usage": True,
+    #     "extra_body": {"top_k": 40},
+    # },
+    # {
+    #     "model": "deepseek-ai/DeepSeek-V4-Pro",
+    #     "base_url": MODELSCOPE_BASE_URL,
+    #     "temperature": 1.0,
+    #     "top_p": 1.0,
+    #     "stream_usage": True,
+    # },
+    # {
+    #     "model": "deepseek-ai/DeepSeek-V4-Flash",
+    #     "base_url": MODELSCOPE_BASE_URL,
+    #     "temperature": 1.0,
+    #     "top_p": 1.0,
+    #     "stream_usage": True,
+    # },
 ]
 
 API_KEY_ENV_VARS = [
diff --git a/chcode/utils/modelscope_ratelimit.py b/chcode/utils/modelscope_ratelimit.py
@@ -0,0 +1,62 @@
+"""ModelScope API 调用次数监控（解耦模块）
+
+通过自定义 httpx Transport 捕获响应头中的 ratelimit 信息，
+供状态栏实时显示。仅在 base_url 包含 modelscope 时启用。
+"""
+
+from __future__ import annotations
+
+import httpx
+import threading
+
+_ratelimit_data: dict = {}
+_ratelimit_lock = threading.Lock()
+
+_cached_sync: httpx.Client | None = None
+_cached_async: httpx.AsyncClient | None = None
+_client_lock = threading.Lock()
+
+
+def get_ratelimit() -> dict:
+    with _ratelimit_lock:
+        return dict(_ratelimit_data) if _ratelimit_data else {}
+
+
+def is_modelscope_model(model_config: dict) -> bool:
+    return "modelscope" in model_config.get("base_url", "")
+
+
+def _update_ratelimit(headers: httpx.Headers) -> None:
+    total_limit = headers.get("modelscope-ratelimit-requests-limit")
+    if not total_limit:
+        return
+    with _ratelimit_lock:
+        _ratelimit_data.update({
+            "total_limit": int(total_limit),
+            "total_remaining": int(headers.get("modelscope-ratelimit-requests-remaining", 0)),
+            "model_limit": int(headers.get("modelscope-ratelimit-model-requests-limit", 0)),
+            "model_remaining": int(headers.get("modelscope-ratelimit-model-requests-remaining", 0)),
+        })
+
+
+class _HeaderCaptureTransport(httpx.HTTPTransport):
+    def handle_request(self, request):
+        response = super().handle_request(request)
+        _update_ratelimit(response.headers)
+        return response
+
+
+class _HeaderCaptureAsyncTransport(httpx.AsyncHTTPTransport):
+    async def handle_async_request(self, request):
+        response = await super().handle_async_request(request)
+        _update_ratelimit(response.headers)
+        return response
+
+
+def get_modelscope_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
+    global _cached_sync, _cached_async
+    with _client_lock:
+        if _cached_sync is None or _cached_async is None:
+            _cached_sync = httpx.Client(transport=_HeaderCaptureTransport())
+            _cached_async = httpx.AsyncClient(transport=_HeaderCaptureAsyncTransport())
+        return _cached_sync, _cached_async
diff --git a/docs/plans/2026-04-23-vision-tool.md b/docs/plans/2026-04-23-vision-tool.md
@@ -0,0 +1,66 @@
+# Vision Understanding Tool Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Add a vision understanding tool that integrates ModelScope vision models, allowing users to paste image paths in chat and have the AI analyze them via tool calling.
+
+**Architecture:** Add a new `analyze_image` tool to the existing tools system. Create a vision model config file at `~/.chat/vision_model.json`. The tool sends the image (base64) + user prompt to the ModelScope OpenAI-compatible vision API with fallback support.
+
+**Tech Stack:** httpx (async HTTP), base64 (image encoding), LangChain @tool, OpenAI-compatible chat completions API
+
+---
+
+### Task 1: Create Vision Model Config Module
+
+**Files:**
+- Create: `chcode/vision_config.py`
+
+**Step 1:** Create `chcode/vision_config.py` with:
+- Vision model presets (default: Kimi-K2.5, backups: Qwen3-VL series, Intern-S1)
+- Load/save vision config from `~/.chat/vision_model.json`
+- Auto-detect ModelScope token from env var or existing model config
+- Default vision config generation
+
+### Task 2: Add `analyze_image` Tool
+
+**Files:**
+- Modify: `chcode/utils/tools.py` — add `analyze_image` tool + register in `ALL_TOOLS`
+
+**Step 1:** Add `analyze_image` async tool that:
+- Accepts `image_path` and `prompt` params
+- Validates the image file exists and is a supported format (png/jpg/jpeg/gif/bmp/webp)
+- Reads the image file, base64-encodes it
+- Calls the ModelScope vision API (OpenAI-compatible chat completions with image content)
+- Falls back through backup vision models on failure
+- Returns the model's analysis text
+
+### Task 3: Update System Prompt
+
+**Files:**
+- Modify: `chcode/agent_setup.py` — update `load_skills` middleware to mention `analyze_image`
+
+**Step 1:** Add `analyze_image` to the system prompt tool list so the LLM knows to use it when users provide image paths.
+
+### Task 4: Update `/tools` Command Display
+
+**Files:**
+- Modify: `chcode/chat.py` — no changes needed (it reads from `ALL_TOOLS` dynamically)
+
+### Task 5: Add Vision Config Slash Command
+
+**Files:**
+- Modify: `chcode/chat.py` — add `/vision` command to configure vision models
+- Modify: `chcode/prompts.py` — add vision model configuration prompt
+
+**Step 1:** Add `/vision` slash command that lets users:
+- View current vision model config
+- Reconfigure vision models (pick default, set API key)
+- Test vision model connection
+
+---
+
+## Verification
+
+1. Run `chcode` and type `/tools` — `analyze_image` should appear in the list
+2. Type `/vision` — should show current vision config
+3. In chat, paste an image path like `./test.png` with a question — the LLM should call `analyze_image`