Skip to content

Commit bc09c0a

Browse files
committed
feat: add ModelScope API rate limit monitor and update model presets
- New modelscope_ratelimit.py: captures ratelimit headers via custom httpx transport, displays daily quota in status bar - agent_setup.py: inject header-capturing clients for ModelScope models - chat.py: show 魔搭今日免费额度 in prompt status bar - prompts.py: remove GLM-4.7, add Qwen3-Next-80B-A3B-Thinking preset, comment out unavailable models (MiniMax-M2.7, DeepSeek-V4-Pro/Flash) - README.md: document rate limit feature
1 parent 9b60476 commit bc09c0a

6 files changed

Lines changed: 181 additions & 10 deletions

File tree

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ https://github.com/ScarletMercy/chcode/blob/main/assets/test.mp4
8080
- Skills are injected into system prompt via LangChain middleware
8181
- Supports project-level and global skill directories
8282

83+
### ModelScope Rate Limit
84+
85+
- Real-time **API quota display** in status bar (daily limit remaining, per-model remaining)
86+
- Auto-enabled when using ModelScope models
87+
8388
## Built-in Tools (14)
8489

8590
| Tool | Description |
@@ -199,6 +204,7 @@ chcode/
199204
├── enhanced_chat_openai.py # Extended ChatOpenAI with reasoning support
200205
├── git_manager.py # Git checkpoint management
201206
├── skill_loader.py # Skill discovery and loading
207+
├── modelscope_ratelimit.py # ModelScope API rate limit monitor
202208
└── tool_result_pipeline.py # Output truncation and budget enforcement
203209
```
204210

chcode/agent_setup.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
3030

3131
from chcode.utils.enhanced_chat_openai import EnhancedChatOpenAI
32+
from chcode.utils.modelscope_ratelimit import is_modelscope_model, get_modelscope_clients
3233
from chcode.utils.skill_loader import SkillAgentContext
3334
from chcode.display import console
3435
from chcode.utils.tool_result_pipeline import (
@@ -221,7 +222,12 @@ async def load_model(
221222
) -> ModelResponse:
222223
"""动态加载模型"""
223224
model_config = request.runtime.context.model_config
224-
return await handler(request.override(model=EnhancedChatOpenAI(**model_config)))
225+
kwargs = dict(model_config)
226+
if is_modelscope_model(model_config):
227+
sync_client, async_client = get_modelscope_clients()
228+
kwargs["http_client"] = sync_client
229+
kwargs["http_async_client"] = async_client
230+
return await handler(request.override(model=EnhancedChatOpenAI(**kwargs)))
225231

226232

227233
@wrap_model_call

chcode/chat.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
from chcode.skill_manager import manage_skills
7575
from chcode.utils.git_checker import check_git_availability
7676
from chcode.utils.git_manager import GitManager
77+
from chcode.utils.modelscope_ratelimit import get_ratelimit, is_modelscope_model
7778

7879

7980
# ─── 命令自动补全 ──────────────────────────────────────
@@ -467,7 +468,15 @@ def _bottom_toolbar():
467468
if wp:
468469
parts.append(f"cwd: {wp}")
469470
status = " │ ".join(parts)
470-
return HTML(f"<ansiblue>{sep}</ansiblue>\n{status}")
471+
ratelimit_line = ""
472+
if is_modelscope_model(self.model_config):
473+
rl = get_ratelimit()
474+
if rl:
475+
total = f"{rl['total_remaining']}/{rl['total_limit']}"
476+
model_name = self.model_config.get("model", "").split("/")[-1]
477+
model_rl = f"{rl['model_remaining']}/{rl['model_limit']}"
478+
ratelimit_line = f"\n<ansicyan>魔搭今日免费额度剩余: 全局 {total} │ 模型({model_name}) {model_rl}</ansicyan>"
479+
return HTML(f"<ansiblue>{sep}</ansiblue>\n{status}{ratelimit_line}")
471480

472481
self._prompt_session = PromptSession(
473482
multiline=True,

chcode/prompts.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,14 +159,6 @@ async def select_or_custom(
159159
"top_p": 0.95,
160160
"stream_usage": True,
161161
},
162-
{
163-
"model": "ZhipuAI/GLM-4.7",
164-
"base_url": MODELSCOPE_BASE_URL,
165-
"temperature": 1.0,
166-
"top_p": 0.95,
167-
"stream_usage": True,
168-
"extra_body": {"max_completion_tokens": 131072},
169-
},
170162
{
171163
"model": "ZhipuAI/GLM-5.1",
172164
"base_url": MODELSCOPE_BASE_URL,
@@ -196,6 +188,36 @@ async def select_or_custom(
196188
"top_p": 0.95,
197189
"stream_usage": True,
198190
},
191+
{
192+
"model": "Qwen/Qwen3-Next-80B-A3B-Thinking",
193+
"base_url": MODELSCOPE_BASE_URL,
194+
"temperature": 0.6,
195+
"top_p": 0.95,
196+
"stream_usage": True,
197+
"extra_body": {"top_k": 20},
198+
},
199+
# {
200+
# "model": "MiniMax/MiniMax-M2.7",
201+
# "base_url": MODELSCOPE_BASE_URL,
202+
# "temperature": 1.0,
203+
# "top_p": 0.95,
204+
# "stream_usage": True,
205+
# "extra_body": {"top_k": 40},
206+
# },
207+
# {
208+
# "model": "deepseek-ai/DeepSeek-V4-Pro",
209+
# "base_url": MODELSCOPE_BASE_URL,
210+
# "temperature": 1.0,
211+
# "top_p": 1.0,
212+
# "stream_usage": True,
213+
# },
214+
# {
215+
# "model": "deepseek-ai/DeepSeek-V4-Flash",
216+
# "base_url": MODELSCOPE_BASE_URL,
217+
# "temperature": 1.0,
218+
# "top_p": 1.0,
219+
# "stream_usage": True,
220+
# },
199221
]
200222

201223
API_KEY_ENV_VARS = [
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""ModelScope API 调用次数监控(解耦模块)
2+
3+
通过自定义 httpx Transport 捕获响应头中的 ratelimit 信息,
4+
供状态栏实时显示。仅在 base_url 包含 modelscope 时启用。
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import httpx
10+
import threading
11+
12+
_ratelimit_data: dict = {}
13+
_ratelimit_lock = threading.Lock()
14+
15+
_cached_sync: httpx.Client | None = None
16+
_cached_async: httpx.AsyncClient | None = None
17+
_client_lock = threading.Lock()
18+
19+
20+
def get_ratelimit() -> dict:
21+
with _ratelimit_lock:
22+
return dict(_ratelimit_data) if _ratelimit_data else {}
23+
24+
25+
def is_modelscope_model(model_config: dict) -> bool:
26+
return "modelscope" in model_config.get("base_url", "")
27+
28+
29+
def _update_ratelimit(headers: httpx.Headers) -> None:
30+
total_limit = headers.get("modelscope-ratelimit-requests-limit")
31+
if not total_limit:
32+
return
33+
with _ratelimit_lock:
34+
_ratelimit_data.update({
35+
"total_limit": int(total_limit),
36+
"total_remaining": int(headers.get("modelscope-ratelimit-requests-remaining", 0)),
37+
"model_limit": int(headers.get("modelscope-ratelimit-model-requests-limit", 0)),
38+
"model_remaining": int(headers.get("modelscope-ratelimit-model-requests-remaining", 0)),
39+
})
40+
41+
42+
class _HeaderCaptureTransport(httpx.HTTPTransport):
43+
def handle_request(self, request):
44+
response = super().handle_request(request)
45+
_update_ratelimit(response.headers)
46+
return response
47+
48+
49+
class _HeaderCaptureAsyncTransport(httpx.AsyncHTTPTransport):
50+
async def handle_async_request(self, request):
51+
response = await super().handle_async_request(request)
52+
_update_ratelimit(response.headers)
53+
return response
54+
55+
56+
def get_modelscope_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
57+
global _cached_sync, _cached_async
58+
with _client_lock:
59+
if _cached_sync is None or _cached_async is None:
60+
_cached_sync = httpx.Client(transport=_HeaderCaptureTransport())
61+
_cached_async = httpx.AsyncClient(transport=_HeaderCaptureAsyncTransport())
62+
return _cached_sync, _cached_async
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Vision Understanding Tool Implementation Plan
2+
3+
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
4+
5+
**Goal:** Add a vision understanding tool that integrates ModelScope vision models, allowing users to paste image paths in chat and have the AI analyze them via tool calling.
6+
7+
**Architecture:** Add a new `analyze_image` tool to the existing tools system. Create a vision model config file at `~/.chat/vision_model.json`. The tool sends the image (base64) + user prompt to the ModelScope OpenAI-compatible vision API with fallback support.
8+
9+
**Tech Stack:** httpx (async HTTP), base64 (image encoding), LangChain @tool, OpenAI-compatible chat completions API
10+
11+
---
12+
13+
### Task 1: Create Vision Model Config Module
14+
15+
**Files:**
16+
- Create: `chcode/vision_config.py`
17+
18+
**Step 1:** Create `chcode/vision_config.py` with:
19+
- Vision model presets (default: Kimi-K2.5, backups: Qwen3-VL series, Intern-S1)
20+
- Load/save vision config from `~/.chat/vision_model.json`
21+
- Auto-detect ModelScope token from env var or existing model config
22+
- Default vision config generation
23+
24+
### Task 2: Add `analyze_image` Tool
25+
26+
**Files:**
27+
- Modify: `chcode/utils/tools.py` — add `analyze_image` tool + register in `ALL_TOOLS`
28+
29+
**Step 1:** Add `analyze_image` async tool that:
30+
- Accepts `image_path` and `prompt` params
31+
- Validates the image file exists and is a supported format (png/jpg/jpeg/gif/bmp/webp)
32+
- Reads the image file, base64-encodes it
33+
- Calls the ModelScope vision API (OpenAI-compatible chat completions with image content)
34+
- Falls back through backup vision models on failure
35+
- Returns the model's analysis text
36+
37+
### Task 3: Update System Prompt
38+
39+
**Files:**
40+
- Modify: `chcode/agent_setup.py` — update `load_skills` middleware to mention `analyze_image`
41+
42+
**Step 1:** Add `analyze_image` to the system prompt tool list so the LLM knows to use it when users provide image paths.
43+
44+
### Task 4: Update `/tools` Command Display
45+
46+
**Files:**
47+
- Modify: `chcode/chat.py` — no changes needed (it reads from `ALL_TOOLS` dynamically)
48+
49+
### Task 5: Add Vision Config Slash Command
50+
51+
**Files:**
52+
- Modify: `chcode/chat.py` — add `/vision` command to configure vision models
53+
- Modify: `chcode/prompts.py` — add vision model configuration prompt
54+
55+
**Step 1:** Add `/vision` slash command that lets users:
56+
- View current vision model config
57+
- Reconfigure vision models (pick default, set API key)
58+
- Test vision model connection
59+
60+
---
61+
62+
## Verification
63+
64+
1. Run `chcode` and type `/tools``analyze_image` should appear in the list
65+
2. Type `/vision` — should show current vision config
66+
3. In chat, paste an image path like `./test.png` with a question — the LLM should call `analyze_image`

0 commit comments

Comments
 (0)