claude-code-subagent-wrapper/subagent_template.py at main · gabe-almeida/claude-code-subagent-wrapper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
#!/usr/bin/env python3
# =============================================================================
# Claude Code Sub-Agent Wrapper Template
# Version: v2.2.1
#
# Use non-Anthropic models as sub-agents in Claude Code with ALL native tools.
# This template uses environment variables - set your API key before use.
#
# Setup:
#   export ZAI_API_KEY="your-api-key-here"  # Get from https://z.ai/subscribe
#
# Usage:
#   python subagent_template.py --task "Your task" --cwd /path --stream
#
# Timeout Behavior (v2.2.0+):
#   Uses INACTIVITY timeout, not global timeout. Timer resets on each tool use.
#   - Long tasks run indefinitely as long as they're making progress
#   - Stalled tasks (no tool use) are killed after inactivity timeout
#   - Optional max-timeout as a safety ceiling
# =============================================================================
"""
Claude Code Sub-Agent Wrapper

Spawns Claude Code CLI with custom API backend (z.ai, OpenRouter, etc.),
giving ALL native Claude Code tools but using your preferred model.

Key features:
- Native-ish UI: spinner (in terminal) + tool names
- Token efficient: only tool names + result preview to stdout
- No truncation: full stream saved to log files
- Full debuggability: logs at /tmp/glm-native-subagent/
- Smart timeout: Inactivity-based (resets on tool use), not global

Timeout behavior (v2.2.0+):
- Uses INACTIVITY timeout instead of global timeout
- Timer resets every time the agent uses a tool (heartbeat pattern)
- Long tasks run indefinitely as long as they're making progress
- Stalled tasks are detected and killed quickly
- Optional max-timeout provides a safety ceiling

Environment variables (set before running):
- ANTHROPIC_AUTH_TOKEN or ZAI_API_KEY (required) - Your API key
- ANTHROPIC_BASE_URL or ZAI_BASE_URL (optional) - API endpoint
"""

import argparse
import json
import os
import signal
import subprocess
import sys
import threading
import time
import uuid
from typing import Optional, Dict, Any, List


# =============================================================================
# CONFIGURATION - Modify these for your API provider
# =============================================================================
DEFAULT_BASE_URL = "https://api.z.ai/api/anthropic"  # z.ai endpoint
DEFAULT_API_TIMEOUT_MS = "300000"  # 5 min timeout
SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]

# ANSI color codes for terminal output
class Colors:
    RESET = "\033[0m"
    GRAY = "\033[90m"      # Muted gray for labels
    WHITE = "\033[97m"     # Bright white for content
    DIM = "\033[2m"        # Dim text
    BOLD = "\033[1m"       # Bold


def _supports_color() -> bool:
    """Check if terminal supports ANSI colors."""
    if not sys.stdout.isatty():
        return False
    # Check for common environment indicators
    if os.environ.get("NO_COLOR"):
        return False
    if os.environ.get("TERM") == "dumb":
        return False
    return True


def _safe_truncate(s: str, n: int = 250) -> str:
    if not s:
        return ""
    return s[:n] + ("..." if len(s) > n else "")


def _build_env() -> Dict[str, str]:
    """Build environment with API credentials from env vars."""
    env = os.environ.copy()

    # Check for API token (required)
    token = env.get("ANTHROPIC_AUTH_TOKEN") or env.get("ZAI_API_KEY")
    if not token:
        raise ValueError(
            "Missing API token!\n"
            "Set one of these environment variables:\n"
            "  export ZAI_API_KEY='your-key-here'\n"
            "  export ANTHROPIC_AUTH_TOKEN='your-key-here'\n\n"
            "Get your API key from: https://z.ai/subscribe"
        )

    env["ANTHROPIC_AUTH_TOKEN"] = token
    env["ANTHROPIC_BASE_URL"] = (
        env.get("ANTHROPIC_BASE_URL") or
        env.get("ZAI_BASE_URL") or
        DEFAULT_BASE_URL
    )
    env.setdefault("API_TIMEOUT_MS", DEFAULT_API_TIMEOUT_MS)
    return env


def _kill_process_group(proc: subprocess.Popen, grace_seconds: int = 5) -> None:
    """Kill process group: SIGTERM first, then SIGKILL."""
    try:
        os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
        proc.wait(timeout=grace_seconds)
    except subprocess.TimeoutExpired:
        os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
        try:
            proc.wait(timeout=2)
        except Exception:
            pass
    except Exception:
        try:
            proc.kill()
        except Exception:
            pass


def run_subagent(
    task: str,
    working_dir: Optional[str] = None,
    allowed_tools: Optional[str] = None,
    inactivity_timeout: int = 90,
    max_timeout: Optional[int] = None,
    skip_permissions: bool = True,
    stream_progress: bool = False,
    show_prompt: bool = False,
    max_budget_usd: Optional[float] = None,
    debug: bool = False,
) -> Dict[str, Any]:
    """
    Run Claude Code CLI as a sub-agent with custom API backend.

    Args:
        task: Task description
        working_dir: Working directory (default: current)
        allowed_tools: Comma-separated allowed tools
        inactivity_timeout: Kill if no tool use for this many seconds (default: 90)
        max_timeout: Optional hard ceiling in seconds (default: None = unlimited)
        skip_permissions: Skip permission prompts (default: True)
        stream_progress: Show tool names as they execute
        show_prompt: Display the full prompt before execution
        max_budget_usd: Max cost ceiling
        debug: Write debug logs

    Returns:
        Dict with: success, result, session_id, error, artifacts

    Timeout behavior:
        Uses inactivity-based timeout (heartbeat pattern). Timer resets each
        time the agent uses a tool. Long tasks run indefinitely if active;
        stalled tasks are detected and killed quickly.
    """
    env = _build_env()
    cwd = working_dir or os.getcwd()
    run_id = uuid.uuid4().hex[:10]

    # Log file paths
    artifacts_dir = os.path.join("/tmp", "glm-native-subagent")
    os.makedirs(artifacts_dir, exist_ok=True)

    stream_log = os.path.join(artifacts_dir, f"run_{run_id}.stream.jsonl") if stream_progress else None
    stdout_log = os.path.join(artifacts_dir, f"run_{run_id}.stdout.txt")
    stderr_log = os.path.join(artifacts_dir, f"run_{run_id}.stderr.txt")

    # Build command
    output_format = "stream-json" if stream_progress else "json"
    cmd: List[str] = ["claude", "-p", task, "--output-format", output_format]

    if stream_progress:
        cmd.append("--verbose")

    if skip_permissions:
        cmd.append("--dangerously-skip-permissions")
    elif allowed_tools:
        cmd.extend(["--allowedTools", allowed_tools])

    # Sub-agent behavior prompt
    subagent_prompt = """You are a coding sub-agent. Complete the given task efficiently.
Guidelines:
- Read existing files before modifying them
- Use Edit tool for surgical changes to existing files
- Use Write tool only for new files
- Follow existing project conventions
- When done, provide a clear summary of what you accomplished"""
    cmd.extend(["--append-system-prompt", subagent_prompt])
    cmd.append("--no-session-persistence")

    if max_budget_usd is not None:
        cmd.extend(["--max-budget-usd", str(max_budget_usd)])

    # Debug logging
    debug_log = None
    if debug:
        debug_log = open("/tmp/glm-subagent-debug.log", "a", encoding="utf-8")
        debug_log.write(f"\n{'='*80}\nrun_id={run_id}\ncwd={cwd}\ncmd={' '.join(cmd)}\n")
        debug_log.flush()

    # Print header
    use_color = _supports_color()
    print(f"[subagent] {run_id} starting cwd={cwd}", flush=True)

    if show_prompt:
        # Show full prompt with colored label (like native Task tool UI)
        if use_color:
            print(f"{Colors.GRAY}└ Prompt:{Colors.RESET}", flush=True)
            # Print each line of the task indented
            for line in task.split('\n'):
                print(f"    {line}", flush=True)
        else:
            print("└ Prompt:", flush=True)
            for line in task.split('\n'):
                print(f"    {line}", flush=True)
        print("", flush=True)  # Empty line after prompt
    else:
        print(f"[subagent] task: {_safe_truncate(task, 120)}", flush=True)

    result: Dict[str, Any] = {
        "success": False,
        "result": "",
        "session_id": None,
        "error": None,
        "artifacts": {
            "run_id": run_id,
            "stream_log": stream_log,
            "stdout_log": stdout_log,
            "stderr_log": stderr_log
        },
    }

    final_result_event: Optional[Dict[str, Any]] = None
    stop_spinner = threading.Event()
    last_tool_name_printed = None

    # Heartbeat tracking for inactivity timeout
    last_activity_time = time.time()
    activity_lock = threading.Lock()

    def spinner_loop():
        # Skip spinner if stdout is not a real terminal
        if not sys.stdout.isatty():
            return
        i = 0
        while not stop_spinner.is_set():
            frame = SPINNER_FRAMES[i % len(SPINNER_FRAMES)]
            i += 1
            sys.stdout.write(f"\r[subagent] {run_id} running {frame}")
            sys.stdout.flush()
            time.sleep(0.2)
        sys.stdout.write("\r" + " " * 60 + "\r")
        sys.stdout.flush()

    try:
        proc = subprocess.Popen(
            cmd,
            cwd=cwd,
            env=env,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            start_new_session=True,
            bufsize=1,
        )

        # CRITICAL: Close stdin immediately to prevent hang
        if proc.stdin:
            proc.stdin.close()

        # Open log files
        stdout_f = open(stdout_log, "w", encoding="utf-8")
        stderr_f = open(stderr_log, "w", encoding="utf-8")
        stream_f = open(stream_log, "w", encoding="utf-8") if stream_log else None

        spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
        spinner_thread.start()

        def read_stdout():
            nonlocal final_result_event, last_tool_name_printed, last_activity_time
            assert proc.stdout is not None
            for line in proc.stdout:
                # Write to log files (full fidelity)
                stdout_f.write(line)
                stdout_f.flush()
                if stream_progress and stream_f:
                    stream_f.write(line)
                    stream_f.flush()

                # Parse stream events for progress display
                if stream_progress:
                    line_stripped = line.strip()
                    if not line_stripped:
                        continue
                    try:
                        event = json.loads(line_stripped)
                    except json.JSONDecodeError:
                        continue

                    etype = event.get("type", "")
                    if etype == "assistant":
                        for block in event.get("message", {}).get("content", []):
                            if isinstance(block, dict) and block.get("type") == "tool_use":
                                tool_name = block.get("name") or "tool"
                                # HEARTBEAT: Reset inactivity timer on tool use
                                with activity_lock:
                                    last_activity_time = time.time()
                                # Only print unique tool names (deduplication)
                                if tool_name != last_tool_name_printed:
                                    last_tool_name_printed = tool_name
                                    sys.stdout.write(f"\n[subagent] {run_id} 🔧 {tool_name}\n")
                                    sys.stdout.flush()
                    elif etype == "result":
                        final_result_event = event
                        sys.stdout.write(f"\n[subagent] {run_id} ✅ complete\n")
                        sys.stdout.flush()

        reader = threading.Thread(target=read_stdout, daemon=True)
        reader.start()

        # Wait with inactivity-based timeout (heartbeat pattern)
        start = time.time()
        while proc.poll() is None:
            current_time = time.time()

            # Check inactivity timeout (resets on each tool use)
            with activity_lock:
                idle_time = current_time - last_activity_time

            if idle_time > inactivity_timeout:
                _kill_process_group(proc)
                result["error"] = f"Inactivity timeout: no tool use for {inactivity_timeout}s"
                break

            # Check max timeout ceiling (if set)
            if max_timeout and (current_time - start) > max_timeout:
                _kill_process_group(proc)
                result["error"] = f"Max timeout ceiling reached: {max_timeout}s"
                break

            time.sleep(0.1)

        # Capture stderr
        if proc.stderr:
            stderr_text = proc.stderr.read()
            if stderr_text:
                stderr_f.write(stderr_text)
                stderr_f.flush()

        # Cleanup
        stop_spinner.set()
        reader.join(timeout=2)
        spinner_thread.join(timeout=1)
        stdout_f.close()
        stderr_f.close()
        if stream_f:
            stream_f.close()

        if result["error"]:
            return result

        # Parse result
        if proc.returncode == 0:
            if stream_progress:
                if final_result_event:
                    result["success"] = True
                    result["result"] = final_result_event.get("result", "") or ""
                    result["session_id"] = final_result_event.get("session_id")
                else:
                    # Fallback: re-parse log file
                    last_result = None
                    if stream_log and os.path.exists(stream_log):
                        with open(stream_log, "r", encoding="utf-8") as f:
                            for ln in f:
                                try:
                                    ev = json.loads(ln.strip())
                                    if ev.get("type") == "result":
                                        last_result = ev
                                except:
                                    continue
                    if last_result:
                        result["success"] = True
                        result["result"] = last_result.get("result", "") or ""
                        result["session_id"] = last_result.get("session_id")
                    else:
                        result["success"] = True
                        result["result"] = f"(no result event; see {stream_log})"
            else:
                # Non-streaming: parse stdout as JSON
                with open(stdout_log, "r", encoding="utf-8") as f:
                    stdout_text = f.read()
                try:
                    output = json.loads(stdout_text)
                    result["success"] = True
                    result["result"] = output.get("result", "") or ""
                    result["session_id"] = output.get("session_id")
                except json.JSONDecodeError:
                    result["success"] = True
                    result["result"] = stdout_text
        else:
            err_text = ""
            if os.path.exists(stderr_log):
                with open(stderr_log, "r", encoding="utf-8") as f:
                    err_text = f.read()
            result["error"] = (err_text.strip() or f"Exit code: {proc.returncode}").strip()

    except FileNotFoundError:
        result["error"] = "Claude CLI not found. Install: npm install -g @anthropic-ai/claude-code"
    except Exception as e:
        result["error"] = str(e)
    finally:
        if debug_log:
            debug_log.write(f"success={result.get('success')} error={result.get('error')}\n")
            debug_log.close()

    # Print final summary (small)
    if result["success"]:
        print(f"[subagent] {run_id} ✅ success", flush=True)
        print(f"[subagent] result: {_safe_truncate(result.get('result',''), 200)}", flush=True)
    else:
        print(f"[subagent] {run_id} ❌ error={result.get('error')}", flush=True)

    if stream_log:
        print(f"[subagent] logs: {stream_log}", flush=True)

    return result


def main():
    parser = argparse.ArgumentParser(
        description="Claude Code Sub-Agent Wrapper",
        epilog="Set ZAI_API_KEY environment variable before running."
    )
    parser.add_argument("--task", required=True, help="Task description")
    parser.add_argument("--cwd", help="Working directory")
    parser.add_argument("--allowed-tools", help="Comma-separated allowed tools")
    parser.add_argument("--inactivity-timeout", type=int, default=90,
                        help="Kill if no tool use for N seconds (default: 90)")
    parser.add_argument("--max-timeout", type=int, default=None,
                        help="Optional hard ceiling in seconds (default: unlimited)")
    parser.add_argument("--require-permissions", action="store_true", help="Require permission prompts")
    parser.add_argument("--stream", action="store_true", help="Show tool names as they execute")
    parser.add_argument("--show-prompt", action="store_true", help="Display full prompt before execution")
    parser.add_argument("--max-budget", type=float, help="Max cost USD")
    parser.add_argument("--debug", action="store_true", help="Debug logs to /tmp/glm-subagent-debug.log")
    args = parser.parse_args()

    result = run_subagent(
        task=args.task,
        working_dir=args.cwd,
        allowed_tools=args.allowed_tools,
        inactivity_timeout=args.inactivity_timeout,
        max_timeout=args.max_timeout,
        skip_permissions=not args.require_permissions,
        stream_progress=args.stream,
        show_prompt=args.show_prompt,
        max_budget_usd=args.max_budget,
        debug=args.debug,
    )

    # Print JSON for orchestrator
    print(json.dumps({"success": result["success"], "result": result["result"], "error": result["error"]}))
    sys.exit(0 if result.get("success") else 1)


if __name__ == "__main__":
    main()