ocp/server.mjs at main · dtzp555-max/ocp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env node
/**
 * openclaw-claude-proxy — OpenAI-compatible proxy for Claude CLI
 *
 * Translates OpenAI chat/completions requests into `claude --output-format stream-json` CLI calls,
 * letting you use your Claude Pro/Max subscription as an OpenClaw model provider.
 *
 * Timeout design: single CLAUDE_TIMEOUT (default 600s / 10 min).
 * No separate first-byte or idle timeout — Claude tool-use causes long pauses
 * in the token stream (30s-5min) that make fine-grained timeouts unreliable.
 * This matches LiteLLM, OpenAI SDK, and other major LLM proxies.
 *
 * Env vars:
 *   CLAUDE_PROXY_PORT            — listen port (default: DEFAULT_PORT from lib/constants.mjs)
 *   CLAUDE_BIN                   — path to claude binary (default: auto-detect)
 *   CLAUDE_TIMEOUT               — per-request timeout in ms (default: 600000)
 *   CLAUDE_ALLOWED_TOOLS         — comma-separated tools to allow (default: expanded set)
 *   CLAUDE_SKIP_PERMISSIONS      — "true" to bypass all permission checks (default: false)
 *   CLAUDE_SYSTEM_PROMPT         — system prompt appended to all requests
 *   CLAUDE_MCP_CONFIG            — path to MCP server config JSON file
 *   CLAUDE_SESSION_TTL           — session TTL in ms (default: 3600000 = 1h)
 *   CLAUDE_MAX_CONCURRENT        — max concurrent claude processes, -p/stream-json path (default: 8)
 *   CLAUDE_MAX_QUEUE             — max requests waiting for a -p slot before HTTP 429 (default: 16)
 *   OCP_TUI_MAX_CONCURRENT       — max concurrent interactive TUI turns, TUI-mode path (default: 2)
 *   OCP_SPAWN_REAL_HOME          — "1" forces the -p spawn to use the real HOME (disables the
 *                                  latency spawn-home isolation; default: isolated when a token exists)
 *   CLAUDE_BREAKER_THRESHOLD     — failures in window before circuit opens (default: 6)
 *   CLAUDE_BREAKER_COOLDOWN      — base ms to wait before retrying after circuit opens (default: 120000)
 *   CLAUDE_BREAKER_WINDOW        — sliding window duration in ms (default: 300000 = 5min)
 *   CLAUDE_BREAKER_HALF_OPEN_MAX — max concurrent probes in half-open state (default: 2)
 *   PROXY_API_KEY                — Bearer token for API auth (optional)
 *   CLAUDE_HEARTBEAT_INTERVAL    — SSE heartbeat interval in ms on streaming path (default: 0 = disabled)
 */
import { createServer } from "node:http";
import { spawn, execFileSync } from "node:child_process";
import { randomUUID, timingSafeEqual } from "node:crypto";
import { readFileSync, readdirSync, accessSync, existsSync, constants, chmodSync, statSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { dirname, join } from "node:path";
import { homedir } from "node:os";
import { validateKey, recordUsage, getUsageByKey, getUsageTimeline, getRecentUsage, createKey, listKeys, revokeKey, closeDb, checkQuota, updateKeyQuota, getKeyQuota, findKey, cacheHash, getCachedResponse, setCachedResponse, clearCache, getCacheStats, hasCacheControl, singleflight, getInflightStats } from "./keys.mjs";
import { DEFAULT_PORT } from "./lib/constants.mjs";
import { isLoopbackBind } from "./lib/net.mjs";
import { runTuiTurn, reapStaleTuiSessions, resolveTuiHome } from "./lib/tui/session.mjs";
import { detectTuiUpstreamError } from "./lib/tui/transcript.mjs";
import { TuiSemaphore, recordTuiEntrypoint, buildTuiHealthBlock } from "./lib/tui/semaphore.mjs";

const __dirname = dirname(fileURLToPath(import.meta.url));
const _pkg = JSON.parse(readFileSync(join(__dirname, "package.json"), "utf8"));
const modelsConfig = JSON.parse(readFileSync(join(__dirname, "models.json"), "utf8"));

// ── Resolve claude binary ───────────────────────────────────────────────
// Priority: CLAUDE_BIN env > well-known paths > nvm/fnm/asdf user-local
// installs > which lookup. Fail-fast if not found — never start with an
// unresolvable binary.
function _listVersionDirs(parent) {
  try { return readdirSync(parent); } catch { return []; }
}
function _collectNodeManagerCandidates(home) {
  if (!home) return [];
  const out = [];

  // nvm: $HOME/.nvm/versions/node/<version>/bin/claude
  const nvmRoot = join(home, ".nvm/versions/node");
  for (const v of _listVersionDirs(nvmRoot)) {
    out.push(join(nvmRoot, v, "bin/claude"));
  }
  // nvm default alias: resolve $HOME/.nvm/aliases/default if it points to a version
  try {
    const aliasFile = join(home, ".nvm/aliases/default");
    const aliasVer = readFileSync(aliasFile, "utf8").trim();
    if (aliasVer) {
      const direct = join(nvmRoot, aliasVer, "bin/claude");
      if (!out.includes(direct)) out.unshift(direct);
    }
  } catch {}

  // fnm: $HOME/.fnm/node-versions/<version>/installation/bin/claude
  const fnmRoot = join(home, ".fnm/node-versions");
  for (const v of _listVersionDirs(fnmRoot)) {
    out.push(join(fnmRoot, v, "installation/bin/claude"));
  }

  // asdf: $HOME/.asdf/installs/nodejs/<version>/bin/claude
  const asdfRoot = join(home, ".asdf/installs/nodejs");
  for (const v of _listVersionDirs(asdfRoot)) {
    out.push(join(asdfRoot, v, "bin/claude"));
  }

  // npm prefix-relocated: $HOME/.npm-global/bin/claude
  out.push(join(home, ".npm-global/bin/claude"));

  return out;
}
function resolveClaude() {
  if (process.env.CLAUDE_BIN) {
    try {
      accessSync(process.env.CLAUDE_BIN, constants.X_OK);
      return process.env.CLAUDE_BIN;
    } catch {
      console.error(`FATAL: CLAUDE_BIN="${process.env.CLAUDE_BIN}" is set but not executable.`);
      process.exit(1);
    }
  }

  const home = process.env.HOME || "";
  const candidates = [
    "/opt/homebrew/bin/claude",
    "/usr/local/bin/claude",
    "/usr/bin/claude",
    join(home, ".local/bin/claude"),
    ..._collectNodeManagerCandidates(home),
  ];
  for (const p of candidates) {
    try { accessSync(p, constants.X_OK); console.warn(`[init] CLAUDE_BIN not set, resolved to ${p}`); return p; } catch {}
  }

  try {
    const resolved = execFileSync("which", ["claude"], { encoding: "utf8", timeout: 5000 }).trim();
    if (resolved) { console.warn(`[init] CLAUDE_BIN not set, resolved via which: ${resolved}`); return resolved; }
  } catch {}

  console.error(
    "FATAL: claude binary not found.\n" +
    "  Set CLAUDE_BIN=/path/to/claude or ensure claude is in PATH.\n" +
    "  Hint: if you use nvm/fnm/asdf, set CLAUDE_BIN to the absolute path\n" +
    "  shown by `which claude` in your interactive shell.\n" +
    "  Checked: " + candidates.join(", ")
  );
  process.exit(1);
}

// ── OCP system prompt wrapper (Phase 6c port — ADR 0009 Amendment 1 analogue) ─
// Injected via `--system-prompt` flag, replacing claude CLI's default system
// prompt (which normally includes cwd, OS, tool descriptions, and git status —
// all irrelevant and potentially misleading when the model is accessed via the
// OCP HTTP proxy).
//
// Authority: claude CLI § --system-prompt (ported from OLP, verified v2.1.104;
// behavior stable through v2.1.158 — OLP ADR 0009 Amendment 1 §
// "OLP system prompt wrapper"; ported to OCP 2026-05-30).
// Reference: https://github.com/dtzp555-max/olp commit 97e7d16 (Phase 6c)
const OCP_SYSTEM_PROMPT_WRAPPER = `You are accessed via the OCP HTTP proxy. You do NOT have access to any local filesystem, working directory, shell, git status, or machine environment. Do not infer or invent such information from any context you observe. Respond only based on the conversation provided.`;

// Build the full system-prompt string: OCP_SYSTEM_PROMPT_WRAPPER prepended,
// then any system-role messages from the request appended (separated by blank line).
// ADR 0009 Amendment 1 analogue § "OLP system prompt wrapper".
function extractSystemPrompt(messages) {
  const systemMessages = (messages ?? []).filter(m => m.role === "system");
  if (systemMessages.length === 0) {
    return OCP_SYSTEM_PROMPT_WRAPPER;
  }
  const clientContent = systemMessages.map(m =>
    contentToText(m.content)
  ).join("\n\n");
  return `${OCP_SYSTEM_PROMPT_WRAPPER}\n\n${clientContent}`;
}

// ── NDJSON line buffer parser (Phase 6c port) ─────────────────────────────
// Splits a buffered string on newlines, returning complete parsed events
// plus the trailing incomplete line as `remainder` for the next data chunk.
//
// Authority: claude CLI § --output-format stream-json (ported from OLP, verified v2.1.104;
//   behavior stable through v2.1.158; each event is a newline-terminated JSON object on stdout).
// Reference: OLP lib/providers/anthropic.mjs parseStreamJsonLines (commit 97e7d16).
function parseStreamJsonLines(buffered) {
  const lines = buffered.split("\n");
  const remainder = lines.pop(); // last element is the incomplete trailing line
  const events = [];
  for (const line of lines) {
    const trimmed = line.trim();
    if (trimmed === "") continue;
    try {
      events.push(JSON.parse(trimmed));
    } catch {
      console.error("[claude] NDJSON parse error on line:", trimmed.slice(0, 120));
      events.push({ type: "parse_error", raw: trimmed });
    }
  }
  return { events, remainder: remainder ?? "" };
}

// ── NDJSON event → text content extractor (Phase 6c port) ────────────────
// Maps claude CLI stream-json NDJSON events to { text, stop, error } signals.
// Returns:
//   { text: string }   — content delta to forward
//   { stop: true }     — terminal event (emit finish_reason=stop)
//   { error: string }  — error event (emit error stop)
//   null               — consumed event (log/ignore)
//
// Authority: claude CLI § --output-format stream-json (ported from OLP, verified v2.1.104;
//   behavior stable through v2.1.158).
// Reference: OLP lib/providers/anthropic.mjs anthropicStreamJsonEventToIR (commit 97e7d16).
//
// @param {object} event — parsed NDJSON event
// @param {boolean} isFirstDelta — true if no content has been yielded yet
function parseStreamJsonEvent(event, isFirstDelta) {
  const t = event?.type;

  // system/* — first-event init + other system meta (api_retry etc.)
  if (t === "system") return null;
  // user — echo of user message; consumed
  if (t === "user") return null;

  // stream_event — contains nested content_block_delta
  if (t === "stream_event") {
    const inner = event.event ?? event;
    if (inner?.type === "content_block_delta" && inner.delta?.type === "text_delta") {
      return { text: inner.delta.text ?? "" };
    }
    // Other stream_event sub-types (content_block_start, message_delta, etc.) — consumed
    return null;
  }

  // assistant — aggregate message (fallback when no prior content_block_delta seen)
  // Empirically (claude CLI without --include-partial-messages, verified v2.1.104 through v2.1.158): fast/short
  // responses may emit ONLY the aggregate assistant event, no content_block_delta events.
  // If isFirstDelta is true, extract text here; otherwise it's a duplicate, ignore.
  // Reference: OLP commit 65f945c (assistant-aggregate fallback, fold-in).
  if (t === "assistant") {
    if (isFirstDelta) {
      const blocks = event.message?.content;
      if (Array.isArray(blocks)) {
        const text = blocks
          .filter(b => b && b.type === "text" && typeof b.text === "string")
          .map(b => b.text)
          .join("");
        if (text) return { text };
      }
    }
    return null;
  }

  // result — terminal event
  if (t === "result") {
    if (event.is_error === true) {
      return { error: event.error_message ?? event.result ?? "claude returned is_error" };
    }
    return { stop: true };
  }

  // rate_limit_event / usage — log for observability, don't forward
  if (t === "rate_limit_event" || t === "usage") {
    logEvent("info", "claude_stream_event", { type: t, data: JSON.stringify(event).slice(0, 200) });
    return null;
  }

  // control_request — per Anthropic stream-json docs
  if (t === "control_request") {
    console.error("[claude] stream_json control_request event (ignored):", JSON.stringify(event).slice(0, 120));
    return null;
  }

  // parse_error — already logged by parseStreamJsonLines
  if (t === "parse_error") return null;

  // Unknown event type — log + skip; future-proof for new claude CLI events
  if (t !== undefined) {
    console.error("[claude] unknown stream_json event type:", t);
  }
  return null;
}

// ── Configuration ───────────────────────────────────────────────────────
// Settings marked with `let` can be changed at runtime via PATCH /settings.
const PORT = parseInt(process.env.CLAUDE_PROXY_PORT || String(DEFAULT_PORT), 10);
const CLAUDE = resolveClaude();
let TIMEOUT = parseInt(process.env.CLAUDE_TIMEOUT || "600000", 10);
const PROXY_API_KEY = process.env.PROXY_API_KEY || "";
const SKIP_PERMISSIONS = process.env.CLAUDE_SKIP_PERMISSIONS === "true";
const ALLOWED_TOOLS = (process.env.CLAUDE_ALLOWED_TOOLS ||
  "Bash,Read,Write,Edit,Glob,Grep,WebSearch,WebFetch,Agent"
).split(",").map(s => s.trim()).filter(Boolean);
const SYSTEM_PROMPT = process.env.CLAUDE_SYSTEM_PROMPT || "";
const MCP_CONFIG = process.env.CLAUDE_MCP_CONFIG || "";
let SESSION_TTL = parseInt(process.env.CLAUDE_SESSION_TTL || "3600000", 10);
let MAX_CONCURRENT = parseInt(process.env.CLAUDE_MAX_CONCURRENT || "8", 10);
// FIX ⑥ (concurrency): bound on requests WAITING for a -p concurrency slot. Beyond
// MAX_CONCURRENT, requests queue (up to CLAUDE_MAX_QUEUE) instead of being rejected; when the
// queue is ALSO full, the request gets HTTP 429 + Retry-After (not an opaque 500). See
// claudeSemaphore / acquireClaudeSlot below.
const CLAUDE_MAX_QUEUE = parseInt(process.env.CLAUDE_MAX_QUEUE || "16", 10);
// Retry-After seconds advertised on a 429 backpressure response. A claude turn is typically a
// few seconds to tens of seconds; a small constant nudge keeps well-behaved clients from
// hammering while the queue drains.
const CLAUDE_QUEUE_RETRY_AFTER = parseInt(process.env.CLAUDE_QUEUE_RETRY_AFTER || "5", 10);
const BREAKER_THRESHOLD = parseInt(process.env.CLAUDE_BREAKER_THRESHOLD || "6", 10);
const BREAKER_COOLDOWN = parseInt(process.env.CLAUDE_BREAKER_COOLDOWN || "120000", 10);
const BREAKER_WINDOW = parseInt(process.env.CLAUDE_BREAKER_WINDOW || "300000", 10);
const BREAKER_HALF_OPEN_MAX = parseInt(process.env.CLAUDE_BREAKER_HALF_OPEN_MAX || "2", 10);
const HEARTBEAT_INTERVAL = parseInt(process.env.CLAUDE_HEARTBEAT_INTERVAL || "0", 10);
const BIND_ADDRESS = process.env.CLAUDE_BIND || "127.0.0.1";
const NO_CONTEXT = process.env.CLAUDE_NO_CONTEXT === "true";
// Kill-switch for the FIX-③ default-path spawn-home isolation (see resolveSpawnHome /
// spawnHomeMode below). When "1", the -p/stream-json spawn always runs in the operator's
// real HOME with no cwd override — byte-for-byte the pre-isolation behaviour — even if an
// OAuth token is resolvable. Provided as an escape hatch in case a host depends on the real
// HOME's claude config for the spawned process.
const SPAWN_REAL_HOME = process.env.OCP_SPAWN_REAL_HOME === "1";
const AUTH_MODE = process.env.CLAUDE_AUTH_MODE || (PROXY_API_KEY ? "shared" : "none");
const ADMIN_KEY = process.env.OCP_ADMIN_KEY || "";
const PROXY_ANONYMOUS_KEY = process.env.PROXY_ANONYMOUS_KEY || "";
// When set to "1", advertise PROXY_ANONYMOUS_KEY in the public /health body so
// remote `ocp-connect` devices can zero-config auto-discover it (issue #12 §14 Path A).
// Default OFF: /health is unauthenticated, so advertising hands the shared key to any
// LAN-reachable device (issue #109 P0). Localhost callers always see it regardless,
// since localhost is already fully trusted by the auth path.
const ADVERTISE_ANON_KEY = process.env.PROXY_ADVERTISE_ANON_KEY === "1";
let CACHE_TTL = parseInt(process.env.CLAUDE_CACHE_TTL || "0", 10); // 0 = disabled, value in ms

// ── TUI-mode (subscription-pool bridge) — opt-in; default OFF ───────────
// When ON: requests are served by spawning interactive `claude` (no -p / no
// --output-format) so cc_entrypoint=cli (subscription pool). Responses are
// buffered then replayed as chunked SSE.  Streaming is always buffered here.
// Authority: docs/adr/0007-tui-interactive-mode.md
// SECURITY: TUI-mode is SINGLE-USER ONLY.  Never enable on a multi-user OCP
// (guest prompts would run claude with operator filesystem access).
const TUI_MODE = process.env.CLAUDE_TUI_MODE === "true";
const TUI_WALLCLOCK_MS = parseInt(process.env.CLAUDE_TUI_WALLCLOCK_MS || "120000", 10);
const TUI_CWD  = process.env.OCP_TUI_CWD  || `${process.env.HOME}/.ocp-tui/work`;
// HOME the interactive claude runs under. resolveTuiHome() decides:
//   - OCP_TUI_HOME set            → that path (explicit override, back-compat).
//   - else CLAUDE_CODE_OAUTH_TOKEN set → a CREDENTIAL-FREE scratch home
//     (<HOME>/.ocp-tui/home) with NO .credentials.json, so the env token is the only
//     credential and is authoritative — interactive claude otherwise PREFERS a
//     credentials.json over the env var, so a stale one shadows the token (proven live on
//     PI231) and a refresh on it can corrupt the single-use token. See ADR 0007 PR-D.
//   - else (no env token)         → the operator's real home (legacy credentials.json path,
//     byte-for-byte unchanged for hosts that intentionally rely on credentials.json).
const TUI_HOME = resolveTuiHome({
  realHome:       process.env.HOME,
  configuredHome: process.env.OCP_TUI_HOME,
  envTokenSet:    !!process.env.CLAUDE_CODE_OAUTH_TOKEN,
});
const TUI_ENTRYPOINT = process.env.OCP_TUI_ENTRYPOINT || "cli"; // cli|auto|off — see ADR 0007
// Independent concurrency bound for the TUI path (audit C-4). Default 2: a TUI turn is
// HEAVY (per-request cold-boot of a tmux+claude session + up to TUI_WALLCLOCK_MS=120s of
// wallclock), so a small host (e.g. a Pi 4 serving a family) cannot run many at once
// without OOM + multiplied subscription rate-limit pressure. This is NOT the global
// MAX_CONCURRENT gate (that lives in spawnClaudeProcess, the -p/stream-json path, which
// callClaudeTui never reaches). See ADR 0007 PR-B amendment + lib/tui/semaphore.mjs.
const TUI_MAX_CONCURRENT = parseInt(process.env.OCP_TUI_MAX_CONCURRENT || "2", 10);
const tuiSemaphore = new TuiSemaphore(TUI_MAX_CONCURRENT);
// Operator-visible TUI drift surface (audit C-5). lastEntrypoint + entrypointMismatches
// let the operator poll /health to catch a silent metered-pool drift (the audit's top
// risk: after the 6/15 flip a TTY-loss could flip cc_entrypoint cli→sdk-cli and drain
// metered credits invisibly — the warning currently only reaches journald).
const tuiStats = {
  lastEntrypoint: null,      // last observed cc_entrypoint from the transcript ("cli" | "sdk-cli" | null)
  entrypointMismatches: 0,   // count of cli-expected-but-got-other turns
};

// ── FIX ③ (latency): default-path (-p / stream-json) spawn-home isolation ──────────────
// PROBLEM (measured, not theoretical): OCP's default spawn inherits the operator's real HOME
// (loading the global ~/.claude — plugins, skills, hooks) and runs with cwd=~/ocp (loading the
// project CLAUDE.md / skills) on EVERY request. Pure Anthropic API floor for haiku "hi" ≈ 1–2s;
// the same claude CLI spawned in the operator's real HOME/cwd ≈ 10–28s; a clean minimal HOME +
// CLAUDE_CODE_OAUTH_TOKEN ≈ 3–7s and authenticates fine. So the heavy global config is pure
// per-request latency tax with no proxy benefit (a proxy must NOT leak the host's context into
// the proxied turn — same rationale as NO_CONTEXT / the TUI path's CLAUDE_MDS suppression).
//
// FIX: when an OAuth token is resolvable, run the default spawn under a CREDENTIAL-FREE minimal
// scratch HOME (`<realHome>/.ocp/spawn-home`) with cwd = that same neutral dir, and pass the
// resolved token via CLAUDE_CODE_OAUTH_TOKEN so the env token is authoritative. This MIRRORS the
// TUI path's resolveTuiHome() env-token mode (lib/tui/session.mjs): for `-p`, the env token wins
// over a credentials.json (the opposite of interactive claude), so credential isolation is not
// even strictly required for auth here, but a credential-FREE home is still the right shape —
// nothing to refresh, nothing to corrupt, no heavy config to load.
//
// SAFETY: if NO token is resolvable → fall back to the real HOME with no cwd override (zero
// regression). OCP_SPAWN_REAL_HOME=1 forces that legacy behaviour even when a token exists.
// The scratch home holds NO .credentials.json / NO settings.json / NO plugins — it is created
// minimal and (re)cleaned of any settings.json on prepare.
const SPAWN_HOME_DIR = `${process.env.HOME}/.ocp/spawn-home`;

// Idempotently prepare the minimal scratch HOME. Creates the dir if missing and removes any
// settings.json that might have crept in, so the spawned claude loads no host settings/plugins.
// Best-effort: a failure here degrades toward "dir may be missing", which spawn() tolerates by
// erroring loudly — never a silent auth/credential corruption (there are no credentials here).
function prepareSpawnHome(dir = SPAWN_HOME_DIR) {
  try {
    mkdirSync(`${dir}/.claude`, { recursive: true });
    // Belt-and-braces: ensure no settings.json/plugins leak in (this home is fully ours).
    for (const f of [`${dir}/.claude/settings.json`, `${dir}/.claude/settings.local.json`]) {
      try { if (existsSync(f)) rmSync(f, { force: true }); } catch { /* best effort */ }
    }
  } catch { /* best effort — spawn will surface a hard error if the dir is truly unusable */ }
}

// Resolve the default-spawn HOME-isolation decision ONCE, lazily + memoized (so it runs after
// getOAuthCredentials is defined regardless of source order, and the token probe happens at most
// once). Returns { isolated, home, token } where:
//   - isolated:true  → spawn under SPAWN_HOME_DIR with cwd=SPAWN_HOME_DIR + the env token.
//   - isolated:false → legacy real-HOME spawn, no cwd override (no token, or kill-switch on).
// Caches only the isolation DECISION (isolated/home/reason), NOT the token — the token is
// re-resolved FRESH per spawn via resolveSpawnToken(). A memoized token goes stale when its
// source rotates: the macOS keychain access token rotates (~hourly, refreshed by the operator's
// real claude), so a startup snapshot 401s once it expires (caused a ~31h Mac-mini 401 outage,
// 2026-06-26). OCP deliberately does NOT refresh the token itself — a refresh-token grant would
// consume the single-use refresh token and log out the operator's real claude (issue #112).
let _spawnHomeMode = null;
function getSpawnHomeMode() {
  if (_spawnHomeMode) return _spawnHomeMode;
  if (SPAWN_REAL_HOME) {
    _spawnHomeMode = { isolated: false, home: null, reason: "kill-switch (OCP_SPAWN_REAL_HOME=1)" };
    return _spawnHomeMode;
  }
  let hasToken = false;
  try { hasToken = !!(getOAuthCredentials()?.accessToken); } catch { hasToken = false; }
  if (hasToken) {
    prepareSpawnHome(SPAWN_HOME_DIR);
    _spawnHomeMode = { isolated: true, home: SPAWN_HOME_DIR, reason: "oauth token resolved" };
  } else {
    _spawnHomeMode = { isolated: false, home: null, reason: "no oauth token resolvable" };
  }
  return _spawnHomeMode;
}

// Resolve a FRESH OAuth access token for an isolated spawn. Read-only (keychain / credentials.json
// / env) — NEVER refreshes/rotates (see getSpawnHomeMode note). Returns null if none resolvable OR
// if a known expiry has passed (5-min buffer): a null return makes the caller fall back to real
// HOME, where the spawned claude refreshes the credential natively and self-heals (the keychain
// token is then fresh again → next spawn is fast). The env-token path (Linux) carries no expiresAt
// → never expiry-gated (those tokens are long-lived).
function resolveSpawnToken() {
  try {
    const creds = getOAuthCredentials();
    if (!creds?.accessToken) return null;
    if (creds.expiresAt && Date.now() + 300000 >= creds.expiresAt) return null;
    return creds.accessToken;
  } catch { return null; }
}

// ── FIX ⑥ (concurrency): bounded wait-queue for the -p / stream-json path ──────────────
// PROBLEM (proven): spawnClaudeProcess used `if (activeRequests >= MAX_CONCURRENT) throw` →
// the client got an opaque 500 AND the rejection was NOT counted in stats (a 15-concurrent
// stress run returned 7×500 while /health stats.errors stayed 0). The TUI path already has a
// bounded-queue semaphore (TuiSemaphore); the -p path did not.
//
// FIX: requests beyond MAX_CONCURRENT WAIT on this semaphore (up to CLAUDE_MAX_QUEUE) instead of
// being rejected. Only when the queue is ALSO full do we reject — with HTTP 429 + Retry-After
// (deterministic backpressure), a distinct `concurrency_queue_full` log, and a stats.queueRejections
// counter that shows up on /health. The slot is released on EVERY exit path via the existing
// idempotent cleanup() (proc exit/close/error/timeout) — the #37/#40 slot-leak guard.
const claudeSemaphore = new TuiSemaphore(MAX_CONCURRENT, { maxQueue: CLAUDE_MAX_QUEUE });

// Tagged error so callers can map this single overflow case to HTTP 429 (every OTHER throw stays
// a 500). Carries retryAfter for the Retry-After header.
class ConcurrencyOverflowError extends Error {
  constructor(message) { super(message); this.name = "ConcurrencyOverflowError"; this.httpStatus = 429; this.retryAfter = CLAUDE_QUEUE_RETRY_AFTER; }
}

// Acquire a -p concurrency slot, queuing if all are busy (up to CLAUDE_MAX_QUEUE). Resolves to a
// release() fn that MUST be called exactly once on every exit path (wired into ctx.cleanup()).
// Rejects with ConcurrencyOverflowError when the wait-queue is full. Increments stats.queued while
// waiting (decremented on acquire) and stats.queueRejections on overflow.
async function acquireClaudeSlot() {
  stats.queued = claudeSemaphore.queued + 1; // reflect this waiter before we (maybe) block
  try {
    await claudeSemaphore.acquire();
  } catch (e) {
    stats.queued = claudeSemaphore.queued;
    stats.queueRejections++;
    logEvent("warn", "concurrency_queue_full", {
      limit: claudeSemaphore.limit, maxQueue: claudeSemaphore.maxQueue,
      inflight: claudeSemaphore.inflight, queued: claudeSemaphore.queued,
    });
    throw new ConcurrencyOverflowError(
      `backpressure: concurrency limit (${claudeSemaphore.limit}) reached and wait queue ` +
      `(${claudeSemaphore.maxQueue}) is full — retry shortly`);
  }
  stats.queued = claudeSemaphore.queued;
  let released = false;
  return function releaseClaudeSlot() {
    if (released) return; // idempotent — cleanup() may be reached via multiple proc events
    released = true;
    claudeSemaphore.release();
    stats.queued = claudeSemaphore.queued;
  };
}

// SECURITY fail-loud: TUI-mode is incompatible with any configuration that allows
// non-operator prompts to reach the interactive claude session. Three cases:
//   1. AUTH_MODE=multi — guest/anonymous keys can submit prompts.
//   2. a non-loopback BIND_ADDRESS — server is network-exposed; any reachable peer
//      can send prompts unless per-request trust is in place. Override with
//      OCP_TUI_ALLOW_LAN=1 ONLY if you have a separate network-layer trust (firewall, VPN).
//   3. PROXY_ANONYMOUS_KEY set — anonymous callers can submit prompts without a key.
// In all three cases TUI runs interactive claude with the OPERATOR's full filesystem
// access — home is NOT isolation. Refuse to boot. See ADR 0007.
if (TUI_MODE && AUTH_MODE === "multi") {
  console.error(
    "FATAL: CLAUDE_TUI_MODE=true is incompatible with CLAUDE_AUTH_MODE=multi.\n" +
    "  TUI runs interactive claude with the operator's filesystem access, so a guest/anonymous\n" +
    "  prompt could read operator data. TUI-mode is single-user only until B-path isolation lands.\n" +
    "  See docs/adr/0007-tui-interactive-mode.md. Refusing to start."
  );
  process.exit(1);
}
if (TUI_MODE && !isLoopbackBind(BIND_ADDRESS) && process.env.OCP_TUI_ALLOW_LAN !== "1") {
  console.error(
    `FATAL: CLAUDE_TUI_MODE=true with a non-loopback CLAUDE_BIND (${BIND_ADDRESS}) is unsafe.\n` +
    "  TUI runs interactive claude with operator filesystem access; network-exposed without\n" +
    "  per-request isolation means any reachable peer could drive the operator's claude session.\n" +
    "  Either bind to 127.0.0.1 (default) or set OCP_TUI_ALLOW_LAN=1 if you have a\n" +
    "  separate network-layer trust (firewall/VPN). See docs/adr/0007-tui-interactive-mode.md."
  );
  process.exit(1);
}
if (TUI_MODE && PROXY_ANONYMOUS_KEY) {
  console.error(
    "FATAL: CLAUDE_TUI_MODE=true with PROXY_ANONYMOUS_KEY set is unsafe.\n" +
    "  TUI runs interactive claude with operator filesystem access; anonymous callers\n" +
    "  could drive the operator's claude session without a named key.\n" +
    "  Remove PROXY_ANONYMOUS_KEY or disable TUI-mode. See docs/adr/0007-tui-interactive-mode.md."
  );
  process.exit(1);
}

if (PROXY_ANONYMOUS_KEY && AUTH_MODE !== "multi") {
  console.warn("WARNING: PROXY_ANONYMOUS_KEY is set but AUTH_MODE is not 'multi' — anonymous key will be ignored");
}

if (AUTH_MODE === "shared" && !PROXY_API_KEY) {
  console.warn("WARNING: AUTH_MODE=shared but PROXY_API_KEY is not set — all requests will pass unauthenticated");
}

const VERSION = _pkg.version;
const START_TIME = Date.now();

// ── Structured logging helper ───────────────────────────────────────────
function logEvent(level, event, data = {}) {
  const entry = { ts: new Date().toISOString(), level, event, ...data };
  if (level === "error" || level === "warn") {
    console.error(JSON.stringify(entry));
  } else {
    console.log(JSON.stringify(entry));
  }
}

// ── Startup file-mode reconciliation ───────────────────────────────────
// Idempotently tightens OCP credential-bearing files to 700/600 so that
// existing installs (created before this fix) are hardened on next restart.
// Wrapped in try/catch — chmod failure must never crash startup.
// Does NOT touch systemd units or launchd plists; those are managed by setup.mjs.
function _tightenFileModesIfPossible() {
  const ocpDir = join(homedir(), ".ocp");
  const targets = [
    { path: ocpDir,                      mode: 0o700, label: "~/.ocp (dir)" },
    { path: join(ocpDir, "admin-key"),   mode: 0o600, label: "~/.ocp/admin-key" },
    { path: join(ocpDir, "ocp.db"),      mode: 0o600, label: "~/.ocp/ocp.db" },
  ];
  let tightened = 0;
  let alreadyOk = 0;
  for (const { path, mode, label } of targets) {
    try {
      const st = statSync(path);
      const current = st.mode & 0o777;
      if (current !== mode) {
        chmodSync(path, mode);
        tightened++;
      } else {
        alreadyOk++;
      }
    } catch (e) {
      if (e.code !== "ENOENT") {
        // File exists but chmod failed (e.g. EPERM) — log and move on
        logEvent("warn", "file_mode_tighten_failed", { path: label, error: e.message });
      }
      // ENOENT is fine — file doesn't exist yet
    }
  }
  if (tightened > 0) {
    logEvent("info", "file_modes_tightened", { tightened, alreadyOk });
  }
}
_tightenFileModesIfPossible();

// ── Circuit breaker (DISABLED) ──────────────────────────────────────────
// Disabled: CLI proxy has its own retry logic, and the breaker was causing
// cascading failures — once API got briefly slow, ALL agents lost connectivity
// for 120s+ due to the breaker rejecting every request.
// The timeout/failure tracking stubs below are kept as no-ops so callers
// don't need to be changed.
function breakerRecordSuccess(_cliModel) {}
function breakerRecordTimeout(_cliModel) {}
function getBreakerState(_cliModel) { return { state: "closed" }; }
function getBreakerSnapshot() { return { _note: "circuit breaker disabled" }; }

// Legacy constants kept for /health display
const _BREAKER_DISABLED_NOTE = "disabled";
/* Original breaker code removed — see git history for v2.5.0 implementation.
   Re-enable by reverting this block if needed in the future.
   Reason for disabling: CLI-proxy architecture means each request spawns a
   fresh claude process. The breaker was designed for persistent API connections
   where a degraded backend benefits from back-off. With CLI spawning, timeouts
   are usually transient (API load, large prompts) and the breaker's 120s+
   cooldown with graduated backoff made things worse, not better.
*/


// ── Model mapping ───────────────────────────────────────────────────────
// Maps request model IDs and aliases to canonical claude CLI model IDs.
// Derived from models.json (single source of truth).
const MODEL_MAP = Object.fromEntries([
  ...modelsConfig.models.map(m => [m.id, m.id]),
  ...Object.entries(modelsConfig.aliases),
  ...Object.entries(modelsConfig.legacyAliases),
]);

const MODELS = modelsConfig.models.map(m => ({ id: m.id, name: m.displayName }));

// ── Session management ──────────────────────────────────────────────────
// Maps namespaced session keys to Claude CLI session UUIDs.
// Key format: "${keyName}|${conversationId}" — prevents cross-key collision
// when two callers (different API keys or anon + authenticated) use the same
// session_id string. Anonymous callers use "anon"; admin uses "admin".
// Enables --resume for multi-turn conversations, reducing token waste.
const sessions = new Map(); // `${keyName}|${conversationId}` → { uuid, messageCount, lastUsed, model }

// Build the namespaced key used for all sessions Map operations.
// Returns null when conversationId is falsy (one-off requests bypass session tracking).
function _sessionKey(conversationId, keyName) {
  return conversationId ? `${keyName || "anon"}|${conversationId}` : null;
}

const sessionCleanupInterval = setInterval(() => {
  const now = Date.now();
  for (const [id, s] of sessions) {
    const idleMs = now - s.lastUsed;
    const ageMs = s.firstSeen ? now - s.firstSeen : null;
    // id is "${keyName}|${conversationId}"; strip prefix for log output
    const convIdShort = id.includes("|") ? id.slice(id.indexOf("|") + 1, id.indexOf("|") + 13) : id.slice(0, 12);
    if (idleMs > SESSION_TTL) {
      sessions.delete(id);
      console.log(`[session] expired ${convIdShort}... (idle ${Math.round(idleMs / 60000)}m)`);
      logEvent("info", "session_expired", { conversationId: convIdShort + "...", idleMs, ageMs });
    } else if (ageMs !== null && ageMs > 4 * SESSION_TTL) {
      // #42 evidence-gathering: a session whose firstSeen is more than 4× TTL old
      // but whose lastUsed keeps getting bumped (never idle long enough to expire)
      // is the suspected bug. Log without action so the pattern can be confirmed
      // in /logs. Do NOT enforce an absolute age cap here speculatively.
      logEvent("warn", "session_long_lived", { conversationId: convIdShort + "...", idleMs, ageMs });
    }
  }
}, 60000);

// Cache cleanup: remove expired entries every 10 minutes
const cacheCleanupInterval = setInterval(() => {
  if (CACHE_TTL > 0) {
    try {
      const cleaned = clearCache(CACHE_TTL);
      if (cleaned > 0) logEvent("info", "cache_cleanup", { expired: cleaned });
    } catch (e) { logEvent("error", "cache_cleanup_failed", { error: e.message }); }
  }
}, 600000);

// TUI defunct-session reap (periodic): the boot reap (below) only fires once, but a
// long-lived host (PI231 ran 30 days without restart) accumulates defunct `<claude>`
// zombies between restarts — the pane's claude is a child of the tmux server, not node,
// so only the server can reap it (see reapStaleTuiSessions). We sweep every 15 min, but
// ONLY when the TUI path is fully idle: reapStaleTuiSessions may `kill-server`, which would
// tear down a live turn's pane, so we skip the sweep while any turn is inflight or queued.
// RESIDUAL (documented, accepted): a brand-new request whose pane is created in the narrow
// window between this idle-check and kill-server would have its pane torn down and fail the
// turn cleanly via runTuiTurn's existing honesty gates (rare; the boot reap is the primary
// mechanism and the 15-min cadence makes the window negligible).
// Gated on TUI_MODE — zero effect (no kill-server, no list-sessions) when TUI is off.
// cli.js does NOT perform this operation (Class B, OCP-owned TUI spawn) — see ADR 0007.
const TUI_REAP_INTERVAL_MS = 15 * 60 * 1000;
const tuiReapInterval = TUI_MODE ? setInterval(() => {
  if (tuiSemaphore.inflight > 0 || tuiSemaphore.queued > 0) return; // a turn is live — defer
  try {
    const n = reapStaleTuiSessions();
    if (n) logEvent("info", "tui_reaped_stale_sessions", { count: n, trigger: "periodic" });
  } catch (e) { logEvent("error", "tui_periodic_reap_failed", { error: e.message }); }
}, TUI_REAP_INTERVAL_MS) : null;
if (tuiReapInterval && typeof tuiReapInterval.unref === "function") tuiReapInterval.unref();

// ── Active child process tracking ────────────────────────────────────────
const activeProcesses = new Set();

// ── Stats & diagnostics ─────────────────────────────────────────────────
const stats = {
  totalRequests: 0,
  activeRequests: 0,
  errors: 0,
  timeouts: 0,
  sessionHits: 0,
  sessionMisses: 0,
  oneOffRequests: 0,
  queued: 0,           // current requests waiting for a -p concurrency slot (FIX ⑥)
  queueRejections: 0,  // total requests rejected with HTTP 429 because the wait-queue was full (FIX ⑥)
};
const recentErrors = []; // last 20 errors

// Per-model request stats
const modelStats = new Map(); // cliModel → { requests, errors, timeouts, totalElapsed, maxElapsed, totalPromptChars, maxPromptChars }

function getModelStats(cliModel) {
  if (!modelStats.has(cliModel)) {
    modelStats.set(cliModel, {
      requests: 0, successes: 0, errors: 0, timeouts: 0,
      totalElapsed: 0, maxElapsed: 0,
      totalPromptChars: 0, maxPromptChars: 0,
    });
  }
  return modelStats.get(cliModel);
}

function recordModelRequest(cliModel, promptChars) {
  const m = getModelStats(cliModel);
  m.requests++;
  m.totalPromptChars += promptChars;
  if (promptChars > m.maxPromptChars) m.maxPromptChars = promptChars;
}

function recordModelSuccess(cliModel, elapsedMs) {
  const m = getModelStats(cliModel);
  m.successes++;
  m.totalElapsed += elapsedMs;
  if (elapsedMs > m.maxElapsed) m.maxElapsed = elapsedMs;
}

function recordModelError(cliModel, isTimeout) {
  const m = getModelStats(cliModel);
  m.errors++;
  if (isTimeout) m.timeouts++;
}

function getModelStatsSnapshot() {
  const result = {};
  for (const [model, m] of modelStats) {
    result[model] = {
      requests: m.requests,
      successes: m.successes,
      errors: m.errors,
      timeouts: m.timeouts,
      avgElapsed: m.successes > 0 ? Math.round(m.totalElapsed / m.successes) : 0,
      maxElapsed: m.maxElapsed,
      avgPromptChars: m.requests > 0 ? Math.round(m.totalPromptChars / m.requests) : 0,
      maxPromptChars: m.maxPromptChars,
    };
  }
  return result;
}

function trackError(msg) {
  stats.errors++;
  recentErrors.push({ time: new Date().toISOString(), message: String(msg).slice(0, 200) });
  if (recentErrors.length > 20) recentErrors.shift();
}

// ── Auth health check ───────────────────────────────────────────────────
let authStatus = { ok: null, lastCheck: 0, message: "" };

async function checkAuth() {
  try {
    const env = { ...process.env };
    delete env.CLAUDECODE;
    delete env.ANTHROPIC_API_KEY;
    delete env.ANTHROPIC_BASE_URL;
    delete env.ANTHROPIC_AUTH_TOKEN;
    execFileSync(CLAUDE, ["auth", "status"], { encoding: "utf8", timeout: 10000, env });
    authStatus = { ok: true, lastCheck: Date.now(), message: "authenticated" };
  } catch (e) {
    const msg = (e.stderr || e.message || "").slice(0, 200);
    authStatus = { ok: false, lastCheck: Date.now(), message: msg };
    console.error(`[auth] check failed: ${msg}`);
  }
}

// Check auth on start and every 10 minutes
checkAuth();
const authCheckInterval = setInterval(checkAuth, 600000);

// ── Build CLI arguments ─────────────────────────────────────────────────
// Phase 6c port (2026-05-30): removed `-p` / `--output-format text`.
// Now uses `--output-format stream-json --verbose --no-session-persistence
// --system-prompt <OCP_SYSTEM_PROMPT_WRAPPER + client system messages>`.
//
// Authority: claude CLI § --output-format stream-json, § --verbose,
//   § --no-session-persistence, § --system-prompt (ported from OLP, verified v2.1.104;
//   behavior stable through v2.1.158).
// Reference: OLP ADR 0009 Amendment 1 + commit 97e7d16.
//
// Session flags (--resume, --session-id) are dropped: they are incompatible
// with stream-json mode without -p. OCP always passes full conversation context
// via stdin instead (messagesToPrompt), preserving multi-turn correctness.
// CLAUDE_SYSTEM_PROMPT env var is absorbed into the system prompt via
// extractSystemPrompt() at the caller level; APPEND_SYSTEM_PROMPT no longer used.
// Note: ALLOWED_TOOLS / SKIP_PERMISSIONS / MCP_CONFIG are preserved as before.
function buildCliArgs(cliModel, systemPrompt) {
  const args = [
    "--model", cliModel,
    "--output-format", "stream-json",
    "--verbose",
    "--no-session-persistence",
    "--system-prompt", systemPrompt,
  ];

  // Permissions
  // ADR 0007 B-path: in multi-tenant mode, suppress operator-FS tools so a guest
  // prompt cannot drive Bash/Read/Write/Edit/etc. on the operator's filesystem.
  // For AUTH_MODE !== "multi" (none/shared — single-operator/trusted), preserve
  // existing behaviour unchanged.
  if (AUTH_MODE === "multi") {
    // Disallow the full operator-FS + web + agent surface. "--disallowedTools" may
    // be repeated; claude accepts multiple occurrences (TUI path already uses it).
    args.push(
      "--disallowedTools", "Bash",
      "--disallowedTools", "Read",
      "--disallowedTools", "Write",
      "--disallowedTools", "Edit",
      "--disallowedTools", "Glob",
      "--disallowedTools", "Grep",
      "--disallowedTools", "WebFetch",
      "--disallowedTools", "WebSearch",
      "--disallowedTools", "Agent",
      "--disallowedTools", "mcp__*",
    );
    // Do NOT push --allowedTools in multi mode.
  } else if (SKIP_PERMISSIONS) {
    args.push("--dangerously-skip-permissions");
  } else if (ALLOWED_TOOLS.length > 0) {
    args.push("--allowedTools", ...ALLOWED_TOOLS);
  }

  // MCP config
  if (MCP_CONFIG) {
    args.push("--mcp-config", MCP_CONFIG);
  }

  return args;
}

// ── Format messages to prompt text ──────────────────────────────────────
// Truncation guard: if total chars exceed MAX_PROMPT_CHARS, keep the system
// message(s) + first user message + last N messages, dropping the middle.
// This prevents runaway context from gateway-side conversation accumulation.
let MAX_PROMPT_CHARS = parseInt(process.env.CLAUDE_MAX_PROMPT_CHARS || "150000", 10);

// Flatten OpenAI content (string | array of parts) to plain text for the prompt.
// Array content: concatenate text parts; replace non-text parts (e.g. image_url)
// with a placeholder rather than dumping raw JSON. (issue #110)
function contentToText(content) {
  if (typeof content === "string") return content;
  if (Array.isArray(content)) {
    return content.map(p =>
      p && p.type === "text" && typeof p.text === "string" ? p.text : "[non-text content omitted]"
    ).join("");
  }
  return content == null ? "" : JSON.stringify(content);
}

function messagesToPrompt(messages) {
  const full = messages.map((m) => {
    const text = contentToText(m.content);
    if (m.role === "system") return `[System] ${text}`;
    if (m.role === "assistant") return `[Assistant] ${text}`;
    return text;
  });

  const joined = full.join("\n\n");
  if (joined.length <= MAX_PROMPT_CHARS) return joined;

  // Truncation: keep system messages, first user msg, and trim from the tail
  logEvent("warn", "prompt_truncated", {
    originalChars: joined.length,
    maxChars: MAX_PROMPT_CHARS,
    originalMessages: messages.length,
  });

  const system = [];
  const rest = [];
  for (let i = 0; i < full.length; i++) {
    if (messages[i].role === "system") system.push(full[i]);
    else rest.push(full[i]);
  }

  // Keep system + as many recent messages as fit
  const systemText = system.join("\n\n");
  const budget = MAX_PROMPT_CHARS - systemText.length - 200; // 200 for separator
  const kept = [];
  let used = 0;
  for (let i = rest.length - 1; i >= 0; i--) {
    if (used + rest[i].length + 2 > budget) break;
    kept.unshift(rest[i]);
    used += rest[i].length + 2;
  }

  const truncNote = `[System] Note: ${rest.length - kept.length} older messages were truncated to fit context limit.`;
  const result = [systemText, truncNote, ...kept].filter(Boolean).join("\n\n");

  logEvent("info", "prompt_after_truncation", {
    chars: result.length,
    keptMessages: kept.length,
    droppedMessages: rest.length - kept.length,
  });

  return result;
}

// Model tier — used for logging only (no timeout logic).
function getModelTier(cliModel) {
  if (cliModel.includes("opus")) return "opus";
  if (cliModel.includes("haiku")) return "haiku";
  return "sonnet";
}

// ── Spawn claude CLI (shared setup) ─────────────────────────────────────
// Builds CLI args, spawns the process, and sets up timeouts.
// Returns context object or throws synchronously.
//
// Phase 6c port (2026-05-30): session resume (--resume / --session-id) is
// dropped because it is incompatible with stream-json mode without -p.
// OCP now always passes the full serialized conversation via stdin
// (messagesToPrompt), so multi-turn correctness is preserved without sessions.
// The sessions Map is retained for stats/logging but no longer drives --resume.
// Reference: OLP ADR 0009 Amendment 1 + commit 97e7d16.
// FIX ⑥: concurrency is now bounded by the claudeSemaphore via acquireClaudeSlot(), which the
// caller MUST await before calling this, passing the resulting release fn as `releaseSlot`. The
// old `if (activeRequests >= MAX_CONCURRENT) throw` gate (→ opaque 500, uncounted) is GONE: at
// most MAX_CONCURRENT callers hold a slot when they reach here, so this spawn is always within
// budget. releaseSlot is wired into the idempotent cleanup() so the slot is freed on EVERY exit
// path (close/error/timeout/abort). Back-compat: releaseSlot defaults to a no-op so any future
// internal caller that does its own gating still works.
function spawnClaudeProcess(model, messages, conversationId, keyName, releaseSlot = () => {}) {
  const cliModel = MODEL_MAP[model] || model;

  // Circuit breaker: disabled (see comment at top of breaker section)

  stats.activeRequests++;
  stats.totalRequests++;

  // Phase 6c: always serialize full conversation via stdin (no session resume).
  // System messages are extracted and passed via --system-prompt; the remaining
  // messages (user/assistant/tool) are serialized by messagesToPrompt.
  const systemPrompt = extractSystemPrompt(messages);

  // messagesToPrompt skips system messages now that they go via --system-prompt.
  // Filter them out before calling to avoid double-injection.
  const nonSystemMessages = messages.filter(m => m.role !== "system");
  const prompt = messagesToPrompt(nonSystemMessages);

  stats.oneOffRequests++;
  if (conversationId) {
    console.log(`[session] stateless conv=${conversationId.slice(0, 12)}... key=${keyName || "anon"} msgs=${messages.length} prompt_chars=${prompt.length}`);
  }

  const cliArgs = buildCliArgs(cliModel, systemPrompt);

  const env = { ...process.env };
  delete env.CLAUDECODE;
  delete env.ANTHROPIC_API_KEY;
  delete env.ANTHROPIC_BASE_URL;
  delete env.ANTHROPIC_AUTH_TOKEN;

  // Pure API mode: suppress Claude Code context injection while preserving OAuth auth
  if (NO_CONTEXT) {
    env.CLAUDE_CODE_DISABLE_CLAUDE_MDS = "1";
    env.CLAUDE_CODE_DISABLE_AUTO_MEMORY = "1";
  }

  // FIX ③ (latency): default-path spawn-home isolation. When a token is resolvable (and the
  // OCP_SPAWN_REAL_HOME kill-switch is off), run claude under a credential-free minimal HOME
  // with cwd = that same neutral dir, so it loads NONE of the operator's global ~/.claude
  // (plugins/skills/hooks) or the ~/ocp project CLAUDE.md/skills — the measured 10–28s → 3–7s
  // latency win. The env token is authoritative for `-p` (unlike interactive claude). When no
  // token is resolvable, falls back to real HOME + inherited cwd (zero regression). See
  // getSpawnHomeMode() / prepareSpawnHome() above. The DISABLE_CLAUDE_MDS / AUTO_MEMORY flags
  // are set unconditionally in isolated mode (belt-and-braces; mirrors the TUI path).
  const spawnHome = getSpawnHomeMode();
  const spawnOpts = { env, stdio: ["pipe", "pipe", "pipe"] };
  if (spawnHome.isolated) {
    // Re-resolve the token FRESH per spawn (never a startup snapshot — keychain tokens rotate;
    // a stale snapshot 401s). If unresolvable right now, fall through to real HOME so the spawned
    // claude resolves + refreshes credentials natively instead of 401ing on a stale/null token.
    const freshToken = resolveSpawnToken();
    if (freshToken) {
      env.HOME = spawnHome.home;
      env.CLAUDE_CODE_OAUTH_TOKEN = freshToken; // env token is authoritative for -p
      env.CLAUDE_CODE_DISABLE_CLAUDE_MDS = "1";
      env.CLAUDE_CODE_DISABLE_AUTO_MEMORY = "1";
      spawnOpts.cwd = spawnHome.home; // neutral cwd: no project CLAUDE.md/skills
    }
  }

  const proc = spawn(CLAUDE, cliArgs, spawnOpts);
  activeProcesses.add(proc);

  const t0 = Date.now();
  let gotFirstByte = false;
  let cleaned = false;

  function cleanup() {
    if (cleaned) return;
    cleaned = true;
    clearTimeout(overallTimer);
    stats.activeRequests--;