feat(iso-route): retune openrouter-free preset after real-world contention data

CharGrnmn · CharGrnmn · commit 0a0f7fb4dbe5 · 2026-04-20T18:42:32.000-04:00
qwen/qwen3-coder:free stays as the orchestrator default (still the
strongest free agentic model on OpenRouter) but real usage showed its
shared-pool providers (Venice, Chutes) rate-limit far more often than
any other free model. Subagent roles now spread load across
less-contended providers:

  fast:     minimax/minimax-m2.5:free       -&gt; z-ai/glm-4.5-air:free
  minimal:  google/gemma-4-26b-a4b-it:free  -&gt; openai/gpt-oss-20b:free
  quality:  openai/gpt-oss-120b:free        -&gt; qwen/qwen3-next-80b-a3b-instruct:free

quality bumped to Qwen3 Next 80B — it's the best free writing model
on the catalog, and JobForge was already overriding to this. The
preset now ships the actually-best pick by default.

Bumps iso-route to 0.5.1. No breaking changes; consumers get better
defaults on next install. Existing overrides in downstream models.yaml
continue to work.

Made-with: Cursor
diff --git a/package-lock.json b/package-lock.json
diff --git a/packages/iso-route/CHANGELOG.md b/packages/iso-route/CHANGELOG.md
@@ -1,5 +1,25 @@
 # @razroo/iso-route
 
+## 0.5.1
+
+### Patch Changes
+
+- Retune the `openrouter-free` preset after real-world usage showed
+  `qwen/qwen3-coder:free` is the most contended free model on OpenRouter's
+  shared pool. Keeps it as the orchestrator default (still the strongest
+  free agentic model) but spreads subagent load:
+
+  - `fast`: `minimax/minimax-m2.5:free` → `z-ai/glm-4.5-air:free`
+    (more reliable tool-call schema compliance on Geometra flows)
+  - `minimal`: `google/gemma-4-26b-a4b-it:free` → `openai/gpt-oss-20b:free`
+    (less contended, better structured-output adherence)
+  - `quality`: kept on `qwen/qwen3-next-80b-a3b-instruct:free`
+    (upgraded from `gpt-oss-120b:free`, matching JobForge's override;
+    the preset now ships the actually-best free writing model)
+
+  No breaking changes — existing consumers get better defaults on next
+  install.
+
 ## 0.5.0
 
 ### Minor Changes
diff --git a/packages/iso-route/package.json b/packages/iso-route/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@razroo/iso-route",
-  "version": "0.5.0",
+  "version": "0.5.1",
   "description": "Author one model policy; fan out to every harness that supports it. Translates role-based model selection into settings.json, config.toml, opencode.json, and a machine-readable resolved map.",
   "license": "MIT",
   "type": "module",
diff --git a/packages/iso-route/presets/openrouter-free.yaml b/packages/iso-route/presets/openrouter-free.yaml
@@ -5,11 +5,24 @@
 # proxy tokens. Intended for users who already have OpenRouter connected
 # inside OpenCode and want reproducible, inspectable model names.
 #
-# Verified against the live OpenRouter models API on 2026-04-20:
-#   - default:  qwen/qwen3-coder:free
-#   - quality:  openai/gpt-oss-120b:free
-#   - fast:     minimax/minimax-m2.5:free
-#   - minimal:  google/gemma-4-26b-a4b-it:free
+# Verified against the live OpenRouter models API on 2026-04-20, tuned
+# after real-world observation that qwen3-coder:free is the most
+# contended free model on the shared OpenRouter pool (everyone uses it,
+# so the Venice / Chutes providers that back it rate-limit often).
+# We keep it as the orchestrator default since it is genuinely the
+# strongest free agentic model, but subagent roles now spread load
+# across less-contended providers:
+#
+#   - orchestrator:  qwen/qwen3-coder:free             (best free agentic)
+#   - quality:       qwen/qwen3-next-80b-a3b-instruct:free (best free writing)
+#   - fast:          z-ai/glm-4.5-air:free             (reliable tool-calling)
+#   - minimal:       openai/gpt-oss-20b:free           (strong structured output)
+#
+# Notable free candidates outside this shortlist (see `iso-route catalog
+# openrouter`): moonshotai/kimi-k2.6 (scores 910, free with tools,
+# 262k ctx — worth pinning once usage stabilizes),
+# nvidia/nemotron-3-super-120b-a12b:free (120B reasoning model, good
+# quality-tier fallback).
 #
 # Use `iso-route catalog openrouter` to refresh the shortlist when
 # upstream offerings change.
@@ -26,9 +39,9 @@ default:
       model: qwen/qwen3-coder:free
 
 roles:
-  # fast — procedural worker. Preserves the existing Claude/Codex tiers
-  # while switching OpenCode to the quickest free OpenRouter pick in the
-  # current shortlist.
+  # fast — procedural worker. GLM 4.5 Air was designed by Zhipu as the
+  # lightweight counterpart to their agentic GLM 4.5; more reliable
+  # tool-call schema compliance than MiniMax on the Geometra flows.
   fast:
     provider: anthropic
     model: claude-haiku-4-5
@@ -38,10 +51,10 @@ roles:
         model: gpt-5.4-mini
       opencode:
         provider: openrouter
-        model: minimax/minimax-m2.5:free
+        model: z-ai/glm-4.5-air:free
 
-  # quality — best free OpenRouter alternative for OpenCode while keeping
-  # opus / gpt-5.4 on the native harnesses.
+  # quality — best free OpenRouter model for long-form writing. 262k
+  # ctx, 80B params, instruct-tuned. Claude/Codex still run Opus / GPT-5.4.
   quality:
     provider: anthropic
     model: claude-opus-4-7
@@ -53,10 +66,12 @@ roles:
         reasoning: high
       opencode:
         provider: openrouter
-        model: openai/gpt-oss-120b:free
+        model: qwen/qwen3-next-80b-a3b-instruct:free
 
-  # minimal — smallest credible tool-capable OpenRouter pick from the
-  # current free shortlist.
+  # minimal — GPT-OSS-20B is dense-trained for structured outputs, has
+  # 131k ctx (plenty for extraction), and is typically less contended
+  # than the Gemma variants which get hammered as default small-model
+  # picks across many apps.
   minimal:
     provider: anthropic
     model: claude-haiku-4-5
@@ -66,4 +81,4 @@ roles:
         model: gpt-5.4-nano
       opencode:
         provider: openrouter
-        model: google/gemma-4-26b-a4b-it:free
+        model: openai/gpt-oss-20b:free
diff --git a/packages/iso-route/tests/extends.test.ts b/packages/iso-route/tests/extends.test.ts
@@ -84,9 +84,11 @@ test("extends openrouter-free: OpenCode targets use explicit free OpenRouter mod
   assert.equal(policy.default.targets?.opencode?.model, "qwen/qwen3-coder:free");
   const quality = policy.roles.find((r) => r.name === "quality")!;
   assert.equal(quality.targets?.opencode?.provider, "openrouter");
-  assert.equal(quality.targets?.opencode?.model, "openai/gpt-oss-120b:free");
+  assert.equal(quality.targets?.opencode?.model, "qwen/qwen3-next-80b-a3b-instruct:free");
+  const fast = policy.roles.find((r) => r.name === "fast")!;
+  assert.equal(fast.targets?.opencode?.model, "z-ai/glm-4.5-air:free");
   const minimal = policy.roles.find((r) => r.name === "minimal")!;
-  assert.equal(minimal.targets?.opencode?.model, "google/gemma-4-26b-a4b-it:free");
+  assert.equal(minimal.targets?.opencode?.model, "openai/gpt-oss-20b:free");
 });
 
 test("extends standard: scalar override on default replaces just that scalar", () => {

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@razroo/iso-route",`
`3`		`- "version": "0.5.0",`
	`3`	`+ "version": "0.5.1",`
`4`	`4`	`"description": "Author one model policy; fan out to every harness that supports it. Translates role-based model selection into settings.json, config.toml, opencode.json, and a machine-readable resolved map.",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"type": "module",`