Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ jobs:
run: go run ./cmd/openrouter/main.go
continue-on-error: true

- name: OrcaRouter
run: go run ./cmd/orcarouter/main.go
continue-on-error: true

- name: Synthetic
run: go run ./cmd/synthetic/main.go
continue-on-error: true
Expand Down
6 changes: 6 additions & 0 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ tasks:
- task: gen:opencode-go
- task: gen:opencode-zen
- task: gen:openrouter
- task: gen:orcarouter
- task: gen:synthetic
- task: gen:venice
- task: gen:vercel
Expand Down Expand Up @@ -134,6 +135,11 @@ tasks:
cmds:
- go run cmd/openrouter/main.go

gen:orcarouter:
desc: Generate orcarouter provider configurations
cmds:
- go run cmd/orcarouter/main.go

gen:synthetic:
desc: Generate synthetic provider configurations
cmds:
Expand Down
286 changes: 286 additions & 0 deletions cmd/orcarouter/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
// Package main provides a command-line tool to fetch models from OrcaRouter
// and generate a configuration file for the provider.
package main

import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"math"
"net/http"
"os"
"regexp"
"slices"
"strings"
"time"

"charm.land/catwalk/pkg/catwalk"
)

// APIModel represents a model from the OrcaRouter pricing API.
type APIModel struct {
ModelName string `json:"model_name"`
ModelRatio float64 `json:"model_ratio"`
CompletionRatio float64 `json:"completion_ratio"`
CacheRatio float64 `json:"cache_ratio"`
CreateCacheRatio float64 `json:"create_cache_ratio"`
ContextLength int64 `json:"context_length"`
MaxCompletionTokens int64 `json:"max_completion_tokens"`
SupportedEndpointTypes []string `json:"supported_endpoint_types"`
InputModalities []string `json:"input_modalities"`
OutputModalities []string `json:"output_modalities"`
SupportedParameters []string `json:"supported_parameters"`
}

// PricingResponse is the response structure for the OrcaRouter pricing API.
type PricingResponse struct {
Data []APIModel `json:"data"`
Success bool `json:"success"`
}

const (
pricingURL = "https://www.orcarouter.ai/api/pricing"
apiEndpoint = "https://api.orcarouter.ai/v1"
defaultLargeModel = "anthropic/claude-opus-4.8"
defaultSmallModel = "google/gemini-3.5-flash"
// quotaToUSD converts OrcaRouter's internal quota ratio to USD per 1M
// tokens. See https://docs.orcarouter.ai for the pricing model.
quotaToUSD = 2.0
// defaultContextWindow is used when the pricing API does not report a
// context_length for a model.
defaultContextWindow = 128000
minContextWindow = 8192
maxTokensFactor = 10
)

// gpt5ProPattern matches OpenAI gpt-5(.X)-pro models, which are only served on
// the responses endpoint and not on chat completions.
var gpt5ProPattern = regexp.MustCompile(`openai/gpt-5(\.\d+)?-pro`)

func fetchOrcaRouterModels() (*PricingResponse, error) {
req, err := http.NewRequestWithContext(
context.Background(),
"GET",
pricingURL,
nil,
)
if err != nil {
return nil, fmt.Errorf("creating request: %w", err)
}
req.Header.Set("User-Agent", "Crush-Client/1.0")

client := &http.Client{Timeout: 30 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("fetching models: %w", err)
}
defer resp.Body.Close() //nolint:errcheck

if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, body)
}

var pr PricingResponse
if err := json.NewDecoder(resp.Body).Decode(&pr); err != nil {
return nil, fmt.Errorf("parsing response: %w", err)
}
return &pr, nil
}

func contains(list []string, item string) bool {
return slices.Contains(list, item)
}

// isChatLLM filters out non chat-completion models (image, video, embedding,
// TTS, rerank) and models that are only served on the responses or completions
// endpoints.
func isChatLLM(m APIModel) bool {
name := strings.ToLower(m.ModelName)
eps := m.SupportedEndpointTypes

if contains(eps, "image-generation") || contains(eps, "openai-video") {
return false
}
if contains(m.OutputModalities, "image") {
return false
}
for _, k := range []string{"imagen", "dall-e", "gpt-image", "grok-imagine"} {
if strings.Contains(name, k) {
return false
}
}
if strings.Contains(name, "embedding") ||
strings.Contains(name, "tts") ||
strings.HasSuffix(name, "-speech") {
return false
}
for _, k := range []string{"whisper", "transcrib", "rerank"} {
if strings.Contains(name, k) {
return false
}
}
// Responses-only models are not usable through chat completions.
if contains(eps, "openai-response") && !contains(eps, "openai") {
return false
}
// Codex and gpt-5-pro models use the completions / responses endpoints.
if strings.Contains(name, "codex") {
return false
}
if gpt5ProPattern.MatchString(name) {
return false
}
return true
}

func roundCost(v float64) float64 {
return math.Round(v*1e5) / 1e5
}

func contextWindow(m APIModel) int64 {
if m.ContextLength > 0 {
return m.ContextLength
}
if m.MaxCompletionTokens > 0 {
return m.MaxCompletionTokens
}
return defaultContextWindow
}

func calculateMaxTokens(contextWindow, maxOutput int64) int64 {
if maxOutput == 0 || maxOutput > contextWindow/2 {
return contextWindow / maxTokensFactor
}
return maxOutput
}

// reasoningExcludedVendors are upstreams whose reasoning models cannot be
// driven through Crush's openai-compat path (POST /v1/chat/completions with a
// flat `reasoning_effort` field):
//
// - anthropic: rejects `reasoning_effort`; it expects Anthropic's native
// `thinking` block, which the openai-compat path does not emit.
// - openai: rejects `tools` + `reasoning_effort` together on chat completions
// ("use /v1/responses instead"). Crush is agentic and always sends tools,
// so reasoning and tool calls cannot be combined here.
//
// Models from these vendors are still served for regular chat and tool calls;
// they just don't advertise reasoning. Every other vendor (Gemini, Grok, Qwen,
// DeepSeek, MiniMax) accepts tools + reasoning_effort on chat completions.
var reasoningExcludedVendors = []string{"anthropic/", "openai/"}

func canReason(m APIModel) bool {
for _, prefix := range reasoningExcludedVendors {
if strings.HasPrefix(m.ModelName, prefix) {
return false
}
}
return contains(m.SupportedParameters, "reasoning") ||
contains(m.SupportedParameters, "include_reasoning")
}

func reasoningConfig(reason bool) ([]string, string) {
if !reason {
return nil, ""
}
return []string{"low", "medium", "high"}, "medium"
}

// autoModel is OrcaRouter's adaptive router. It is a virtual model that is not
// returned by the pricing API, so we add it manually. Routing decisions (and
// therefore real cost, context, and capabilities) depend on the upstream the
// router selects; we use conservative values here.
func autoModel() catwalk.Model {
return catwalk.Model{
ID: "orcarouter/auto",
Name: "OrcaRouter Auto (adaptive routing)",
CostPer1MIn: 0,
CostPer1MOut: 0,
ContextWindow: defaultContextWindow,
DefaultMaxTokens: defaultContextWindow / maxTokensFactor,
CanReason: false,
SupportsImages: true,
}
}

func main() {
pricing, err := fetchOrcaRouterModels()
if err != nil {
log.Fatal("Error fetching OrcaRouter models:", err)
}

provider := catwalk.Provider{
Name: "OrcaRouter",
ID: catwalk.InferenceProviderOrcaRouter,
APIKey: "$ORCAROUTER_API_KEY",
APIEndpoint: apiEndpoint,
Type: catwalk.TypeOpenAICompat,
DefaultLargeModelID: defaultLargeModel,
DefaultSmallModelID: defaultSmallModel,
DefaultHeaders: map[string]string{
"HTTP-Referer": "https://www.orcarouter.ai/",
"X-Title": "Crush",
},
Models: []catwalk.Model{autoModel()},
}

for _, m := range pricing.Data {
if !isChatLLM(m) {
continue
}
ctx := contextWindow(m)
if ctx < minContextWindow {
continue
}

reason := canReason(m)
levels, defaultEffort := reasoningConfig(reason)

var inCached, outCached float64
if m.CacheRatio > 0 {
outCached = roundCost(m.ModelRatio * m.CacheRatio * quotaToUSD)
}
if m.CreateCacheRatio > 0 {
inCached = roundCost(m.ModelRatio * m.CreateCacheRatio * quotaToUSD)
}

provider.Models = append(provider.Models, catwalk.Model{
ID: m.ModelName,
Name: m.ModelName,
CostPer1MIn: roundCost(m.ModelRatio * quotaToUSD),
CostPer1MOut: roundCost(m.ModelRatio * m.CompletionRatio * quotaToUSD),
CostPer1MInCached: inCached,
CostPer1MOutCached: outCached,
ContextWindow: ctx,
DefaultMaxTokens: calculateMaxTokens(ctx, m.MaxCompletionTokens),
CanReason: reason,
ReasoningLevels: levels,
DefaultReasoningEffort: defaultEffort,
SupportsImages: contains(m.InputModalities, "image"),
})

fmt.Printf("Added model %s (context window %d)\n", m.ModelName, ctx)
}

if len(provider.Models) <= 1 {
log.Fatal("No models found or no models met the criteria")
}

slices.SortFunc(provider.Models, func(a, b catwalk.Model) int {
return strings.Compare(a.ID, b.ID)
})

data, err := json.MarshalIndent(provider, "", " ")
if err != nil {
log.Fatal("Error marshaling OrcaRouter provider:", err)
}

if err := os.WriteFile("internal/providers/configs/orcarouter.json", data, 0o600); err != nil {
log.Fatal("Error writing OrcaRouter provider config:", err)
}

fmt.Printf("\nSuccessfully wrote %d models to internal/providers/configs/orcarouter.json\n", len(provider.Models))
}
Loading