Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/ai/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Fixed

- Fixed Google and `google-vertex` Gemini model metadata to map `latest` aliases to the current models, add Gemini 3.5 Flash for Vertex, correct Gemini 2.5 Flash Vertex cache pricing, and remove shut-down Vertex preview models ([#5761](https://github.com/earendil-works/pi/issues/5761)).
- Fixed Moonshot AI China model metadata to include Kimi K2.7 Code, and omitted unsupported thinking-off payloads for Kimi K2.7 Code models ([#5760](https://github.com/earendil-works/pi/issues/5760)).

## [0.79.4] - 2026-06-15
Expand Down
229 changes: 59 additions & 170 deletions packages/ai/scripts/generate-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ const TOGETHER_TOGGLE_REASONING_LEVEL_MAP = {

const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1";
const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh";
const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com";
const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
const NVIDIA_HEADERS = {
"NVCF-POLL-SECONDS": "3600",
Expand Down Expand Up @@ -270,7 +271,8 @@ function isGemini3ProModel(modelId: string): boolean {
}

function isGemini3FlashModel(modelId: string): boolean {
return /gemini-3(?:\.\d+)?-flash/.test(modelId.toLowerCase());
const id = modelId.toLowerCase();
return /gemini-3(?:\.\d+)?-flash/.test(id) || id === "gemini-flash-latest" || id === "gemini-flash-lite-latest";
}

function isGemma4Model(modelId: string): boolean {
Expand Down Expand Up @@ -624,23 +626,71 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
for (const [modelId, model] of Object.entries(data.google.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
let source = m;
if (modelId === "gemini-flash-latest") {
source = (data.google.models["gemini-3.5-flash"] as ModelsDevModel | undefined) ?? m;
}
if (modelId === "gemini-flash-lite-latest") {
source = (data.google.models["gemini-3.1-flash-lite"] as ModelsDevModel | undefined) ?? m;
}

models.push({
id: modelId,
name: m.name || modelId,
api: "google-generative-ai",
provider: "google",
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
reasoning: source.reasoning === true,
input: source.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
input: source.cost?.input || 0,
output: source.cost?.output || 0,
cacheRead: source.cost?.cache_read || 0,
cacheWrite: source.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
contextWindow: source.limit?.context || 4096,
maxTokens: source.limit?.output || 4096,
});
}
}

// Process Google Vertex Gemini models. The google-vertex models.dev catalog also includes
// Claude, OpenAI, and other MaaS models that do not use the @google/genai Gemini streaming
// path implemented by our google-vertex provider.
if (data["google-vertex"]?.models) {
for (const [modelId, model] of Object.entries(data["google-vertex"].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
if (!modelId.startsWith("gemini-")) continue;
if (modelId === "gemini-3.1-flash-lite-preview") continue;
let source = m;
if (modelId === "gemini-flash-latest") {
source = (data["google-vertex"].models["gemini-3.5-flash"] as ModelsDevModel | undefined) ?? m;
}
if (modelId === "gemini-flash-lite-latest") {
source = (data["google-vertex"].models["gemini-3.1-flash-lite"] as ModelsDevModel | undefined) ?? m;
}

// models.dev reports Vertex cache_read/cache_write values for Gemini 2.5 Flash that
// do not match the official Gemini API standard pricing table. pi only accounts
// cachedContentTokenCount as cacheRead.
const cacheRead = modelId === "gemini-2.5-flash" ? 0.03 : source.cost?.cache_read || 0;
models.push({
id: modelId,
name: m.name || modelId,
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: source.reasoning === true,
input: source.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: source.cost?.input || 0,
output: source.cost?.output || 0,
cacheRead,
cacheWrite: 0,
},
contextWindow: source.limit?.context || 4096,
maxTokens: source.limit?.output || 4096,
});
}
}
Expand Down Expand Up @@ -1968,167 +2018,6 @@ async function generateModels() {
});
}

const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com";
const vertexModels: Model<"google-vertex">[] = [
{
id: "gemini-3-pro-preview",
name: "Gemini 3 Pro Preview (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 64000,
},
{
id: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro Preview (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-3.1-pro-preview-customtools",
name: "Gemini 3.1 Pro Preview Custom Tools (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-3-flash-preview",
name: "Gemini 3 Flash Preview (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.0-flash",
name: "Gemini 2.0 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: false,
input: ["text", "image"],
cost: { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 8192,
},
{
id: "gemini-2.0-flash-lite",
name: "Gemini 2.0 Flash Lite (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.5-pro",
name: "Gemini 2.5 Pro (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.5-flash-lite-preview-09-2025",
name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.5-flash-lite",
name: "Gemini 2.5 Flash Lite (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-1.5-pro",
name: "Gemini 1.5 Pro (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: false,
input: ["text", "image"],
cost: { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 8192,
},
{
id: "gemini-1.5-flash",
name: "Gemini 1.5 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: false,
input: ["text", "image"],
cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 8192,
},
{
id: "gemini-1.5-flash-8b",
name: "Gemini 1.5 Flash-8B (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: false,
input: ["text", "image"],
cost: { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 8192,
},
];
allModels.push(...vertexModels);

// Azure Foundry deploys these with larger context windows than OpenAI's own API,
// which caps gpt-5.4/gpt-5.5 at 272k. See models-sold-directly-by-azure docs.
const AZURE_CONTEXT_WINDOW_OVERRIDES: Record<string, number> = {
Expand Down
Loading
Loading