Skip to content

Commit 855d1bf

Browse files
committed
AI client: detect reasoning models and switch token param name
OpenAI's GPT-5 and o-series ("reasoning") models renamed max_tokens to max_completion_tokens and only accept the default temperature (1). Sending the legacy params returns: Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead. Detect the model family from @config.model and pick the right shape: - Legacy chat models (gpt-4o, gpt-4, gpt-3.5, etc.): body[:max_tokens] = N body[:temperature] = T - Reasoning models (gpt-5*, o1*, o3*, o4*): body[:max_completion_tokens] = N (no temperature — default is 1, anything else is rejected) The regex `\b(gpt-5|o1|o3|o4)(-|\b)/i` matches the bare model name (`gpt-5`, `o1`, `o3-mini`, `o4`) plus the common variants (`gpt-5-mini`, `o1-mini`, `o3-pro`). gpt-4o is *not* matched (the trailing `o` doesn't sit on a word boundary the way `o1` does), so the existing default model keeps working unchanged. Anthropic and the custom callable path are untouched. https://claude.ai/code/session_01Xy96nK3Ron2NqukBiSgtzE
1 parent d7bf185 commit 855d1bf

2 files changed

Lines changed: 127 additions & 2 deletions

File tree

lib/mysql_genius/core/ai/client.rb

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,33 @@ def build_openai_body(messages, temperature)
6060
body = {
6161
messages: messages,
6262
response_format: { type: "json_object" },
63-
temperature: temperature,
6463
}
65-
body[:max_tokens] = @config.max_tokens.to_i if @config.max_tokens
64+
# GPT-5 and o-series ("reasoning") models renamed max_tokens to
65+
# max_completion_tokens and only accept temperature=1 (the default).
66+
# Older chat models (gpt-4o, gpt-4, gpt-3.5) keep the original names.
67+
if openai_reasoning_model?
68+
body[:max_completion_tokens] = @config.max_tokens.to_i if @config.max_tokens
69+
else
70+
body[:max_tokens] = @config.max_tokens.to_i if @config.max_tokens
71+
body[:temperature] = temperature
72+
end
6673
body[:model] = @config.model if @config.model && !@config.model.empty?
6774
body
6875
end
6976

77+
# Returns true when the configured model belongs to the OpenAI families
78+
# that reject `max_tokens` and `temperature` overrides: gpt-5*, o1*,
79+
# o3*, o4*. Matches the bare model name plus common deployment-name
80+
# prefixes (Azure deployments are user-named but typically include the
81+
# model identifier).
82+
OPENAI_REASONING_MODEL_PATTERN = /\b(gpt-5|o1|o3|o4)(-|\b)/i.freeze
83+
def openai_reasoning_model?
84+
model = @config.model.to_s
85+
return false if model.empty?
86+
87+
model.match?(OPENAI_REASONING_MODEL_PATTERN)
88+
end
89+
7090
def build_anthropic_body(messages, temperature)
7191
system_text = messages.select { |m| m[:role] == "system" }.map { |m| m[:content] }.join("\n\n")
7292
user_messages = messages.reject { |m| m[:role] == "system" }

spec/mysql_genius/core/ai/client_spec.rb

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,111 @@ def ok_response(content)
155155
end
156156
end
157157

158+
context "with gpt-4o (legacy chat model)" do
159+
let(:http_response) { ok_response('{"ok":true}') }
160+
161+
it "sends max_tokens and temperature" do
162+
captured_body = nil
163+
stub_http_with_block do |http|
164+
allow(http).to(receive(:request)) do |req|
165+
captured_body = JSON.parse(req.body)
166+
http_response
167+
end
168+
end
169+
170+
client.chat(messages: [{ role: "user", content: "hi" }], temperature: 0)
171+
172+
expect(captured_body).to(have_key("max_tokens"))
173+
expect(captured_body).not_to(have_key("max_completion_tokens"))
174+
expect(captured_body["temperature"]).to(eq(0))
175+
end
176+
end
177+
178+
context "with gpt-5 / o-series reasoning models" do
179+
let(:http_response) { ok_response('{"ok":true}') }
180+
181+
def with_reasoning_model(model_name, &block)
182+
cfg = MysqlGenius::Core::Ai::Config.new(
183+
client: nil,
184+
endpoint: "https://api.example.com/v1/chat/completions",
185+
api_key: "sk-test-key",
186+
model: model_name,
187+
auth_style: :bearer,
188+
system_context: nil,
189+
)
190+
block.call(described_class.new(cfg))
191+
end
192+
193+
it "sends max_completion_tokens instead of max_tokens for gpt-5-mini" do
194+
captured_body = nil
195+
stub_http_with_block do |http|
196+
allow(http).to(receive(:request)) do |req|
197+
captured_body = JSON.parse(req.body)
198+
http_response
199+
end
200+
end
201+
202+
with_reasoning_model("gpt-5-mini") do |c|
203+
c.chat(messages: [{ role: "user", content: "hi" }])
204+
end
205+
206+
expect(captured_body).to(have_key("max_completion_tokens"))
207+
expect(captured_body).not_to(have_key("max_tokens"))
208+
end
209+
210+
it "omits temperature for reasoning models (they only accept the default)" do
211+
captured_body = nil
212+
stub_http_with_block do |http|
213+
allow(http).to(receive(:request)) do |req|
214+
captured_body = JSON.parse(req.body)
215+
http_response
216+
end
217+
end
218+
219+
with_reasoning_model("gpt-5") do |c|
220+
c.chat(messages: [{ role: "user", content: "hi" }], temperature: 0)
221+
end
222+
223+
expect(captured_body).not_to(have_key("temperature"))
224+
end
225+
226+
it "matches o1, o3, o4 model families too" do
227+
["o1", "o1-mini", "o3-mini", "o4"].each do |model|
228+
captured_body = nil
229+
stub_http_with_block do |http|
230+
allow(http).to(receive(:request)) do |req|
231+
captured_body = JSON.parse(req.body)
232+
http_response
233+
end
234+
end
235+
236+
with_reasoning_model(model) do |c|
237+
c.chat(messages: [{ role: "user", content: "hi" }])
238+
end
239+
240+
expect(captured_body.key?("max_completion_tokens")).to(be(true), "expected max_completion_tokens for #{model}")
241+
expect(captured_body.key?("max_tokens")).to(be(false), "expected no max_tokens for #{model}")
242+
end
243+
end
244+
245+
it "still treats gpt-4o as a legacy chat model (substring shouldn't match)" do
246+
captured_body = nil
247+
stub_http_with_block do |http|
248+
allow(http).to(receive(:request)) do |req|
249+
captured_body = JSON.parse(req.body)
250+
http_response
251+
end
252+
end
253+
254+
with_reasoning_model("gpt-4o") do |c|
255+
c.chat(messages: [{ role: "user", content: "hi" }])
256+
end
257+
258+
expect(captured_body).to(have_key("max_tokens"))
259+
expect(captured_body).to(have_key("temperature"))
260+
end
261+
end
262+
158263
context "when the API returns an error" do
159264
before do
160265
body = { "error" => { "message" => "Rate limit exceeded" } }.to_json

0 commit comments

Comments
 (0)