AI client: detect reasoning models and switch token param name

claude · claude · commit 855d1bf176fe · 2026-05-18T06:35:13.000Z
OpenAI's GPT-5 and o-series ("reasoning") models renamed max_tokens to max_completion_tokens and only accept the default temperature (1). Sending the legacy params returns: Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead. Detect the model family from @config.model and pick the right shape: - Legacy chat models (gpt-4o, gpt-4, gpt-3.5, etc.): body[:max_tokens] = N body[:temperature] = T - Reasoning models (gpt-5*, o1*, o3*, o4*): body[:max_completion_tokens] = N (no temperature — default is 1, anything else is rejected) The regex `\b(gpt-5|o1|o3|o4)(-|\b)/i` matches the bare model name (`gpt-5`, `o1`, `o3-mini`, `o4`) plus the common variants (`gpt-5-mini`, `o1-mini`, `o3-pro`). gpt-4o is *not* matched (the trailing `o` doesn't sit on a word boundary the way `o1` does), so the existing default model keeps working unchanged. Anthropic and the custom callable path are untouched. https://claude.ai/code/session_01Xy96nK3Ron2NqukBiSgtzE
diff --git a/lib/mysql_genius/core/ai/client.rb b/lib/mysql_genius/core/ai/client.rb
@@ -60,13 +60,33 @@ def build_openai_body(messages, temperature)
           body = {
             messages: messages,
             response_format: { type: "json_object" },
-            temperature: temperature,
           }
-          body[:max_tokens] = @config.max_tokens.to_i if @config.max_tokens
+          # GPT-5 and o-series ("reasoning") models renamed max_tokens to
+          # max_completion_tokens and only accept temperature=1 (the default).
+          # Older chat models (gpt-4o, gpt-4, gpt-3.5) keep the original names.
+          if openai_reasoning_model?
+            body[:max_completion_tokens] = @config.max_tokens.to_i if @config.max_tokens
+          else
+            body[:max_tokens] = @config.max_tokens.to_i if @config.max_tokens
+            body[:temperature] = temperature
+          end
           body[:model] = @config.model if @config.model && !@config.model.empty?
           body
         end
 
+        # Returns true when the configured model belongs to the OpenAI families
+        # that reject `max_tokens` and `temperature` overrides: gpt-5*, o1*,
+        # o3*, o4*. Matches the bare model name plus common deployment-name
+        # prefixes (Azure deployments are user-named but typically include the
+        # model identifier).
+        OPENAI_REASONING_MODEL_PATTERN = /\b(gpt-5|o1|o3|o4)(-|\b)/i.freeze
+        def openai_reasoning_model?
+          model = @config.model.to_s
+          return false if model.empty?
+
+          model.match?(OPENAI_REASONING_MODEL_PATTERN)
+        end
+
         def build_anthropic_body(messages, temperature)
           system_text = messages.select { |m| m[:role] == "system" }.map { |m| m[:content] }.join("\n\n")
           user_messages = messages.reject { |m| m[:role] == "system" }
diff --git a/spec/mysql_genius/core/ai/client_spec.rb b/spec/mysql_genius/core/ai/client_spec.rb
@@ -155,6 +155,111 @@ def ok_response(content)
       end
     end
 
+    context "with gpt-4o (legacy chat model)" do
+      let(:http_response) { ok_response('{"ok":true}') }
+
+      it "sends max_tokens and temperature" do
+        captured_body = nil
+        stub_http_with_block do |http|
+          allow(http).to(receive(:request)) do |req|
+            captured_body = JSON.parse(req.body)
+            http_response
+          end
+        end
+
+        client.chat(messages: [{ role: "user", content: "hi" }], temperature: 0)
+
+        expect(captured_body).to(have_key("max_tokens"))
+        expect(captured_body).not_to(have_key("max_completion_tokens"))
+        expect(captured_body["temperature"]).to(eq(0))
+      end
+    end
+
+    context "with gpt-5 / o-series reasoning models" do
+      let(:http_response) { ok_response('{"ok":true}') }
+
+      def with_reasoning_model(model_name, &block)
+        cfg = MysqlGenius::Core::Ai::Config.new(
+          client: nil,
+          endpoint: "https://api.example.com/v1/chat/completions",
+          api_key: "sk-test-key",
+          model: model_name,
+          auth_style: :bearer,
+          system_context: nil,
+        )
+        block.call(described_class.new(cfg))
+      end
+
+      it "sends max_completion_tokens instead of max_tokens for gpt-5-mini" do
+        captured_body = nil
+        stub_http_with_block do |http|
+          allow(http).to(receive(:request)) do |req|
+            captured_body = JSON.parse(req.body)
+            http_response
+          end
+        end
+
+        with_reasoning_model("gpt-5-mini") do |c|
+          c.chat(messages: [{ role: "user", content: "hi" }])
+        end
+
+        expect(captured_body).to(have_key("max_completion_tokens"))
+        expect(captured_body).not_to(have_key("max_tokens"))
+      end
+
+      it "omits temperature for reasoning models (they only accept the default)" do
+        captured_body = nil
+        stub_http_with_block do |http|
+          allow(http).to(receive(:request)) do |req|
+            captured_body = JSON.parse(req.body)
+            http_response
+          end
+        end
+
+        with_reasoning_model("gpt-5") do |c|
+          c.chat(messages: [{ role: "user", content: "hi" }], temperature: 0)
+        end
+
+        expect(captured_body).not_to(have_key("temperature"))
+      end
+
+      it "matches o1, o3, o4 model families too" do
+        ["o1", "o1-mini", "o3-mini", "o4"].each do |model|
+          captured_body = nil
+          stub_http_with_block do |http|
+            allow(http).to(receive(:request)) do |req|
+              captured_body = JSON.parse(req.body)
+              http_response
+            end
+          end
+
+          with_reasoning_model(model) do |c|
+            c.chat(messages: [{ role: "user", content: "hi" }])
+          end
+
+          expect(captured_body.key?("max_completion_tokens")).to(be(true), "expected max_completion_tokens for #{model}")
+          expect(captured_body.key?("max_tokens")).to(be(false), "expected no max_tokens for #{model}")
+        end
+      end
+
+      it "still treats gpt-4o as a legacy chat model (substring shouldn't match)" do
+        captured_body = nil
+        stub_http_with_block do |http|
+          allow(http).to(receive(:request)) do |req|
+            captured_body = JSON.parse(req.body)
+            http_response
+          end
+        end
+
+        with_reasoning_model("gpt-4o") do |c|
+          c.chat(messages: [{ role: "user", content: "hi" }])
+        end
+
+        expect(captured_body).to(have_key("max_tokens"))
+        expect(captured_body).to(have_key("temperature"))
+      end
+    end
+
     context "when the API returns an error" do
       before do
         body = { "error" => { "message" => "Rate limit exceeded" } }.to_json