Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion models/azure_openai/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ resource:
model:
enabled: false
type: plugin
version: 0.0.49
version: 0.0.50
12 changes: 12 additions & 0 deletions models/azure_openai/models/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2586,6 +2586,10 @@ class AzureBaseModel(BaseModel):
ModelPropertyKey.CONTEXT_SIZE: 272000,
},
parameter_rules=[
ParameterRule(
name="temperature",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
Comment on lines +2589 to +2592
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The PR description states that top_p is also a valid parameter for GPT-5.1 when reasoning_effort is set to none. However, it is missing from the parameter_rules for the gpt-5.1 model entry here, while it was correctly added (or already present) for gpt-5.2 and gpt-5.4.

                ParameterRule(
                    name="temperature",
                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
                ),
                ParameterRule(
                    name="top_p",
                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
                ),

ParameterRule(
name="response_format",
label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
Expand Down Expand Up @@ -3050,6 +3054,10 @@ class AzureBaseModel(BaseModel):
ModelPropertyKey.CONTEXT_SIZE: 400000,
},
parameter_rules=[
ParameterRule(
name="temperature",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name="top_p",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
Expand Down Expand Up @@ -3500,6 +3508,10 @@ class AzureBaseModel(BaseModel):
ModelPropertyKey.CONTEXT_SIZE: 400000,
},
parameter_rules=[
ParameterRule(
name="temperature",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name="top_p",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
Expand Down
18 changes: 11 additions & 7 deletions models/azure_openai/models/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,10 +429,17 @@ def _chat_generate_with_responses(
}

# Map model parameters to the Responses API
if "temperature" in model_parameters:
responses_params["temperature"] = model_parameters["temperature"]
if "top_p" in model_parameters:
responses_params["top_p"] = model_parameters["top_p"]
# reasoning_effort controls whether temperature/top_p are supported:
# - "none" (or not set): reasoning is disabled, temperature/top_p are valid
# - any other value: reasoning is active, temperature/top_p are NOT supported
reasoning_effort = model_parameters.get("reasoning_effort")
if reasoning_effort and reasoning_effort != "none":
responses_params["reasoning"] = {"effort": reasoning_effort}
else:
if "temperature" in model_parameters:
responses_params["temperature"] = model_parameters["temperature"]
if "top_p" in model_parameters:
responses_params["top_p"] = model_parameters["top_p"]
if "max_tokens" in model_parameters:
responses_params["max_output_tokens"] = model_parameters["max_tokens"]
elif "max_completion_tokens" in model_parameters:
Expand Down Expand Up @@ -495,9 +502,6 @@ def _chat_generate_with_responses(
"format": {"type": response_format}
}

if "reasoning_effort" in model_parameters:
responses_params["reasoning"] = {"effort": model_parameters["reasoning_effort"]}

logger.info(
f"llm request with responses api: model={model}, stream={stream}, "
f"parameters={responses_params}"
Expand Down
2 changes: 1 addition & 1 deletion models/openai/manifest.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 0.3.4
version: 0.3.5
type: plugin
author: "langgenius"
name: "openai"
Expand Down
2 changes: 2 additions & 0 deletions models/openai/models/llm/gpt-5.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ parameter_rules:
default: 8192
min: 1
max: 128000
- name: temperature
use_template: temperature
Comment on lines +20 to +21
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Similar to the Azure OpenAI constants, top_p should be added to the parameter rules for GPT-5.1 to allow users to configure it when reasoning is disabled.

  - name: temperature
    use_template: temperature
  - name: top_p
    use_template: top_p

- name: response_format
label:
zh_Hans: 回复格式
Expand Down
2 changes: 2 additions & 0 deletions models/openai/models/llm/gpt-5.2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ parameter_rules:
default: 8192
min: 1
max: 128000
- name: temperature
use_template: temperature
- name: response_format
label:
zh_Hans: 回复格式
Expand Down
2 changes: 2 additions & 0 deletions models/openai/models/llm/gpt-5.4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ parameter_rules:
default: 8192
min: 1
max: 128000
- name: temperature
use_template: temperature
- name: response_format
label:
zh_Hans: 回复格式
Expand Down
13 changes: 12 additions & 1 deletion models/openai/models/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,14 @@ def _chat_generate(
else:
# chat model
messages: Any = [self._convert_prompt_message_to_dict(m) for m in prompt_messages]


# For models where temperature is only valid when reasoning_effort="none"
# (e.g. gpt-5.1/5.2/5.4): strip temperature/top_p when reasoning is active
_re = model_parameters.get("reasoning_effort")
if _re and _re != "none":
model_parameters.pop("temperature", None)
model_parameters.pop("top_p", None)
Comment on lines +784 to +785
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The PR description mentions that logprobs is also restricted when reasoning is active. While it is correctly stripped in _build_responses_api_params, it is missing from the stripping logic here in the standard _chat_generate path. This could lead to a 400 error if a user provides logprobs with a non-none reasoning effort.

Suggested change
model_parameters.pop("temperature", None)
model_parameters.pop("top_p", None)
model_parameters.pop("temperature", None)
model_parameters.pop("top_p", None)
model_parameters.pop("logprobs", None)


try:
response = client.chat.completions.create(
messages=messages,
Expand Down Expand Up @@ -821,6 +828,10 @@ def _build_responses_api_params(
reasoning_effort = params.pop("reasoning_effort", None)
if reasoning_effort and reasoning_effort != "none":
params["reasoning"] = {"effort": reasoning_effort}
# temperature/top_p/logprobs not supported when reasoning is active
params.pop("temperature", None)
params.pop("top_p", None)
params.pop("logprobs", None)

# response_format -> text.format (Responses API uses different format)
# response_format is incompatible with Responses API, convert to text.format
Expand Down
Loading