unslothai · danielhanchen · Apr 10, 2026 · gemini-code-assist · Apr 10, 2026 · gemini-code-assist
diff --git a/unsloth_zoo/temporary_patches/ministral.py b/unsloth_zoo/temporary_patches/ministral.py
@@ -15,6 +15,7 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 import torch
+import inspect
 from typing import Optional, Callable
 from .common import TEMPORARY_PATCHES
 from .utils import (
@@ -90,13 +91,19 @@ def forward(
         attn_output = self.o_proj(attn_output)
         return attn_output, attn_weights
 
-    # Wrap so check_args_kwargs accepts removed params (e.g. cache_position in v5)
+    # Wrap so check_args_kwargs accepts removed params (e.g. cache_position in v5).
+    # Preserve the original signature on the wrapper so inspect.signature
+    # (used by transformers._validate_model_kwargs among others) still sees
+    # the real named parameters.
+    target_cls = transformers.models.ministral.modeling_ministral.MinistralAttention
+    _original_forward_signature = inspect.signature(target_cls.forward)
     _full_forward = forward
     def forward(self, *args, **kwargs):
         return _full_forward(self, *args, **kwargs)
+    forward.__signature__ = _original_forward_signature
 
     patch_function(
-        transformers.models.ministral.modeling_ministral.MinistralAttention,
+        target_cls,
         "forward",
         forward,
         match_level="relaxed",

diff --git a/unsloth_zoo/temporary_patches/misc.py b/unsloth_zoo/temporary_patches/misc.py
@@ -232,10 +232,15 @@ def forward(
     pass
 
     # Wrap with (self, *args, **kwargs) so check_args_kwargs accepts any
-    # removed params (output_attentions, output_hidden_states, cache_position)
+    # removed params (output_attentions, output_hidden_states, cache_position).
+    # Copy the original class signature onto the wrapper so
+    # transformers._validate_model_kwargs (used by generate) still sees
+    # the real named parameters like backbone_last_hidden_state.
+    _original_forward_signature = inspect.signature(target_cls.forward)
     _full_forward = forward
     def forward(self, *args, **kwargs):
         return _full_forward(self, *args, **kwargs)
+    forward.__signature__ = _original_forward_signature
     patch_function(target_cls, "forward", forward, match_level="relaxed")
 pass
 TEMPORARY_PATCHES.append(patch_CsmDepthDecoderForCausalLM_forward)
@@ -367,9 +372,11 @@ def forward(
         })
     pass
 
+    _original_forward_signature = inspect.signature(target_cls.forward)
     _full_forward = forward
     def forward(self, *args, **kwargs):
         return _full_forward(self, *args, **kwargs)
+    forward.__signature__ = _original_forward_signature
-    _original_forward_signature = inspect.signature(target_cls.forward)
-    _full_forward = forward
-    def forward(self, *args, **kwargs):
-        return _full_forward(self, *args, **kwargs)
-    forward.__signature__ = _original_forward_signature
+    # Preserve the original signature on the wrapper so inspect.signature
+    # (used by transformers._validate_model_kwargs among others) still sees
+    # the real named parameters.
+    _original_forward_signature = inspect.signature(target_cls.forward)
+    _full_forward = forward
+    def forward(self, *args, **kwargs):
+        return _full_forward(self, *args, **kwargs)
+    forward.__signature__ = _original_forward_signature
-    _original_forward_signature = inspect.signature(target_cls.forward)
-    _full_forward = forward
-    def forward(self, *args, **kwargs):
-        return _full_forward(self, *args, **kwargs)
-    forward.__signature__ = _original_forward_signature
+    # Preserve the original signature on the wrapper so inspect.signature
+    # (used by transformers._validate_model_kwargs among others) still sees
+    # the real named parameters.
+    _original_forward_signature = inspect.signature(target_cls.forward)
+    _full_forward = forward
+    def forward(self, *args, **kwargs):
+        return _full_forward(self, *args, **kwargs)
+    forward.__signature__ = _original_forward_signature
     patch_function(target_cls, "forward", forward, match_level="relaxed")
 pass
 TEMPORARY_PATCHES.append(patch_CsmForConditionalGeneration_forward)

diff --git a/unsloth_zoo/temporary_patches/pixtral.py b/unsloth_zoo/temporary_patches/pixtral.py
@@ -16,6 +16,7 @@
 
 import torch
 import torch.nn as nn
+import inspect
 from typing import Optional, Tuple
 from .common import TEMPORARY_PATCHES
 from .utils import (
@@ -83,19 +84,25 @@ def forward(
         attn_output = self.o_proj(attn_output)
         return attn_output, None
 
-    # Wrap so check_args_kwargs accepts removed params (e.g. output_attentions in v5)
+    # Wrap so check_args_kwargs accepts removed params (e.g. output_attentions in v5).
+    # Preserve the original signature on the wrapper so inspect.signature
+    # (used by transformers._validate_model_kwargs among others) still sees
+    # the real named parameters.
+    target_cls = transformers.models.pixtral.modeling_pixtral.PixtralAttention
+    _original_forward_signature = inspect.signature(target_cls.forward)
     _full_forward = forward
     def forward(self, *args, **kwargs):
         return _full_forward(self, *args, **kwargs)
+    forward.__signature__ = _original_forward_signature
 
     patch_function(
-        transformers.models.pixtral.modeling_pixtral.PixtralAttention,
+        target_cls,
         "__init__",
         __init__,
     )
 
     patch_function(
-        transformers.models.pixtral.modeling_pixtral.PixtralAttention,
+        target_cls,
         "forward",
         forward,
     )