Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions unsloth_zoo/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,16 +184,22 @@ def _find_common_token_ids(component, tokenizer, force_match = False):

def train_on_responses_only(
trainer,
instruction_part = None,
response_part = None,
force_match = True, # Match newlines as well!
tokenizer = None, # Optional
return_function = False, # Useful for iterating over lists
num_proc = None,
instruction_part = None,
response_part = None,
force_match = True, # Match newlines as well!
tokenizer = None, # Optional
return_function = False, # Useful for iterating over lists
num_proc = None,
last_response_only = False, # Train only on the last assistant turn
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[3/11 reviewers] The new last_response_only parameter is not forwarded to UnslothVisionDataCollator.__init__ in unsloth_zoo/vision_utils.py. UnslothVisionDataCollator currently calls _train_on_responses_only(None, ..., return_function=True) on line 734 without passing last_response_only, so VLM fine-tuning cannot access the new behavior. Since the function signature is unchanged (kwarg with default), this is a forward-compatible addition. Suggested forwarding in vision_utils.py (separate file, see that file for the actual edit):

Suggested change
last_response_only = False, # Train only on the last assistant turn
last_response_only = False, # Train only on the last assistant turn

No change needed on this line itself — this comment flags the VLM integration to also be added in vision_utils.py in the follow-up.

):
"""
Trains only on responses and not on the instruction by masking out
the labels with -100 for the instruction part.

If last_response_only=True, only the final assistant turn has its
labels unmasked; all earlier assistant turns remain masked at -100
(they are never written, so they keep the initialized -100 values
and are not copied from old_labels either).
"""
# All Unsloth Zoo code licensed under LGPLv3
if tokenizer is None and trainer is not None:
Expand Down Expand Up @@ -249,13 +255,16 @@ def _train_on_responses_only(examples):
for input_ids, old_labels in zip(input_ids_, labels_):
n = len(input_ids)
labels = [-100] * n

use_old_labels = False
if old_labels is not None:
use_old_labels = True
assert(n == len(old_labels))
n_minus_1 = n - 1
j = 0

# Collect all (assistant_k, user_j) spans for this sample
spans = []
while j < n:
# Find <assistant>
if (input_ids[j] == A_first) and \
Expand Down Expand Up @@ -308,20 +317,27 @@ def _train_on_responses_only(examples):
k = n
pass

if not use_old_labels:
# Now copy input_ids to labels
labels[assistant_k : user_j] = input_ids [assistant_k : user_j]
# print(assistant_j, assistant_k, user_j, user_k)
else:
# Copy over from old labels!
labels[assistant_k : user_j] = old_labels[assistant_k : user_j]
spans.append((assistant_k, user_j))
break
pass
j += 1
pass
pass
j += 1
pass

# Apply labels: only the last assistant turn when last_response_only=True.
# Note: spans[-1:] safely returns [] when spans is empty (no assistant turn
# was found), so a sample with no assistant turn stays fully masked at -100.
apply_spans = spans[-1:] if last_response_only else spans
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[3/11 reviewers] spans[-1:] on an empty list returns [] rather than raising IndexError, which is the correct behavior (sample with no assistant turn stays fully masked), but it is non-obvious. Adding a short inline comment clarifies intent:

Suggested change
apply_spans = spans[-1:] if last_response_only else spans
# Apply labels: only the last assistant turn when last_response_only=True.
# Note: spans[-1:] safely returns [] when spans is empty (no assistant turn found),
# so a sample with no assistant turn stays fully masked at -100.
apply_spans = spans[-1:] if last_response_only else spans

for assistant_k, user_j in apply_spans:
if not use_old_labels:
# Now copy input_ids to labels
labels[assistant_k : user_j] = input_ids [assistant_k : user_j]
else:
# Copy over from old labels!
labels[assistant_k : user_j] = old_labels[assistant_k : user_j]

all_labels.append(labels)
pass
return { "labels" : torch.tensor(all_labels, dtype = torch.int64) if use_tensors else all_labels }
Expand Down
14 changes: 8 additions & 6 deletions unsloth_zoo/vision_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,7 @@ def __init__(
pad_to_multiple_of = None,
resize_dimension = 0, # can be 0, 1, 'max' or 'min' (max resizes based on the max of height width, min the min size, 0 the first dim, etc)
snap_to_patch_size = False,
last_response_only = False, # Train only on the last assistant turn
):
if not hasattr(processor, "image_processor"):
raise TypeError("Unsloth: UnslothVisionDataCollator is only for image models!")
Expand Down Expand Up @@ -733,12 +734,13 @@ def __init__(
assert(isinstance(instruction_part, str) and isinstance(response_part, str))
self.train_on_responses_only = _train_on_responses_only(
None,
instruction_part = instruction_part,
response_part = response_part,
force_match = force_match,
tokenizer = processor,
return_function = True,
num_proc = num_proc,
instruction_part = instruction_part,
response_part = response_part,
force_match = force_match,
tokenizer = processor,
return_function = True,
num_proc = num_proc,
last_response_only = last_response_only,
)
else:
self.train_on_responses_only = None
Expand Down