Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lettucedetect/datasets/hallucination_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def from_json(cls, json_dict: dict) -> "HallucinationSample":
labels=json_dict["labels"],
split=json_dict["split"],
task_type=json_dict["task_type"],
dataset=json_dict["dataset"],
language=json_dict["language"],
dataset=json_dict.get("dataset", "unknown"),
language=json_dict.get("language", "en"),
context_modality=json_dict.get("context_modality", "prose"),
category=json_dict.get("category"),
subcategory=json_dict.get("subcategory"),
Expand Down
40 changes: 40 additions & 0 deletions tests/test_datasets_pytest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Pytest tests for the datasets module."""

from lettucedetect.datasets.hallucination_dataset import HallucinationSample


def test_from_json_with_minimal_keys():
"""Test that from_json handles data missing optional dataset/language keys."""
sample = HallucinationSample.from_json(
{
"prompt": "User request: What is AI?\n\nAI is artificial intelligence.",
"answer": "AI stands for Artificial Intelligence.",
"labels": [],
"split": "test",
"task_type": "qa",
}
)
assert sample.prompt == "User request: What is AI?\n\nAI is artificial intelligence."
assert sample.answer == "AI stands for Artificial Intelligence."
assert sample.labels == []
assert sample.split == "test"
assert sample.task_type == "qa"
assert sample.dataset == "unknown"
assert sample.language == "en"


def test_from_json_explicit_dataset_language():
"""Test that explicit dataset and language keys are preserved."""
sample = HallucinationSample.from_json(
{
"prompt": "Test prompt",
"answer": "Test answer",
"labels": [{"start": 0, "end": 4, "label": "intrinsic"}],
"split": "train",
"task_type": "summarization",
"dataset": "cnn_dailymail",
"language": "de",
}
)
assert sample.dataset == "cnn_dailymail"
assert sample.language == "de"
Loading