Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ build/**/logs
configurations/*
!configurations/configuration.default.json
!configurations/configuration.local.json
!configurations/temporal_domains/
!configurations/configuration.test*.json
data/
tmp/
Expand Down
115 changes: 115 additions & 0 deletions configurations/temporal_domains/bundestag.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"name": "bundestag",
"metadata_schema": {
"temporal_field": "legislature_period",
"current_period": 21,
"historical_period": 20
},
"temporal_keywords": {
"current": {
"en": [
"current",
"recent",
"latest",
"today",
"now",
"this year",
"present",
"nowadays"
],
"de": [
"aktuell",
"jetzt",
"neueste",
"derzeitig",
"gegenwärtig",
"dieses jahr",
"momentan",
"derzeit",
"heute"
]
},
"historical": {
"en": [
"previous",
"past",
"former",
"old",
"historical",
"before",
"earlier",
"last year"
],
"de": [
"vorherig",
"vergangen",
"früher",
"alt",
"historisch",
"letztes jahr"
]
}
},
"period_identifiers": {
"20": {
"names": [
"20. Wahlperiode",
"Wahlperiode 20",
"WP20",
"20th legislative period",
"20th legislature",
"period 20"
],
"years": [2021, 2022, 2023, 2024],
"temporal_type": "historical"
},
"21": {
"names": [
"21. Wahlperiode",
"Wahlperiode 21",
"WP21",
"21st legislative period",
"21st legislature",
"period 21"
],
"years": [2025],
"temporal_type": "current"
}
},
"query_expansion": {
"temporal_current": {
"de": "21. Wahlperiode 2025 aktuelle Bundesregierung neueste",
"en": "21st legislature 2025 current government latest 21. Wahlperiode"
},
"temporal_historical": {
"de": "20. Wahlperiode 2021 2022 2023 2024 frühere Bundesregierung vergangene",
"en": "20th legislature 2021 2022 2023 2024 former government previous 20. Wahlperiode"
},
"entity_terms": {
"de": "Fraktionen Bundestag Bundestagsfraktionen parlamentarische Gruppen Wahlperiode",
"en": "fractions Bundestag parliamentary groups legislature period"
}
},
"language_detection": {
"de": [
"wer",
"was",
"welche",
"alle",
"der",
"die",
"das",
"ist",
"sind"
],
"en": [
"who",
"what",
"which",
"all",
"the",
"is",
"are"
]
}
}
21 changes: 21 additions & 0 deletions configurations/temporal_domains/generic.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"name": "generic",
"metadata_schema": {
"temporal_field": "period",
"current_period": null,
"historical_period": null
},
"temporal_keywords": {
"current": {
"en": []
},
"historical": {
"en": []
}
},
"period_identifiers": {},
"query_expansion": {},
"language_detection": {
"en": ["the", "is", "are", "a", "an"]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class RetrieverName(str, Enum):

BASIC = "basic"
AUTO = "auto"
DYNAMIC_TEMPORAL = "dynamic_temporal"


class RetrieverConfiguration(BaseConfiguration):
Expand Down
80 changes: 78 additions & 2 deletions src/augmentation/bootstrap/configuration/configuration.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Any
import json
from pathlib import Path
from typing import Any, Optional, Union

from pydantic import Field, ValidationInfo, field_validator

Expand All @@ -11,6 +13,9 @@
from augmentation.bootstrap.configuration.langfuse_configuration import (
LangfuseConfiguration,
)
from augmentation.bootstrap.configuration.temporal_domain_config import (
TemporalDomainConfiguration,
)
from core.base_configuration import BaseConfiguration
from embedding.bootstrap.configuration.configuration import (
EmbeddingConfiguration,
Expand All @@ -22,7 +27,8 @@ class _AugmentationConfiguration(BaseConfiguration):
Internal configuration class for augmentation process settings.

This class defines the structure for augmentation configuration including
Langfuse monitoring, Chainlit UI, and Chat Engine components.
Langfuse monitoring, Chainlit UI, Chat Engine components, and optional
temporal domain configuration.
"""

langfuse: LangfuseConfiguration = Field(
Expand All @@ -34,6 +40,76 @@ class _AugmentationConfiguration(BaseConfiguration):
chat_engine: Any = Field(
..., description="Configuration of the Chat Engine."
)
temporal_domain: Optional[Union[str, TemporalDomainConfiguration]] = Field(
default=None,
description="Temporal domain configuration. Can be a file path (str) or inline config. "
"If not provided, system runs in generic mode without temporal filtering.",
)

@field_validator("temporal_domain")
@classmethod
def _validate_temporal_domain(
cls, value: Optional[Union[str, TemporalDomainConfiguration]]
) -> Optional[TemporalDomainConfiguration]:
"""
Validates and loads temporal domain configuration.

If value is a string, treats it as a file path and loads the configuration
from that file. If value is already a TemporalDomainConfiguration, returns it.
If value is None, returns None (generic mode).

Args:
value: Temporal domain config (file path, config object, or None)

Returns:
Loaded TemporalDomainConfiguration or None

Raises:
ValueError: If file path is invalid or file content is malformed
"""
if value is None:
return None

if isinstance(value, TemporalDomainConfiguration):
return value

if isinstance(value, str):
# Treat as file path
file_path = Path(value)

# If relative path, resolve relative to configurations directory
if not file_path.is_absolute():
config_dir = (
Path(__file__).parent.parent.parent.parent.parent
/ "configurations"
)
file_path = config_dir / value

if not file_path.exists():
raise ValueError(
f"Temporal domain configuration file not found: {file_path}"
)

try:
with open(file_path, "r") as f:
config_data = json.load(f)
return TemporalDomainConfiguration(**config_data)
except json.JSONDecodeError as e:
raise ValueError(
f"Invalid JSON in temporal domain config file {file_path}: {e}"
)
except Exception as e:
raise ValueError(
f"Failed to load temporal domain config from {file_path}: {e}"
)

if isinstance(value, dict):
# Treat as inline configuration
return TemporalDomainConfiguration(**value)

raise ValueError(
f"temporal_domain must be a file path (str), dict, or TemporalDomainConfiguration, got {type(value)}"
)

@field_validator("chat_engine")
@classmethod
Expand Down
Loading
Loading