Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e842b23
feat: add DIP API client for Bundestag data extraction
Oct 30, 2025
038caa1
feat: add clear-collection flag to embedding script
Oct 30, 2025
d174486
perf: optimize chat engine response synthesis
Oct 30, 2025
50bcd8f
fix: add lazy configuration initialization in augment.py
Oct 30, 2025
778adce
chore: add dependencies for DIP API and embedding improvements
Oct 30, 2025
599c37b
test: add e2e tests for Bundestag data sources
Oct 30, 2025
b3ac2e6
chore: allow test configuration files in version control
Oct 30, 2025
2346dc9
test: fix unit tests for updated parser and reader
Oct 30, 2025
e7eb894
ci: exclude e2e tests from CI pipeline
Oct 30, 2025
f263d29
feat: update Auto Retriever metadata schema for Bundestag documents
Nov 3, 2025
178910c
fix: remove metadata filters from Auto Retriever to prevent spurious …
Nov 4, 2025
30c0765
feat: add hybrid filter postprocessor with multi-stage filtering
Nov 7, 2025
eff1b49
feat: add query rewriting wrapper to auto-retriever
Nov 7, 2025
b5a0c72
feat: add temporal-aware prompts for Bundestag assistant
Nov 7, 2025
02951e8
feat: add parliamentary composition extraction and protocol filtering
Nov 7, 2025
7769f37
feat: implement query rewriting for improved retrieval
Nov 7, 2025
15e7726
feat: add dynamic party extractor for Bundestag documents
Nov 7, 2025
2b6981b
test: add comprehensive tests for hybrid filter postprocessor
Nov 7, 2025
031cc08
feat: add temporal filtering stage to hybrid filter postprocessor
Nov 7, 2025
c509fd2
fix: update test fixtures to meet MIN_MENTIONS threshold and add api_key
Nov 7, 2025
a570b6e
feat: move context_window and num_output to per-LLM configuration
Nov 7, 2025
eb2e0f4
refactor: remove redundant postprocessor registration from __init__.py
Nov 10, 2025
3304fee
refactor: move domain-specific prompts to version-controlled directory
Nov 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ jobs:
run: uv run pre-commit run --all-files

- name: Run tests
run: uv run pytest tests
run: uv run pytest tests --ignore=tests/e2e
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ build/**/logs
configurations/*
!configurations/configuration.default.json
!configurations/configuration.local.json
!configurations/configuration.test*.json
data/
tmp/
# Byte-compiled / optimized / DLL files
Expand Down
37 changes: 37 additions & 0 deletions configurations/configuration.test-bundestag-combined.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"extraction": {
"orchestrator_name": "basic",
"datasources": [
{
"name": "bundestag",
"include_bundestag_mine": true,
"include_dip": true,
"dip_wahlperiode": 21,
"dip_sources": ["protocols"],
"export_limit": 2
}
]
},
"embedding": {
"orchestrator_name": "basic",
"embedder_name": "basic",
"embedding_model": {
"provider": "hugging_face",
"name": "intfloat/multilingual-e5-small",
"tokenizer_name": "intfloat/multilingual-e5-small",
"batch_size": 8,
"device": "cpu",
"splitter": {
"chunk_overlap_in_tokens": 50,
"chunk_size_in_tokens": 384
}
},
"vector_store": {
"name": "pgvector",
"database_name": "rag-local",
"collection_name": "e2e_test_bundestag_combined",
"host": "localhost",
"port": 5433
}
}
}
37 changes: 37 additions & 0 deletions configurations/configuration.test-bundestag-dip.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"extraction": {
"orchestrator_name": "basic",
"datasources": [
{
"name": "bundestag",
"include_bundestag_mine": false,
"include_dip": true,
"dip_wahlperiode": 21,
"dip_sources": ["protocols"],
"export_limit": 2
}
]
},
"embedding": {
"orchestrator_name": "basic",
"embedder_name": "basic",
"embedding_model": {
"provider": "hugging_face",
"name": "intfloat/multilingual-e5-small",
"tokenizer_name": "intfloat/multilingual-e5-small",
"batch_size": 8,
"device": "cpu",
"splitter": {
"chunk_overlap_in_tokens": 50,
"chunk_size_in_tokens": 384
}
},
"vector_store": {
"name": "pgvector",
"database_name": "rag-local",
"collection_name": "e2e_test_bundestag_dip",
"host": "localhost",
"port": 5433
}
}
}
35 changes: 35 additions & 0 deletions configurations/configuration.test-bundestag-mine.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"extraction": {
"orchestrator_name": "basic",
"datasources": [
{
"name": "bundestag",
"include_bundestag_mine": true,
"include_dip": false,
"export_limit": 5
}
]
},
"embedding": {
"orchestrator_name": "basic",
"embedder_name": "basic",
"embedding_model": {
"provider": "hugging_face",
"name": "intfloat/multilingual-e5-small",
"tokenizer_name": "intfloat/multilingual-e5-small",
"batch_size": 8,
"device": "cpu",
"splitter": {
"chunk_overlap_in_tokens": 50,
"chunk_size_in_tokens": 384
}
},
"vector_store": {
"name": "pgvector",
"database_name": "rag-local",
"collection_name": "e2e_test_bundestag_mine",
"host": "localhost",
"port": 5433
}
}
}
Loading
Loading