jmcentire
diff --git a/‎.constrain/sessions/c50e5d20-0809-4064-b922-256018ca9572.json‎
Lines changed: 138 additions & 0 deletions b/‎.constrain/sessions/c50e5d20-0809-4064-b922-256018ca9572.json‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎component_map.yaml‎
Lines changed: 60 additions & 113 deletions b/‎component_map.yaml‎
Lines changed: 60 additions & 113 deletions
diff --git a/‎constraints.yaml‎
Lines changed: 26 additions & 72 deletions b/‎constraints.yaml‎
Lines changed: 26 additions & 72 deletions
diff --git a/‎pact.yaml‎
Lines changed: 2 additions & 1 deletion b/‎pact.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎primer_story.md‎
Lines changed: 31 additions & 0 deletions b/‎primer_story.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎prompt.md‎
Lines changed: 46 additions & 26 deletions b/‎prompt.md‎
Lines changed: 46 additions & 26 deletions
@@ -1,113 +1,60 @@
-version: "1"
-project: apprentice
-components:
-  - id: config_loader
-    role: library
-    authority: configuration
-    data_access:
-      PUBLIC: read
-
-  - id: task_registry
-    role: library
-    authority: task_definitions
-    data_access:
-      PUBLIC: read_write
-
-  - id: router
-    role: library
-    authority: traffic_routing
-    data_access:
-      PUBLIC: read_write
-
-  - id: remote_api_client
-    role: library
-    authority: frontier_api
-    data_access:
-      PUBLIC: read_write
-      AUTH: read
-
-  - id: local_model_server
-    role: library
-    authority: local_inference
-    data_access:
-      PUBLIC: read_write
-
-  - id: evaluators
-    role: library
-    authority: quality_scoring
-    data_access:
-      PUBLIC: read
-
-  - id: phase_manager
-    role: library
-    authority: phase_transitions
-    data_access:
-      PUBLIC: read_write
-
-  - id: training_data_store
-    role: library
-    authority: training_data
-    data_access:
-      PUBLIC: read_write
-      PII: write
-
-  - id: pii_tokenizer
-    role: library
-    authority: pii_protection
-    data_access:
-      PII: read_write
-
-  - id: fine_tuning_orchestrator
-    role: library
-    authority: model_training
-    data_access:
-      PUBLIC: read_write
-
-  - id: budget_manager
-    role: library
-    authority: cost_tracking
-    data_access:
-      FINANCIAL: read_write
-
-  - id: audit_log
-    role: library
-    authority: audit_trail
-    data_access:
-      PUBLIC: write
-      COMPLIANCE: write
-
-  - id: cli
-    role: ingress
-    protocol: cli
-    authority: user_commands
-    data_access:
-      PUBLIC: read_write
-
-edges:
-  - from: cli
-    to: router
-    tier: internal
-  - from: router
-    to: remote_api_client
-    tier: cross_boundary
-  - from: router
-    to: local_model_server
-    tier: cross_boundary
-  - from: router
-    to: phase_manager
-    tier: internal
-  - from: router
-    to: budget_manager
-    tier: internal
-  - from: training_data_store
-    to: pii_tokenizer
-    tier: internal
-  - from: fine_tuning_orchestrator
-    to: training_data_store
-    tier: internal
-  - from: phase_manager
-    to: evaluators
-    tier: internal
-  - from: router
-    to: audit_log
-    tier: internal
+core_models:
+  Story:
+    module: "apprentice.models.story"
+    dependencies: ["pydantic", "typing", "datetime"]
+    purpose: "Multi-step narrative representation with metadata"
+    
+  StoryStep:
+    module: "apprentice.models.story_step"
+    dependencies: ["pydantic", "TrainingExample"]
+    purpose: "Individual step within story journey"
+
+collection_layer:
+  StoryCollector:
+    module: "apprentice.collectors.story_collector"
+    dependencies: ["Story", "StoryStep", "Chronicler"]
+    purpose: "Aggregate and process story data from Chronicler"
+    interfaces:
+      - collect_story_events()
+      - validate_story_consistency()
+      - emit_training_examples()
+
+evaluation_layer:
+  JourneyEvaluator:
+    module: "apprentice.evaluators.journey_evaluator"
+    dependencies: ["Story", "metrics"]
+    purpose: "Analyze journey patterns and efficiency"
+    interfaces:
+      - evaluate_journey_completion()
+      - measure_step_efficiency()
+      - detect_backtracking()
+      - score_consistency()
+
+orchestration:
+  EnhancedPhaseManager:
+    module: "apprentice.orchestration.phase_manager"
+    dependencies: ["existing PhaseManager", "JourneyEvaluator"]
+    purpose: "Per-journey-type phase transition tracking"
+    extension_points:
+      - journey_type_registration()
+      - phase_transition_callbacks()
+      - journey_specific_metrics()
+
+configuration:
+  StoryLearningConfig:
+    module: "apprentice.config.story_learning"
+    dependencies: ["pydantic", "base config"]
+    purpose: "Story learning feature configuration"
+    fields:
+      - story_learning_enabled: bool = False
+      - max_story_length: int = 50
+      - story_retention_days: int = 30
+
+integration_points:
+  existing_atomic_router:
+    modification: "none - preserved as-is"
+    integration: "parallel story collection when enabled"
+    
+  training_orchestrator:
+    modification: "extended to handle Story objects"
+    backward_compatibility: "TrainingExample processing unchanged"
@@ -1,72 +1,26 @@
-version: "1"
-project: apprentice
-constraints:
-  - id: C001
-    name: pii_tokenization
-    description: All training data must be PII-tokenized before storage
-    severity: must
-    classification: PII
-    rationale: Raw PII in training data creates compliance liability
-
-  - id: C002
-    name: phase_validation
-    description: Phase transitions require statistical validation (correlation threshold) — no manual promotion
-    severity: must
-    classification: null
-    rationale: Premature promotion degrades user experience
-
-  - id: C003
-    name: budget_enforcement
-    description: API budget exhaustion must degrade gracefully (fall back to local model, never crash)
-    severity: must
-    classification: FINANCIAL
-    rationale: Budget overruns are unacceptable; service must continue
-
-  - id: C004
-    name: audit_append_only
-    description: Audit log is append-only JSONL with UTC timestamps — no updates or deletes
-    severity: must
-    classification: COMPLIANCE
-    rationale: Audit trail integrity is required for debugging and compliance
-
-  - id: C005
-    name: no_global_state
-    description: No global state — all dependencies passed explicitly
-    severity: must
-    classification: null
-    rationale: Global state prevents testing and makes composition impossible
-
-  - id: C006
-    name: shadow_phase_required
-    description: New tasks must start in shadow phase (100% frontier, local runs in background)
-    severity: must
-    classification: null
-    rationale: Local model quality is unknown until shadow phase proves correlation
-
-  - id: C007
-    name: external_boundary_abstraction
-    description: All external boundaries (APIs, model servers, I/O) must be behind abstract interfaces
-    severity: must
-    classification: null
-    rationale: Enables testing without network/GPU access
-
-  - id: C008
-    name: pact_key_traceability
-    description: Source modules with PACT keys must maintain them through code changes
-    severity: should
-    classification: null
-    rationale: PACT keys enable production attribution via Sentinel
-
-  - id: C009
-    name: config_fail_fast
-    description: Invalid configuration must cause immediate failure with clear error message
-    severity: must
-    classification: null
-    rationale: Silent config errors cause hard-to-diagnose runtime failures
-
-  - id: C010
-    name: test_isolation
-    description: All tests must run without GPU, API keys, or network access
-    severity: must
-    classification: null
-    rationale: CI environments don't have GPUs or API keys
+backward_compatibility:
+  atomic_routing: "must remain completely unaffected"
+  existing_tests: "all 2628 tests must pass without modification"
+  api_contracts: "no breaking changes to existing interfaces"
+
+configuration:
+  story_learning:
+    enabled_flag: "story_learning_enabled: true"
+    default_state: false
+    opt_in_required: true
+
+technical_constraints:
+  python_version: "3.12+"
+  pydantic_version: "v2"
+  new_dependencies: "strictly prohibited"
+  frozen_models: "constraint must be maintained"
+
+data_handling:
+  privacy: "story data retention must respect user privacy rights"
+  storage: "efficient storage patterns for multi-step narratives required"
+  consistency: "Chronicler and StoryCollector must maintain state agreement"
+
+performance:
+  existing_performance: "atomic task performance must not degrade"
+  story_overhead: "story learning overhead must be minimal when disabled"
+  memory_usage: "efficient memory management for long stories required"
@@ -1,4 +1,5 @@
-budget: 50.00
+budget: 20.00
+plan_only: true
 
 backend: anthropic
 model: claude-opus-4-6
 
@@ -0,0 +1,31 @@
+# Apprentice: Story-Level Learning
+
+## What This Is
+
+A targeted modification to Apprentice (adaptive model distillation) to support learning from multi-step stories instead of only atomic request/response pairs.
+
+## Current State
+
+Apprentice routes requests between frontier API and local model. Training data is collected as TrainingExample objects (request_id, task_type, prompt, remote_response, local_response, phase, confidence). The fine-tuning orchestrator expects single (user_prompt, assistant_response) pairs. Phase transitions are per-task-type.
+
+Evaluators score individual responses (exact_match, semantic_similarity, structured_match, llm_judge, custom). No multi-step evaluation exists.
+
+## What Changes
+
+1. Add Story and StoryStep models to data_models.py
+2. Add StoryCollector to training_data_store.py (store/retrieve stories, convert steps to sequential training examples)
+3. Add JourneyEvaluator to evaluators.py (scores: goal_completion, step_efficiency, backtracking, consistency)
+4. Extend phase manager for per-journey-type phase tracking
+5. Opt-in via config: story_learning_enabled: true
+
+## Why
+
+When Chronicler emits stories (multi-step event narratives), Apprentice can learn from sequential patterns rather than isolated exchanges. This enables journey-level optimization — the local model learns to handle multi-turn flows, and phase transitions can vary by journey type (checkout may be autonomous while support is still coaching).
+
+## Constraints
+
+- Backward compatible: existing atomic task routing unaffected
+- Story support opt-in via config
+- No new external dependencies
+- All existing 2628 tests must pass
+- Python 3.12+, Pydantic v2, frozen models
@@ -1,26 +1,46 @@
-# Apprentice — System Context
-
-## What It Is
-Adaptive model distillation. Routes between frontier API and local fine-tuned model, progressively shifting traffic as correlation proves quality.
-
-## How It Works
-Request -> Router -> [frontier | local] -> Evaluator -> Phase Manager
-Phases: shadow -> canary -> primary -> autonomous
-
-## Key Constraints
-- PII tokenized before storage (C001)
-- Phase transitions require statistical validation (C002)
-- Budget exhaustion degrades gracefully (C003)
-- Audit log is append-only (C004)
-- No global state (C005)
-- New tasks start in shadow phase (C006)
-
-## Architecture
-28 components (21 leaf + 7 compositions). Core: router, phase_manager, evaluators, budget_manager, pii_tokenizer, audit_log.
-
-## Done Checklist
-- [ ] PII tokenization verified before storage
-- [ ] Phase transition requires correlation threshold
-- [ ] Budget exhaustion falls back to local model
-- [ ] Tests pass without GPU/API/network
-- [ ] Audit trail is append-only and complete
+# Apprentice Story Learning Enhancement
+
+## Overview
+Extend the existing Apprentice system to support multi-step story learning while maintaining backward compatibility with atomic task routing.
+
+## Current System
+- Routes requests between frontier API and local model
+- Collects training data as request/response pairs
+- Handles atomic exchanges effectively
+- Has 2628 existing tests that must continue passing
+
+## Enhancement Goals
+Add journey-level optimization capabilities including:
+- Multi-turn conversation flow handling
+- Per-journey-type phase transition tracking
+- Goal completion detection and measurement
+- Step efficiency analysis
+- Backtracking pattern recognition
+- Multi-step consistency scoring
+
+## Key Requirements
+- **Backward Compatibility**: All existing atomic task routing must remain unaffected
+- **Opt-in Configuration**: Story learning enabled via `story_learning_enabled: true`
+- **No New Dependencies**: Work within existing Python 3.12+ and Pydantic v2 constraints
+- **Test Preservation**: All 2628 existing tests must pass
+- **Frozen Models**: Maintain existing model constraints
+
+## New Components to Implement
+1. **Story Model**: Represents multi-step narratives with metadata
+2. **StoryStep Model**: Individual steps within a story journey
+3. **StoryCollector**: Aggregates and processes story data from Chronicler
+4. **JourneyEvaluator**: Analyzes journey patterns and efficiency metrics
+5. **Enhanced Phase Manager**: Extended for per-journey-type tracking
+
+## Integration Points
+- Chronicler will emit multi-step event narratives
+- StoryCollector processes these narratives into training data
+- JourneyEvaluator provides optimization insights
+- Phase manager tracks journey-specific transitions
+
+## Success Metrics
+- Journey completion rates by type
+- Multi-turn consistency scores
+- Step efficiency measurements
+- Backtracking frequency analysis
+- Goal achievement tracking