Support structured signal context bias

Pigbibi · Pigbibi · commit b1b497965bf4 · 2026-05-31T15:56:46.000+08:00
diff --git a/README.md b/README.md
@@ -215,6 +215,29 @@ data/output/signal_history/YYYY-MM-DD.json
 All artifacts must remain shadow-only. They cannot encode broker orders, target
 quantities, or live allocation overrides.
 
+`candidate_bias` and `theme_bias` may use either the legacy compact form:
+
+```json
+{"MU": "watch"}
+```
+
+or the structured audit form:
+
+```json
+{
+  "MU": {
+    "bias": "watch",
+    "confidence": 0.55,
+    "linked_themes": ["hbm_memory"],
+    "rationale": "Shadow context only; not a trade instruction."
+  }
+}
+```
+
+`symbol_bias` is optional and uses the same structured shape for symbol-specific
+long-horizon context. Downstream Advisor code treats these fields as context and
+still blocks orders, target quantities, and portfolio weights.
+
 ## Replay Contract
 
 Historical validation should replay stored signal artifacts instead of asking a
diff --git a/README.zh-CN.md b/README.zh-CN.md
@@ -178,6 +178,27 @@ data/output/signal_history/YYYY-MM-DD.json
 
 所有 artifacts 必须保持 shadow-only。它们不能编码券商订单、目标数量或实盘 allocation override。
 
+`candidate_bias` 和 `theme_bias` 支持两种写法。兼容旧的紧凑写法：
+
+```json
+{"MU": "watch"}
+```
+
+也支持更适合审计的结构化写法：
+
+```json
+{
+  "MU": {
+    "bias": "watch",
+    "confidence": 0.55,
+    "linked_themes": ["hbm_memory"],
+    "rationale": "只作为 shadow context，不是交易指令。"
+  }
+}
+```
+
+`symbol_bias` 是可选字段，使用同样结构表达单个 symbol 的长线背景。下游 Advisor 只把这些字段当作上下文，仍然禁止订单、目标股数和组合权重。
+
 ## Replay Contract
 
 历史验证必须 replay 已保存 signal artifacts，而不是让模型重新生成过去的判断。当前示例 policy 有意保持保守：
diff --git a/docs/architecture.md b/docs/architecture.md
@@ -106,16 +106,17 @@ The taxonomy intentionally covers multiple durable sectors:
 - consumer platforms, industrial automation, EV/auto, and crypto infrastructure
 
 Theme membership is static research context.  A symbol is not added to a theme
-just because it is hot this month.  Monthly AI output may express `theme_bias`,
-but downstream consumers must keep that output shadow-only and replay saved
-artifacts point-in-time.
+just because it is hot this month.  Monthly AI output may express `theme_bias`
+and optional `symbol_bias`; both can use structured values with bias,
+confidence, linked themes, rationale, and risk flags. Downstream consumers must
+keep that output shadow-only and replay saved artifacts point-in-time.
 
 This is the anti-overfit boundary:
 
 1. Define universe and theme exposure before looking at future returns.
 2. Save every AI theme judgment as an artifact.
 3. Replay only saved artifacts; never regenerate old model judgments.
-4. Treat theme bias as context, not as execution or allocation.
+4. Treat theme and symbol bias as context, not as execution or allocation.
 
 ## Horizon Boundary
 
diff --git a/examples/latest_signal.example.json b/examples/latest_signal.example.json
@@ -58,10 +58,42 @@
     "downstream_use": "Shadow context only; deterministic policy must explicitly opt in before any future use."
   },
   "theme_bias": {
-    "ai_compute": "watch",
-    "hbm_memory": "watch",
-    "ai_server_infrastructure": "watch",
-    "foundry_semicap": "watch",
+    "ai_compute": {
+      "bias": "watch",
+      "confidence": 0.42,
+      "horizon": "1-3 years",
+      "rationale": "AI infrastructure demand is durable but requires valuation and capex review.",
+      "risk_flags": [
+        "valuation_sensitive"
+      ]
+    },
+    "hbm_memory": {
+      "bias": "positive",
+      "confidence": 0.58,
+      "horizon": "1-3 years",
+      "rationale": "HBM and high-end memory remain linked to AI server buildout.",
+      "risk_flags": [
+        "memory_cycle_risk"
+      ]
+    },
+    "ai_server_infrastructure": {
+      "bias": "watch",
+      "confidence": 0.5,
+      "horizon": "1-3 years",
+      "rationale": "AI server infrastructure demand remains visible but margins and order conversion need review.",
+      "risk_flags": [
+        "margin_risk"
+      ]
+    },
+    "foundry_semicap": {
+      "bias": "watch",
+      "confidence": 0.46,
+      "horizon": "1-3 years",
+      "rationale": "Foundry and semiconductor capital spending are long-cycle context themes.",
+      "risk_flags": [
+        "capex_cycle_risk"
+      ]
+    },
     "defense_aerospace": "watch",
     "healthcare_policy": "neutral",
     "energy_security": "neutral",
@@ -92,5 +124,34 @@
     "XLF": [
       "financial_market_infrastructure"
     ]
+  },
+  "symbol_bias": {
+    "MU": {
+      "bias": "watch",
+      "confidence": 0.54,
+      "linked_themes": [
+        "hbm_memory",
+        "ai_compute"
+      ],
+      "rationale": "HBM exposure is positive context, but memory cycle and valuation still need confirmation."
+    },
+    "INTC": {
+      "bias": "watch",
+      "confidence": 0.45,
+      "linked_themes": [
+        "foundry_semicap",
+        "ai_compute"
+      ],
+      "rationale": "Foundry and domestic semiconductor policy are long-horizon context, execution risk remains high."
+    },
+    "DELL": {
+      "bias": "watch",
+      "confidence": 0.47,
+      "linked_themes": [
+        "ai_server_infrastructure",
+        "ai_compute"
+      ],
+      "rationale": "AI server demand is relevant, while margin and backlog conversion need review."
+    }
   }
 }
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,5 +17,5 @@ test = ["pytest>=8"]
 where = ["src"]
 
 [tool.pytest.ini_options]
-pythonpath = ["src"]
+pythonpath = ["src", "."]
 testpaths = ["tests"]
diff --git a/src/research_signal_context_pipelines/schema.py b/src/research_signal_context_pipelines/schema.py
@@ -69,6 +69,13 @@ def _require_string_list(value: Any, name: str, *, allow_empty: bool = False) ->
     return result
 
 
+def _require_number_0_1(value: Any, name: str) -> None:
+    if not isinstance(value, (int, float)) or isinstance(value, bool):
+        raise SignalValidationError(f"{name} must be numeric")
+    if value < 0 or value > 1:
+        raise SignalValidationError(f"{name} must be between 0 and 1")
+
+
 def validate_signal(payload: Mapping[str, Any]) -> None:
     missing = [key for key in REQUIRED_TOP_LEVEL_KEYS if key not in payload]
     if missing:
@@ -94,17 +101,15 @@ def validate_signal(payload: Mapping[str, Any]) -> None:
 
     if "theme_bias" in payload:
         _validate_bias_mapping(_require_mapping(payload["theme_bias"], "theme_bias"), "theme_bias")
+    if "symbol_bias" in payload:
+        _validate_bias_mapping(_require_mapping(payload["symbol_bias"], "symbol_bias"), "symbol_bias")
     if "symbol_theme_exposure" in payload:
         symbol_theme_exposure = _require_mapping(payload["symbol_theme_exposure"], "symbol_theme_exposure")
         for symbol, theme_ids in symbol_theme_exposure.items():
             _require_string(symbol, "symbol_theme_exposure key")
             _require_string_list(theme_ids, f"symbol_theme_exposure[{symbol!r}]")
 
-    confidence = payload["confidence"]
-    if not isinstance(confidence, (int, float)) or isinstance(confidence, bool):
-        raise SignalValidationError("confidence must be numeric")
-    if confidence < 0 or confidence > 1:
-        raise SignalValidationError("confidence must be between 0 and 1")
+    _require_number_0_1(payload["confidence"], "confidence")
 
     evidence = _require_mapping(payload["evidence"], "evidence")
     _require_string_list(evidence.get("sources"), "evidence.sources")
@@ -121,7 +126,22 @@ def validate_signal(payload: Mapping[str, Any]) -> None:
 def _validate_bias_mapping(mapping: Mapping[str, Any], name: str) -> None:
     for key, bias in mapping.items():
         _require_string(key, f"{name} key")
-        if bias not in ALLOWED_BIAS_VALUES:
-            raise SignalValidationError(
-                f"{name}[{key!r}] must be one of: {', '.join(sorted(ALLOWED_BIAS_VALUES))}"
-            )
+        _validate_bias_value(bias, f"{name}[{key!r}]")
+
+
+def _validate_bias_value(value: Any, name: str) -> None:
+    if isinstance(value, str):
+        bias = value
+    else:
+        raw = _require_mapping(value, name)
+        bias = _require_string(raw.get("bias"), f"{name}.bias")
+        if "confidence" in raw:
+            _require_number_0_1(raw["confidence"], f"{name}.confidence")
+        for optional_key in ("rationale", "horizon"):
+            if optional_key in raw:
+                _require_string(raw[optional_key], f"{name}.{optional_key}")
+        for optional_list_key in ("risk_flags", "linked_themes"):
+            if optional_list_key in raw:
+                _require_string_list(raw[optional_list_key], f"{name}.{optional_list_key}", allow_empty=True)
+    if bias not in ALLOWED_BIAS_VALUES:
+        raise SignalValidationError(f"{name} must be one of: {', '.join(sorted(ALLOWED_BIAS_VALUES))}")
diff --git a/tests/test_signal_validation.py b/tests/test_signal_validation.py
@@ -53,9 +53,40 @@ def test_signal_accepts_optional_theme_bias_and_exposure() -> None:
     validate_signal(payload)
 
 
+def test_signal_accepts_structured_theme_and_symbol_bias() -> None:
+    payload = load_example()
+    payload["theme_bias"] = {
+        "hbm_memory": {
+            "bias": "positive",
+            "confidence": 0.62,
+            "horizon": "1-3 years",
+            "rationale": "HBM demand remains a long-horizon research context.",
+            "risk_flags": ["cycle_risk"],
+        }
+    }
+    payload["symbol_bias"] = {
+        "MU": {
+            "bias": "watch",
+            "confidence": 0.55,
+            "linked_themes": ["hbm_memory"],
+            "rationale": "Symbol-level shadow context remains watch-only.",
+        }
+    }
+
+    validate_signal(payload)
+
+
 def test_signal_rejects_invalid_theme_bias() -> None:
     payload = load_example()
     payload["theme_bias"] = {"hbm_memory": "hot"}
 
     with pytest.raises(SignalValidationError, match="theme_bias"):
         validate_signal(payload)
+
+
+def test_signal_rejects_invalid_structured_bias_confidence() -> None:
+    payload = load_example()
+    payload["symbol_bias"] = {"MU": {"bias": "watch", "confidence": 1.5}}
+
+    with pytest.raises(SignalValidationError, match="confidence"):
+        validate_signal(payload)