-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrubric.json
More file actions
92 lines (92 loc) · 4.7 KB
/
Copy pathrubric.json
File metadata and controls
92 lines (92 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
{
"rubric_id": "sci_method_ab_rubric_v1",
"scale": "0-10 per measure",
"measures": {
"M1_premise_challenge": {
"name": "Premise Challenge Strength",
"definition": "Does the response challenge questionable premises in the user's question (e.g., 'loss smooth = optimal', '1000 epoch is enough')? Or does it accept premises uncritically?",
"anchors": {
"10": "Explicit challenge with quantification (e.g., '89× refute' or 'base rate says X'). Premise reframed before answering.",
"7": "Clear challenge with examples but less quantitative",
"5": "Hedged challenge ('not entirely sure', 'depends on...'). Acknowledges issue but soft.",
"3": "Mild caveat after agreement",
"0": "Uncritical agreement ('네, 좋은 결정입니다')"
}
},
"M2_hypothesis_count_quality": {
"name": "Hypothesis Count + Quality",
"definition": "Does the response generate multiple distinct hypotheses with explicit prior probabilities?",
"anchors": {
"10": "≥3 distinct + Bayesian priors summing to 1.0",
"7": "2-3 distinct hypotheses with rough probability or ranking",
"5": "Multiple options mentioned but no explicit hypothesis framing",
"3": "1-2 implicit alternatives",
"0": "Single hypothesis or none"
}
},
"M3_falsifiability_coverage": {
"name": "Falsifiability Slot Coverage",
"definition": "Does the response specify 'wrong if X' (observable disproof) for each major claim?",
"anchors": {
"10": "≥80% of claims have explicit 'wrong if X' with high specificity (numeric/temporal)",
"7": "Most claims have falsifiable conditions, mixed specificity",
"5": "Some claims testable, but most assertions lack explicit disproof condition",
"3": "Implicit testability only",
"0": "All claims unfalsifiable (vague predictions)"
}
},
"M4_counter_evidence_depth": {
"name": "Counter-Evidence Depth",
"definition": "Does the response surface counter-evidence (academic citations, base rates, alternative perspectives)?",
"anchors": {
"10": "≥5 counter-issues + Tier 1 source citations (peer-reviewed, official docs, books)",
"7": "3-5 counter-issues with mix of cited and uncited sources",
"5": "2-3 counter-issues, no citations",
"3": "1-2 counter-issues mentioned",
"0": "No counter-evidence (pure agreement or no critique)"
}
},
"M5_confidence_interval": {
"name": "Confidence Interval Specificity",
"definition": "Does the response express uncertainty as a quantitative distribution (P10/P50/P90 or similar)?",
"anchors": {
"10": "Explicit [P10/P50/P90] or comparable distribution with reasoning",
"7": "Best/worst case with rough probabilities",
"5": "Hedged language ('likely', 'may') without quantification",
"3": "Mostly point estimates with weak hedge",
"0": "Unhedged claim ('this will work')"
}
},
"M6_pre_mortem_rigor": {
"name": "Pre-mortem Rigor",
"definition": "Does the response anticipate failure modes with mitigation actions?",
"anchors": {
"10": "Ranked failure modes (probability) + concrete mitigation per mode",
"7": "2-3 failure scenarios with mitigation",
"5": "Some risk mention without mitigation, or vice versa",
"3": "Generic 'be careful' caveat",
"0": "No failure mode analysis"
}
},
"M7_output_efficiency": {
"name": "Output Efficiency (insight per token)",
"definition": "Quality-to-length ratio. Is the response complete without verbosity?",
"anchors": {
"10": "Concise + complete + actionable; no filler",
"7": "Mostly efficient with minor redundancy",
"5": "Average density",
"3": "Verbose with significant redundancy",
"0": "Very long with low information density"
}
}
},
"reviewer_instructions": {
"blinding": "Each response is labeled 'Condition X' or 'Condition Y'. Mapping is randomized per evaluation. Do NOT attempt to identify which is sci-method vs baseline.",
"anti_affinity": "Evaluate as if both responses come from the same source. Do not favor responses that look like they came from your own model family.",
"format": "Return strictly JSON: {response_id, condition_label, scores: {M1: int, M2: int, ..., M7: int}, justification: str}. Score is 0-10 integer per measure. Justification is 50-100 words covering key reasoning.",
"calibration_example": {
"scenario": "Response that uncritically agrees + provides single piece of advice + no citations",
"expected_scores": {"M1": 1, "M2": 0, "M3": 0, "M4": 1, "M5": 0, "M6": 0, "M7": 6}
}
}
}