-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathvalidate_reports.py
More file actions
83 lines (70 loc) · 3.31 KB
/
Copy pathvalidate_reports.py
File metadata and controls
83 lines (70 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python3
"""Validate orchestrator report directory: required files and minimal content checks."""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
def validate_report_dir(report_root: Path) -> tuple[bool, list[str], list[str]]:
"""Check report_root for REPORT.md, LLM_PROMPT.md and per-sample dirs. Return (ok, errors, warnings)."""
errors = []
warnings = []
report_root = report_root.resolve()
if not report_root.is_dir():
return False, [f"Not a directory: {report_root}"], []
report_md = report_root / "REPORT.md"
prompt_md = report_root / "LLM_PROMPT.md"
if not report_md.exists():
errors.append("Missing REPORT.md")
else:
content = report_md.read_text(encoding="utf-8")
if "Static Malware Analysis Report" not in content:
errors.append("REPORT.md missing expected title")
if not prompt_md.exists():
errors.append("Missing LLM_PROMPT.md")
else:
content = prompt_md.read_text(encoding="utf-8")
if "LLM Triage Prompt" not in content and "Static Malware Analysis" not in content:
errors.append("LLM_PROMPT.md missing expected header")
# Per-sample dirs: at least one subdir that looks like a sample (has triage.json or strings_report.txt)
subdirs = [d for d in report_root.iterdir() if d.is_dir() and not d.name.startswith(".")]
sample_dirs = []
for d in subdirs:
if (d / "triage.json").exists() or (d / "strings_report.txt").exists():
sample_dirs.append(d)
for sample_dir in sample_dirs:
name = sample_dir.name
if not (sample_dir / "triage.json").exists():
errors.append(f"{name}: missing triage.json")
else:
try:
data = json.loads((sample_dir / "triage.json").read_text(encoding="utf-8"))
if not isinstance(data, dict):
errors.append(f"{name}: triage.json invalid (not object)")
elif "error" in data:
errors.append(f"{name}: triage has error: {data.get('error')}")
else:
if "file_type" not in data and "hashes" not in data:
errors.append(f"{name}: triage.json missing file_type/hashes")
except json.JSONDecodeError as e:
errors.append(f"{name}: triage.json invalid JSON: {e}")
if not (sample_dir / "strings_report.txt").exists():
warnings.append(f"{name}: no strings_report.txt (step2 skipped or failed)")
if not sample_dirs and subdirs:
errors.append("No sample directories found (expected triage.json or strings_report.txt in subdirs)")
return len(errors) == 0, errors, warnings
def main() -> int:
ap = argparse.ArgumentParser(description="Validate orchestrator report directory")
ap.add_argument("report_dir", type=Path, nargs="?", default=Path("reports"), help="Report directory (default: reports)")
args = ap.parse_args()
ok, errors, warnings = validate_report_dir(args.report_dir)
for w in warnings:
print(f"Warning: {w}", file=sys.stderr)
if ok:
print("OK: report directory valid.", flush=True)
return 0
for e in errors:
print(f"Error: {e}", file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(main())