Skip to content

[Leo] Fix polybench-segmented.yml — correct ELF filenames and benchma… #24

[Leo] Fix polybench-segmented.yml — correct ELF filenames and benchma…

[Leo] Fix polybench-segmented.yml — correct ELF filenames and benchma… #24

name: H5 Accuracy Report
on:
workflow_dispatch: # Allow manual triggering
push:
branches: [main]
paths:
- 'benchmarks/**'
- 'h5_accuracy_report.py'
- 'timing/**'
concurrency:
group: h5-accuracy-report-${{ github.ref }}
cancel-in-progress: false
jobs:
h5-accuracy-report:
name: Generate H5 Milestone Accuracy Report
runs-on: macos-14 # Apple Silicon runner for native M2 matching
timeout-minutes: 120 # Extended timeout for comprehensive accuracy testing
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.25'
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install Python dependencies
run: |
pip install matplotlib numpy scipy
- name: Install Ginkgo
run: go install github.com/onsi/ginkgo/v2/ginkgo@latest
- name: Verify PolyBench ELFs exist
run: |
echo "Checking PolyBench ELF files..."
ls -la benchmarks/polybench/*.elf || echo "Some ELF files missing - will skip those benchmarks"
- name: Verify EmBench ELFs exist
run: |
echo "Checking EmBench ELF files..."
ls -la benchmarks/aha-mont64-m2sim/*.elf benchmarks/crc32-m2sim/*.elf benchmarks/edn-m2sim/*.elf benchmarks/huffbench-m2sim/*.elf benchmarks/matmult-int-m2sim/*.elf benchmarks/statemate-m2sim/*.elf benchmarks/primecount-m2sim/*.elf || echo "Some EmBench ELF files missing - will skip those benchmarks"
- name: Run H5 Accuracy Report
run: |
echo "Running H5 accuracy framework..."
python3 h5_accuracy_report.py
- name: Upload H5 Accuracy Report
uses: actions/upload-artifact@v4
with:
name: h5-accuracy-report
path: |
h5_accuracy_report.md
h5_accuracy_results.json
benchmarks/native/accuracy_report.md
benchmarks/native/accuracy_figure.png
benchmarks/native/accuracy_results.json
benchmarks/native/accuracy_normalized.pdf
retention-days: 90
- name: Post H5 Report Summary
if: always()
run: |
echo "## H5 Milestone Accuracy Report" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
if [ -f h5_accuracy_results.json ]; then
TOTAL_BENCHMARKS=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['h5_milestone']['total_benchmarks'])" 2>/dev/null || echo "0")
OVERALL_ERROR=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(f\"{d['h5_milestone']['overall_average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
H5_STATUS=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['h5_milestone']['status'])" 2>/dev/null || echo "unknown")
MICRO_COUNT=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['categories']['microbenchmarks']['count'])" 2>/dev/null || echo "0")
POLYBENCH_COUNT=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['categories']['polybench']['count'])" 2>/dev/null || echo "0")
MICRO_ERROR=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(f\"{d['categories']['microbenchmarks']['average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
POLYBENCH_ERROR=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(f\"{d['categories']['polybench']['average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
echo "### H5 Status: $H5_STATUS" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "- **Total Benchmarks:** $TOTAL_BENCHMARKS (Target: 15+)" >> $GITHUB_STEP_SUMMARY
echo "- **Overall Average Error:** $OVERALL_ERROR (Target: <20%)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Breakdown by Category" >> $GITHUB_STEP_SUMMARY
echo "- **Microbenchmarks:** $MICRO_COUNT benchmarks, $MICRO_ERROR average error" >> $GITHUB_STEP_SUMMARY
echo "- **PolyBench Intermediate:** $POLYBENCH_COUNT benchmarks, $POLYBENCH_ERROR average error" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "See the uploaded artifacts for the detailed H5 accuracy report." >> $GITHUB_STEP_SUMMARY
else
echo "⚠️ H5 accuracy report generation failed." >> $GITHUB_STEP_SUMMARY
fi
- name: Comment on H5 Issue
if: github.ref == 'refs/heads/main' && always()
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
if [ -f h5_accuracy_results.json ]; then
TOTAL_BENCHMARKS=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['h5_milestone']['total_benchmarks'])" 2>/dev/null || echo "0")
OVERALL_ERROR=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(f\"{d['h5_milestone']['overall_average_error']*100:.1f}%\")" 2>/dev/null || echo "N/A")
H5_STATUS=$(python3 -c "import json; d=json.load(open('h5_accuracy_results.json')); print(d['h5_milestone']['status'])" 2>/dev/null || echo "unknown")
COMMENT_BODY="# [CI] H5 Accuracy Framework Results
## H5 Milestone Validation Complete
**Status**: ${H5_STATUS}
- **Total Benchmarks**: ${TOTAL_BENCHMARKS} (Target: 15+)
- **Overall Average Error**: ${OVERALL_ERROR} (Target: <20%)
**Commit**: ${GITHUB_SHA:0:8}
**Workflow Run**: [View Details]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID)
[Download H5 Accuracy Report Artifacts]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID)"
gh issue comment 460 --body "$COMMENT_BODY" || echo "Failed to comment on issue #460"
fi