Skip to content

Commit 54bfbd8

Browse files
ViralBShahclaude
andauthored
Standardize benchmark methodology and enforce single-threaded execution (#112)
* Add Dependabot for monthly GitHub Actions updates Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Standardize benchmark methodology and enforce single-threaded execution - Go: replace testing.Benchmark (adaptive iterations, average time) with the standard 5-iteration minimum pattern used by all other languages - Lua: replace adaptive timing loop (ran until 2s elapsed) with fixed 5 iterations reporting minimum, matching all other languages - Add GOMAXPROCS=1, JULIA_NUM_THREADS=1, NUMBA_NUM_THREADS=1, and MKL_NUM_THREADS=1 to both the Makefile and CI workflow to enforce single-threaded execution across all language runtimes - Update Methodology and Notes docs to accurately describe how benchmarks are run: 5 internal iterations per script, 3 Makefile invocations, overall minimum via collect.jl, and all environment variables used Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c04ee0e commit 54bfbd8

File tree

6 files changed

+108
-131
lines changed

6 files changed

+108
-131
lines changed

.github/workflows/benchmarks.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,15 @@ permissions:
1818
pull-requests: read
1919
statuses: write
2020

21+
# Force single-threaded execution for all benchmarks
22+
env:
23+
OMP_NUM_THREADS: 1
24+
OPENBLAS_NUM_THREADS: 1
25+
MKL_NUM_THREADS: 1
26+
GOMAXPROCS: 1
27+
JULIA_NUM_THREADS: 1
28+
NUMBA_NUM_THREADS: 1
29+
2130
# ---------------------------------------------------------------------------
2231
# Per-language benchmark jobs
2332
# ---------------------------------------------------------------------------

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ default: benchmarks.html
3131

3232
export OMP_NUM_THREADS=1
3333
export OPENBLAS_NUM_THREADS=1
34+
export MKL_NUM_THREADS=1
35+
export GOMAXPROCS=1
36+
export JULIA_NUM_THREADS=1
37+
export NUMBA_NUM_THREADS=1
3438

3539
dsfmt:
3640
mkdir -p dSFMT

docs/src/index.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,10 @@ Times are normalized relative to C.
2727

2828
## Methodology
2929

30-
- Each benchmark runs 5 iterations; the minimum time is taken
31-
- Single-threaded execution (`OMP_NUM_THREADS=1`, `OPENBLAS_NUM_THREADS=1`)
32-
- Julia results exclude compile time
30+
- Each language's benchmark script internally runs each benchmark 5 times and records the minimum time
31+
- The Makefile invokes each script 3 times; `collect.jl` takes the overall minimum across all runs
32+
- JIT languages (Julia, Numba) include a warmup pass before timing to exclude compilation overhead
33+
- Single-threaded execution enforced via environment variables (`OMP_NUM_THREADS=1`, `OPENBLAS_NUM_THREADS=1`, `MKL_NUM_THREADS=1`, `GOMAXPROCS=1`, `JULIA_NUM_THREADS=1`, `NUMBA_NUM_THREADS=1`)
3334
- Runs on GitHub Actions `ubuntu-latest` (x86_64, single core used)
3435
- Benchmarks test equivalent code patterns, not peak-optimized implementations
3536

docs/src/notes.md

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,22 @@ benchmarks use the same for-loop.
1717

1818
## Timing methodology
1919

20-
- Each benchmark is run 5 times; the minimum time is reported
21-
- Julia discards the first iteration as JIT warmup
22-
- Both `OMP_NUM_THREADS` and `OPENBLAS_NUM_THREADS` are set to 1 for deterministic single-threaded execution
20+
All languages follow the same pattern:
21+
22+
- Each benchmark is run 5 times internally; the minimum time is reported
23+
- The Makefile invokes each language's script 3 times (`ITERATIONS=3`), producing multiple sets of results
24+
- `bin/collect.jl` takes the overall minimum across all runs, so the final reported time is the best of up to 15 measurements
25+
- JIT languages (Julia, Numba) include a warmup pass before the 5 timed iterations to exclude compilation overhead
26+
27+
Environment:
28+
29+
- The following environment variables are set to 1 (via the Makefile and the GitHub Actions workflow) for deterministic single-threaded execution:
30+
- `OMP_NUM_THREADS=1` — OpenMP threads
31+
- `OPENBLAS_NUM_THREADS=1` — OpenBLAS threads
32+
- `MKL_NUM_THREADS=1` — Intel MKL threads (if linked)
33+
- `GOMAXPROCS=1` — Go runtime OS threads
34+
- `JULIA_NUM_THREADS=1` — Julia threads
35+
- `NUMBA_NUM_THREADS=1` — Numba parallel threads
2336
- Runs on GitHub Actions `ubuntu-latest` runners (x86_64)
2437

2538
## Matrix benchmarks and BLAS

go/perf.go

Lines changed: 71 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,12 @@ package main
1717

1818
import (
1919
"bufio"
20-
"errors"
2120
"fmt"
22-
"log"
2321
"math"
2422
"math/rand"
2523
"os"
2624
"strconv"
27-
"testing"
25+
"time"
2826

2927
"gonum.org/v1/gonum/mat"
3028
"gonum.org/v1/gonum/stat"
@@ -215,133 +213,85 @@ func pisum() float64 {
215213
return sum
216214
}
217215

218-
func print_perf(name string, time float64) {
219-
fmt.Printf("go,%v,%v\n", name, time*1000)
220-
}
216+
const NITER = 5
221217

222-
// run tests
218+
func print_perf(name string, t float64) {
219+
fmt.Printf("go,%v,%v\n", name, t*1000)
220+
}
223221

224-
func assert(b *testing.B, t bool) {
225-
if t != true {
226-
b.Fatal("assert failed")
222+
func timeit(name string, fn func()) {
223+
tmin := math.Inf(1)
224+
for i := 0; i < NITER; i++ {
225+
t := time.Now()
226+
fn()
227+
elapsed := time.Since(t).Seconds()
228+
if elapsed < tmin {
229+
tmin = elapsed
230+
}
227231
}
232+
print_perf(name, tmin)
228233
}
229234

235+
// run benchmarks
236+
230237
func main() {
231-
for _, bm := range benchmarks {
232-
seconds, err := runBenchmarkFor(bm.fn)
233-
if err != nil {
234-
log.Fatalf("%s %s", bm.name, err)
238+
n := 20
239+
sink = &n // prevent constant propagation of fib argument
240+
if fib(n) != 6765 {
241+
panic("unexpected value for fib(20)")
242+
}
243+
timeit("recursion_fibonacci", func() {
244+
fib(n)
245+
})
246+
247+
timeit("parse_integers", func() {
248+
for k := 0; k < 1000; k++ {
249+
n := rnd.Uint32()
250+
m, _ := strconv.ParseUint(strconv.FormatUint(uint64(n), 16), 16, 32)
251+
if uint32(m) != n {
252+
panic("incorrect value for m")
253+
}
235254
}
236-
print_perf(bm.name, seconds)
255+
})
256+
257+
if mandelperf() != 14791 {
258+
panic("unexpected value for mandelperf")
237259
}
238-
}
260+
timeit("userfunc_mandelbrot", func() {
261+
mandelperf()
262+
})
263+
264+
timeit("recursion_quicksort", func() {
265+
lst := make([]float64, 5000)
266+
for k := range lst {
267+
lst[k] = rnd.Float64()
268+
}
269+
qsort_kernel(lst, 0, len(lst)-1)
270+
})
239271

240-
func runBenchmarkFor(fn func(*testing.B)) (seconds float64, err error) {
241-
bm := testing.Benchmark(fn)
242-
if (bm.N == 0) {
243-
return 0, errors.New("failed")
272+
if math.Abs(pisum()-1.644834071848065) >= 1e-6 {
273+
panic("pi_sum out of range")
244274
}
245-
return bm.T.Seconds() / float64(bm.N), nil
246-
}
275+
timeit("iteration_pi_sum", func() {
276+
pisum()
277+
})
247278

248-
var benchmarks = []struct {
249-
name string
250-
fn func(*testing.B)
251-
}{
252-
{
253-
name: "recursion_fibonacci",
254-
fn: func(b *testing.B) {
255-
n := 20
256-
sink = &n // prevent constant propagation of fib argument
257-
for i := 0; i < b.N; i++ {
258-
if fib(n) != 6765 {
259-
b.Fatal("unexpected value for fib(20)")
260-
}
261-
}
262-
},
263-
},
264-
265-
{
266-
name: "parse_integers",
267-
fn: func(b *testing.B) {
268-
for i := 0; i < b.N; i++ {
269-
for k := 0; k < 1000; k++ {
270-
n := rnd.Uint32()
271-
m, _ := strconv.ParseUint(strconv.FormatUint(uint64(n), 16), 16, 32)
272-
if uint32(m) != n {
273-
b.Fatal("incorrect value for m")
274-
}
275-
}
276-
}
277-
},
278-
},
279-
280-
{
281-
name: "userfunc_mandelbrot",
282-
fn: func(b *testing.B) {
283-
for i := 0; i < b.N; i++ {
284-
if mandelperf() != 14791 {
285-
b.Fatal("unexpected value for mandelperf")
286-
}
287-
}
288-
},
289-
},
290-
291-
{
292-
name: "print_to_file",
293-
fn: func(b *testing.B) {
294-
for i := 0; i < b.N; i++ {
295-
printfd(100000)
296-
}
297-
},
298-
},
299-
300-
{
301-
name: "recursion_quicksort",
302-
fn: func(b *testing.B) {
303-
lst := make([]float64, 5000)
304-
b.ResetTimer()
305-
for i := 0; i < b.N; i++ {
306-
for k := range lst {
307-
lst[k] = rnd.Float64()
308-
}
309-
qsort_kernel(lst, 0, len(lst)-1)
310-
}
311-
},
312-
},
313-
314-
{
315-
name: "iteration_pi_sum",
316-
fn: func(b *testing.B) {
317-
for i := 0; i < b.N; i++ {
318-
if math.Abs(pisum()-1.644834071848065) >= 1e-6 {
319-
b.Fatal("pi_sum out of range")
320-
}
321-
}
322-
},
323-
},
324-
325-
{
326-
name: "matrix_statistics",
327-
fn: func(b *testing.B) {
328-
for i := 0; i < b.N; i++ {
329-
c1, c2 := randmatstat(1000)
330-
assert(b, 0.5 < c1)
331-
assert(b, c1 < 1.0)
332-
assert(b, 0.5 < c2)
333-
assert(b, c2 < 1.0)
334-
}
335-
},
336-
},
337-
338-
{
339-
name: "matrix_multiply",
340-
fn: func(b *testing.B) {
341-
for i := 0; i < b.N; i++ {
342-
c := randmatmul(1000)
343-
assert(b, c.At(0, 0) >= 0)
344-
}
345-
},
346-
},
279+
c1, c2 := randmatstat(1000)
280+
if !(0.5 < c1 && c1 < 1.0 && 0.5 < c2 && c2 < 1.0) {
281+
panic("randmatstat out of range")
282+
}
283+
timeit("matrix_statistics", func() {
284+
randmatstat(1000)
285+
})
286+
287+
timeit("matrix_multiply", func() {
288+
c := randmatmul(1000)
289+
if c.At(0, 0) < 0 {
290+
panic("unexpected negative value")
291+
}
292+
})
293+
294+
timeit("print_to_file", func() {
295+
printfd(100000)
296+
})
347297
}

lua/perf.lua

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,16 @@ local function elapsed(f)
3939
return t1 - t0, val1, val2
4040
end
4141

42+
local NITER = 5
43+
4244
local function timeit(f, name, check)
43-
local t, k, s = 1/0, 0, now_ms()
44-
while true do
45-
k = k + 1
45+
local t = 1/0
46+
for k = 1, NITER do
4647
local tx, val1, val2 = elapsed(f)
4748
t = min(t, tx)
4849
if check then
4950
check(val1, val2)
5051
end
51-
if k > 5 and (now_ms() - s) >= 2000 then break end
5252
end
5353
io.write(format('lua,%s,%g\n', name, t))
5454
end

0 commit comments

Comments
 (0)