Writing inaccurate benchmarks
Not resetting or pausing the timer
func BenchmarkFoo(b *testing.B) {
setup()
b.ResetTimer()
for i := 0; i < b.N; i++ {
foo()
}
}
func BenchmarkFoo(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
setup()
b.StartTimer()
foo()
}
}
Micro-benchmarks
func BenchmarkAtomicStoreInt32(b *testing.B) {
var v int32
for i := 0; i < b.N; i++{
atomic.StoreInt32(&v, 1)
}
}
func BenchmarkAtomicStoreInt64(b *testing.B) {
var v int64
for i := 0; i < b.N; i++ {
atomic.StoreInt64(&v, 1)
}
}
Run the benchmark multiple times and evaluate statistics using benchstat
go test -bench=. -count=10 | stats.txt
benchstat stats.txt
Compiler optimizations
This function is likely to be in-lined, making the benchmark useless
const m1 = 0x5555555555555555
const m2 = 0x3333333333333333
const m4 = 0x0f0f0f0f0f0f0f0f
const h01 = 0x0101010101010101
func popcnt(x uint64) uint64 {
x -= (x >> 1) & m1
x = (x & m2) + ((x >> 2) & m2)
x = (x + (x >> 4)) & m4
return (x * h01) >> 56
}
We can avoid in-lining by forcing the function to write to a local variable and then write to a variable outside of the function scope.
var global uint64
func BenchmarkPopcnt(b *testing.B) {
var v uint64
for i := 0; i < b.N; i++ {
// assign a local variable
v = popcnt(uint64(i))
}
// assign the latest value to a global
global = v
}
Observer effect
Mistake
Reusing the same resources will result in the CPU caching the data and we end up benchmarking cache misses
const rows = 1000
var res int64
func BenchmarkCalculateSum512(b *testing.B) {
var sum int64
s := createMatrix512(rows)
b.ResetTimer()
for i := 0; i < b.N; i++ {
// we keep reusing the same matrix
sum = calculateSum(s)
}
res = sum
}
Fix
func BenchmarkCalculateSum512(b *testing.B) {
var sum int64
for i := 0; i < b.N; i++ {
b.StopTimer()
// create a new matrix to avoid caching
s := createMatrix512(rows)
b.StartTimer()
sum = calculateSum512(s)
}
res = sum
}