diff --git a/docs/src/index.md b/docs/src/index.md index a3da1678..f300aa56 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -33,7 +33,7 @@ For quick sanity checks, one can use the [`@btime` macro](https://github.com/Jul ```julia julia> @btime sin(x) setup=(x=rand()) - 4.361 ns (0 allocations: 0 bytes) + min 3.041 ns, mean 3.487 ns (0 allocations) 0.49587200950472454 ``` @@ -45,16 +45,16 @@ If the expression you want to benchmark depends on external variables, you shoul Essentially, any interpolated variable `$x` or expression `$(...)` is "pre-computed" before benchmarking begins: ```julia -julia> A = rand(3,3); +julia> A = rand(8,8); julia> @btime inv($A); # we interpolate the global variable A with $A - 1.191 μs (10 allocations: 2.31 KiB) + min 951.000 ns, mean 1.349 μs (4 allocations, 4.81 KiB) -julia> @btime inv($(rand(3,3))); # interpolation: the rand(3,3) call occurs before benchmarking - 1.192 μs (10 allocations: 2.31 KiB) +julia> @btime inv($(rand(8,8))); # interpolation: the rand() call occurs before benchmarking + min 930.542 ns, mean 1.363 μs (4 allocations, 4.81 KiB) -julia> @btime inv(rand(3,3)); # the rand(3,3) call is included in the benchmark time - 1.295 μs (11 allocations: 2.47 KiB) +julia> @btime inv(rand(8,8)); # the rand() call is included in the benchmark time + min 1.092 μs, mean 1.572 μs (5 allocations, 5.38 KiB) ``` Sometimes, interpolating variables into very simple expressions can give the compiler more information than you intended, causing it to "cheat" the benchmark by hoisting the calculation out of the benchmark code @@ -63,13 +63,13 @@ julia> a = 1; b = 2 2 julia> @btime $a + $b - 0.024 ns (0 allocations: 0 bytes) + min 0.875 ns, mean 0.950 ns (0 allocations) 3 ``` As a rule of thumb, if a benchmark reports that it took less than a nanosecond to perform, this hoisting probably occured. You can avoid this by referencing and dereferencing the interpolated variables ```julia julia> @btime $(Ref(a))[] + $(Ref(b))[] - 1.277 ns (0 allocations: 0 bytes) + min 0.875 ns, mean 0.953 ns (0 allocations) 3 ``` diff --git a/docs/src/manual.md b/docs/src/manual.md index 8fd5105a..e62fad74 100644 --- a/docs/src/manual.md +++ b/docs/src/manual.md @@ -65,12 +65,15 @@ before returning the value of the expression, while `@belapsed` returns the minimum time in seconds. ```julia -julia> @btime sin(1) - 13.612 ns (0 allocations: 0 bytes) -0.8414709848078965 +julia> @btime sin.(0:2) + min 25.687 ns, mean 28.266 ns (1 allocation, 80 bytes) +3-element Vector{Float64}: + 0.0 + 0.8414709848078965 + 0.9092974268256817 -julia> @belapsed sin(1) -1.3614228456913828e-8 +julia> @belapsed sin.(0:2) +2.5727911646586344e-8 ``` ### Benchmark `Parameters` @@ -280,13 +283,13 @@ julia> a = 1; b = 2 2 julia> @btime $a + $b - 0.024 ns (0 allocations: 0 bytes) + min 0.833 ns, mean 0.944 ns (0 allocations) 3 ``` in this case julia was able to use the properties of `+(::Int, ::Int)` to know that it could safely replace `$a + $b` with `3` at compile time. We can stop the optimizer from doing this by referencing and dereferencing the interpolated variables ```julia julia> @btime $(Ref(a))[] + $(Ref(b))[] - 1.277 ns (0 allocations: 0 bytes) + min 0.875 ns, mean 0.948 ns (0 allocations) 3 ``` diff --git a/src/execution.jl b/src/execution.jl index d2346a66..44404d71 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -554,15 +554,31 @@ this executes an expression, printing the time it took to execute and the memory allocated before returning the value of the expression. -Unlike `@time`, it uses the `@benchmark` -macro, and accepts all of the same additional -parameters as `@benchmark`. The printed time -is the *minimum* elapsed time measured during the benchmark. +However, it uses the [`@benchmark`](@ref) to evaluate +`expression` many times, and accepts all of the same +additional parameters as `@benchmark`. It prints both +the minimum and the mean elapsed time, plus information +about allocations (if any) and time spent on +memory garbage collection (GC). + +# Examples +``` +julia> @btime log(x[]) setup=(x=Ref(2.0)) + min 3.666 ns, mean 3.772 ns (0 allocations) +0.6931471805599453 + +julia> @btime sum(log, \$(fill(2.0, 1000))) + min 3.391 μs, mean 3.441 μs (0 allocations) +693.1471805599322 + +julia> @btime rand(1000); + min 724.432 ns, mean 1.462 μs (1 allocation, 7.94 KiB. GC mean 352 ns, 24.11%) +``` """ macro btime(args...) _, params = prunekwargs(args...) bench, trial, result = gensym(), gensym(), gensym() - trialmin, trialallocs = gensym(), gensym() + trialmin, trialmean, trialallocs = gensym(), gensym(), gensym() tune_phase = hasevals(params) ? :() : :($BenchmarkTools.tune!($bench)) return esc(quote local $bench = $BenchmarkTools.@benchmarkable $(args...) @@ -570,12 +586,18 @@ macro btime(args...) $tune_phase local $trial, $result = $BenchmarkTools.run_result($bench) local $trialmin = $BenchmarkTools.minimum($trial) + local $trialmean = $BenchmarkTools.mean($trial) local $trialallocs = $BenchmarkTools.allocs($trialmin) - println(" ", - $BenchmarkTools.prettytime($BenchmarkTools.time($trialmin)), - " (", $trialallocs , " allocation", - $trialallocs == 1 ? "" : "s", ": ", - $BenchmarkTools.prettymemory($BenchmarkTools.memory($trialmin)), ")") + $print(" min ") + $printstyled($BenchmarkTools.prettytime($BenchmarkTools.time($trialmin)); bold=true) + $print(", ") + $print("mean ") + $printstyled($BenchmarkTools.prettytime($BenchmarkTools.time($trialmean)); bold=true) + $print(" (", $trialallocs , " allocation", $trialallocs == 1 ? "" : "s") + if $trialallocs != 0 + $print(", ", $prettymemory($memory($trialmin))) + end + $println(")") $result end) end diff --git a/src/trials.jl b/src/trials.jl index 639ee794..3b9bcbf0 100644 --- a/src/trials.jl +++ b/src/trials.jl @@ -253,7 +253,7 @@ function prettydiff(p) return string(diff >= 0.0 ? "+" : "", @sprintf("%.2f", diff * 100), "%") end -function prettytime(t) +function prettytime(t; short=false) if t < 1e3 value, units = t, "ns" elseif t < 1e6 @@ -263,7 +263,11 @@ function prettytime(t) else value, units = t / 1e9, "s" end - return string(@sprintf("%.3f", value), " ", units) + if short + string(@sprintf("%.0f", value), " ", units) # "%.3g" also OK, always 3 numbers + else + string(@sprintf("%.3f", value), " ", units) + end end function prettymemory(b) diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl index 7b71c716..626dbbeb 100644 --- a/test/ExecutionTests.jl +++ b/test/ExecutionTests.jl @@ -227,6 +227,7 @@ str = String(take!(io)) let fname = tempname() try + # simple function, zero allocations ret = open(fname, "w") do f redirect_stdout(f) do x = 1 @@ -238,7 +239,21 @@ let fname = tempname() end s = read(fname, String) try - @test occursin(r"[0-9.]+ \w*s \([0-9]* allocations?: [0-9]+ bytes\)", s) + @test occursin(r"min [0-9.]+ \w*s, mean [0-9.]+ \w*s \(0 allocations\)", s) + catch + println(stderr, "@btime output didn't match ", repr(s)) + rethrow() + end + # function which allocates + ret2 = open(fname, "w") do f + redirect_stdout(f) do + y = @btime(sum(log, ones(100))) + @test y ≈ 0 + end + end + s2 = read(fname, String) + try + @test occursin(r", mean [0-9.]+ \w*s \([0-9]* allocations?, [0-9]+ bytes", s2) catch println(stderr, "@btime output didn't match ", repr(s)) rethrow()