FixedSizeArrays.jl Showcase #62
Replies: 4 comments 3 replies
-
Adding vectors of
|
Beta Was this translation helpful? Give feedback.
-
Reshaping
|
Beta Was this translation helpful? Give feedback.
-
Reduced allocations in a function with side effectsjulia> using FixedSizeArrays, BenchmarkTools, Random
julia> function f(T=Vector{Float64})
v = T(undef, 8)
rand!(v)
return length(v)
end
f (generic function with 2 methods)
julia> @code_llvm debuginfo=:none f(Vector{Float64}) ; Function Signature: f(Type{Array{Float64, 1}})
define i64 @julia_f_1471() local_unnamed_addr #0 {
top:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #12
%tls_ppgcstack = getelementptr inbounds i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 8
%frame.prev = getelementptr inbounds nuw i8, ptr %gcframe1, i64 8
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%ptls_field = getelementptr inbounds nuw i8, ptr %tls_pgcstack, i64 16
%ptls_load = load ptr, ptr %ptls_field, align 8
%"Memory{Float64}[]" = call noalias nonnull align 8 dereferenceable(96) ptr @ijl_gc_small_alloc(ptr %ptls_load, i32 600, i32 96, i64 133044644450816) #8
%"Memory{Float64}[].tag_addr" = getelementptr inbounds i8, ptr %"Memory{Float64}[]", i64 -8
store atomic i64 133044644450816, ptr %"Memory{Float64}[].tag_addr" unordered, align 8
%memory_ptr = getelementptr inbounds nuw i8, ptr %"Memory{Float64}[]", i64 8
%memory_data = getelementptr inbounds nuw i8, ptr %"Memory{Float64}[]", i64 16
store ptr %memory_data, ptr %memory_ptr, align 8
store i64 8, ptr %"Memory{Float64}[]", align 8
%gc_slot_addr_0 = getelementptr inbounds nuw i8, ptr %gcframe1, i64 16
store ptr %"Memory{Float64}[]", ptr %gc_slot_addr_0, align 8
%ptls_load16 = load ptr, ptr %ptls_field, align 8
%"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_small_alloc(ptr %ptls_load16, i32 408, i32 32, i64 133044570275280) #8
%"new::Array.tag_addr" = getelementptr inbounds i8, ptr %"new::Array", i64 -8
store atomic i64 133044570275280, ptr %"new::Array.tag_addr" unordered, align 8
%0 = getelementptr inbounds nuw i8, ptr %"new::Array", i64 8
store ptr %memory_data, ptr %"new::Array", align 8
store ptr %"Memory{Float64}[]", ptr %0, align 8
%"new::Array.size_ptr" = getelementptr inbounds nuw i8, ptr %"new::Array", i64 16
store i64 8, ptr %"new::Array.size_ptr", align 8
store ptr %"new::Array", ptr %gc_slot_addr_0, align 8
%1 = call i64 @j_xoshiro_bulk_simd_1475(ptr nonnull %memory_data, i64 signext 64)
%2 = sub i64 64, %1
%3 = getelementptr i8, ptr %memory_data, i64 %1
%4 = icmp eq i64 %1, 64
br i1 %4, label %L26, label %L24
L24: ; preds = %top
store ptr %"new::Array", ptr %gc_slot_addr_0, align 8
call void @j_xoshiro_bulk_nosimd_1476(ptr %3, i64 signext %2)
br label %L26
L26: ; preds = %L24, %top
%"new::Array.size6.0.copyload" = load i64, ptr %"new::Array.size_ptr", align 8
%frame.prev20 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev20, ptr %tls_pgcstack, align 8
ret i64 %"new::Array.size6.0.copyload"
} julia> @code_llvm debuginfo=:none f(FixedSizeVectorDefault{Float64}) ; Function Signature: f(Type{FixedSizeArrays.FixedSizeArray{Float64, 1, Memory{Float64}}})
define i64 @julia_f_1488() local_unnamed_addr #0 {
top:
%"Memory{Float64}[]" = alloca [80 x i8], align 16
call void @llvm.lifetime.start.p0(i64 80, ptr nonnull %"Memory{Float64}[]")
%0 = getelementptr inbounds nuw i8, ptr %"Memory{Float64}[]", i64 16
call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(80) %0, i8 0, i64 64, i1 false)
%memory_ptr = getelementptr inbounds nuw i8, ptr %"Memory{Float64}[]", i64 8
store ptr %0, ptr %memory_ptr, align 8
store i64 8, ptr %"Memory{Float64}[]", align 8
%1 = call i64 @j_xoshiro_bulk_simd_1491(ptr nonnull %0, i64 signext 64)
%2 = icmp eq i64 %1, 64
br i1 %2, label %L14, label %L13
L13: ; preds = %top
%3 = sub i64 64, %1
%4 = getelementptr i8, ptr %0, i64 %1
call void @j_xoshiro_bulk_nosimd_1492(ptr %4, i64 signext %3)
br label %L14
L14: ; preds = %L13, %top
ret i64 8
} julia> @benchmark f(Vector{Float64})
BenchmarkTools.Trial: 10000 samples with 995 evaluations per sample.
Range (min … max): 29.652 ns … 3.009 μs ┊ GC (min … max): 0.00% … 97.53%
Time (median): 31.515 ns ┊ GC (median): 0.00%
Time (mean ± σ): 34.936 ns ± 44.664 ns ┊ GC (mean ± σ): 7.26% ± 8.08%
███
▃███▇▃▄▄▅▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂ ▃
29.7 ns Histogram: frequency by time 64.5 ns <
Memory estimate: 128 bytes, allocs estimate: 2.
julia> @benchmark f(FixedSizeVectorDefault{Float64})
BenchmarkTools.Trial: 10000 samples with 996 evaluations per sample.
Range (min … max): 22.383 ns … 39.769 ns ┊ GC (min … max): 0.00% … 0.00%
Time (median): 23.932 ns ┊ GC (median): 0.00%
Time (mean ± σ): 24.145 ns ± 0.913 ns ┊ GC (mean ± σ): 0.00% ± 0.00%
▁▂▃▃▄▅▆▇███▇▇▇▆▆▅▄▃▃▂▁ ▁▂▂▂▁▁▁ ▃
▆▇▇▆█████████████████████████▆▆▇▇███████████▇█▆▇▇▆██▇▇▇▇▆▆▆ █
22.4 ns Histogram: log(frequency) by time 27.9 ns <
Memory estimate: 0 bytes, allocs estimate: 0.
julia> versioninfo()
Julia Version 1.13.0-DEV.596
Commit 7df60f480df (2025-05-14 21:36 UTC)
Build Info:
Official https://julialang.org release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 192 × AMD Ryzen Threadripper PRO 7995WX 96-Cores
WORD_SIZE: 64
LLVM: libLLVM-20.1.2 (ORCJIT, znver4)
GC: Built with stock GC
Threads: 1 default, 1 interactive, 1 GC (on 192 virtual cores) The return value is fully statically inferred, the memory allocations are fully removed (at least for length < 250, otherwise they're still less than |
Beta Was this translation helpful? Give feedback.
-
Creating a
|
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
I'm starting this discussion to collect examples of how
FixedSizeArray
can be better (smaller generated code and/or faster) than standardArray
. I'll start with the example in theREADME.md
Simple function which can constant-propagate the size of the array
julia> code_llvm(h)
julia> versioninfo() Julia Version 1.12.0-DEV.1082 Commit 58c7186d19* (2024-08-22 02:53 UTC) Platform Info: OS: macOS (arm64-apple-darwin23.4.0) CPU: 8 × Apple M1 WORD_SIZE: 64 LLVM: libLLVM-18.1.7 (ORCJIT, apple-m1) Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)
Beta Was this translation helpful? Give feedback.
All reactions