Open
Description
Simple Example to Replicate:
using Flux
using Flux: onehotbatch
model = Embedding(26, 5) |> gpu
inputs = rand('a':'z', (2, 5)) # NOTE THIS IS 2D SO AFTER CONVERTING TO OH IT WOULD BE 3D
loss(y) = sum(y)
inputs_oh = onehotbatch(inputs, 'a':'z') |> gpu
model(inputs_oh)
loss(model(inputs_oh))
opt = Flux.Optimise.Descent(0.1)
opt_state = Flux.setup(opt, model)
l, grads = Flux.withgradient(m -> loss(m(inputs_oh)), model) # ERROR HAPPENS HERE
Flux.update!(opt_state, model, grads[1])
loss(model(inputs_oh))
Error Log:
ERROR: LoadError: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore are only permitted from the REPL for prototyping purposes.
If you did intend to index this array, annotate the caller with @allowscalar.
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:33
[2] assertscalar(op::String)
@ GPUArraysCore ~/.julia/packages/GPUArraysCore/lojQM/src/GPUArraysCore.jl:87
[3] getindex(::CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Int64, ::Int64)
@ GPUArrays ~/.julia/packages/GPUArrays/fqD8z/src/host/indexing.jl:9
[4] _generic_matmatmul!(C::Matrix{Float32}, tA::Char, tB::Char, A::CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, B::Base.ReshapedArray{Bool, 2, OneHotArrays.OneHotArray{UInt32, 2, 3, CUDA.CuArray{UInt32, 2, CUDA.Mem.DeviceBuffer}}, Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}, Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}}, _add::LinearAlgebra.MulAddMul{true, true, Bool, Bool})
@ LinearAlgebra /opt/julias/julia-1.7.2/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:830
[5] generic_matmatmul!(C::Matrix{Float32}, tA::Char, tB::Char, A::CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, B::Base.ReshapedArray{Bool, 2, OneHotArrays.OneHotArray{UInt32, 2, 3, CUDA.CuArray{UInt32, 2, CUDA.Mem.DeviceBuffer}}, Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}, Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}}, _add::LinearAlgebra.MulAddMul{true, true, Bool, Bool})
@ LinearAlgebra /opt/julias/julia-1.7.2/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:798
[6] mul!
@ /opt/julias/julia-1.7.2/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:478 [inlined]
[7] mul!
@ /opt/julias/julia-1.7.2/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:275 [inlined]
[8] *
@ /opt/julias/julia-1.7.2/share/julia/stdlib/v1.7/LinearAlgebra/src/matmul.jl:153 [inlined]
[9] FluxML/Flux.jl#1471
@ ~/.julia/packages/ChainRules/RZYEu/src/rulesets/Base/arraymath.jl:36 [inlined]
[10] unthunk
@ ~/.julia/packages/ChainRulesCore/C73ay/src/tangent_types/thunks.jl:204 [inlined]
[11] wrap_chainrules_output
@ ~/.julia/packages/Zygote/AS0Go/src/compiler/chainrules.jl:105 [inlined]
[12] map
@ ./tuple.jl:223 [inlined]
[13] wrap_chainrules_output
@ ~/.julia/packages/Zygote/AS0Go/src/compiler/chainrules.jl:106 [inlined]
[14] ZBack
@ ~/.julia/packages/Zygote/AS0Go/src/compiler/chainrules.jl:206 [inlined]
[15] Pullback
@ ~/.julia/packages/Flux/v79Am/src/layers/basic.jl:701 [inlined]
[16] Pullback
@ ~/.julia/packages/Flux/v79Am/src/layers/basic.jl:702 [inlined]
[17] (::typeof(∂(λ)))(Δ::CUDA.CuArray{Float32, 3, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/AS0Go/src/compiler/interface2.jl:0
[18] Pullback
@ ~/Projects/MakeMore/src/bugrep.jl:23 [inlined]
[19] (::Zygote.var"#60#61"{typeof(∂(#1))})(Δ::Float32)
@ Zygote ~/.julia/packages/Zygote/AS0Go/src/compiler/interface.jl:45
[20] withgradient(f::Function, args::Flux.Embedding{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})
@ Zygote ~/.julia/packages/Zygote/AS0Go/src/compiler/interface.jl:133
[21] top-level scope
@ ~/Projects/MakeMore/src/bugrep.jl:23
Work Around:
By replacing inputs_oh above with a Integer Tensor where characters have been mapped to 1:26 makes sure everything runs with no errors on GPU. To test try below:
# inputs_oh = onehotbatch(inputs, 'a':'z') |> gpu
inputs_oh = rand(1:26, (2, 5)) |> gpu
Useful Background Information:
- Flux v0.13.11
- OneHotArrays v0.2.3
- Julia Info
Julia Version 1.7.2
Commit bf53498635 (2022-02-06 15:21 UTC)
Platform Info:
OS: Linux (x86_64-pc-linux-gnu)
CPU: Intel(R) Core(TM) i9-10900 CPU @ 2.80GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-12.0.1 (ORCJIT, skylake)
Environment:
JULIA_NUM_THREADS = 4
JULIA_ERROR_COLOR = red
- CUDA Info
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.86.01 Driver Version: 515.86.01 CUDA Version: 11.7 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |