diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index c3516c96..a29eb271 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -18,9 +18,10 @@ include("convolution.jl") include("testsuite/testsuite.jl") include("jlbackend.jl") include("random.jl") +include("pool.jl") export GPUArray, gpu_call, thread_blocks_heuristic, global_size, synchronize_threads -export linear_index, @linearidx, @cartesianidx, convolution!, device, synchronize +export linear_index, @linearidx, @cartesianidx, convolution!, device, synchronize, maxpool2d export JLArray end # module diff --git a/src/pool.jl b/src/pool.jl new file mode 100644 index 00000000..7503a4f0 --- /dev/null +++ b/src/pool.jl @@ -0,0 +1,40 @@ +function maxpool2d_kernel(state, A::AbstractArray{T}, out, Asize, pool, stride, outSize) where T + ilin = linear_index(state) + idx = GPUArrays.gpu_ind2sub(Asize, ilin) + if (idx[1] > outSize[1] || idx[2] > outSize[2] || idx[3] > outSize[3] || idx[4] > outSize[4]) + return + end + + temp_max = A[((idx[1] - 1) * stride) + Asize[1] * (idx[2] - 1) * stride + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1] + max_pos = ((idx[1] - 1) * stride) + Asize[1] * (idx[2] - 1) * stride + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1 + curr_pos = ((idx[1] - 1) * stride) + Asize[1] * (idx[2] - 1) * stride + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1 + + for p in 1:pool + for p in 1:pool + m = A[curr_pos] + if (m > temp_max) + temp_max = m + max_pos = curr_pos + end + curr_pos += 1 + end + curr_pos += Asize[1] - pool + end + out[(idx[1] - 1) + outSize[1] * (idx[2] - 1) + (outSize[1] * outSize[2]) * (idx[3] - 1) + (outSize[1] * outSize[2] * outSize[3]) * (idx[4] - 1) + 1] = temp_max + return +end + + +function maxpool2d{T <: Integer}(a, pool::T; stride = pool, pad = 0) + a2 = size.((a,), (1, 2)) + b = zeros(typeof(a), (a2 .+ 2pad)..., size(a, 3), size(a, 4)) + apad = a2 .+ pad + b[pad + 1 : apad[1], pad + 1 : apad[2], :, :] = a + as = ((size(b) .- pool) .รท stride) .+ 1 + out = similar(b, (as[1], as[2], size(b, 3), size(b, 4))) + sizes = map(x-> UInt32.(x), (size(b), pool, stride, size(out))) + gpu_call(maxpool2d_kernel, b, (b, out, sizes...)) + out +end + + diff --git a/src/testsuite/pool.jl b/src/testsuite/pool.jl new file mode 100644 index 00000000..7649e04e --- /dev/null +++ b/src/testsuite/pool.jl @@ -0,0 +1,57 @@ +using GPUArrays.TestSuite, Base.Test, Flux + +function run_pool(Typ) + for ET in supported_eltypes() + T = Typ{ET} + if (ET == Complex{Float32} || ET == Complex{Float64}) + continue + end + @testset "$ET" begin + @testset "maxpool with padding" begin + pool = 3 + stride = 3 + pad = 3 + + a = rand(ET, 9,9,3,1) + b = zeros(eltype(a), size(a,1) + pad * 2, size(a,2) + pad * 2, size(a,3), size(a,4)) + b[pad + 1 : pad + size(a,1), pad + 1 : pad + size(a,2), :, :] = a + out1 = maxpool(b, (3, 3)) + + a = T(a) + GPUArrays.allowslow(true) + out2 = GPUArrays.maxpool2d(a, pool, pad = pad) + + @test out1 โ‰ˆ out2 + end + + @testset "maxpool without padding" begin + pool = 3 + stride = 3 + + a = rand(ET, 9,9,3,1) + out1 = maxpool(a, (3, 3)) + + a = T(a) + GPUArrays.allowslow(true) + out2 = GPUArrays.maxpool2d(a, pool) + + @test out1 โ‰ˆ out2 + end + + + @testset "maxpool with full kernel" begin + pool = 9 + stride = 1 + + a = rand(ET, 9,9,3,1) + out1 = maxpool(a, (9, 9)) + + a = T(a) + GPUArrays.allowslow(true) + out2 = GPUArrays.maxpool2d(a, pool, stride = stride) + + @test out1 โ‰ˆ out2 + end + end + end +end diff --git a/src/testsuite/testsuite.jl b/src/testsuite/testsuite.jl index 20fa2ea0..26f5cfa1 100644 --- a/src/testsuite/testsuite.jl +++ b/src/testsuite/testsuite.jl @@ -42,6 +42,7 @@ include("base.jl") include("indexing.jl") # include("vector.jl") include("random.jl") +include("pool.jl") function supported_eltypes() (Float32, Float64, Int32, Int64, Complex64, Complex128) @@ -62,6 +63,7 @@ function run_tests(Typ) run_mapreduce(Typ) run_indexing(Typ) run_random(Typ) + run_pool(Typ) end export against_base, run_tests, supported_eltypes diff --git a/test/REQUIRE b/test/REQUIRE new file mode 100644 index 00000000..db2d53f9 --- /dev/null +++ b/test/REQUIRE @@ -0,0 +1,2 @@ +Flux +CUDAnative