From ce38b313aa9fa0442050bf4629effbb22628e45c Mon Sep 17 00:00:00 2001 From: Billy Moses Date: Sat, 15 Jun 2024 13:07:15 -0400 Subject: [PATCH 1/5] Re-enable ci for amd math fns --- test/amdgpu.jl | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/test/amdgpu.jl b/test/amdgpu.jl index da826efcc4..9c9b097422 100644 --- a/test/amdgpu.jl +++ b/test/amdgpu.jl @@ -38,15 +38,15 @@ function grad_exp_kernel(A, dA) return nothing end -# @testset "exp_kernel" begin -# A = AMDGPU.ones(64,) -# @roc groupsize=length(A) exp_kernel(A) -# A = AMDGPU.ones(64,) -# dA = similar(A) -# dA .= 1 -# @roc groupsize=length(A) grad_exp_kernel(A, dA) -# @test all(dA .== exp(1.f0)) -# end +@testset "exp_kernel" begin + A = AMDGPU.ones(64,) + @roc groupsize=length(A) exp_kernel(A) + A = AMDGPU.ones(64,) + dA = similar(A) + dA .= 1 + @roc groupsize=length(A) grad_exp_kernel(A, dA) + @test all(dA .== exp(1.f0)) +end function cos_kernel(A) i = workitemIdx().x @@ -61,12 +61,12 @@ function grad_cos_kernel(A, dA) return nothing end -# @testset "cos_kernel" begin -# A = AMDGPU.ones(64,) -# @roc groupsize=length(A) cos_kernel(A) -# A = AMDGPU.ones(64,) -# dA = similar(A) -# dA .= 1 -# @roc groupsize=length(A) grad_cos_kernel(A, dA) -# @test all(dA .≈ -sin(1.f0)) -# end +@testset "cos_kernel" begin + A = AMDGPU.ones(64,) + @roc groupsize=length(A) cos_kernel(A) + A = AMDGPU.ones(64,) + dA = similar(A) + dA .= 1 + @roc groupsize=length(A) grad_cos_kernel(A, dA) + @test all(dA .≈ -sin(1.f0)) +end From 3ee3d0cf020d6b407f9f5765557fd7b1cb9ff9e9 Mon Sep 17 00:00:00 2001 From: Billy Moses Date: Sat, 15 Jun 2024 13:21:51 -0400 Subject: [PATCH 2/5] better amd gpu errs --- src/compiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler.jl b/src/compiler.jl index 830232d4ab..6a75e61577 100644 --- a/src/compiler.jl +++ b/src/compiler.jl @@ -1632,7 +1632,7 @@ function emit_error(B::LLVM.IRBuilder, orig, string) string*=sprint(io->Base.show_backtrace(io, bt)) end - ct = if occursin("ptx", LLVM.triple(mod)) + ct = if occursin("ptx", LLVM.triple(mod)) || occursin("amdgcn", LLVM.triple(mod)) GPUCompiler.emit_exception!(B, string, orig) else call!(B, funcT, func, LLVM.Value[globalstring_ptr!(B, string)]) From 4eda9667829b07cfefe0af6bb787103bbddbb207 Mon Sep 17 00:00:00 2001 From: Billy Moses Date: Sat, 15 Jun 2024 13:26:22 -0400 Subject: [PATCH 3/5] print --- test/amdgpu.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/amdgpu.jl b/test/amdgpu.jl index 9c9b097422..09d120e246 100644 --- a/test/amdgpu.jl +++ b/test/amdgpu.jl @@ -38,6 +38,8 @@ function grad_exp_kernel(A, dA) return nothing end +Enzyme.API.printall!(true) + @testset "exp_kernel" begin A = AMDGPU.ones(64,) @roc groupsize=length(A) exp_kernel(A) From c0cd8fb022be765b6bb5fbbbe05c4c3ba92e5da4 Mon Sep 17 00:00:00 2001 From: Billy Moses Date: Sat, 15 Jun 2024 19:09:02 -0400 Subject: [PATCH 4/5] amd intrs --- src/compiler.jl | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/compiler.jl b/src/compiler.jl index 6a75e61577..18be62e92a 100644 --- a/src/compiler.jl +++ b/src/compiler.jl @@ -5932,6 +5932,46 @@ function GPUCompiler.codegen(output::Symbol, job::CompilerJob{<:EnzymeTarget}; end end end + if parent_job.config.target isa GPUCompiler.GCNCompilerTarget + arg1 = ("acos", "acosh", "asin", + "asinh", "atan2", "atan", + "atanh", "cbrt", "ceil", + "copysign", "cos", "native_cos", + "cosh", "cospi", "i0", + "i1", "erfc", "erfcinv", + "erfcx", "erf", "erfinv", + "exp10", "native_exp10", "exp2", + "exp", "native_exp", "expm1", + "fabs", "fdim", "floor", + "fma", "fmax", "fmin", + "fmod", "frexp", "hypot", + "ilogb", "isfinite", "isinf", + "isnan", "j0", "j1", + "ldexp", "lgamma", "log10", + "native_log10", "log1p", "log2", + "log2", "logb", "log", + "native_log", "modf", "nearbyint", + "nextafter", "len3", "len4", + "ncdf", "ncdfinv", "pow", + "pown", "rcbrt", "remainder", + "remquo", "rhypot", "rint", + "rlen3", "rlen4", "round", + "rsqrt", "scalb", "scalbn", + "signbit", "sincos", "sincospi", + "sin", "native_sin", "sinh", + "sinpi", "sqrt", "native_sqrt", + "tan", "tanh", "tgamma", + "trunc", "y0", "y1") + for n in arg1, (T, pf, lpf) in ((LLVM.DoubleType(), "", "f64"), (LLVM.FloatType(), "f", "f32")) + fname = "__ocml_"*n*"_"*lpf + if !haskey(functions(mod), fname) + FT = LLVM.FunctionType(T, [T], vararg=false) + wrapper_f = LLVM.Function(mod, fname, FT) + llname = "llvm."*n*"."*lpf + push!(function_attributes(wrapper_f), StringAttribute("implements", llname)) + end + end + end end for (name, fnty) in fnsToInject for (T, JT, pf) in ((LLVM.DoubleType(), Float64, ""), (LLVM.FloatType(), Float32, "f")) From 403caef932b2258960f6a4118bdcb17d042cc643 Mon Sep 17 00:00:00 2001 From: William Moses Date: Tue, 18 Jun 2024 09:54:28 -0400 Subject: [PATCH 5/5] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 9441aa6ef1..3f273b5d12 100644 --- a/Project.toml +++ b/Project.toml @@ -30,7 +30,7 @@ EnzymeStaticArraysExt = "StaticArrays" CEnum = "0.4, 0.5" ChainRulesCore = "1" EnzymeCore = "0.7.5" -Enzyme_jll = "0.0.122" +Enzyme_jll = "0.0.123" GPUCompiler = "0.21, 0.22, 0.23, 0.24, 0.25, 0.26" LLVM = "6.1, 7" ObjectFile = "0.4"