diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index d4229b4eb..a047715b9 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -40,6 +40,21 @@ steps:
       NNLIB_TEST_CUDA: true
     timeout_in_minutes: 60
 
+  - label: "Benchmarks"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1
+    env:
+      JULIA_NUM_THREADS: 4
+    command:
+      - julia --project=benchmark -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+      - julia --project=benchmark benchmark/runbenchmarks.jl
+      - printf '%b\n' "$(cat benchmark/report.md)" | buildkite-agent annotate --style 'info'
+    agents:
+      queue: "juliagpu"
+    if: build.pull_request.labels includes "benchmark"
+    timeout_in_minutes: 30
+
   # - label: "GPU julia nightly"
   #   plugins:
   #     - JuliaCI/julia#v1:
diff --git a/.gitignore b/.gitignore
index d8b90e638..ac7124fbc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,6 @@ deps.jl
 .vscode/
 /Manifest.toml
 lib/NNlibCUDA/Manifest.toml
+benchmark/Manifest.toml
+benchmark/*.json
+benchmark/report.md
diff --git a/benchmark/Project.toml b/benchmark/Project.toml
new file mode 100644
index 000000000..1a9bb9eff
--- /dev/null
+++ b/benchmark/Project.toml
@@ -0,0 +1,14 @@
+[deps]
+ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
+BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
+PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+
+[compat]
+# No compat bounds for NNlib because we may test breaking versions
+ArgParse = "1"
+BenchmarkCI = "0.1"
+BenchmarkTools = "1.3"
+PkgBenchmark = "0.2"
+julia = "1.6"
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
new file mode 100644
index 000000000..53bf64ba5
--- /dev/null
+++ b/benchmark/benchmarks.jl
@@ -0,0 +1,13 @@
+using BenchmarkTools
+using NNlib
+
+const SUITE = BenchmarkGroup()
+
+SUITE["activations"] = BenchmarkGroup()
+
+x = rand(64, 64)
+
+for f in NNlib.ACTIVATIONS
+    act = @eval($f)
+    SUITE["activations"][string(f)] = @benchmarkable $act.($x)
+end
diff --git a/benchmark/runbenchmarks.jl b/benchmark/runbenchmarks.jl
new file mode 100644
index 000000000..a40fd1631
--- /dev/null
+++ b/benchmark/runbenchmarks.jl
@@ -0,0 +1,65 @@
+# Adapted from
+# https://github.com/kul-forbes/ProximalOperators.jl/tree/master/benchmark
+using ArgParse
+using PkgBenchmark
+using BenchmarkCI: displayjudgement, printresultmd, CIResult
+using Markdown
+
+function markdown_report(judgement)
+    md = sprint(printresultmd, CIResult(judgement = judgement))
+    md = replace(md, ":x:" => "❌")
+    md = replace(md, ":white_check_mark:" => "✅")
+    return md
+end
+
+function parse_commandline()
+    s = ArgParseSettings()
+
+    @add_arg_table! s begin
+        "--target"
+            help = "the branch/commit/tag to use as target"
+            default = "HEAD"
+        "--baseline"
+            help = "the branch/commit/tag to use as baseline"
+            default = "master"
+        "--retune"
+            help = "force re-tuning (ignore existing tuning data)"
+            action = :store_true
+    end
+
+    return parse_args(s)
+end
+
+function main()
+    parsed_args = parse_commandline()
+
+    mkconfig(; kwargs...) =
+        BenchmarkConfig(
+            env = Dict(
+                "JULIA_NUM_THREADS" => get(ENV, "JULIA_NUM_THREADS", "1"),
+            );
+            kwargs...
+        )
+
+    target = parsed_args["target"]
+    group_target = benchmarkpkg(
+        dirname(@__DIR__),
+        mkconfig(id = target),
+        resultfile = joinpath(@__DIR__, "result-$(target).json"),
+        retune = parsed_args["retune"],
+    )
+
+    baseline = parsed_args["baseline"]
+    group_baseline = benchmarkpkg(
+        dirname(@__DIR__),
+        mkconfig(id = baseline),
+        resultfile = joinpath(@__DIR__, "result-$(baseline).json"),
+    )
+
+    judgement = judge(group_target, group_baseline)
+    report_md = markdown_report(judgement)
+    write(joinpath(@__DIR__, "report.md"), report_md)
+    display(Markdown.parse(report_md))
+end
+
+main()