From 46697cc54c2189920742cc38c1787fa394f16d20 Mon Sep 17 00:00:00 2001 From: nick evans Date: Mon, 28 Apr 2025 17:14:55 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Improve=20`SequenceSet#xor?= =?UTF-8?q?`=20performance=20by=20~2x?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Obviously, the performance improvement is highly dependant on what data you're using, whether YJIT is enabled, etc. I saw results ranging from 1.7x faster to 2.6x faster. The benchmark script is included. For a benchmark run using sets with 10k members: ``` new impl 79.061 (± 8.9%) i/s (12.65 ms/i) - 392.000 in 5.004322s old impl 32.736 (±15.3%) i/s (30.55 ms/i) - 162.000 in 5.052839s ``` The old implementation was ~2.42x slower. For a benchmark run using very sparse sets with 100 members: ``` new impl 4.295k (±13.5%) i/s (232.81 μs/i) - 21.476k in 5.102536s old impl 2.459k (±11.3%) i/s (406.69 μs/i) - 12.095k in 5.000148s ``` This time, the old implementation was ~1.75x slower. I have some other (much bigger) PRs that should give even bigger performance improvements, but this is simple and effective. --- Gemfile | 1 + benchmarks/seqset-ops.rb | 36 ++++++++++++++++++++++++++++++++++++ lib/net/imap/sequence_set.rb | 4 +++- 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100755 benchmarks/seqset-ops.rb diff --git a/Gemfile b/Gemfile index 4b1dfca8a..8ad4f5515 100644 --- a/Gemfile +++ b/Gemfile @@ -15,6 +15,7 @@ gem "test-unit" gem "test-unit-ruby-core", git: "https://github.com/ruby/test-unit-ruby-core" gem "benchmark-driver", require: false +gem "benchmark-ips", require: false group :test do gem "simplecov", require: false diff --git a/benchmarks/seqset-ops.rb b/benchmarks/seqset-ops.rb new file mode 100755 index 000000000..832026cf6 --- /dev/null +++ b/benchmarks/seqset-ops.rb @@ -0,0 +1,36 @@ +#!/usr/bin/env ruby +require "benchmark/ips" +require "net/imap" + +warmup = 1.0 +time = 5.0 +size_a = 10_000 +size_b = 10_000 +max_a = 14_000 +max_b = 14_000 + +SeqSet = Net::IMAP::SequenceSet +a = SeqSet[Array.new(size_a) { rand(1..max_a) }] +b = SeqSet[Array.new(size_b) { rand(1..max_b) }] + +puts ?=*72 +puts "SequenceSet XOR implementations" +Benchmark.ips do |x| + x.config(warmup:, time:) + + # the original was missing the "a.dup", so it crashed or mutated a! + x.report("a ^ b") do a.dup ^ b end + x.report("new (a - b) | (b - a)") do + SeqSet.new(a).subtract(b).merge(SeqSet.new(b).subtract(a)) + end + x.report("dup (a - b) | (b - a)") do + a.dup.subtract(b).merge(b.dup.subtract(a)) + end + x.report("(a.dup | b).subtract(a & b)") do (a.dup | b).subtract(a & b) end + x.report("dup (a | b) - (a & b)") do a.dup.merge(b).subtract(a & b) end + + x.report("(a - b) | (b - a)") do (a - b) | (b - a) end + x.report("(a | b) - (a & b)") do (a | b) - (a & b) end + + x.compare! +end diff --git a/lib/net/imap/sequence_set.rb b/lib/net/imap/sequence_set.rb index 2fd5b695b..d340603c8 100644 --- a/lib/net/imap/sequence_set.rb +++ b/lib/net/imap/sequence_set.rb @@ -702,7 +702,9 @@ def &(other) # # (seqset ^ other) is equivalent to ((seqset | other) - # (seqset & other)). - def ^(other) remain_frozen (dup | other).subtract(self & other) end + def ^(other) + remain_frozen dup.subtract(SequenceSet.new(other).subtract(self)) + end alias xor :^ # :call-seq: