Skip to content

Commit ca74b40

Browse files
committed
[GR-30782] GSoC: Use a compact hash table for RubyHash instead of the buckets strategy (#3172)
PullRequest: truffleruby/4078
2 parents 58f96a4 + 07e1d83 commit ca74b40

23 files changed

+981
-191
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Compatibility:
2424

2525
Performance:
2626

27+
* Change the `Hash` representation from traditional buckets to a "compact hash table" for improved locality, performance and memory footprint (#3172, @moste00).
2728
* Optimize calls with `ruby2_keywords` forwarding by deciding it per call site instead of per callee thanks to [my fix in CRuby 3.2](https://bugs.ruby-lang.org/issues/18625) (@eregon).
2829
* Optimize feature loading when require is called with an absolute path to a .rb file (@rwstauner).
2930

bench/micro/hash/buckets-lookup.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# truffleruby_primitives: true
2+
3+
# Copyright (c) 2023 Oracle and/or its affiliates. All rights reserved. This
4+
# code is released under a tri EPL/GPL/LGPL license. You can use it,
5+
# redistribute it and/or modify it under the terms of the:
6+
#
7+
# Eclipse Public License version 2.0, or
8+
# GNU General Public License version 2, or
9+
# GNU Lesser General Public License version 2.1.
10+
11+
# Benchmarks looking up keys
12+
13+
max = 400_000 # > 0.75*(524288 + 21) (cf. BucketsHashStore)
14+
hash = { a: 1, b: 2, c: 3, d: 4 } # big enough to start as a bucket hash
15+
max.times { |i|
16+
hash[i] = i
17+
}
18+
19+
benchmark 'core-hash-buckets-lookup' do
20+
1000.times do |i|
21+
Primitive.blackhole(hash[i])
22+
end
23+
end

bench/micro/hash/each-buckets.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313
if RUBY_ENGINE == 'truffleruby'
1414
hash = { a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8, i: 9, j: 10 }
1515
benchmark 'core-hash-each-buckets' do
16-
hash.each { |k, v| Primitive.blackhole(v) }
16+
hash.each { |k, v| Primitive.blackhole(k); Primitive.blackhole(v) }
1717
end
1818
end

spec/ruby/core/hash/delete_spec.rb

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,35 @@
2424
it "allows removing a key while iterating" do
2525
h = { a: 1, b: 2 }
2626
visited = []
27-
h.each_pair { |k,v|
27+
h.each_pair { |k, v|
2828
visited << k
2929
h.delete(k)
3030
}
3131
visited.should == [:a, :b]
3232
h.should == {}
3333
end
3434

35+
it "allows removing a key while iterating for big hashes" do
36+
h = { a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8, i: 9, j: 10,
37+
k: 11, l: 12, m: 13, n: 14, o: 15, p: 16, q: 17, r: 18, s: 19, t: 20,
38+
u: 21, v: 22, w: 23, x: 24, y: 25, z: 26 }
39+
visited = []
40+
h.each_pair { |k, v|
41+
visited << k
42+
h.delete(k)
43+
}
44+
visited.should == [:a, :b, :c, :d, :e, :f, :g, :h, :i, :j, :k, :l, :m,
45+
:n, :o, :p, :q, :r, :s, :t, :u, :v, :w, :x, :y, :z]
46+
h.should == {}
47+
end
48+
3549
it "accepts keys with private #hash method" do
3650
key = HashSpecs::KeyWithPrivateHash.new
3751
{ key => 5 }.delete(key).should == 5
3852
end
3953

4054
it "raises a FrozenError if called on a frozen instance" do
41-
-> { HashSpecs.frozen_hash.delete("foo") }.should raise_error(FrozenError)
55+
-> { HashSpecs.frozen_hash.delete("foo") }.should raise_error(FrozenError)
4256
-> { HashSpecs.empty_frozen_hash.delete("foo") }.should raise_error(FrozenError)
4357
end
4458
end

spec/ruby/core/hash/rehash_spec.rb

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,36 @@ def k1.hash; 1; end
7777
h.keys.should_not.include? [1]
7878
end
7979

80+
it "iterates keys in insertion order" do
81+
key = Class.new do
82+
attr_reader :name
83+
84+
def initialize(name)
85+
@name = name
86+
end
87+
88+
def hash
89+
123
90+
end
91+
end
92+
93+
a, b, c, d = key.new('a'), key.new('b'), key.new('c'), key.new('d')
94+
h = { a => 1, b => 2, c => 3, d => 4 }
95+
h.size.should == 4
96+
97+
key.class_exec do
98+
def eql?(other)
99+
true
100+
end
101+
end
102+
103+
h.rehash
104+
h.size.should == 1
105+
k, v = h.first
106+
k.name.should == 'a'
107+
v.should == 4
108+
end
109+
80110
it "raises a FrozenError if called on a frozen instance" do
81111
-> { HashSpecs.frozen_hash.rehash }.should raise_error(FrozenError)
82112
-> { HashSpecs.empty_frozen_hash.rehash }.should raise_error(FrozenError)

src/main/java/org/truffleruby/core/array/ArrayNodes.java

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -909,46 +909,46 @@ Object equalNotArray(RubyArray a, Object b) {
909909
@ImportStatic(ArrayGuards.class)
910910
public abstract static class EqlNode extends PrimitiveArrayArgumentsNode {
911911

912-
@Child private SameOrEqlNode eqlNode = SameOrEqlNode.create();
913-
914912
@Specialization(
915913
guards = { "stores.accepts(bStore)", "stores.isPrimitive(aStore)" },
916914
limit = "storageStrategyLimit()")
917-
boolean eqlSamePrimitiveType(RubyArray a, RubyArray b,
915+
static boolean eqlSamePrimitiveType(RubyArray a, RubyArray b,
918916
@Bind("a.getStore()") Object aStore,
919917
@Bind("b.getStore()") Object bStore,
920918
@CachedLibrary("aStore") ArrayStoreLibrary stores,
921-
@Cached ConditionProfile sameProfile,
922-
@Cached IntValueProfile arraySizeProfile,
923-
@Cached ConditionProfile sameSizeProfile,
924-
@Cached BranchProfile trueProfile,
925-
@Cached BranchProfile falseProfile,
926-
@Cached LoopConditionProfile loopProfile) {
919+
@Cached SameOrEqlNode eqlNode,
920+
@Cached InlinedConditionProfile sameProfile,
921+
@Cached InlinedIntValueProfile arraySizeProfile,
922+
@Cached InlinedConditionProfile sameSizeProfile,
923+
@Cached InlinedBranchProfile trueProfile,
924+
@Cached InlinedBranchProfile falseProfile,
925+
@Cached InlinedLoopConditionProfile loopProfile,
926+
@Bind("$node") Node node) {
927927

928-
if (sameProfile.profile(a == b)) {
928+
if (sameProfile.profile(node, a == b)) {
929929
return true;
930930
}
931931

932-
final int size = arraySizeProfile.profile(a.size);
932+
final int size = arraySizeProfile.profile(node, a.size);
933933

934-
if (!sameSizeProfile.profile(size == b.size)) {
934+
if (!sameSizeProfile.profile(node, size == b.size)) {
935935
return false;
936936
}
937937

938938
int i = 0;
939939
try {
940-
for (; loopProfile.inject(i < size); i++) {
941-
if (!eqlNode.execute(stores.read(aStore, i), stores.read(bStore, i))) {
942-
falseProfile.enter();
940+
for (; loopProfile.inject(node, i < size); i++) {
941+
if (!eqlNode.execute(node, stores.read(aStore, i), stores.read(bStore, i))) {
942+
falseProfile.enter(node);
943943
return false;
944944
}
945-
TruffleSafepoint.poll(this);
945+
TruffleSafepoint.poll(node);
946946
}
947947
} finally {
948-
profileAndReportLoopCount(loopProfile, i);
948+
profileAndReportLoopCount(node, loopProfile, i);
949949
}
950950

951-
trueProfile.enter();
951+
trueProfile.enter(node);
952952
return true;
953953
}
954954

src/main/java/org/truffleruby/core/hash/CompareHashKeysNode.java

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,23 @@
99
*/
1010
package org.truffleruby.core.hash;
1111

12+
import com.oracle.truffle.api.dsl.GenerateCached;
13+
import com.oracle.truffle.api.dsl.GenerateInline;
14+
import org.truffleruby.core.basicobject.ReferenceEqualNode;
15+
import org.truffleruby.core.kernel.KernelNodes.SameOrEqlNode;
16+
import org.truffleruby.language.RubyBaseNode;
17+
1218
import com.oracle.truffle.api.dsl.Cached;
1319
import com.oracle.truffle.api.dsl.GenerateUncached;
1420
import com.oracle.truffle.api.dsl.Specialization;
1521
import com.oracle.truffle.api.nodes.Node;
16-
import org.truffleruby.core.basicobject.ReferenceEqualNode;
17-
import org.truffleruby.core.kernel.KernelNodes.SameOrEqlNode;
18-
import org.truffleruby.language.RubyBaseNode;
1922

23+
@GenerateInline
24+
@GenerateCached(false)
2025
@GenerateUncached
2126
public abstract class CompareHashKeysNode extends RubyBaseNode {
2227

23-
public static CompareHashKeysNode getUncached() {
24-
return CompareHashKeysNodeGen.getUncached();
25-
}
26-
27-
public abstract boolean execute(boolean compareByIdentity, Object key, int hashed,
28+
public abstract boolean execute(Node node, boolean compareByIdentity, Object key, int hashed,
2829
Object otherKey, int otherHashed);
2930

3031
/** Checks if the two keys are the same object, which is used by both modes (by identity or not) of lookup. Enables
@@ -37,14 +38,35 @@ public static boolean referenceEqualKeys(Node node, ReferenceEqualNode refEqual,
3738
}
3839

3940
@Specialization(guards = "compareByIdentity")
40-
boolean refEquals(boolean compareByIdentity, Object key, int hashed, Object otherKey, int otherHashed,
41+
static boolean refEquals(
42+
Node node, boolean compareByIdentity, Object key, int hashed, Object otherKey, int otherHashed,
4143
@Cached ReferenceEqualNode refEqual) {
42-
return refEqual.execute(this, key, otherKey);
44+
return refEqual.execute(node, key, otherKey);
4345
}
4446

4547
@Specialization(guards = "!compareByIdentity")
46-
boolean same(boolean compareByIdentity, Object key, int hashed, Object otherKey, int otherHashed,
48+
static boolean same(Node node, boolean compareByIdentity, Object key, int hashed, Object otherKey, int otherHashed,
4749
@Cached SameOrEqlNode same) {
48-
return hashed == otherHashed && same.execute(key, otherKey);
50+
return hashed == otherHashed && same.execute(node, key, otherKey);
51+
}
52+
53+
@GenerateInline
54+
@GenerateCached(false)
55+
@GenerateUncached
56+
public abstract static class AssumingEqualHashes extends RubyBaseNode {
57+
58+
public abstract boolean execute(Node node, boolean compareByIdentity, Object key, Object otherKey);
59+
60+
@Specialization(guards = "compareByIdentity")
61+
static boolean refEquals(Node node, boolean compareByIdentity, Object key, Object otherKey,
62+
@Cached ReferenceEqualNode refEqual) {
63+
return refEqual.execute(node, key, otherKey);
64+
}
65+
66+
@Specialization(guards = "!compareByIdentity")
67+
static boolean same(Node node, boolean compareByIdentity, Object key, Object otherKey,
68+
@Cached SameOrEqlNode same) {
69+
return same.execute(node, key, otherKey);
70+
}
4971
}
5072
}

src/main/java/org/truffleruby/core/hash/HashLiteralNode.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
*/
1010
package org.truffleruby.core.hash;
1111

12+
import org.truffleruby.RubyLanguage;
1213
import org.truffleruby.core.hash.library.BucketsHashStore;
14+
import org.truffleruby.core.hash.library.CompactHashStore;
1315
import org.truffleruby.core.hash.library.EmptyHashStore;
1416
import org.truffleruby.core.hash.library.PackedHashStoreLibrary;
1517
import org.truffleruby.core.hash.library.PackedHashStoreLibraryFactory;
@@ -28,13 +30,19 @@ protected HashLiteralNode(RubyNode[] keyValues) {
2830
this.keyValues = keyValues;
2931
}
3032

31-
public static HashLiteralNode create(RubyNode[] keyValues) {
33+
protected int getNumberOfEntries() {
34+
return keyValues.length >> 1;
35+
}
36+
37+
public static HashLiteralNode create(RubyNode[] keyValues, RubyLanguage language) {
3238
if (keyValues.length == 0) {
3339
return new EmptyHashStore.EmptyHashLiteralNode();
3440
} else if (keyValues.length <= PackedHashStoreLibrary.MAX_ENTRIES * 2) {
3541
return PackedHashStoreLibraryFactory.SmallHashLiteralNodeGen.create(keyValues);
3642
} else {
37-
return new BucketsHashStore.GenericHashLiteralNode(keyValues);
43+
return language.options.BIG_HASH_STRATEGY_IS_BUCKETS
44+
? new BucketsHashStore.BucketHashLiteralNode(keyValues)
45+
: new CompactHashStore.CompactHashLiteralNode(keyValues);
3846
}
3947
}
4048

src/main/java/org/truffleruby/core/hash/RubyHash.java

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,20 @@
1111

1212
import java.util.Set;
1313

14+
import org.truffleruby.RubyContext;
15+
import org.truffleruby.collections.PEBiFunction;
16+
import org.truffleruby.core.hash.library.BucketsHashStore;
17+
import org.truffleruby.core.hash.library.CompactHashStore;
18+
import org.truffleruby.core.hash.library.HashStoreLibrary;
19+
import org.truffleruby.core.klass.RubyClass;
20+
import org.truffleruby.interop.ForeignToRubyNode;
21+
import org.truffleruby.language.Nil;
22+
import org.truffleruby.language.RubyDynamicObject;
23+
import org.truffleruby.language.dispatch.DispatchNode;
24+
import org.truffleruby.language.objects.IsFrozenNode;
25+
import org.truffleruby.language.objects.ObjectGraph;
26+
import org.truffleruby.language.objects.ObjectGraphNode;
27+
1428
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
1529
import com.oracle.truffle.api.dsl.Bind;
1630
import com.oracle.truffle.api.dsl.Cached;
@@ -26,20 +40,7 @@
2640
import com.oracle.truffle.api.library.ExportMessage;
2741
import com.oracle.truffle.api.nodes.Node;
2842
import com.oracle.truffle.api.object.Shape;
29-
3043
import com.oracle.truffle.api.profiles.InlinedConditionProfile;
31-
import org.truffleruby.RubyContext;
32-
import org.truffleruby.collections.PEBiFunction;
33-
import org.truffleruby.core.hash.library.BucketsHashStore;
34-
import org.truffleruby.core.hash.library.HashStoreLibrary;
35-
import org.truffleruby.core.klass.RubyClass;
36-
import org.truffleruby.interop.ForeignToRubyNode;
37-
import org.truffleruby.language.Nil;
38-
import org.truffleruby.language.RubyDynamicObject;
39-
import org.truffleruby.language.dispatch.DispatchNode;
40-
import org.truffleruby.language.objects.IsFrozenNode;
41-
import org.truffleruby.language.objects.ObjectGraph;
42-
import org.truffleruby.language.objects.ObjectGraphNode;
4344

4445
@ExportLibrary(InteropLibrary.class)
4546
@ImportStatic(HashGuards.class)
@@ -86,6 +87,8 @@ public String toString() {
8687
public void getAdjacentObjects(Set<Object> reachable) {
8788
if (store instanceof BucketsHashStore) {
8889
((BucketsHashStore) store).getAdjacentObjects(reachable);
90+
} else if (store instanceof CompactHashStore) {
91+
((CompactHashStore) store).getAdjacentObjects(reachable);
8992
} else {
9093
ObjectGraph.addProperty(reachable, store);
9194
}

0 commit comments

Comments
 (0)