Skip to content

Avoid excessive splitting of nodes caused by a unique proc being created per call when a method reference would suffice. #3529

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 28 additions & 27 deletions src/main/ruby/truffleruby/core/splitter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def split(string, pattern, limit, &orig_block)
# To simplify the code, we present a single block
# that either calls user (origin) block or adds a substring to the resulting array
# See motivation: https://github.com/oracle/truffleruby/pull/2052#issuecomment-663449395
block = orig_block || result.method(:<<).to_proc
callable = orig_block || result.method(:<<)

return (orig_block ? string : result) if string.empty?

Expand All @@ -57,7 +57,7 @@ def split(string, pattern, limit, &orig_block)
if limit == 1
dup_string = string.dup

block.call(dup_string)
callable.call(dup_string)
return orig_block ? dup_string : result
end

Expand All @@ -72,47 +72,48 @@ def split(string, pattern, limit, &orig_block)
# See motivation: https://github.com/oracle/truffleruby/pull/2052#issuecomment-663494235
return Primitive.string_awk_split string, awk_limit, orig_block
elsif Primitive.is_a?(pattern, Regexp)
split_type_regexp(string, pattern, limit, block)
split_type_regexp(string, pattern, limit, callable)
else
pattern = StringValue(pattern)

valid_encoding?(string)
valid_encoding?(pattern)

if pattern.empty?
split_type_chars(string, limit, block)
split_type_chars(string, limit, callable)
else
split_type_string(string, pattern, limit, block)
split_type_string(string, pattern, limit, callable)
end
end

orig_block ? string : result
end
Truffle::Graal.always_split(instance_method(:split))

private

def valid_encoding?(string)
raise ArgumentError, "invalid byte sequence in #{string.encoding.name}" unless string.valid_encoding?
end

def split_type_chars(string, limit, block)
def split_type_chars(string, limit, callable)
if limit > 0
last = string.size > (limit - 1) ? string[(limit - 1)..-1] : empty_string(string)

string.each_char.each_with_index do |char, index|
break if index == limit - 1
block.call(char)
callable.call(char)
end

block.call(last)
callable.call(last)
else
string.each_char(&block)
string.each_char { |c| callable.call(c) }

block.call(empty_string(string)) if tail_empty?(limit)
callable.call(empty_string(string)) if tail_empty?(limit)
end
end

def split_type_string(string, pattern, limit, block)
def split_type_string(string, pattern, limit, callable)
pos = 0
empty_count = 0
limited = limit > 0
Expand All @@ -128,24 +129,24 @@ def split_type_string(string, pattern, limit, block)
break unless nxt

match_size = nxt - pos
empty_count = add_substring(string, ret, string.byteslice(pos, match_size), empty_count, block)
empty_count = add_substring(string, ret, string.byteslice(pos, match_size), empty_count, callable)

pos = nxt + pat_size
count += 1
end

# No more separators, but we need to grab the last part still.
empty_count = add_substring(string, ret, string.byteslice(pos, str_size - pos), empty_count, block)
empty_count = add_substring(string, ret, string.byteslice(pos, str_size - pos), empty_count, callable)

if tail_empty?(limit)
add_empty(string, ret, empty_count, block)
add_empty(string, ret, empty_count, callable)
end
end

def split_type_regexp(string, pattern, limit, block)
def split_type_regexp(string, pattern, limit, callable)
# Handle // as a special case.
if pattern.source.empty?
return split_type_chars(string, limit, block)
return split_type_chars(string, limit, callable)
end

start = 0
Expand All @@ -164,12 +165,12 @@ def split_type_regexp(string, pattern, limit, block)

unless collapsed && (Primitive.match_data_byte_begin(match, 0) == last_match_end)
substring = Truffle::RegexpOperations.pre_match_from(match, last_match_end)
empty_count = add_substring(string, ret, substring, empty_count, block)
empty_count = add_substring(string, ret, substring, empty_count, callable)

# length > 1 means there are captures
if match.length > 1
match.captures.compact.each do |capture|
empty_count = add_substring(string, ret, capture, empty_count, block)
empty_count = add_substring(string, ret, capture, empty_count, callable)
end
end

Expand All @@ -186,31 +187,31 @@ def split_type_regexp(string, pattern, limit, block)
end

if last_match
empty_count = add_substring(string, ret, last_match.post_match, empty_count, block)
empty_count = add_substring(string, ret, last_match.post_match, empty_count, callable)
elsif ret.empty?
empty_count = add_substring(string, ret, string.dup, empty_count, block)
empty_count = add_substring(string, ret, string.dup, empty_count, callable)
end

if tail_empty?(limit)
add_empty(string, ret, empty_count, block)
add_empty(string, ret, empty_count, callable)
end

block ? string : ret
callable ? string : ret
end


def add_substring(string, array, substring, empty_count, block)
def add_substring(string, array, substring, empty_count, callable)
return empty_count + 1 if substring.length == 0 # remember another one empty match

add_empty(string, array, empty_count, block)
add_empty(string, array, empty_count, callable)

block.call(substring)
callable.call(substring)

0 # always release all empties when we get non empty substring
end

def add_empty(string, array, count, block)
count.times { block.call(empty_string(string)) }
def add_empty(string, array, count, callable)
count.times { callable.call(empty_string(string)) }
end

def empty_string(original)
Expand Down
1 change: 1 addition & 0 deletions src/main/ruby/truffleruby/core/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ def scan(pattern, &block)
def split(pattern = nil, limit = undefined, &block)
Truffle::Splitter.split(Primitive.dup_as_string_instance(self), pattern, limit, &block)
end
Truffle::Graal.always_split(instance_method(:split))

def squeeze(*strings)
str = Primitive.dup_as_string_instance(self)
Expand Down