Skip to content

Commit e80ffdd

Browse files
naitohtompngkou
authored
Improve using // in XPath performance (#249)
When using `//` in XPath, the deeper the tag hierarchy, the slower it becomes due to the namespace acquisition process. Caching namespace information improves performance when using `//` with XPath. ## Benchmark (Comparison with rexml 3.4.1) ``` $ benchmark-driver benchmark/xpath.yaml Calculating ------------------------------------- rexml 3.4.1 master 3.4.1(YJIT) master(YJIT) REXML::XPath.match(REXML::Document.new(xml), 'a//a') 29.215 234.909 108.945 492.410 i/s - 100.000 times in 3.422925s 0.425697s 0.917898s 0.203083s Comparison: REXML::XPath.match(REXML::Document.new(xml), 'a//a') master(YJIT): 492.4 i/s master: 234.9 i/s - 2.10x slower 3.4.1(YJIT): 108.9 i/s - 4.52x slower rexml 3.4.1: 29.2 i/s - 16.85x slower ``` - YJIT=ON : 4.52x faster - YJIT=OFF : 8.04x faster --------- Co-authored-by: tomoya ishida <tomoyapenguin@gmail.com> Co-authored-by: Sutou Kouhei <kou@clear-code.com>
1 parent cd575a1 commit e80ffdd

File tree

7 files changed

+106
-37
lines changed

7 files changed

+106
-37
lines changed

benchmark/xpath.yaml

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
loop_count: 100
2+
contexts:
3+
- gems:
4+
rexml: 3.2.6
5+
require: false
6+
prelude: require 'rexml'
7+
- name: master
8+
prelude: |
9+
$LOAD_PATH.unshift(File.expand_path("lib"))
10+
require 'rexml'
11+
- name: 3.2.6(YJIT)
12+
gems:
13+
rexml: 3.2.6
14+
require: false
15+
prelude: |
16+
require 'rexml'
17+
RubyVM::YJIT.enable
18+
- name: master(YJIT)
19+
prelude: |
20+
$LOAD_PATH.unshift(File.expand_path("lib"))
21+
require 'rexml'
22+
RubyVM::YJIT.enable
23+
24+
prelude: |
25+
require 'rexml/document'
26+
27+
DEPTH = 100
28+
xml = '<a>' * DEPTH + '</a>' * DEPTH
29+
doc = REXML::Document.new(xml)
30+
31+
benchmark:
32+
"REXML::XPath.match(REXML::Document.new(xml), 'a//a')" : REXML::XPath.match(doc, "a//a")

lib/rexml/attribute.rb

+4
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ def xpath
206206
path += "/@#{self.expanded_name}"
207207
return path
208208
end
209+
210+
def document
211+
@element&.document
212+
end
209213
end
210214
end
211215
#vim:ts=2 sw=2 noexpandtab:

lib/rexml/document.rb

+14
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,20 @@ def document
448448
end
449449

450450
private
451+
452+
attr_accessor :namespaces_cache
453+
454+
# New document level cache is created and available in this block.
455+
# This API is thread unsafe. Users can't change this document in this block.
456+
def enable_cache
457+
@namespaces_cache = {}
458+
begin
459+
yield
460+
ensure
461+
@namespaces_cache = nil
462+
end
463+
end
464+
451465
def build( source )
452466
Parsers::TreeParser.new( source, self ).parse
453467
end

lib/rexml/element.rb

+17-16
Original file line numberDiff line numberDiff line change
@@ -589,10 +589,12 @@ def prefixes
589589
# d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"}
590590
#
591591
def namespaces
592-
namespaces = {}
593-
namespaces = parent.namespaces if parent
594-
namespaces = namespaces.merge( attributes.namespaces )
595-
return namespaces
592+
namespaces_cache = document&.__send__(:namespaces_cache)
593+
if namespaces_cache
594+
namespaces_cache[self] ||= calculate_namespaces
595+
else
596+
calculate_namespaces
597+
end
596598
end
597599

598600
# :call-seq:
@@ -619,17 +621,9 @@ def namespace(prefix=nil)
619621
if prefix.nil?
620622
prefix = prefix()
621623
end
622-
if prefix == ''
623-
prefix = "xmlns"
624-
else
625-
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
626-
end
627-
ns = nil
628-
target = self
629-
while ns.nil? and target
630-
ns = target.attributes[prefix]
631-
target = target.parent
632-
end
624+
prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:")
625+
ns = namespaces[prefix]
626+
633627
ns = '' if ns.nil? and prefix == 'xmlns'
634628
return ns
635629
end
@@ -1516,8 +1510,15 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
15161510
formatter.write( self, output )
15171511
end
15181512

1519-
15201513
private
1514+
def calculate_namespaces
1515+
if parent
1516+
parent.namespaces.merge(attributes.namespaces)
1517+
else
1518+
attributes.namespaces
1519+
end
1520+
end
1521+
15211522
def __to_xpath_helper node
15221523
rv = node.expanded_name.clone
15231524
if node.parent

lib/rexml/xpath_parser.rb

+12-15
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,15 @@ def variables=( vars={} )
7878

7979
def parse path, node
8080
path_stack = @parser.parse( path )
81-
match( path_stack, node )
81+
if node.is_a?(Array)
82+
Kernel.warn("REXML::XPath.each, REXML::XPath.first, REXML::XPath.match dropped support for nodeset...", uplevel: 1)
83+
return [] if node.empty?
84+
node = node.first
85+
end
86+
87+
node.document.__send__(:enable_cache) do
88+
match( path_stack, node )
89+
end
8290
end
8391

8492
def get_first path, node
@@ -137,11 +145,6 @@ def first( path_stack, node )
137145

138146

139147
def match(path_stack, node)
140-
if node.is_a?(Array)
141-
Kernel.warn("REXML::XPath.each, REXML::XPath.first, REXML::XPath.match dropped support for nodeset...", uplevel: 1)
142-
return [] if node.empty?
143-
node = node.first
144-
end
145148
nodeset = [XPathNode.new(node, position: 1)]
146149
result = expr(path_stack, nodeset)
147150
case result
@@ -494,24 +497,18 @@ def node_test(path_stack, nodesets, any_type: :element)
494497
if strict?
495498
raw_node.name == name and raw_node.namespace == ""
496499
else
497-
# FIXME: This DOUBLES the time XPath searches take
498-
ns = get_namespace(raw_node, prefix)
499-
raw_node.name == name and raw_node.namespace == ns
500+
raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
500501
end
501502
else
502-
# FIXME: This DOUBLES the time XPath searches take
503-
ns = get_namespace(raw_node, prefix)
504-
raw_node.name == name and raw_node.namespace == ns
503+
raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
505504
end
506505
when :attribute
507506
if prefix.nil?
508507
raw_node.name == name
509508
elsif prefix.empty?
510509
raw_node.name == name and raw_node.namespace == ""
511510
else
512-
# FIXME: This DOUBLES the time XPath searches take
513-
ns = get_namespace(raw_node.element, prefix)
514-
raw_node.name == name and raw_node.namespace == ns
511+
raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix)
515512
end
516513
else
517514
false

test/test_core.rb

+17-6
Original file line numberDiff line numberDiff line change
@@ -653,18 +653,23 @@ def test_namespace
653653
assert_equal "<sean:blah>Some text</sean:blah>", out
654654
end
655655

656-
657656
def test_add_namespace
658657
e = Element.new 'a'
658+
assert_equal("", e.namespace)
659+
assert_nil(e.namespace('foo'))
659660
e.add_namespace 'someuri'
660661
e.add_namespace 'foo', 'otheruri'
661662
e.add_namespace 'xmlns:bar', 'thirduri'
662-
assert_equal 'someuri', e.attributes['xmlns']
663-
assert_equal 'otheruri', e.attributes['xmlns:foo']
664-
assert_equal 'thirduri', e.attributes['xmlns:bar']
663+
assert_equal("someuri", e.namespace)
664+
assert_equal("otheruri", e.namespace('foo'))
665+
assert_equal("otheruri", e.namespace('xmlns:foo'))
666+
assert_equal("thirduri", e.namespace('bar'))
667+
assert_equal("thirduri", e.namespace('xmlns:bar'))
668+
assert_equal('someuri', e.attributes['xmlns'])
669+
assert_equal('otheruri', e.attributes['xmlns:foo'])
670+
assert_equal('thirduri', e.attributes['xmlns:bar'])
665671
end
666672

667-
668673
def test_big_documentation
669674
d = File.open(fixture_path("documentation.xml")) {|f| Document.new f }
670675
assert_equal "Sean Russell", d.elements["documentation/head/author"].text.tr("\n\t", " ").squeeze(" ")
@@ -764,9 +769,15 @@ def test_attributes_each
764769

765770
def test_delete_namespace
766771
doc = Document.new "<a xmlns='1' xmlns:x='2'/>"
772+
assert_equal("1", doc.root.namespace)
773+
assert_equal("2", doc.root.namespace('x'))
774+
assert_equal("2", doc.root.namespace('xmlns:x'))
767775
doc.root.delete_namespace
768776
doc.root.delete_namespace 'x'
769-
assert_equal "<a/>", doc.to_s
777+
assert_equal("<a/>", doc.to_s)
778+
assert_equal("", doc.root.namespace)
779+
assert_nil(doc.root.namespace('x'))
780+
assert_nil(doc.root.namespace('xmlns:x'))
770781
end
771782

772783
def test_each_element_with_attribute

test/xpath/test_base.rb

+10
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,16 @@ def test_namespaces_0
11931193
assert_equal( 1, XPath.match( d, "//x:*" ).size )
11941194
end
11951195

1196+
def test_namespaces_cache
1197+
doc = Document.new("<a xmlns='1'><b/></a>")
1198+
assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='1']").to_s)
1199+
assert_nil(XPath.first(doc, "//b[namespace-uri()='']"))
1200+
1201+
doc.root.delete_namespace
1202+
assert_nil(XPath.first(doc, "//b[namespace-uri()='1']"))
1203+
assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='']").to_s)
1204+
end
1205+
11961206
def test_ticket_71
11971207
doc = Document.new(%Q{<root xmlns:ns1="xyz" xmlns:ns2="123"><element ns1:attrname="foo" ns2:attrname="bar"/></root>})
11981208
el = doc.root.elements[1]

0 commit comments

Comments
 (0)