Skip to content

Commit a9cfd67

Browse files
committed
Merge branch 'wip-i255-single-match'
2 parents e1f2d60 + b3953ef commit a9cfd67

File tree

4 files changed

+156
-1
lines changed

4 files changed

+156
-1
lines changed

bench_traversal_test.go

+20
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package goquery
22

33
import (
44
"testing"
5+
6+
"github.com/andybalholm/cascadia"
57
)
68

79
func BenchmarkFind(b *testing.B) {
@@ -800,3 +802,21 @@ func BenchmarkClosestNodes(b *testing.B) {
800802
b.Fatalf("want 2, got %d", n)
801803
}
802804
}
805+
806+
func BenchmarkSingleMatcher(b *testing.B) {
807+
doc := Doc()
808+
multi := cascadia.MustCompile(`div`)
809+
single := SingleMatcher(multi)
810+
b.ResetTimer()
811+
812+
b.Run("multi", func(b *testing.B) {
813+
for i := 0; i < b.N; i++ {
814+
_ = doc.FindMatcher(multi)
815+
}
816+
})
817+
b.Run("single", func(b *testing.B) {
818+
for i := 0; i < b.N; i++ {
819+
_ = doc.FindMatcher(single)
820+
}
821+
})
822+
}

example_test.go

+28
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,31 @@ func ExampleNewDocumentFromReader_string() {
8080

8181
// Output: Header
8282
}
83+
84+
func ExampleSingle() {
85+
html := `
86+
<html>
87+
<body>
88+
<div>1</div>
89+
<div>2</div>
90+
<div>3</div>
91+
</body>
92+
</html>
93+
`
94+
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
95+
if err != nil {
96+
log.Fatal(err)
97+
}
98+
99+
// By default, the selector string selects all matching nodes
100+
multiSel := doc.Find("div")
101+
fmt.Println(multiSel.Text())
102+
103+
// Using goquery.Single, only the first match is selected
104+
singleSel := doc.FindMatcher(goquery.Single("div"))
105+
fmt.Println(singleSel.Text())
106+
107+
// Output:
108+
// 123
109+
// 1
110+
}

type.go

+63-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"net/url"
88

99
"github.com/andybalholm/cascadia"
10-
1110
"golang.org/x/net/html"
1211
)
1312

@@ -122,6 +121,45 @@ type Matcher interface {
122121
Filter([]*html.Node) []*html.Node
123122
}
124123

124+
// Single compiles a selector string to a Matcher that stops after the first
125+
// match is found.
126+
//
127+
// By default, Selection.Find and other functions that accept a selector string
128+
// to select nodes will use all matches corresponding to that selector. By
129+
// using the Matcher returned by Single, at most the first match will be
130+
// selected.
131+
//
132+
// For example, those two statements are semantically equivalent:
133+
//
134+
// sel1 := doc.Find("a").First()
135+
// sel2 := doc.FindMatcher(goquery.Single("a"))
136+
//
137+
// The one using Single is optimized to be potentially much faster on large
138+
// documents.
139+
//
140+
// Only the behaviour of the MatchAll method of the Matcher interface is
141+
// altered compared to standard Matchers. This means that the single-selection
142+
// property of the Matcher only applies for Selection methods where the Matcher
143+
// is used to select nodes, not to filter or check if a node matches the
144+
// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
145+
// FilterMatcher(Single("div")) will still result in a Selection with multiple
146+
// "div"s if there were many "div"s in the Selection to begin with).
147+
func Single(selector string) Matcher {
148+
return singleMatcher{compileMatcher(selector)}
149+
}
150+
151+
// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
152+
// after the first match is found.
153+
//
154+
// See the documentation of function Single for more details.
155+
func SingleMatcher(m Matcher) Matcher {
156+
if _, ok := m.(singleMatcher); ok {
157+
// m is already a singleMatcher
158+
return m
159+
}
160+
return singleMatcher{m}
161+
}
162+
125163
// compileMatcher compiles the selector string s and returns
126164
// the corresponding Matcher. If s is an invalid selector string,
127165
// it returns a Matcher that fails all matches.
@@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
133171
return cs
134172
}
135173

174+
type singleMatcher struct {
175+
Matcher
176+
}
177+
178+
func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
179+
// Optimized version - stops finding at the first match (cascadia-compiled
180+
// matchers all use this code path).
181+
if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
182+
node := mm.MatchFirst(n)
183+
if node == nil {
184+
return nil
185+
}
186+
return []*html.Node{node}
187+
}
188+
189+
// Fallback version, for e.g. test mocks that don't provide the MatchFirst
190+
// method.
191+
nodes := m.Matcher.MatchAll(n)
192+
if len(nodes) > 0 {
193+
return nodes[:1:1]
194+
}
195+
return nil
196+
}
197+
136198
// invalidMatcher is a Matcher that always fails to match.
137199
type invalidMatcher struct{}
138200

type_test.go

+45
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"strings"
88
"testing"
99

10+
"github.com/andybalholm/cascadia"
1011
"golang.org/x/net/html"
1112
)
1213

@@ -208,3 +209,47 @@ func TestIssue103(t *testing.T) {
208209
}
209210
t.Log(text)
210211
}
212+
213+
func TestSingle(t *testing.T) {
214+
data := `
215+
<html>
216+
<body>
217+
<div class="b">1</div>
218+
<div class="a">2</div>
219+
<div class="a">3</div>
220+
<p class="b">4</p>
221+
</body>
222+
</html>
223+
`
224+
doc, err := NewDocumentFromReader(strings.NewReader(data))
225+
if err != nil {
226+
t.Fatal(err)
227+
}
228+
229+
text := doc.FindMatcher(Single("div")).Text()
230+
if text != "1" {
231+
t.Fatalf("want %q, got %q", "1", text)
232+
}
233+
234+
// Verify semantic equivalence
235+
sel1 := doc.Find("div").First()
236+
sel2 := doc.FindMatcher(Single("div"))
237+
if sel1.Text() != sel2.Text() {
238+
t.Fatalf("want sel1 to equal sel2")
239+
}
240+
241+
// Here, the Single has no effect as the selector is used to filter
242+
// from the existing selection, not to find nodes in the document.
243+
divs := doc.Find("div")
244+
text = divs.FilterMatcher(Single(".a")).Text()
245+
if text != "23" {
246+
t.Fatalf("want %q, got %q", "23", text)
247+
}
248+
249+
classA := cascadia.MustCompile(".a")
250+
classB := cascadia.MustCompile(".b")
251+
text = doc.FindMatcher(classB).AddMatcher(SingleMatcher(classA)).Text()
252+
if text != "142" {
253+
t.Fatalf("want %q, got %q", "142", text)
254+
}
255+
}

0 commit comments

Comments
 (0)