Skip to content

Commit

Permalink
Abbreviation matcher
Browse files Browse the repository at this point in the history
  • Loading branch information
sergei-dyshel committed Sep 6, 2023
1 parent a294acc commit 4f215b6
Show file tree
Hide file tree
Showing 8 changed files with 660 additions and 4 deletions.
58 changes: 58 additions & 0 deletions README.markdown
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
This is a fork of popular command-line fuzzy finder
[fzf](https://github.com/junegunn/fzf) which implements so-called
*abbreviation* (or *acronym*) fuzzy matching.

Unlike "normal" fuzzy matching, this method allows pattern letters to match not
at any position of searched text but only at word beginnings. In other words
search query must constitute a valid abbreviation (or acronym) for the some
words from searched text.

For example, `foo bar` will be matched by `fb` or `fob` but not by `fr` or
`far`.

Please see README of my other project
[vim-abbrev-matcher](https://github.com/vim-scripts/vim-abbrev-matcher) for
expanded reasoning and benefits of such approach. In short, this type of search
filters candidates much better and faster than normal fuzzy search which is
beneficial in some use cases, such as:
- When searching command-line histories you might prefer to turn sorting off in
order to see filtered candidates in original (chronological) order. In this
case abbreviation matching narrows candidates much faster.
- Fuzzy searchers often provide some sorting heuristic which scores each match
and tries to give you "best" matches first. This heuristic may fail for your
specific use case and give you a large number of unrelated results.
Abbreviation matching also uses such heuristic but it also produces much less
results so there is more chance the needed element will be contained in
displayed results even when having a bad score.

# Installation

```
go get github.com/sergei-dyshel/fzf-abbrev
```

# Usage

`fzf-abbrev` may be used as drop-in replacement for `fzf`. In that case just
copy compiled binary over `~/.fzf/bin/fzf`). In order to get "normal" fuzzy
matching, start your query with `#`.

There is one new command-line option `--abbrev` which is comma-separated list
of options that control new matcher's behavior:
- `no-default` brings back normal fuzzy matching as default behavior, so
`fzf-abbrev --abbrev=no-default` works the same way as original `fzf`.
- `fast` makes matching faster but resulted scoring may not be optimal. Use
this option when candidate sorting is off (i.e. for filtering histories).
- `file-paths` optimizes scoring heuristic to match file paths. In this case it
will prefer matching file names over matching file directories.

# Scoring

Basically similar to
[vim-abbrev-matcher](https://github.com/vim-scripts/vim-abbrev-matcher#ranking).
The weights are slightly different though.

# Development status

This is work in progress but current version is already pretty stable and
usable.
120 changes: 120 additions & 0 deletions src/algo/abbrev/abbrev_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package abbrev

import (
"testing"
)

func logMatch(tb testing.TB, input, pattern string) *result {
res := checkMatch([]rune(input), []rune(pattern))
if res != nil {
tb.Logf("pattern: '%s' score: %d input: '%s'", string(pattern),
res.score, markPosInStr(string(input), res.pos))
}
return res
}

func assertMatch(tb testing.TB, input, pattern string) (res *result) {
res = logMatch(tb, input, pattern)
if res == nil {
tb.Fatalf("'%s' should match '%s'", input, pattern)
}
return
}

func assertNoMatch(t *testing.T, input, pattern string) (res *result) {
res = logMatch(t, input, pattern)
if res != nil {
t.Fatalf("'%s' should not match '%s'", input, pattern)
}
return
}

func assertMatchMany(t *testing.T, input string, patterns ...string) {
for _, pattern := range patterns {
assertMatch(t, input, pattern)
}
}

func assertNoMatchMany(t *testing.T, input string, patterns ...string) {
for _, pattern := range patterns {
assertNoMatch(t, input, pattern)
}
}

func assertBetterMatch(t *testing.T, input1, input2 string, pattern string) {
res1 := assertMatch(t, input1, pattern)
res2 := assertMatch(t, input2, pattern)
if res1.score >= res2.score {
t.Fatalf("'%s' (score %d) should come before '%s' (score %d) when matching '%s'",
input1, res1.score, input2, res2.score, pattern)
}
}

func assertMatchOrder(t *testing.T, pattern string, inputs ...string) {
for i := 0; i < len(inputs)-1; i++ {
assertBetterMatch(t, inputs[i], inputs[i+1], pattern)
}
}

func TestSimple(t *testing.T) {
Opts.Default()
assertMatchMany(t, "FooBarQux", "fbq", "foob")
assertNoMatchMany(t, "FooBarQux", "fbx", "foobr")
assertMatchMany(t, "foo bar qux", "fbq", "foob")
assertMatchMany(t, "foo19bar", "f1", "f19", "f1b", "foo1")
}

func TestScoring(t *testing.T) {
Opts.Default()
assertMatchOrder(t, "fbq", "FooBarQux", "Foo BarQux", "Foo Bar Qux")
// assertMatchOrder(t, "For", "ForLoop", ")
}

func TestFilePaths(t *testing.T) {
Opts.Parse("file-paths")

assertMatchOrder(t, "fbq", "foo_bar_qux", "foo/bar_qux", "foo/bar/qux")
}

func TestSkippedStart(t *testing.T) {
Opts.Default()
assertMatchOrder(t, "fbq", "foo_bar_qux", "some_fbq",
"some_fb_qux", "some_foo_bar_qux")
}

func benchmarkMatch(b *testing.B, input string, pattern string) {
inputRunes := []rune(input)
patternRunes := []rune(pattern)
for i := 0; i < b.N; i++ {
checkMatch(inputRunes, patternRunes)
// fmt.Sprintf("hello")
}
}

func benchmarkMatchMany(b *testing.B, input string, patterns ...string) {
for _, pattern := range patterns {
b.Run(pattern, func(b *testing.B) { benchmarkMatch(b, input, pattern) })
}
}
func BenchmarkLongLine(b *testing.B) {
Opts.FirstMatchOnly = true
input := "./configure --with-features=huge " +
"--enable-multibyte " +
"--enable-rubyinterp=yes " +
"--enable-pythoninterp=yes " +
"--with-python-config-dir=/usr/lib/python2.7/config " +
"--enable-python3interp=yes " +
"--with-python3-config-dir=/usr/lib/python3.5/config " +
"--enable-perlinterp=yes " +
"--enable-luainterp=yes " +
"--enable-gui=gtk2 " +
"--enable-cscope " +
"--prefix=/usr/local"
benchmarkMatchMany(b, input, "cohuge", "enagtk2", "cwfh")
}

func BenchmarkVeryLongLine(b *testing.B) {
Opts.FirstMatchOnly = true
input := `rg --hidden --heading --line-number --color ansi --colors path:none --colors line:none --colors match:fg:red --colors match:style:nobold --ignore-case -g E8/**/.git -g !/home/sergei/e8/code/qux-E8/**/.svn -g !/home/sergei/e8/code/qux-E8/**/.hg -g !/home/sergei/e8/code/qux-E8/**/CVS -g !/home/sergei/e8/code/qux-E8/**/.DS_Store -g !/home/sergei/e8/code/qux-E8/**/node_modules -g !/home/sergei/e8/code/qux-E8/**/bower_components -g !/home/sergei/e8/code/qux-E8/.vscode --max-filesize 17179869184 --no-ignore-parent --follow --regexp \binclude.*nvme\b -- /home/sergei/e8/code/qux-E8\n: 1524066773:0;rg --hidden --heading --line-number --color ansi --colors path:none --colors line:none --colors match:fg:red --colors match:style:nobold --ignore-case -g E8/**/.git -g E8/**/.svn -g !/home/sergei/e8/code/qux-E8/**/.hg -g !/home/sergei/e8/code/qux-E8/**/CVS -g !/home/sergei/e8/code/qux-E8/**/.DS_Store -g !/home/sergei/e8/code/qux-E8/**/node_modules -g !/home/sergei/e8/code/qux-E8/**/bower_components -g !/home/sergei/e8/code/qux-E8/.vscode --max-filesize 17179869184 --no-ignore-parent --follow --regexp \binclude.*nvme\b -- /home/sergei/e8/code/qux-E8\n: 1524066839:0;rg --hidden --heading --line-number --color ansi --colors path:none --colors line:none --colors match:fg:red --colors match:style:nobold --ignore-case -g "E8/**/.git" -g "E8/**/.svn" -g "E8/**/.hg" -g "!/home/sergei/e8/code/qux-E8/**/CVS" -g "!/home/sergei/e8/code/qux-E8/**/.DS_Store" -g "!/home/sergei/e8/code/qux-E8/**/node_modules" -g "!/home/sergei/e8/code/qux-E8/**/bower_components" -g "!/home/sergei/e8/code/qux-E8/.vscode" --max-filesize 17179869184 --no-ignore-parent --follow --regexp "\binclud.*nvme\b" -- /home/sergei/e8/code/qux-E8\n: 1524066903:0;rg --hidden --heading --line-number --color ansi --colors path:none --colors line:none --colors match:fg:red --colors match:style:nobold --ignore-case --max-filesize 17179869184 --follow --regexp "\binclud.*nvme\b" -- /home/sergei/e8/code/qux-E8\n: 1524066987:0;cd ../../`
benchmarkMatch(b, input, "e8slashmany")
}
Loading

0 comments on commit 4f215b6

Please sign in to comment.