Skip to content

Commit

Permalink
bug fixes and unit tests for single index implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
CascadingRadium committed Nov 29, 2024
1 parent 784f45b commit f19cedc
Show file tree
Hide file tree
Showing 7 changed files with 517 additions and 21 deletions.
42 changes: 28 additions & 14 deletions search/query/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"
"io"
"log"
"strings"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/mapping"
Expand Down Expand Up @@ -487,16 +488,21 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
}
case *FuzzyQuery:
field, source := resolveFieldAndSource(q.FieldVal)
fuzziness := q.Fuzziness
if q.autoFuzzy {
fuzziness = searcher.GetAutoFuzziness(q.Term)
}
if source != "" {
return addFuzzySynonymsForTerm(ctx, source, field, q.Term, q.Fuzziness, q.Prefix, r, rv)
return addFuzzySynonymsForTerm(ctx, source, field, q.Term, fuzziness, q.Prefix, r, rv)
}
case *MatchQuery, *MatchPhraseQuery:
var analyzerName, matchString, fieldVal string
var fuzziness, prefix int
var autoFuzzy bool
if mq, ok := q.(*MatchQuery); ok {
analyzerName, fieldVal, matchString, fuzziness, prefix = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix
analyzerName, fieldVal, matchString, fuzziness, prefix, autoFuzzy = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix, mq.autoFuzzy
} else if mpq, ok := q.(*MatchPhraseQuery); ok {
analyzerName, fieldVal, matchString, fuzziness = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness
analyzerName, fieldVal, matchString, fuzziness, autoFuzzy = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness, mpq.autoFuzzy
}
field, source := resolveFieldAndSource(fieldVal)
if source != "" {
Expand All @@ -506,6 +512,9 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
}
tokens := analyzer.Analyze([]byte(matchString))
for _, token := range tokens {
if autoFuzzy {
fuzziness = searcher.GetAutoFuzziness(string(token.Term))
}
rv, err = addFuzzySynonymsForTerm(ctx, source, field, string(token.Term), fuzziness, prefix, r, rv)
if err != nil {
return nil, err
Expand All @@ -514,10 +523,12 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
}
case *MultiPhraseQuery, *PhraseQuery:
var fieldVal string
var fuzziness int
var autoFuzzy bool
if mpq, ok := q.(*MultiPhraseQuery); ok {
fieldVal = mpq.FieldVal
fieldVal, fuzziness, autoFuzzy = mpq.FieldVal, mpq.Fuzziness, mpq.autoFuzzy
} else if pq, ok := q.(*PhraseQuery); ok {
fieldVal = pq.FieldVal
fieldVal, fuzziness, autoFuzzy = pq.FieldVal, pq.Fuzziness, pq.autoFuzzy
}
field, source := resolveFieldAndSource(fieldVal)
if source != "" {
Expand All @@ -531,7 +542,10 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
}
for _, term := range terms {
var err error
rv, err = addSynonymsForTerm(ctx, source, term, field, r, rv)
if autoFuzzy {
fuzziness = searcher.GetAutoFuzziness(term)
}
rv, err = addFuzzySynonymsForTerm(ctx, source, field, term, fuzziness, 0, r, rv)
if err != nil {
return nil, err
}
Expand All @@ -552,12 +566,12 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
case *RegexpQuery:
field, source := resolveFieldAndSource(q.FieldVal)
if source != "" {
return addRegexpSynonymsForTerm(ctx, source, field, q.Regexp, r, rv)
return addRegexpSynonymsForTerm(ctx, source, field, strings.TrimPrefix(q.Regexp, "^"), r, rv)
}
case *TermQuery:
field, source := resolveFieldAndSource(q.FieldVal)
if source != "" {
return addSynonymsForTerm(ctx, source, q.Term, field, r, rv)
return addSynonymsForTerm(ctx, source, field, q.Term, r, rv)
}
case *WildcardQuery:
field, source := resolveFieldAndSource(q.FieldVal)
Expand Down Expand Up @@ -594,7 +608,7 @@ func addRegexpSynonymsForTerm(ctx context.Context, src, field, term string,
return nil, err
}
for _, term := range regexpTerms {
rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -628,7 +642,7 @@ func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string,
return nil, err
}
for _, term := range prefixTerms {
rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
if err != nil {
return nil, err
}
Expand All @@ -641,7 +655,7 @@ func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string,
func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzziness, prefix int,
r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
if fuzziness == 0 {
return addSynonymsForTerm(ctx, src, term, field, r, rv)
return addSynonymsForTerm(ctx, src, field, term, r, rv)
}
if ir, ok := r.(index.IndexReaderFuzzy); ok {
if fuzziness > searcher.MaxFuzziness {
Expand Down Expand Up @@ -677,7 +691,7 @@ func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzzi
return nil, err
}
for _, term := range fuzzyTerms {
rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
if err != nil {
return nil, err
}
Expand All @@ -689,8 +703,8 @@ func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzzi

// addSynonymsForTerm finds synonyms for the given term and adds them to the
// provided map.
func addSynonymsForTerm(ctx context.Context, src, term, field string, r index.SynonymReader,
rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
func addSynonymsForTerm(ctx context.Context, src, field, term string,
r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {

termBytes := []byte(term)
termReader, err := r.SynonymTermReader(ctx, src, termBytes)
Expand Down
22 changes: 17 additions & 5 deletions search/searcher/search_fuzzy.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
// since the fuzzy candidate terms are not collected
// for a term search, and the only candidate term is
// the term itself
fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
if fuzzyTermMatches != nil {
fuzzyTermMatches.(map[string][]string)[term] = []string{term}
if ctx != nil {
fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
if fuzzyTermMatches != nil {
fuzzyTermMatches.(map[string][]string)[term] = []string{term}
}
}
return NewTermSearcher(ctx, indexReader, term, field, boost, options)
}
Expand Down Expand Up @@ -94,12 +96,22 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
fuzzyTermMatches.(map[string][]string)[term] = candidates
}
}
// check if the candidates are empty or have one term which is the term itself
if len(candidates) == 0 || (len(candidates) == 1 && candidates[0] == term) {
if ctx != nil {
fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
if fuzzyTermMatches != nil {
fuzzyTermMatches.(map[string][]string)[term] = []string{term}
}
}
return NewTermSearcher(ctx, indexReader, term, field, boost, options)
}

return NewMultiTermSearcherBoosted(ctx, indexReader, candidates, field,
boost, editDistances, options, true)
}

func getAutoFuzziness(term string) int {
func GetAutoFuzziness(term string) int {
termLength := len(term)
if termLength > AutoFuzzinessHighThreshold {
return MaxFuzziness
Expand All @@ -111,7 +123,7 @@ func getAutoFuzziness(term string) int {

func NewAutoFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term string,
prefix int, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
return NewFuzzySearcher(ctx, indexReader, term, prefix, getAutoFuzziness(term), field, boost, options)
return NewFuzzySearcher(ctx, indexReader, term, prefix, GetAutoFuzziness(term), field, boost, options)
}

type fuzzyCandidates struct {
Expand Down
37 changes: 37 additions & 0 deletions search/searcher/search_phrase.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,40 @@ func NewMultiPhraseSearcher(ctx context.Context, indexReader index.IndexReader,
}
}

if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
if ts, exists := fts[field]; exists {
if fuzzinessEnabled {
for term, fuzzyTerms := range fuzzyTermMatches {
fuzzySynonymTerms := make([]string, 0, len(fuzzyTerms))
if s, found := ts[term]; found {
fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
}
for _, fuzzyTerm := range fuzzyTerms {
if fuzzyTerm == term {
continue
}
if s, found := ts[fuzzyTerm]; found {
fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
}
}
if len(fuzzySynonymTerms) > 0 {
fuzzyTermMatches[term] = append(fuzzyTermMatches[term], fuzzySynonymTerms...)
}
}
} else {
for _, termPos := range terms {
for _, term := range termPos {
if s, found := ts[term]; found {
if fuzzyTermMatches == nil {
fuzzyTermMatches = make(map[string][]string)
}
fuzzyTermMatches[term] = s
}
}
}
}
}
}
mustSearcher, err := NewConjunctionSearcher(ctx, indexReader, termPositionSearchers, options)
if err != nil {
// close any searchers already opened
Expand Down Expand Up @@ -337,6 +371,9 @@ func (s *PhraseSearcher) expandFuzzyMatches(tlm search.TermLocationMap, expanded
for term, fuzzyMatches := range s.fuzzyTermMatches {
locations := tlm[term]
for _, fuzzyMatch := range fuzzyMatches {
if fuzzyMatch == term {
continue
}
locations = append(locations, tlm[fuzzyMatch]...)
}
expandedTlm[term] = locations
Expand Down
4 changes: 4 additions & 0 deletions search/searcher/search_regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader,
if err != nil {
return nil, err
}
// check if the candidateTerms are empty or have one term which is the term itself
if len(candidateTerms) == 0 || (len(candidateTerms) == 1 && candidateTerms[0] == pattern) {
return NewTermSearcher(ctx, indexReader, pattern, field, boost, options)
}

return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
options, true)
Expand Down
57 changes: 55 additions & 2 deletions search/searcher/search_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,23 @@ type TermSearcher struct {
tfd index.TermFieldDoc
}

func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
if isTermQuery(ctx) {
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
}
return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
}

func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
if ctx != nil {
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
if ts, exists := fts[field]; exists {
if s, found := ts[string(term)]; found {
return NewSynonymSearcher(ctx, indexReader, term, s, field, boost, options)
}
}
}
}
needFreqNorm := options.Score != "none"
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil {
Expand All @@ -69,6 +78,50 @@ func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermF
}, nil
}

func NewSynonymSearcher(ctx context.Context, indexReader index.IndexReader, term []byte, synonyms []string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
createTermSearcher := func(term []byte, boostVal float64) (search.Searcher, error) {
needFreqNorm := options.Score != "none"
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil {
return nil, err
}
return newTermSearcherFromReader(indexReader, reader, term, field, boostVal, options)
}
// create a searcher for the term itself
termSearcher, err := createTermSearcher(term, boost)
if err != nil {
return nil, err
}
// constituent searchers of the disjunction
qsearchers := make([]search.Searcher, 0, len(synonyms)+1)
// helper method to close all the searchers we've created
// in case of an error
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
qsearchers = append(qsearchers, termSearcher)
// create a searcher for each synonym
for _, synonym := range synonyms {
synonymSearcher, err := createTermSearcher([]byte(synonym), boost/2.0)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, synonymSearcher)
}
// create a disjunction searcher
rv, err := NewDisjunctionSearcher(ctx, indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return rv, nil
}

func (s *TermSearcher) Size() int {
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
s.reader.Size() +
Expand Down
4 changes: 4 additions & 0 deletions search/searcher/search_term_prefix.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p
reportIOStats(ctx, fieldDict.BytesRead())
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
}
// check if the terms are empty or have one term which is the prefix itself
if len(terms) == 0 || (len(terms) == 1 && terms[0] == prefix) {
return NewTermSearcher(ctx, indexReader, prefix, field, boost, options)
}

return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
}
Loading

0 comments on commit f19cedc

Please sign in to comment.