From f19cedc6f703c4255938e71e7c9bbdb87b031213 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Fri, 29 Nov 2024 12:53:33 +0530 Subject: [PATCH] bug fixes and unit tests for single index implementation --- search/query/query.go | 42 ++- search/searcher/search_fuzzy.go | 22 +- search/searcher/search_phrase.go | 37 +++ search/searcher/search_regexp.go | 4 + search/searcher/search_term.go | 57 +++- search/searcher/search_term_prefix.go | 4 + search_test.go | 372 ++++++++++++++++++++++++++ 7 files changed, 517 insertions(+), 21 deletions(-) diff --git a/search/query/query.go b/search/query/query.go index 22f1293ee..c908bbc54 100644 --- a/search/query/query.go +++ b/search/query/query.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "log" + "strings" "github.com/blevesearch/bleve/v2/analysis" "github.com/blevesearch/bleve/v2/mapping" @@ -487,16 +488,21 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno } case *FuzzyQuery: field, source := resolveFieldAndSource(q.FieldVal) + fuzziness := q.Fuzziness + if q.autoFuzzy { + fuzziness = searcher.GetAutoFuzziness(q.Term) + } if source != "" { - return addFuzzySynonymsForTerm(ctx, source, field, q.Term, q.Fuzziness, q.Prefix, r, rv) + return addFuzzySynonymsForTerm(ctx, source, field, q.Term, fuzziness, q.Prefix, r, rv) } case *MatchQuery, *MatchPhraseQuery: var analyzerName, matchString, fieldVal string var fuzziness, prefix int + var autoFuzzy bool if mq, ok := q.(*MatchQuery); ok { - analyzerName, fieldVal, matchString, fuzziness, prefix = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix + analyzerName, fieldVal, matchString, fuzziness, prefix, autoFuzzy = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix, mq.autoFuzzy } else if mpq, ok := q.(*MatchPhraseQuery); ok { - analyzerName, fieldVal, matchString, fuzziness = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness + analyzerName, fieldVal, matchString, fuzziness, autoFuzzy = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness, mpq.autoFuzzy } field, source := resolveFieldAndSource(fieldVal) if source != "" { @@ -506,6 +512,9 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno } tokens := analyzer.Analyze([]byte(matchString)) for _, token := range tokens { + if autoFuzzy { + fuzziness = searcher.GetAutoFuzziness(string(token.Term)) + } rv, err = addFuzzySynonymsForTerm(ctx, source, field, string(token.Term), fuzziness, prefix, r, rv) if err != nil { return nil, err @@ -514,10 +523,12 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno } case *MultiPhraseQuery, *PhraseQuery: var fieldVal string + var fuzziness int + var autoFuzzy bool if mpq, ok := q.(*MultiPhraseQuery); ok { - fieldVal = mpq.FieldVal + fieldVal, fuzziness, autoFuzzy = mpq.FieldVal, mpq.Fuzziness, mpq.autoFuzzy } else if pq, ok := q.(*PhraseQuery); ok { - fieldVal = pq.FieldVal + fieldVal, fuzziness, autoFuzzy = pq.FieldVal, pq.Fuzziness, pq.autoFuzzy } field, source := resolveFieldAndSource(fieldVal) if source != "" { @@ -531,7 +542,10 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno } for _, term := range terms { var err error - rv, err = addSynonymsForTerm(ctx, source, term, field, r, rv) + if autoFuzzy { + fuzziness = searcher.GetAutoFuzziness(term) + } + rv, err = addFuzzySynonymsForTerm(ctx, source, field, term, fuzziness, 0, r, rv) if err != nil { return nil, err } @@ -552,12 +566,12 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno case *RegexpQuery: field, source := resolveFieldAndSource(q.FieldVal) if source != "" { - return addRegexpSynonymsForTerm(ctx, source, field, q.Regexp, r, rv) + return addRegexpSynonymsForTerm(ctx, source, field, strings.TrimPrefix(q.Regexp, "^"), r, rv) } case *TermQuery: field, source := resolveFieldAndSource(q.FieldVal) if source != "" { - return addSynonymsForTerm(ctx, source, q.Term, field, r, rv) + return addSynonymsForTerm(ctx, source, field, q.Term, r, rv) } case *WildcardQuery: field, source := resolveFieldAndSource(q.FieldVal) @@ -594,7 +608,7 @@ func addRegexpSynonymsForTerm(ctx context.Context, src, field, term string, return nil, err } for _, term := range regexpTerms { - rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv) + rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv) if err != nil { return nil, err } @@ -628,7 +642,7 @@ func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string, return nil, err } for _, term := range prefixTerms { - rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv) + rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv) if err != nil { return nil, err } @@ -641,7 +655,7 @@ func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string, func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzziness, prefix int, r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) { if fuzziness == 0 { - return addSynonymsForTerm(ctx, src, term, field, r, rv) + return addSynonymsForTerm(ctx, src, field, term, r, rv) } if ir, ok := r.(index.IndexReaderFuzzy); ok { if fuzziness > searcher.MaxFuzziness { @@ -677,7 +691,7 @@ func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzzi return nil, err } for _, term := range fuzzyTerms { - rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv) + rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv) if err != nil { return nil, err } @@ -689,8 +703,8 @@ func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzzi // addSynonymsForTerm finds synonyms for the given term and adds them to the // provided map. -func addSynonymsForTerm(ctx context.Context, src, term, field string, r index.SynonymReader, - rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) { +func addSynonymsForTerm(ctx context.Context, src, field, term string, + r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) { termBytes := []byte(term) termReader, err := r.SynonymTermReader(ctx, src, termBytes) diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go index 6c29f845d..d0e5c1ec5 100644 --- a/search/searcher/search_fuzzy.go +++ b/search/searcher/search_fuzzy.go @@ -55,9 +55,11 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s // since the fuzzy candidate terms are not collected // for a term search, and the only candidate term is // the term itself - fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey) - if fuzzyTermMatches != nil { - fuzzyTermMatches.(map[string][]string)[term] = []string{term} + if ctx != nil { + fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey) + if fuzzyTermMatches != nil { + fuzzyTermMatches.(map[string][]string)[term] = []string{term} + } } return NewTermSearcher(ctx, indexReader, term, field, boost, options) } @@ -94,12 +96,22 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s fuzzyTermMatches.(map[string][]string)[term] = candidates } } + // check if the candidates are empty or have one term which is the term itself + if len(candidates) == 0 || (len(candidates) == 1 && candidates[0] == term) { + if ctx != nil { + fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey) + if fuzzyTermMatches != nil { + fuzzyTermMatches.(map[string][]string)[term] = []string{term} + } + } + return NewTermSearcher(ctx, indexReader, term, field, boost, options) + } return NewMultiTermSearcherBoosted(ctx, indexReader, candidates, field, boost, editDistances, options, true) } -func getAutoFuzziness(term string) int { +func GetAutoFuzziness(term string) int { termLength := len(term) if termLength > AutoFuzzinessHighThreshold { return MaxFuzziness @@ -111,7 +123,7 @@ func getAutoFuzziness(term string) int { func NewAutoFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term string, prefix int, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { - return NewFuzzySearcher(ctx, indexReader, term, prefix, getAutoFuzziness(term), field, boost, options) + return NewFuzzySearcher(ctx, indexReader, term, prefix, GetAutoFuzziness(term), field, boost, options) } type fuzzyCandidates struct { diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go index bf24b465a..9c2ff7d5f 100644 --- a/search/searcher/search_phrase.go +++ b/search/searcher/search_phrase.go @@ -164,6 +164,40 @@ func NewMultiPhraseSearcher(ctx context.Context, indexReader index.IndexReader, } } + if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok { + if ts, exists := fts[field]; exists { + if fuzzinessEnabled { + for term, fuzzyTerms := range fuzzyTermMatches { + fuzzySynonymTerms := make([]string, 0, len(fuzzyTerms)) + if s, found := ts[term]; found { + fuzzySynonymTerms = append(fuzzySynonymTerms, s...) + } + for _, fuzzyTerm := range fuzzyTerms { + if fuzzyTerm == term { + continue + } + if s, found := ts[fuzzyTerm]; found { + fuzzySynonymTerms = append(fuzzySynonymTerms, s...) + } + } + if len(fuzzySynonymTerms) > 0 { + fuzzyTermMatches[term] = append(fuzzyTermMatches[term], fuzzySynonymTerms...) + } + } + } else { + for _, termPos := range terms { + for _, term := range termPos { + if s, found := ts[term]; found { + if fuzzyTermMatches == nil { + fuzzyTermMatches = make(map[string][]string) + } + fuzzyTermMatches[term] = s + } + } + } + } + } + } mustSearcher, err := NewConjunctionSearcher(ctx, indexReader, termPositionSearchers, options) if err != nil { // close any searchers already opened @@ -337,6 +371,9 @@ func (s *PhraseSearcher) expandFuzzyMatches(tlm search.TermLocationMap, expanded for term, fuzzyMatches := range s.fuzzyTermMatches { locations := tlm[term] for _, fuzzyMatch := range fuzzyMatches { + if fuzzyMatch == term { + continue + } locations = append(locations, tlm[fuzzyMatch]...) } expandedTlm[term] = locations diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go index b88133e31..74caf0703 100644 --- a/search/searcher/search_regexp.go +++ b/search/searcher/search_regexp.go @@ -68,6 +68,10 @@ func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader, if err != nil { return nil, err } + // check if the candidateTerms are empty or have one term which is the term itself + if len(candidateTerms) == 0 || (len(candidateTerms) == 1 && candidateTerms[0] == pattern) { + return NewTermSearcher(ctx, indexReader, pattern, field, boost, options) + } return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost, options, true) diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go index cd794ea32..c519d8d51 100644 --- a/search/searcher/search_term.go +++ b/search/searcher/search_term.go @@ -38,14 +38,23 @@ type TermSearcher struct { tfd index.TermFieldDoc } -func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { +func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { if isTermQuery(ctx) { ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term) } return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options) } -func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) { +func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { + if ctx != nil { + if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok { + if ts, exists := fts[field]; exists { + if s, found := ts[string(term)]; found { + return NewSynonymSearcher(ctx, indexReader, term, s, field, boost, options) + } + } + } + } needFreqNorm := options.Score != "none" reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors) if err != nil { @@ -69,6 +78,50 @@ func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermF }, nil } +func NewSynonymSearcher(ctx context.Context, indexReader index.IndexReader, term []byte, synonyms []string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { + createTermSearcher := func(term []byte, boostVal float64) (search.Searcher, error) { + needFreqNorm := options.Score != "none" + reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors) + if err != nil { + return nil, err + } + return newTermSearcherFromReader(indexReader, reader, term, field, boostVal, options) + } + // create a searcher for the term itself + termSearcher, err := createTermSearcher(term, boost) + if err != nil { + return nil, err + } + // constituent searchers of the disjunction + qsearchers := make([]search.Searcher, 0, len(synonyms)+1) + // helper method to close all the searchers we've created + // in case of an error + qsearchersClose := func() { + for _, searcher := range qsearchers { + if searcher != nil { + _ = searcher.Close() + } + } + } + qsearchers = append(qsearchers, termSearcher) + // create a searcher for each synonym + for _, synonym := range synonyms { + synonymSearcher, err := createTermSearcher([]byte(synonym), boost/2.0) + if err != nil { + qsearchersClose() + return nil, err + } + qsearchers = append(qsearchers, synonymSearcher) + } + // create a disjunction searcher + rv, err := NewDisjunctionSearcher(ctx, indexReader, qsearchers, 0, options) + if err != nil { + qsearchersClose() + return nil, err + } + return rv, nil +} + func (s *TermSearcher) Size() int { return reflectStaticSizeTermSearcher + size.SizeOfPtr + s.reader.Size() + diff --git a/search/searcher/search_term_prefix.go b/search/searcher/search_term_prefix.go index dc16e4864..3b05e5a8d 100644 --- a/search/searcher/search_term_prefix.go +++ b/search/searcher/search_term_prefix.go @@ -52,6 +52,10 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p reportIOStats(ctx, fieldDict.BytesRead()) search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead()) } + // check if the terms are empty or have one term which is the prefix itself + if len(terms) == 0 || (len(terms) == 1 && terms[0] == prefix) { + return NewTermSearcher(ctx, indexReader, prefix, field, boost, options) + } return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true) } diff --git a/search_test.go b/search_test.go index bdfb2fd42..cd1346936 100644 --- a/search_test.go +++ b/search_test.go @@ -19,6 +19,7 @@ import ( "encoding/json" "fmt" "math" + "math/rand" "reflect" "sort" "strconv" @@ -41,6 +42,7 @@ import ( "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds" "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds" "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds" + "github.com/blevesearch/bleve/v2/analysis/lang/en" "github.com/blevesearch/bleve/v2/analysis/token/length" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/shingle" @@ -3929,3 +3931,373 @@ func TestSynonymTermReader(t *testing.T) { } } } + +func TestSynonymSearchQueries(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + synonymCollection := "collection1" + + synonymSourceName := "english" + + analyzer := en.AnalyzerName + + textField := mapping.NewTextFieldMapping() + textField.Analyzer = analyzer + textField.SynonymSource = synonymSourceName + + imap := mapping.NewIndexMapping() + imap.DefaultMapping.AddFieldMappingsAt("text", textField) + imap.AddSynonymSource(synonymSourceName, synonymCollection, analyzer) + err := imap.Validate() + if err != nil { + t.Fatal(err) + } + + idx, err := New(tmpIndexPath, imap) + if err != nil { + t.Fatal(err) + } + defer func() { + err = idx.Close() + if err != nil { + t.Fatal(err) + } + }() + + documents := map[string]map[string]interface{}{ + "doc1": { + "text": `The hardworking employee consistently strives to exceed expectations. + His industrious nature makes him a valuable asset to any team. + His conscientious attention to detail ensures that projects are completed efficiently and accurately. + He remains persistent even in the face of challenges.`, + }, + "doc2": { + "text": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life. + Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation. + The calm environment offers the ideal place to meditate and connect with nature. + Even the most stressed individuals find themselves feeling relaxed and at ease.`, + }, + "doc3": { + "text": `The house was burned down, leaving only a charred shell behind. + The intense heat of the flames caused the walls to warp and the roof to cave in. + The seared remains of the furniture told the story of the blaze. + The incinerated remains left little more than ashes to remember what once was.`, + }, + "doc4": { + "text": `The faithful dog followed its owner everywhere, always loyal and steadfast. + It was devoted to protecting its family, and its reliable nature meant it could always be trusted. + In the face of danger, the dog remained calm, knowing its role was to stay vigilant. + Its trustworthy companionship provided comfort and security.`, + }, + "doc5": { + "text": `The lively market is bustling with activity from morning to night. + The dynamic energy of the crowd fills the air as vendors sell their wares. + Shoppers wander from stall to stall, captivated by the vibrant colors and energetic atmosphere. + This place is alive with movement and life.`, + }, + "doc6": { + "text": `In moments of crisis, bravery shines through. + It takes valor to step forward when others are afraid to act. + Heroes are defined by their guts and nerve, taking risks to protect others. + Boldness in the face of danger is what sets them apart.`, + }, + "doc7": { + "text": `Innovation is the driving force behind progress in every industry. + The company fosters an environment of invention, encouraging creativity at every level. + The focus on novelty and improvement means that ideas are always evolving. + The development of new solutions is at the core of the company's mission.`, + }, + "doc8": { + "text": `The blazing sunset cast a radiant glow over the horizon, painting the sky with hues of red and orange. + The intense heat of the day gave way to a fiery display of color. + As the sun set, the glowing light illuminated the landscape, creating a breathtaking scene. + The fiery sky was a sight to behold.`, + }, + "doc9": { + "text": `The fertile soil of the valley makes it perfect for farming. + The productive land yields abundant crops year after year. + Farmers rely on the rich, fruitful ground to sustain their livelihoods. + The area is known for its plentiful harvests, supporting both local communities and export markets.`, + }, + "doc10": { + "text": `The arid desert is a vast, dry expanse with little water or vegetation. + The barren landscape stretches as far as the eye can see, offering little respite from the scorching sun. + The desolate environment is unforgiving to those who venture too far without preparation. + The parched earth cracks under the heat, creating a harsh, unyielding terrain.`, + }, + "doc11": { + "text": `The fox is known for its cunning and intelligence. + As a predator, it relies on its sharp instincts to outwit its prey. + Its vulpine nature makes it both mysterious and fascinating. + The fox's ability to hunt with precision and stealth is what makes it such a formidable hunter.`, + }, + "doc12": { + "text": `The dog is often considered man's best friend due to its loyal nature. + As a companion, the hound provides both protection and affection. + The puppy quickly becomes a member of the family, always by your side. + Its playful energy and unshakable loyalty make it a beloved pet.`, + }, + "doc13": { + "text": `He worked tirelessly through the night, always persistent in his efforts. + His industrious approach to problem-solving kept the project moving forward. + No matter how difficult the task, he remained focused, always giving his best. + His dedication paid off when the project was completed ahead of schedule.`, + }, + "doc14": { + "text": `The river flowed calmly through the valley, its peaceful current offering a sense of tranquility. + Fishermen relaxed by the banks, enjoying the calm waters that reflected the sky above. + The tranquil nature of the river made it a perfect spot for meditation. + As the day ended, the river's quiet flow brought a sense of peace.`, + }, + "doc15": { + "text": `After the fire, all that was left was the charred remains of what once was. + The seared walls of the house told a tragic story. + The intensity of the blaze had burned everything in its path, leaving only the smoldering wreckage behind. + The incinerated objects could not be salvaged, and the damage was beyond repair.`, + }, + "doc16": { + "text": `The devoted employee always went above and beyond to complete his tasks. + His steadfast commitment to the company made him a valuable team member. + He was reliable, never failing to meet deadlines. + His trustworthiness earned him the respect of his colleagues, and was considered an + ingenious expert in his field.`, + }, + "doc17": { + "text": `The city is vibrant, full of life and energy. + The dynamic pace of the streets reflects the diverse culture of its inhabitants. + People from all walks of life contribute to the energetic atmosphere. + The city's lively spirit can be felt in every corner, from the bustling markets to the lively festivals.`, + }, + "doc18": { + "text": `In a moment of uncertainty, he made a bold decision that would change his life forever. + It took courage and nerve to take the leap, but his bravery paid off. + The guts to face the unknown allowed him to achieve something remarkable. + Being an bright scholar, the skill he demonstrated inspired those around him.`, + }, + "doc19": { + "text": `Innovation is often born from necessity, and the lightbulb is a prime example. + Thomas Edison's invention changed the world, offering a new way to see the night. + The creativity involved in developing such a groundbreaking product sparked a wave of + novelty in the scientific community. This improvement in technology continues to shape the modern world. + He was a clever academic and a smart researcher.`, + }, + "doc20": { + "text": `The fiery volcano erupted with a force that shook the earth. Its radiant lava flowed down the sides, + illuminating the night sky. The intense heat from the eruption could be felt miles away, as the + glowing lava burned everything in its path. The fiery display was both terrifying and mesmerizing.`, + }, + } + + synonymDocuments := map[string]*SynonymDefinition{ + "synDoc1": { + Synonyms: []string{"hardworking", "industrious", "conscientious", "persistent", "focused", "devoted"}, + }, + "synDoc2": { + Synonyms: []string{"tranquil", "peaceful", "calm", "relaxed", "unruffled"}, + }, + "synDoc3": { + Synonyms: []string{"burned", "charred", "seared", "incinerated", "singed"}, + }, + "synDoc4": { + Synonyms: []string{"faithful", "steadfast", "devoted", "reliable", "trustworthy"}, + }, + "synDoc5": { + Synonyms: []string{"lively", "dynamic", "energetic", "vivid", "vibrating"}, + }, + "synDoc6": { + Synonyms: []string{"bravery", "valor", "guts", "nerve", "boldness"}, + }, + "synDoc7": { + Input: []string{"innovation"}, + Synonyms: []string{"invention", "creativity", "novelty", "improvement", "development"}, + }, + "synDoc8": { + Input: []string{"blazing"}, + Synonyms: []string{"intense", "radiant", "burning", "fiery", "glowing"}, + }, + "synDoc9": { + Input: []string{"fertile"}, + Synonyms: []string{"productive", "fruitful", "rich", "abundant", "plentiful"}, + }, + "synDoc10": { + Input: []string{"arid"}, + Synonyms: []string{"dry", "barren", "desolate", "parched", "unfertile"}, + }, + "synDoc11": { + Input: []string{"fox"}, + Synonyms: []string{"vulpine", "canine", "predator", "hunter", "pursuer"}, + }, + "synDoc12": { + Input: []string{"dog"}, + Synonyms: []string{"canine", "hound", "puppy", "pup", "companion"}, + }, + "synDoc13": { + Synonyms: []string{"researcher", "scientist", "scholar", "academic", "expert"}, + }, + "synDoc14": { + Synonyms: []string{"bright", "clever", "ingenious", "sharp", "astute", "smart"}, + }, + } + + // Combine both maps into a slice of map entries (as they both have similar structure) + var combinedDocIDs []string + for id := range synonymDocuments { + combinedDocIDs = append(combinedDocIDs, id) + } + for id := range documents { + combinedDocIDs = append(combinedDocIDs, id) + } + rand.Shuffle(len(combinedDocIDs), func(i, j int) { + combinedDocIDs[i], combinedDocIDs[j] = combinedDocIDs[j], combinedDocIDs[i] + }) + + // Function to create batches of 5 + createDocBatches := func(docs []string, batchSize int) [][]string { + var batches [][]string + for i := 0; i < len(docs); i += batchSize { + end := i + batchSize + if end > len(docs) { + end = len(docs) + } + batches = append(batches, docs[i:end]) + } + return batches + } + // Create batches of 5 documents + var batchSize = 5 + docBatches := createDocBatches(combinedDocIDs, batchSize) + if len(docBatches) == 0 { + t.Fatal("expected batches") + } + totalDocs := 0 + for _, batch := range docBatches { + totalDocs += len(batch) + } + if totalDocs != len(combinedDocIDs) { + t.Fatalf("expected %d documents, got %d", len(combinedDocIDs), totalDocs) + } + + var batches []*Batch + for _, docBatch := range docBatches { + batch := idx.NewBatch() + for _, docID := range docBatch { + if synDef, ok := synonymDocuments[docID]; ok { + err := batch.IndexSynonym(docID, synonymCollection, synDef) + if err != nil { + t.Fatal(err) + } + } else { + err := batch.Index(docID, documents[docID]) + if err != nil { + t.Fatal(err) + } + } + } + batches = append(batches, batch) + } + for _, batch := range batches { + err = idx.Batch(batch) + if err != nil { + t.Fatal(err) + } + } + + type testStruct struct { + query string + expectHits []string + } + + testQueries := []testStruct{ + { + query: `{ + "match": "hardworking employee", + "field": "text" + }`, + expectHits: []string{"doc1", "doc13", "doc16", "doc4", "doc7"}, + }, + { + query: `{ + "match": "Hardwork and industrius efforts bring lovely and tranqual moments, with a glazing blow of valour.", + "field": "text", + "fuzziness": "auto" + }`, + expectHits: []string{ + "doc1", "doc13", "doc14", "doc15", "doc16", + "doc17", "doc18", "doc2", "doc20", "doc3", + "doc4", "doc5", "doc6", "doc7", "doc8", "doc9", + }, + }, + { + query: `{ + "prefix": "in", + "field": "text" + }`, + expectHits: []string{ + "doc1", "doc11", "doc13", "doc15", "doc16", + "doc17", "doc18", "doc19", "doc2", "doc20", + "doc3", "doc4", "doc7", "doc8", + }, + }, + { + query: `{ + "prefix": "vivid", + "field": "text" + }`, + expectHits: []string{ + "doc17", "doc5", + }, + }, + { + query: `{ + "match_phrase": "smart academic", + "field": "text" + }`, + expectHits: []string{"doc16", "doc18", "doc19"}, + }, + { + query: `{ + "match_phrase": "smrat acedemic", + "field": "text", + "fuzziness": "auto" + }`, + expectHits: []string{"doc16", "doc18", "doc19"}, + }, + { + query: `{ + "wildcard": "br*", + "field": "text" + }`, + expectHits: []string{"doc11", "doc14", "doc16", "doc18", "doc19", "doc6", "doc8"}, + }, + } + + for _, dtq := range testQueries { + q, err := query.ParseQuery([]byte(dtq.query)) + if err != nil { + t.Fatal(err) + } + sr := NewSearchRequest(q) + sr.Highlight = NewHighlightWithStyle(ansi.Name) + sr.SortBy([]string{"_id"}) + sr.Fields = []string{"*"} + sr.Size = 30 + sr.Explain = true + + res, err := idx.Search(sr) + if err != nil { + t.Fatal(err) + } + if len(res.Hits) != len(dtq.expectHits) { + t.Fatalf("expected %d hits, got %d", len(dtq.expectHits), len(res.Hits)) + } + // sort the expected hits to match the order of the search results + sort.Strings(dtq.expectHits) + for i, hit := range res.Hits { + if hit.ID != dtq.expectHits[i] { + t.Fatalf("expected docID %s, got %s", dtq.expectHits[i], hit.ID) + } + } + } + +}