Added tests for new default scorer

vladvildanov · vladvildanov · commit a5ee0145a554 · 2025-01-24T15:15:00.000+02:00
diff --git a/redis/commands/search/query.py b/redis/commands/search/query.py
@@ -179,6 +179,8 @@ def scorer(self, scorer: str) -> "Query":
         Use a different scoring function to evaluate document relevance.
         Default is `TFIDF`.
 
+        Since Redis 8.0 default was changed to BM25STD.
+
         :param scorer: The scoring function to use
                        (e.g. `TFIDF.DOCNORM` or `BM25`)
         """
diff --git a/tests/test_asyncio/test_search.py b/tests/test_asyncio/test_search.py
@@ -341,6 +341,7 @@ async def test_client(decoded_r: redis.Redis):
 
 @pytest.mark.redismod
 @pytest.mark.onlynoncluster
+@skip_if_server_version_gte("7.9.0")
 async def test_scores(decoded_r: redis.Redis):
     await decoded_r.ft().create_index((TextField("txt"),))
 
@@ -361,6 +362,29 @@ async def test_scores(decoded_r: redis.Redis):
         assert "doc1" == res["results"][1]["id"]
 
 
+@pytest.mark.redismod
+@pytest.mark.onlynoncluster
+@skip_if_server_version_lt("7.9.0")
+async def test_scores_with_new_default_scorer(decoded_r: redis.Redis):
+    await decoded_r.ft().create_index((TextField("txt"),))
+
+    await decoded_r.hset("doc1", mapping={"txt": "foo baz"})
+    await decoded_r.hset("doc2", mapping={"txt": "foo bar"})
+
+    q = Query("foo ~bar").with_scores()
+    res = await decoded_r.ft().search(q)
+    if is_resp2_connection(decoded_r):
+        assert 2 == res.total
+        assert "doc2" == res.docs[0].id
+        assert 0.87 == pytest.approx(res.docs[0].score, 0.01)
+        assert "doc1" == res.docs[1].id
+    else:
+        assert 2 == res["total_results"]
+        assert "doc2" == res["results"][0]["id"]
+        assert 0.87 == pytest.approx(res["results"][0]["score"], 0.01)
+        assert "doc1" == res["results"][1]["id"]
+
+
 @pytest.mark.redismod
 async def test_stopwords(decoded_r: redis.Redis):
     stopwords = ["foo", "bar", "baz"]
@@ -1029,6 +1053,7 @@ async def test_phonetic_matcher(decoded_r: redis.Redis):
 @pytest.mark.onlynoncluster
 # NOTE(imalinovskyi): This test contains hardcoded scores valid only for RediSearch 2.8+
 @skip_ifmodversion_lt("2.8.0", "search")
+@skip_if_server_version_gte("7.9.0")
 async def test_scorer(decoded_r: redis.Redis):
     await decoded_r.ft().create_index((TextField("description"),))
 
@@ -1087,6 +1112,69 @@ async def test_scorer(decoded_r: redis.Redis):
         assert 0.0 == res["results"][0]["score"]
 
 
+@pytest.mark.redismod
+@pytest.mark.onlynoncluster
+# NOTE(imalinovskyi): This test contains hardcoded scores valid only for RediSearch 2.8+
+@skip_ifmodversion_lt("2.8.0", "search")
+@skip_if_server_version_lt("7.9.0")
+async def test_scorer_with_new_default_scorer(decoded_r: redis.Redis):
+    await decoded_r.ft().create_index((TextField("description"),))
+
+    await decoded_r.hset(
+        "doc1", mapping={"description": "The quick brown fox jumps over the lazy dog"}
+    )
+    await decoded_r.hset(
+        "doc2",
+        mapping={
+            "description": "Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do."  # noqa
+        },
+    )
+
+    if is_resp2_connection(decoded_r):
+        # default scorer is BM25STD
+        res = await decoded_r.ft().search(Query("quick").with_scores())
+        assert 0.23 == pytest.approx(res.docs[0].score, 0.05)
+        res = await decoded_r.ft().search(Query("quick").scorer("TFIDF").with_scores())
+        assert 1.0 == res.docs[0].score
+        res = await decoded_r.ft().search(
+            Query("quick").scorer("TFIDF.DOCNORM").with_scores()
+        )
+        assert 0.14285714285714285 == res.docs[0].score
+        res = await decoded_r.ft().search(Query("quick").scorer("BM25").with_scores())
+        assert 0.22471909420069797 == res.docs[0].score
+        res = await decoded_r.ft().search(Query("quick").scorer("DISMAX").with_scores())
+        assert 2.0 == res.docs[0].score
+        res = await decoded_r.ft().search(
+            Query("quick").scorer("DOCSCORE").with_scores()
+        )
+        assert 1.0 == res.docs[0].score
+        res = await decoded_r.ft().search(
+            Query("quick").scorer("HAMMING").with_scores()
+        )
+        assert 0.0 == res.docs[0].score
+    else:
+        res = await decoded_r.ft().search(Query("quick").with_scores())
+        assert 0.23 == pytest.approx(res["results"][0]["score"], 0.05)
+        res = await decoded_r.ft().search(Query("quick").scorer("TFIDF").with_scores())
+        assert 1.0 == res["results"][0]["score"]
+        res = await decoded_r.ft().search(
+            Query("quick").scorer("TFIDF.DOCNORM").with_scores()
+        )
+        assert 0.14285714285714285 == res["results"][0]["score"]
+        res = await decoded_r.ft().search(Query("quick").scorer("BM25").with_scores())
+        assert 0.22471909420069797 == res["results"][0]["score"]
+        res = await decoded_r.ft().search(Query("quick").scorer("DISMAX").with_scores())
+        assert 2.0 == res["results"][0]["score"]
+        res = await decoded_r.ft().search(
+            Query("quick").scorer("DOCSCORE").with_scores()
+        )
+        assert 1.0 == res["results"][0]["score"]
+        res = await decoded_r.ft().search(
+            Query("quick").scorer("HAMMING").with_scores()
+        )
+        assert 0.0 == res["results"][0]["score"]
+
+
 @pytest.mark.redismod
 async def test_get(decoded_r: redis.Redis):
     await decoded_r.ft().create_index((TextField("f1"), TextField("f2")))
diff --git a/tests/test_search.py b/tests/test_search.py
@@ -314,6 +314,7 @@ def test_client(client):
 
 @pytest.mark.redismod
 @pytest.mark.onlynoncluster
+@skip_if_server_version_gte("7.9.0")
 def test_scores(client):
     client.ft().create_index((TextField("txt"),))
 
@@ -334,6 +335,29 @@ def test_scores(client):
         assert "doc1" == res["results"][1]["id"]
 
 
+@pytest.mark.redismod
+@pytest.mark.onlynoncluster
+@skip_if_server_version_lt("7.9.0")
+def test_scores_with_new_default_scorer(client):
+    client.ft().create_index((TextField("txt"),))
+
+    client.hset("doc1", mapping={"txt": "foo baz"})
+    client.hset("doc2", mapping={"txt": "foo bar"})
+
+    q = Query("foo ~bar").with_scores()
+    res = client.ft().search(q)
+    if is_resp2_connection(client):
+        assert 2 == res.total
+        assert "doc2" == res.docs[0].id
+        assert 0.87 == pytest.approx(res.docs[0].score, 0.01)
+        assert "doc1" == res.docs[1].id
+    else:
+        assert 2 == res["total_results"]
+        assert "doc2" == res["results"][0]["id"]
+        assert 0.87 == pytest.approx(res["results"][0]["score"], 0.01)
+        assert "doc1" == res["results"][1]["id"]
+
+
 @pytest.mark.redismod
 def test_stopwords(client):
     client.ft().create_index((TextField("txt"),), stopwords=["foo", "bar", "baz"])
@@ -936,6 +960,7 @@ def test_phonetic_matcher(client):
 @pytest.mark.onlynoncluster
 # NOTE(imalinovskyi): This test contains hardcoded scores valid only for RediSearch 2.8+
 @skip_ifmodversion_lt("2.8.0", "search")
+@skip_if_server_version_gte("7.9.0")
 def test_scorer(client):
     client.ft().create_index((TextField("description"),))
 
@@ -982,6 +1007,55 @@ def test_scorer(client):
         assert 0.0 == res["results"][0]["score"]
 
 
+@pytest.mark.redismod
+@pytest.mark.onlynoncluster
+@skip_if_server_version_lt("7.9.0")
+def test_scorer_with_new_default_scorer(client):
+    client.ft().create_index((TextField("description"),))
+
+    client.hset(
+        "doc1", mapping={"description": "The quick brown fox jumps over the lazy dog"}
+    )
+    client.hset(
+        "doc2",
+        mapping={
+            "description": "Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do."  # noqa
+        },
+    )
+
+    # default scorer is BM25STD
+    if is_resp2_connection(client):
+        res = client.ft().search(Query("quick").with_scores())
+        assert 0.23 == pytest.approx(res.docs[0].score, 0.05)
+        res = client.ft().search(Query("quick").scorer("TFIDF").with_scores())
+        assert 1.0 == res.docs[0].score
+        res = client.ft().search(Query("quick").scorer("TFIDF.DOCNORM").with_scores())
+        assert 0.14285714285714285 == res.docs[0].score
+        res = client.ft().search(Query("quick").scorer("BM25").with_scores())
+        assert 0.22471909420069797 == res.docs[0].score
+        res = client.ft().search(Query("quick").scorer("DISMAX").with_scores())
+        assert 2.0 == res.docs[0].score
+        res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores())
+        assert 1.0 == res.docs[0].score
+        res = client.ft().search(Query("quick").scorer("HAMMING").with_scores())
+        assert 0.0 == res.docs[0].score
+    else:
+        res = client.ft().search(Query("quick").with_scores())
+        assert 0.23 == pytest.approx(res["results"][0]["score"], 0.05)
+        res = client.ft().search(Query("quick").scorer("TFIDF").with_scores())
+        assert 1.0 == res["results"][0]["score"]
+        res = client.ft().search(Query("quick").scorer("TFIDF.DOCNORM").with_scores())
+        assert 0.14285714285714285 == res["results"][0]["score"]
+        res = client.ft().search(Query("quick").scorer("BM25").with_scores())
+        assert 0.22471909420069797 == res["results"][0]["score"]
+        res = client.ft().search(Query("quick").scorer("DISMAX").with_scores())
+        assert 2.0 == res["results"][0]["score"]
+        res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores())
+        assert 1.0 == res["results"][0]["score"]
+        res = client.ft().search(Query("quick").scorer("HAMMING").with_scores())
+        assert 0.0 == res["results"][0]["score"]
+
+
 @pytest.mark.redismod
 def test_get(client):
     client.ft().create_index((TextField("f1"), TextField("f2")))
@@ -2605,9 +2679,8 @@ def test_search_missing_fields(client):
         },
     )
 
-    with pytest.raises(redis.exceptions.ResponseError) as e:
+    with pytest.raises(redis.exceptions.ResponseError):
         client.ft().search(Query("ismissing(@title)").return_field("id").no_content())
-    assert "to be defined with 'INDEXMISSING'" in e.value.args[0]
 
     res = client.ft().search(
         Query("ismissing(@features)").return_field("id").no_content()