From 35c6cf51d80d47561b6ae48fd526017377ff764a Mon Sep 17 00:00:00 2001 From: Kerem Kurban Date: Mon, 2 Dec 2024 13:48:19 +0100 Subject: [PATCH 1/4] fix whitespace issue for author suggestions --- src/scholarag/app/routers/suggestions.py | 4 +- tests/app/test_suggestions.py | 61 +++++++++++++++++++++++- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/scholarag/app/routers/suggestions.py b/src/scholarag/app/routers/suggestions.py index c8aa5c5..394200a 100644 --- a/src/scholarag/app/routers/suggestions.py +++ b/src/scholarag/app/routers/suggestions.py @@ -178,10 +178,10 @@ async def author_suggestion( # Match case insensitive. Sadly aggregate queries don't support the CASE_INSENSITIVE flag # The /i flag is not supported either in regex queries. start = time.time() + regex_pattern = "".join( - [f"[{char.lower()}{char.upper()}]" for char in request.name] + [f"[{char.lower()}{char.upper()}]" if char != " " else r"\s+" for char in request.name] ) - pattern = re.compile(re.escape(request.name), re.IGNORECASE) # Regex query to partially match author name with a keyword field. diff --git a/tests/app/test_suggestions.py b/tests/app/test_suggestions.py index b38e805..88014a3 100644 --- a/tests/app/test_suggestions.py +++ b/tests/app/test_suggestions.py @@ -7,7 +7,7 @@ from scholarag.app.main import app from app.dependencies_overrides import override_ds_client - +from unittest.mock import AsyncMock def test_article_type(app_client): """Test the author suggestion endpoint.""" @@ -684,3 +684,62 @@ async def test_journal_duplicates(get_testing_async_ds_client): assert set(d.keys()) == expected_keys assert d["eissn"] == expected_result["eissn"] assert d["print_issn"] == expected_result["print_issn"] + + + +@pytest.mark.asyncio +async def test_author_suggestion_with_spaces(): + # Override the get_settings dependency + test_settings = Settings( + db={ + "db_type": "elasticsearch", + "index_paragraphs": "pmc_paragraphs2", + "host": "localhost", + "port": 9200, + "user": "test_user", + "password": "test_password" + } + ) + app.dependency_overrides[get_settings] = lambda: test_settings + + # Mock the get_ds_client dependency + mock_ds_client = AsyncMock() + mock_ds_client.search.return_value = { + "hits": { + "total": {"value": 1, "relation": "eq"}, + "hits": [ + { + "_source": { + "authors": ["Jing Yuan"] + } + } + ] + } + } + app.dependency_overrides[get_ds_client] = lambda: mock_ds_client + + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + # Test with a name containing spaces + response_with_spaces = await client.get( + "/suggestions/author", + params={"name": "jing yuan", "limit": 100} + ) + assert response_with_spaces.status_code != 500, "Request with spaces should not return 500" + + # Test with a name without spaces + response_without_spaces = await client.get( + "/suggestions/author", + params={"name": "jing", "limit": 100} + ) + assert response_without_spaces.status_code == 200, "Request without spaces should return 200" + + # Optionally, check the response content + response_with_spaces_json = response_with_spaces.json() + response_without_spaces_json = response_without_spaces.json() + + # Ensure the responses are as expected + assert isinstance(response_with_spaces_json, list), "Response should be a list" + assert isinstance(response_without_spaces_json, list), "Response should be a list" + + # Clean up dependency overrides + app.dependency_overrides.clear() \ No newline at end of file From 13fbacdd15e6137157f2ff8d394084921b960765 Mon Sep 17 00:00:00 2001 From: Kerem Kurban Date: Mon, 2 Dec 2024 15:02:31 +0100 Subject: [PATCH 2/4] update changelog and lint --- CHANGELOG.md | 5 +++ src/scholarag/app/routers/suggestions.py | 5 ++- tests/app/test_suggestions.py | 41 ++++++++++++------------ 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dad9800..af7f6ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed +- Fix whitespace issue in author suggestion + ## [0.0.7] - 30.10.2024 ### Added diff --git a/src/scholarag/app/routers/suggestions.py b/src/scholarag/app/routers/suggestions.py index 394200a..a20d8b2 100644 --- a/src/scholarag/app/routers/suggestions.py +++ b/src/scholarag/app/routers/suggestions.py @@ -180,7 +180,10 @@ async def author_suggestion( start = time.time() regex_pattern = "".join( - [f"[{char.lower()}{char.upper()}]" if char != " " else r"\s+" for char in request.name] + [ + f"[{char.lower()}{char.upper()}]" if char != " " else r"\s+" + for char in request.name + ] ) pattern = re.compile(re.escape(request.name), re.IGNORECASE) diff --git a/tests/app/test_suggestions.py b/tests/app/test_suggestions.py index 88014a3..ab82d03 100644 --- a/tests/app/test_suggestions.py +++ b/tests/app/test_suggestions.py @@ -1,13 +1,15 @@ """Tests for the suggestions endpoints.""" +from unittest.mock import AsyncMock + import pytest from httpx import ASGITransport, AsyncClient + +from app.dependencies_overrides import override_ds_client from scholarag.app.config import Settings from scholarag.app.dependencies import ErrorCode, get_ds_client, get_settings from scholarag.app.main import app -from app.dependencies_overrides import override_ds_client -from unittest.mock import AsyncMock def test_article_type(app_client): """Test the author suggestion endpoint.""" @@ -686,7 +688,6 @@ async def test_journal_duplicates(get_testing_async_ds_client): assert d["print_issn"] == expected_result["print_issn"] - @pytest.mark.asyncio async def test_author_suggestion_with_spaces(): # Override the get_settings dependency @@ -697,7 +698,7 @@ async def test_author_suggestion_with_spaces(): "host": "localhost", "port": 9200, "user": "test_user", - "password": "test_password" + "password": "test_password", } ) app.dependency_overrides[get_settings] = lambda: test_settings @@ -707,31 +708,29 @@ async def test_author_suggestion_with_spaces(): mock_ds_client.search.return_value = { "hits": { "total": {"value": 1, "relation": "eq"}, - "hits": [ - { - "_source": { - "authors": ["Jing Yuan"] - } - } - ] + "hits": [{"_source": {"authors": ["Jing Yuan"]}}], } } app.dependency_overrides[get_ds_client] = lambda: mock_ds_client - async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + async with AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: # Test with a name containing spaces response_with_spaces = await client.get( - "/suggestions/author", - params={"name": "jing yuan", "limit": 100} + "/suggestions/author", params={"name": "jing yuan", "limit": 100} ) - assert response_with_spaces.status_code != 500, "Request with spaces should not return 500" + assert ( + response_with_spaces.status_code != 500 + ), "Request with spaces should not return 500" # Test with a name without spaces response_without_spaces = await client.get( - "/suggestions/author", - params={"name": "jing", "limit": 100} + "/suggestions/author", params={"name": "jing", "limit": 100} ) - assert response_without_spaces.status_code == 200, "Request without spaces should return 200" + assert ( + response_without_spaces.status_code == 200 + ), "Request without spaces should return 200" # Optionally, check the response content response_with_spaces_json = response_with_spaces.json() @@ -739,7 +738,9 @@ async def test_author_suggestion_with_spaces(): # Ensure the responses are as expected assert isinstance(response_with_spaces_json, list), "Response should be a list" - assert isinstance(response_without_spaces_json, list), "Response should be a list" + assert isinstance( + response_without_spaces_json, list + ), "Response should be a list" # Clean up dependency overrides - app.dependency_overrides.clear() \ No newline at end of file + app.dependency_overrides.clear() From 7feb35598be1951239d1dafe8bb009725e0cc38b Mon Sep 17 00:00:00 2001 From: Nicolas Frank Date: Thu, 5 Dec 2024 11:31:17 +0100 Subject: [PATCH 3/4] Fix analysed author suggestion --- src/scholarag/app/routers/suggestions.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/scholarag/app/routers/suggestions.py b/src/scholarag/app/routers/suggestions.py index a20d8b2..64b05e9 100644 --- a/src/scholarag/app/routers/suggestions.py +++ b/src/scholarag/app/routers/suggestions.py @@ -180,15 +180,12 @@ async def author_suggestion( start = time.time() regex_pattern = "".join( - [ - f"[{char.lower()}{char.upper()}]" if char != " " else r"\s+" - for char in request.name - ] + [f"[{char.lower()}{char.upper()}]" for char in request.name] ) pattern = re.compile(re.escape(request.name), re.IGNORECASE) # Regex query to partially match author name with a keyword field. - query = {"regexp": {"authors": f".*{regex_pattern}.*"}} + query = {"regexp": {"authors.keyword": f".*{regex_pattern}.*"}} kwargs = {"_source": ["authors"]} # The size here is a bit arbitrary, but in theory it should ensure that # There is enough authors to fill the user's request. If not, people should From a3e239aa5847da3cd515e854072b964238f84f75 Mon Sep 17 00:00:00 2001 From: Nicolas Frank Date: Thu, 5 Dec 2024 11:34:56 +0100 Subject: [PATCH 4/4] linting --- tests/app/test_suggestions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/app/test_suggestions.py b/tests/app/test_suggestions.py index ab82d03..716b664 100644 --- a/tests/app/test_suggestions.py +++ b/tests/app/test_suggestions.py @@ -4,12 +4,12 @@ import pytest from httpx import ASGITransport, AsyncClient - -from app.dependencies_overrides import override_ds_client from scholarag.app.config import Settings from scholarag.app.dependencies import ErrorCode, get_ds_client, get_settings from scholarag.app.main import app +from app.dependencies_overrides import override_ds_client + def test_article_type(app_client): """Test the author suggestion endpoint."""