Merge pull request #66 from john0isaac/add-playwright-tests

pamelafox · web-flow · commit 964ee33011d2 · 2024-07-31T18:04:30.000-07:00
Add Playwright Tests
diff --git a/.github/workflows/app-tests.yaml b/.github/workflows/app-tests.yaml
@@ -73,4 +73,16 @@ jobs:
         - name: Run MyPy
           run: python3 -m mypy .
         - name: Run Pytest
-          run: python3 -m pytest
+          run: python3 -m pytest -s -vv --cov --cov-fail-under=85
+        - name: Run E2E tests with Playwright
+          id: e2e
+          if: runner.os != 'Windows'
+          run: |
+            playwright install chromium --with-deps
+            python3 -m pytest tests/e2e.py --tracing=retain-on-failure
+        - name: Upload test artifacts
+          if: ${{ failure() && steps.e2e.conclusion == 'failure' }}
+          uses: actions/upload-artifact@v4
+          with:
+            name: playwright-traces${{ matrix.python_version }}
+            path: test-results
diff --git a/.gitignore b/.gitignore
@@ -146,3 +146,5 @@ npm-debug.log*
 node_modules
 static/
 
+# Playwright test trace
+test-results/
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,9 +11,9 @@ python_version = 3.12
 exclude = [".venv/*"]
 
 [tool.pytest.ini_options]
-addopts = "-ra --cov"
+addopts = "-ra"
 testpaths = ["tests"]
-pythonpath = ['src']
+pythonpath = ['src/backend']
 filterwarnings = ["ignore::DeprecationWarning"]
 
 [[tool.mypy.overrides]]
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,12 +1,14 @@
 -r src/backend/requirements.txt
 ruff
 mypy
+types-requests
 pre-commit
 pip-tools
 pip-compile-cross-platform
+playwright
 pytest
-pytest-cov
 pytest-asyncio
+pytest-cov
+pytest-playwright
 pytest-snapshot
-mypy
 locust
diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
@@ -37,6 +37,7 @@ class ChatRequestContext(BaseModel):
 class ChatRequest(BaseModel):
     messages: list[ChatCompletionMessageParam]
     context: ChatRequestContext
+    sessionState: Any | None = None
 
 
 class ThoughtStep(BaseModel):
@@ -54,13 +55,13 @@ class RAGContext(BaseModel):
 class RetrievalResponse(BaseModel):
     message: Message
     context: RAGContext
-    session_state: Any | None = None
+    sessionState: Any | None = None
 
 
 class RetrievalResponseDelta(BaseModel):
     delta: Message | None = None
     context: RAGContext | None = None
-    session_state: Any | None = None
+    sessionState: Any | None = None
 
 
 class ItemPublic(BaseModel):
diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py
@@ -29,8 +29,8 @@ class FastAPIAppContext(BaseModel):
     openai_chat_model: str
     openai_embed_model: str
     openai_embed_dimensions: int
-    openai_chat_deployment: str
-    openai_embed_deployment: str
+    openai_chat_deployment: str | None
+    openai_embed_deployment: str | None
 
 
 async def common_parameters():
@@ -51,10 +51,10 @@ async def common_parameters():
         openai_chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT", "gpt-35-turbo")
         openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL", "gpt-35-turbo")
     elif OPENAI_CHAT_HOST == "ollama":
-        openai_chat_deployment = "phi3:3.8b"
+        openai_chat_deployment = None
         openai_chat_model = os.getenv("OLLAMA_CHAT_MODEL", "phi3:3.8b")
     else:
-        openai_chat_deployment = "gpt-3.5-turbo"
+        openai_chat_deployment = None
         openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL", "gpt-3.5-turbo")
     return FastAPIAppContext(
         openai_chat_model=openai_chat_model,
diff --git a/src/frontend/src/pages/chat/Chat.tsx b/src/frontend/src/pages/chat/Chat.tsx
@@ -108,7 +108,8 @@ const Chat = () => {
                         prompt_template: promptTemplate.length === 0 ? undefined : promptTemplate,
                         temperature: temperature
                     }
-                }
+                },
+                sessionState: answers.length ? answers[answers.length - 1][1].sessionState : null
             };
             const chatClient: AIChatProtocolClient = new AIChatProtocolClient("/chat");
             if (shouldStream) {
diff --git a/tests/e2e.py b/tests/e2e.py
@@ -0,0 +1,183 @@
+import socket
+import time
+from collections.abc import Generator
+from contextlib import closing
+from multiprocessing import Process
+
+import pytest
+import requests
+import uvicorn
+from playwright.sync_api import Page, Route, expect
+
+import fastapi_app as app
+
+expect.set_options(timeout=10_000)
+
+
+def wait_for_server_ready(url: str, timeout: float = 10.0, check_interval: float = 0.5) -> bool:
+    """Make requests to provided url until it responds without error."""
+    conn_error = None
+    for _ in range(int(timeout / check_interval)):
+        try:
+            requests.get(url)
+        except requests.ConnectionError as exc:
+            time.sleep(check_interval)
+            conn_error = str(exc)
+        else:
+            return True
+    raise RuntimeError(conn_error)
+
+
+@pytest.fixture(scope="session")
+def free_port() -> int:
+    """Returns a free port for the test server to bind."""
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(("", 0))
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return s.getsockname()[1]
+
+
+def run_server(port: int):
+    uvicorn.run(app.create_app(testing=True), port=port)
+
+
+@pytest.fixture()
+def live_server_url(mock_session_env, free_port: int) -> Generator[str, None, None]:
+    proc = Process(target=run_server, args=(free_port,), daemon=True)
+    proc.start()
+    url = f"http://localhost:{free_port}/"
+    wait_for_server_ready(url, timeout=10.0, check_interval=0.5)
+    yield url
+    proc.kill()
+
+
+def test_home(page: Page, live_server_url: str):
+    page.goto(live_server_url)
+    expect(page).to_have_title("RAG on PostgreSQL")
+
+
+def test_chat(page: Page, live_server_url: str):
+    # Set up a mock route to the /chat endpoint with streaming results
+    def handle(route: Route):
+        # Assert that session_state is specified in the request (None for now)
+        if route.request.post_data_json:
+            session_state = route.request.post_data_json["sessionState"]
+            assert session_state is None
+        # Read the JSONL from our snapshot results and return as the response
+        f = open(
+            "tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines"
+        )
+        jsonl = f.read()
+        f.close()
+        route.fulfill(body=jsonl, status=200, headers={"Transfer-encoding": "Chunked"})
+
+    page.route("*/**/chat/stream", handle)
+
+    # Check initial page state
+    page.goto(live_server_url)
+    expect(page).to_have_title("RAG on PostgreSQL")
+    expect(page.get_by_role("heading", name="Product chat")).to_be_visible()
+    expect(page.get_by_role("button", name="Clear chat")).to_be_disabled()
+    expect(page.get_by_role("button", name="Developer settings")).to_be_enabled()
+
+    # Ask a question and wait for the message to appear
+    page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").click()
+    page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").fill(
+        "Whats the dental plan?"
+    )
+    page.get_by_role("button", name="Ask question button").click()
+
+    expect(page.get_by_text("Whats the dental plan?")).to_be_visible()
+    expect(page.get_by_text("The capital of France is Paris.")).to_be_visible()
+    expect(page.get_by_role("button", name="Clear chat")).to_be_enabled()
+
+    # Show the thought process
+    page.get_by_label("Show thought process").click()
+    expect(page.get_by_title("Thought process")).to_be_visible()
+    expect(page.get_by_text("Prompt to generate search arguments")).to_be_visible()
+
+    # Clear the chat
+    page.get_by_role("button", name="Clear chat").click()
+    expect(page.get_by_text("Whats the dental plan?")).not_to_be_visible()
+    expect(page.get_by_text("The capital of France is Paris.")).not_to_be_visible()
+    expect(page.get_by_role("button", name="Clear chat")).to_be_disabled()
+
+
+def test_chat_customization(page: Page, live_server_url: str):
+    # Set up a mock route to the /chat endpoint
+    def handle(route: Route):
+        if route.request.post_data_json:
+            overrides = route.request.post_data_json["context"]["overrides"]
+            assert overrides["use_advanced_flow"] is False
+            assert overrides["retrieval_mode"] == "vectors"
+            assert overrides["top"] == 1
+            assert overrides["prompt_template"] == "You are a cat and only talk about tuna."
+
+        # Read the JSON from our snapshot results and return as the response
+        f = open("tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json")
+        json = f.read()
+        f.close()
+        route.fulfill(body=json, status=200)
+
+    page.route("*/**/chat", handle)
+
+    # Check initial page state
+    page.goto(live_server_url)
+    expect(page).to_have_title("RAG on PostgreSQL")
+
+    # Customize all the settings
+    page.get_by_role("button", name="Developer settings").click()
+    page.get_by_text(
+        "Use advanced flow with query rewriting and filter formulation. Not compatible with Ollama models."
+    ).click()
+    page.get_by_label("Retrieve this many matching rows:").click()
+    page.get_by_label("Retrieve this many matching rows:").fill("1")
+    page.get_by_text("Vectors + Text (Hybrid)").click()
+    page.get_by_role("option", name="Vectors", exact=True).click()
+    page.get_by_label("Override prompt template").click()
+    page.get_by_label("Override prompt template").fill("You are a cat and only talk about tuna.")
+
+    page.get_by_text("Stream chat completion responses").click()
+    page.locator("button").filter(has_text="Close").click()
+
+    # Ask a question and wait for the message to appear
+    page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").click()
+    page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").fill(
+        "Whats the dental plan?"
+    )
+    page.get_by_role("button", name="Ask question button").click()
+
+    expect(page.get_by_text("Whats the dental plan?")).to_be_visible()
+    expect(page.get_by_text("The capital of France is Paris.")).to_be_visible()
+    expect(page.get_by_role("button", name="Clear chat")).to_be_enabled()
+
+
+def test_chat_nonstreaming(page: Page, live_server_url: str):
+    # Set up a mock route to the /chat_stream endpoint
+    def handle(route: Route):
+        # Read the JSON from our snapshot results and return as the response
+        f = open("tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json")
+        json = f.read()
+        f.close()
+        route.fulfill(body=json, status=200)
+
+    page.route("*/**/chat", handle)
+
+    # Check initial page state
+    page.goto(live_server_url)
+    expect(page).to_have_title("RAG on PostgreSQL")
+    expect(page.get_by_role("button", name="Developer settings")).to_be_enabled()
+    page.get_by_role("button", name="Developer settings").click()
+    page.get_by_text("Stream chat completion responses").click()
+    page.locator("button").filter(has_text="Close").click()
+
+    # Ask a question and wait for the message to appear
+    page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").click()
+    page.get_by_placeholder("Type a new question (e.g. does my plan cover annual eye exams?)").fill(
+        "Whats the dental plan?"
+    )
+    page.get_by_label("Ask question button").click()
+
+    expect(page.get_by_text("Whats the dental plan?")).to_be_visible()
+    expect(page.get_by_text("The capital of France is Paris.")).to_be_visible()
+    expect(page.get_by_role("button", name="Clear chat")).to_be_enabled()
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
@@ -64,5 +64,5 @@
         ],
         "followup_questions": null
     },
-    "session_state": null
+    "sessionState": null
 }
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":["{'role': 'system', 'content': 'Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.\\nYou have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.\\nGenerate a search query based on the conversation and the new question.\\nIf the question is not in English, translate the question to English before generating the search query.\\nIf you cannot generate a search query, return the original user question.\\nDO NOT return anything besides the query.'}","{'role': 'user', 'content': 'What is the capital of France?'}"],"props":{"model":"gpt-35-turbo","deployment":"gpt-35-turbo"}},{"title":"Search using generated search arguments","description":"The capital of France is Paris. [Benefit_Options-2.pdf].","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":["{'role': 'system', 'content': \"Assistant helps customers with questions about products.\\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\\nAnswer ONLY with the product details listed in the products.\\nIf there isn't enough information below, say you don't know.\\nDo not generate answers that don't use the sources below.\\nEach product has an ID in brackets followed by colon and the product details.\\nAlways include the product ID for each product you use in the response.\\nUse square brackets to reference the source, for example [52].\\nDon't combine citations, list each product separately, for example [27][51].\"}","{'role': 'user', 'content': \"What is the capital of France?\\n\\nSources:\\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear\\n\\n\"}"],"props":{"model":"gpt-35-turbo","deployment":"gpt-35-turbo"}}],"followup_questions":null},"session_state":null}
-{"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"session_state":null}
+{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":["{'role': 'system', 'content': 'Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.\\nYou have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.\\nGenerate a search query based on the conversation and the new question.\\nIf the question is not in English, translate the question to English before generating the search query.\\nIf you cannot generate a search query, return the original user question.\\nDO NOT return anything besides the query.'}","{'role': 'user', 'content': 'What is the capital of France?'}"],"props":{"model":"gpt-35-turbo","deployment":"gpt-35-turbo"}},{"title":"Search using generated search arguments","description":"The capital of France is Paris. [Benefit_Options-2.pdf].","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":["{'role': 'system', 'content': \"Assistant helps customers with questions about products.\\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\\nAnswer ONLY with the product details listed in the products.\\nIf there isn't enough information below, say you don't know.\\nDo not generate answers that don't use the sources below.\\nEach product has an ID in brackets followed by colon and the product details.\\nAlways include the product ID for each product you use in the response.\\nUse square brackets to reference the source, for example [52].\\nDon't combine citations, list each product separately, for example [27][51].\"}","{'role': 'user', 'content': \"What is the capital of France?\\n\\nSources:\\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear\\n\\n\"}"],"props":{"model":"gpt-35-turbo","deployment":"gpt-35-turbo"}}],"followup_questions":null},"sessionState":null}
+{"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
@@ -52,5 +52,5 @@
         ],
         "followup_questions": null
     },
-    "session_state": null
+    "sessionState": null
 }
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines

Original file line number	Diff line number	Diff line change
`@@ -64,5 +64,5 @@`
`64`	`64`	`],`
`65`	`65`	`"followup_questions": null`
`66`	`66`	`},`
`67`		`- "session_state": null`
	`67`	`+ "sessionState": null`
`68`	`68`	`}`
Original file line number	Diff line number	Diff line change
`@@ -52,5 +52,5 @@`
`52`	`52`	`],`
`53`	`53`	`"followup_questions": null`
`54`	`54`	`},`
`55`		`- "session_state": null`
	`55`	`+ "sessionState": null`
`56`	`56`	`}`