Skip to content

Commit 580feec

Browse files
Add unit test (#165)
1 parent c17067a commit 580feec

File tree

1 file changed

+225
-0
lines changed

1 file changed

+225
-0
lines changed
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# Copyright (c) Microsoft Corporation. All rights reserved.
2+
# Licensed under the MIT License.
3+
import json
4+
import pytest
5+
6+
import function_app
7+
from function_app import (
8+
layout_analysis,
9+
figure_analysis,
10+
layout_and_figure_merger,
11+
mark_up_cleaner,
12+
semantic_text_chunker,
13+
FigureAnalysis,
14+
LayoutAndFigureMerger,
15+
MarkUpCleaner,
16+
)
17+
18+
19+
# A simple dummy HttpRequest-like object that mimics what
20+
# azure.functions.HttpRequest provides.
21+
class DummyRequest:
22+
def __init__(self, method, url, headers, body):
23+
self.method = method
24+
self.url = url
25+
self.headers = headers
26+
self._body = body
27+
28+
def get_json(self):
29+
return json.loads(self._body.decode("utf8"))
30+
31+
def get_body(self):
32+
return self._body
33+
34+
35+
# ----------------------------
36+
# Tests for layout_analysis
37+
# ----------------------------
38+
@pytest.mark.asyncio
39+
async def test_layout_analysis_valid(monkeypatch):
40+
# Create a dummy async function to replace process_layout_analysis.
41+
async def dummy_process_layout_analysis(value, page_wise, extract_figures):
42+
return {
43+
"processed": True,
44+
"value": value,
45+
"page_wise": page_wise,
46+
"extract_figures": extract_figures,
47+
}
48+
49+
# Patch the dependency in the module namespace.
50+
monkeypatch.setattr(
51+
function_app, "process_layout_analysis", dummy_process_layout_analysis
52+
)
53+
54+
req_body = {"values": [{"id": "1", "data": "test data"}]}
55+
headers = {"chunk_by_page": "true", "extract_figures": "false"}
56+
req = DummyRequest(
57+
method="POST",
58+
url="/layout_analysis",
59+
headers=headers,
60+
body=json.dumps(req_body).encode("utf8"),
61+
)
62+
63+
resp = await layout_analysis(req)
64+
assert resp.status_code == 200
65+
66+
resp_body = json.loads(resp.get_body().decode("utf8"))
67+
# Check that the returned value includes our dummy output.
68+
assert "values" in resp_body
69+
result = resp_body["values"][0]
70+
assert result["processed"] is True
71+
# Confirm that header conversion worked:
72+
assert result["page_wise"] is True
73+
assert result["extract_figures"] is False
74+
75+
76+
@pytest.mark.asyncio
77+
async def test_layout_analysis_invalid_json():
78+
# Create a dummy request that raises ValueError on get_json()
79+
class DummyInvalidRequest:
80+
def __init__(self):
81+
self.headers = {}
82+
83+
def get_json(self):
84+
raise ValueError("Invalid JSON")
85+
86+
req = DummyInvalidRequest()
87+
resp = await layout_analysis(req)
88+
# The function should return a 400 error if JSON is invalid.
89+
assert resp.status_code == 400
90+
# Optionally, you could check that the response body contains the expected error message.
91+
assert "Custom Skill Payload" in resp.get_body().decode("utf8")
92+
93+
94+
# ----------------------------
95+
# Tests for figure_analysis
96+
# ----------------------------
97+
@pytest.mark.asyncio
98+
async def test_figure_analysis_valid(monkeypatch):
99+
async def dummy_analyse(self, value):
100+
return {"fig_analyse": True, "value": value}
101+
102+
# Patch the 'analyse' method of FigureAnalysis.
103+
monkeypatch.setattr(FigureAnalysis, "analyse", dummy_analyse)
104+
105+
req_body = {"values": [{"id": "1", "data": "test data"}]}
106+
req = DummyRequest(
107+
method="POST",
108+
url="/figure_analysis",
109+
headers={},
110+
body=json.dumps(req_body).encode("utf8"),
111+
)
112+
113+
resp = await figure_analysis(req)
114+
assert resp.status_code == 200
115+
116+
resp_body = json.loads(resp.get_body().decode("utf8"))
117+
assert "values" in resp_body
118+
result = resp_body["values"][0]
119+
assert result["fig_analyse"] is True
120+
121+
122+
# ----------------------------
123+
# Tests for layout_and_figure_merger
124+
# ----------------------------
125+
@pytest.mark.asyncio
126+
async def test_layout_and_figure_merger_valid(monkeypatch):
127+
async def dummy_merge(self, value):
128+
return {"merged": True, "value": value}
129+
130+
monkeypatch.setattr(LayoutAndFigureMerger, "merge", dummy_merge)
131+
132+
req_body = {"values": [{"id": "1", "data": "test data"}]}
133+
req = DummyRequest(
134+
method="POST",
135+
url="/layout_and_figure_merger",
136+
headers={},
137+
body=json.dumps(req_body).encode("utf8"),
138+
)
139+
140+
resp = await layout_and_figure_merger(req)
141+
assert resp.status_code == 200
142+
143+
resp_body = json.loads(resp.get_body().decode("utf8"))
144+
assert "values" in resp_body
145+
result = resp_body["values"][0]
146+
assert result["merged"] is True
147+
148+
149+
# ----------------------------
150+
# Tests for mark_up_cleaner
151+
# ----------------------------
152+
@pytest.mark.asyncio
153+
async def test_mark_up_cleaner_valid(monkeypatch):
154+
async def dummy_clean(self, value):
155+
return {"cleaned": True, "value": value}
156+
157+
monkeypatch.setattr(MarkUpCleaner, "clean", dummy_clean)
158+
159+
req_body = {"values": [{"id": "1", "data": "some markup <b>text</b>"}]}
160+
req = DummyRequest(
161+
method="POST",
162+
url="/mark_up_cleaner",
163+
headers={},
164+
body=json.dumps(req_body).encode("utf8"),
165+
)
166+
167+
resp = await mark_up_cleaner(req)
168+
assert resp.status_code == 200
169+
170+
resp_body = json.loads(resp.get_body().decode("utf8"))
171+
assert "values" in resp_body
172+
result = resp_body["values"][0]
173+
assert result["cleaned"] is True
174+
175+
176+
# ----------------------------
177+
# Tests for semantic_text_chunker
178+
# ----------------------------
179+
@pytest.mark.asyncio
180+
async def test_semantic_text_chunker_valid(monkeypatch):
181+
async def dummy_process_semantic_text_chunker(value, processor):
182+
return {"chunked": True, "value": value}
183+
184+
monkeypatch.setattr(
185+
function_app,
186+
"process_semantic_text_chunker",
187+
dummy_process_semantic_text_chunker,
188+
)
189+
190+
headers = {
191+
"num_surrounding_sentences": "2",
192+
"similarity_threshold": "0.9",
193+
"max_chunk_tokens": "600",
194+
"min_chunk_tokens": "60",
195+
}
196+
req_body = {"values": [{"id": "1", "text": "test text for chunking"}]}
197+
req = DummyRequest(
198+
method="POST",
199+
url="/semantic_text_chunker",
200+
headers=headers,
201+
body=json.dumps(req_body).encode("utf8"),
202+
)
203+
204+
resp = await semantic_text_chunker(req)
205+
assert resp.status_code == 200
206+
207+
resp_body = json.loads(resp.get_body().decode("utf8"))
208+
assert "values" in resp_body
209+
result = resp_body["values"][0]
210+
assert result["chunked"] is True
211+
212+
213+
@pytest.mark.asyncio
214+
async def test_semantic_text_chunker_invalid_json():
215+
# Create a dummy request that raises ValueError when get_json is called.
216+
class DummyInvalidRequest:
217+
def __init__(self):
218+
self.headers = {}
219+
220+
def get_json(self):
221+
raise ValueError("Invalid JSON")
222+
223+
req = DummyInvalidRequest()
224+
resp = await semantic_text_chunker(req)
225+
assert resp.status_code == 400

0 commit comments

Comments
 (0)