Skip to content

Commit d59164b

Browse files
authored
TST: Demonstrate that #3270 can be resolved using existing functionality (#3272)
Closes #3270.
1 parent 11b8195 commit d59164b

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

tests/test_text_extraction.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
44
The tested code might be in _page.py.
55
"""
6+
7+
import re
68
from io import BytesIO
79
from pathlib import Path
810
from unittest.mock import patch
@@ -396,3 +398,19 @@ def test_process_operation__cm_multiplication_issue():
396398
stream.set_data(content)
397399
page.replace_contents(stream)
398400
assert page.extract_text().startswith("The Crazy Ones\nOctober 14, 1998\n")
401+
402+
403+
@pytest.mark.enable_socket
404+
def test_rotated_layout_mode(caplog):
405+
"""Ensures text extraction of rotated pages, as in issue #3270."""
406+
url = "https://github.com/user-attachments/files/19981120/rotated-page.pdf"
407+
name = "rotated-page.pdf"
408+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
409+
page = reader.pages[0]
410+
411+
page.transfer_rotation_to_content()
412+
text = page.extract_text(extraction_mode="layout")
413+
414+
assert not caplog.records, "No warnings should be issued"
415+
assert text, "Text matching the page rotation should be extracted"
416+
assert re.search(r"\r?\n +69\r?\n +UNCLASSIFIED$", text), "Contents should be in expected layout"

0 commit comments

Comments
 (0)