From 799630daba40fe434406bd59083e8fe736178d1e Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Thu, 15 Aug 2024 14:28:51 +0200 Subject: [PATCH] BUG: Fix sheared image (#2801) Closes #2411. --- pypdf/_xobj_image_helpers.py | 2 +- tests/test_images.py | 11 +++++++++++ tests/test_workflows.py | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 7a3f40d95..d870b1589 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -122,7 +122,7 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes: by = 0 bit = 8 - bits for y in range(size[1]): - if (bit != 0) and (bit != 8 - bits): + if bit != 8 - bits: by += 1 bit = 8 - bits for x in range(size[0]): diff --git a/tests/test_images.py b/tests/test_images.py index 5955bf47c..5fd7d0968 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -462,3 +462,14 @@ def test_extract_image_from_object(caplog): co = reader.pages[0].get_contents() co.decode_as_image() assert "does not seem to be an Image" in caplog.text + + +@pytest.mark.enable_socket() +def test_4bits_images(caplog): + url = "https://github.com/user-attachments/files/16624406/tt.pdf" + name = "iss2411.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + url = "https://github.com/user-attachments/assets/53058564-9a28-4e4a-818f-a6528013d7dc" + name = "iss2411.png" + img = Image.open(BytesIO(get_data_from_url(url, name=name))) + assert image_similarity(reader.pages[0].images[1].image, img) == 1.0 diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 4407b8fd5..1125222fc 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -412,7 +412,7 @@ def test_get_metadata(url, name, expected_metadata): ("url", "name", "strict", "exception"), [ ( - "https://corpora.tika.apache.org/base/docs/govdocs1/938/938702.pdf", + "https://github.com/user-attachments/files/16624503/tika-938702.pdf", "tika-938702.pdf", False, None, # iss #1090 is now fixed