From 545a79cbcfddf95ebe0c2976e5b673c53e555967 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Tue, 12 Nov 2024 12:10:35 +0100 Subject: [PATCH] ENH: Adding support for reading .metadata.keywords (#2939) Co-authored-by: Cimon Lucas (LCM) --- pypdf/_doc_common.py | 15 +++++++++++++++ tests/test_reader.py | 2 ++ 2 files changed, 17 insertions(+) diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py index 69789d712..ba71e144d 100644 --- a/pypdf/_doc_common.py +++ b/pypdf/_doc_common.py @@ -243,6 +243,21 @@ def modification_date_raw(self) -> Optional[str]: """ return self.get(DI.MOD_DATE) + @property + def keywords(self) -> Optional[str]: + """ + Read-only property accessing the document's keywords. + + Returns a ``TextStringObject`` or ``None`` if keywords are not + specified. + """ + return self._get_text(DI.KEYWORDS) + + @property + def keywords_raw(self) -> Optional[str]: + """The "raw" version of keywords; can return a ``ByteStringObject``.""" + return self.get(DI.KEYWORDS) + class PdfDocCommon: """ diff --git a/tests/test_reader.py b/tests/test_reader.py index b01dc1add..bcc8dcb39 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -107,6 +107,8 @@ def test_read_metadata(pdf_path, expected): docinfo.creation_date_raw docinfo.modification_date docinfo.modification_date_raw + docinfo.keywords + docinfo.keywords_raw if "/Title" in metadict: assert isinstance(docinfo.title, str) assert metadict["/Title"] == docinfo.title