diff --git a/itext.tests/itext.kernel.tests/itext/kernel/pdf/ParentTreeTest.cs b/itext.tests/itext.kernel.tests/itext/kernel/pdf/ParentTreeTest.cs index 9cd46837df..0560d4bf1e 100644 --- a/itext.tests/itext.kernel.tests/itext/kernel/pdf/ParentTreeTest.cs +++ b/itext.tests/itext.kernel.tests/itext/kernel/pdf/ParentTreeTest.cs @@ -333,6 +333,85 @@ public virtual void XObjDoesntHaveStructParentTest() { )); } + [NUnit.Framework.Test] + [LogMessage(iText.IO.Logs.IoLogMessageConstant.TAG_STRUCTURE_INIT_FAILED)] + public virtual void ObjRefNoStructParentNoModificationTest() { + String pdf = sourceFolder + "objRefNoStructParent.pdf"; + String outPdf = destinationFolder + "objRefNoStructParentNoModification.pdf"; + PdfReader reader = new PdfReader(pdf).SetStrictnessLevel(PdfReader.StrictnessLevel.CONSERVATIVE); + PdfDocument doc = new PdfDocument(reader, CompareTool.CreateTestPdfWriter(outPdf)); + PdfArray nums = doc.GetCatalog().GetPdfObject().GetAsDictionary(PdfName.StructTreeRoot).GetAsDictionary(PdfName + .ParentTree).GetAsArray(PdfName.Nums); + NUnit.Framework.Assert.IsNull(GetStructParentEntry(nums.Get(3))); + NUnit.Framework.Assert.IsNull(GetStructParentEntry(nums.Get(5))); + NUnit.Framework.Assert.IsNull(GetStructParentEntry(nums.Get(7))); + NUnit.Framework.Assert.IsNull(GetStructParentEntry(nums.Get(9))); + doc.Close(); + } + + [NUnit.Framework.Test] + [LogMessage(KernelLogMessageConstant.STRUCT_PARENT_INDEX_MISSED_AND_RECREATED, Count = 4)] + public virtual void ObjRefNoStructParentModificationTest() { + String pdf = sourceFolder + "objRefNoStructParent.pdf"; + String outPdf = destinationFolder + "objRefNoStructParentModification.pdf"; + String cmpPdf = sourceFolder + "cmp_objRefNoStructParentModification.pdf"; + PdfDocument doc = new PdfDocument(new PdfReader(pdf), CompareTool.CreateTestPdfWriter(outPdf)); + doc.Close(); + NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(outPdf, cmpPdf, destinationFolder, "diff" + )); + } + + [NUnit.Framework.Test] + [LogMessage(iText.IO.Logs.IoLogMessageConstant.TAG_STRUCTURE_INIT_FAILED)] + public virtual void XObjNoStructParentNoModificationTest() { + String pdf = sourceFolder + "xObjNoStructParent.pdf"; + String outPdf = destinationFolder + "xObjNoStructParentNoModification.pdf"; + PdfReader reader = new PdfReader(pdf).SetStrictnessLevel(PdfReader.StrictnessLevel.CONSERVATIVE); + PdfDocument doc = new PdfDocument(reader, new PdfWriter(outPdf)); + PdfObject obj = doc.GetCatalog().GetPdfObject().GetAsDictionary(PdfName.StructTreeRoot).GetAsDictionary(PdfName + .ParentTree).GetAsArray(PdfName.Nums).Get(1); + PdfStream xObj = ((PdfDictionary)((PdfArray)obj).Get(0)).GetAsDictionary(PdfName.K).GetAsStream(PdfName.Stm + ); + NUnit.Framework.Assert.IsNull(xObj.Get(PdfName.StructParent)); + doc.Close(); + } + + [NUnit.Framework.Test] + [LogMessage(KernelLogMessageConstant.XOBJECT_STRUCT_PARENT_INDEX_MISSED_AND_RECREATED)] + public virtual void XObjNoStructParentModificationTest() { + String pdf = sourceFolder + "xObjNoStructParent.pdf"; + String outPdf = destinationFolder + "xObjNoStructParentModification.pdf"; + String cmpPdf = sourceFolder + "cmp_xObjNoStructParentModification.pdf"; + PdfDocument doc = new PdfDocument(new PdfReader(pdf), CompareTool.CreateTestPdfWriter(outPdf)); + doc.Close(); + NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(outPdf, cmpPdf, destinationFolder, "diff" + )); + } + + [NUnit.Framework.Test] + [LogMessage(KernelLogMessageConstant.STRUCT_PARENT_INDEX_MISSED_AND_RECREATED)] + public virtual void ObjRefNoStructParentNoReaderTest() { + String outPdf = destinationFolder + "objRefNoStructParentNoReader.pdf"; + String cmpPdf = sourceFolder + "cmp_objRefNoStructParentNoReader.pdf"; + PdfDocument pdfDoc = new PdfDocument(CompareTool.CreateTestPdfWriter(outPdf)); + pdfDoc.SetTagged(); + PdfPage page = pdfDoc.AddNewPage(); + PdfDictionary mcrDic = new PdfDictionary(); + mcrDic.Put(PdfName.Pg, page.GetPdfObject()); + mcrDic.Put(PdfName.MCID, new PdfNumber(0)); + mcrDic.Put(PdfName.Obj, new PdfDictionary()); + PdfDictionary elemDic = new PdfDictionary(); + elemDic.Put(PdfName.P, pdfDoc.GetStructTreeRoot().GetPdfObject()); + PdfStructElem elem = new PdfStructElem(elemDic); + elem.MakeIndirect(pdfDoc); + PdfMcr mcr = new PdfObjRef(mcrDic, elem); + elem.AddKid(0, mcr); + pdfDoc.GetStructTreeRoot().AddKid(elem); + pdfDoc.Close(); + NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(outPdf, cmpPdf, destinationFolder, "diff" + )); + } + [NUnit.Framework.Test] [LogMessage(iText.IO.Logs.IoLogMessageConstant.CREATED_ROOT_TAG_HAS_MAPPING)] public virtual void CopyPageWithMultipleDocumentTagsTest() { @@ -341,6 +420,11 @@ public virtual void CopyPageWithMultipleDocumentTagsTest() { NUnit.Framework.Assert.DoesNotThrow(() => pdfDoc.GetTagStructureContext().NormalizeDocumentRootTag()); } + private PdfObject GetStructParentEntry(PdfObject obj) { + return ((PdfDictionary)obj).GetAsDictionary(PdfName.K).GetAsDictionary(PdfName.Obj).Get(PdfName.StructParent + ); + } + private bool CheckParentTree(String outFileName, String cmpFileName) { PdfReader outReader = CompareTool.CreateOutputReader(outFileName); PdfDocument outDocument = new PdfDocument(outReader); diff --git a/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_objRefNoStructParentModification.pdf b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_objRefNoStructParentModification.pdf new file mode 100644 index 0000000000..30df41ff5d Binary files /dev/null and b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_objRefNoStructParentModification.pdf differ diff --git a/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_objRefNoStructParentNoReader.pdf b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_objRefNoStructParentNoReader.pdf new file mode 100644 index 0000000000..bafe79b56f Binary files /dev/null and b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_objRefNoStructParentNoReader.pdf differ diff --git a/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_xObjNoStructParentModification.pdf b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_xObjNoStructParentModification.pdf new file mode 100644 index 0000000000..cac56ff956 Binary files /dev/null and b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/cmp_xObjNoStructParentModification.pdf differ diff --git a/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/objRefNoStructParent.pdf b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/objRefNoStructParent.pdf new file mode 100644 index 0000000000..61f550406e Binary files /dev/null and b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/objRefNoStructParent.pdf differ diff --git a/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/xObjNoStructParent.pdf b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/xObjNoStructParent.pdf new file mode 100644 index 0000000000..c4f448cc46 Binary files /dev/null and b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/ParentTreeTest/xObjNoStructParent.pdf differ diff --git a/itext/itext.kernel/itext/kernel/exceptions/KernelExceptionMessageConstant.cs b/itext/itext.kernel/itext/kernel/exceptions/KernelExceptionMessageConstant.cs index 973a3308e2..af3a496ace 100644 --- a/itext/itext.kernel/itext/kernel/exceptions/KernelExceptionMessageConstant.cs +++ b/itext/itext.kernel/itext/kernel/exceptions/KernelExceptionMessageConstant.cs @@ -445,10 +445,8 @@ public const String CONTENT_STREAM_MUST_NOT_INVOKE_OPERATORS_THAT_SPECIFY_COLORS public const String STREAM_SHALL_END_WITH_ENDSTREAM = "Stream shall end with endstream keyword."; - [Obsolete] - public const String STRUCT_PARENT_INDEX_NOT_FOUND_IN_TAGGED_OBJECT = - // Replaced with log message - "StructParent index not found in " + "tagged object."; + public const String STRUCT_PARENT_INDEX_NOT_FOUND_IN_TAGGED_OBJECT = "StructParent index not found in " + + "tagged object."; public const String STRUCTURE_ELEMENT_IN_STRUCTURE_DESTINATION_SHALL_BE_AN_INDIRECT_OBJECT = "Structure " + "element referenced by a structure destination shall be an indirect object."; @@ -544,6 +542,8 @@ public const String WHEN_ADDING_OBJECT_REFERENCE_TO_THE_TAG_TREE_IT_MUST_BE_CONN public const String XREF_STRUCTURE_SIZE_EXCEEDED_THE_LIMIT = "Xref structure contains too many elements " + "and may cause OOM exception. You can increase number of elements by setting custom " + "MemoryLimitsAwareHandler."; + public const String XOBJECT_STRUCT_PARENT_INDEX_MISSED = "XObject has no StructParents index in its stream."; + public const String TOTAL_XOBJECT_SIZE_ONE_PAGE_EXCEEDED_THE_LIMIT = "Pdf contains too many xObject elements on a page " + "and may cause OOM exception. You can increase page size limit by setting custom " + "MemoryLimitsAwareHandler."; diff --git a/itext/itext.kernel/itext/kernel/pdf/tagging/ParentTreeHandler.cs b/itext/itext.kernel/itext/kernel/pdf/tagging/ParentTreeHandler.cs index 304badacbb..29b5a7011e 100644 --- a/itext/itext.kernel/itext/kernel/pdf/tagging/ParentTreeHandler.cs +++ b/itext/itext.kernel/itext/kernel/pdf/tagging/ParentTreeHandler.cs @@ -165,11 +165,16 @@ private void RegisterMcr(PdfMcr mcr, bool registeringOnInit) { } } else { - maxStructParentIndex++; - xObjectToStructParentsInd.Put(stmIndRef, maxStructParentIndex); - xObjectStream.Put(PdfName.StructParents, new PdfNumber(maxStructParentIndex)); - structTreeRoot.GetPdfObject().Put(PdfName.ParentTreeNextKey, new PdfNumber(maxStructParentIndex + 1)); - LOGGER.LogWarning(KernelLogMessageConstant.XOBJECT_STRUCT_PARENT_INDEX_MISSED_AND_RECREATED); + if (IsModificationAllowed()) { + maxStructParentIndex++; + xObjectToStructParentsInd.Put(stmIndRef, maxStructParentIndex); + xObjectStream.Put(PdfName.StructParents, new PdfNumber(maxStructParentIndex)); + structTreeRoot.GetPdfObject().Put(PdfName.ParentTreeNextKey, new PdfNumber(maxStructParentIndex + 1)); + LOGGER.LogWarning(KernelLogMessageConstant.XOBJECT_STRUCT_PARENT_INDEX_MISSED_AND_RECREATED); + } + else { + throw new PdfException(KernelExceptionMessageConstant.XOBJECT_STRUCT_PARENT_INDEX_MISSED); + } } pageMcrs.PutXObjectMcr(stmIndRef, mcr); } @@ -185,11 +190,16 @@ private void RegisterMcr(PdfMcr mcr, bool registeringOnInit) { pageMcrs.PutObjectReferenceMcr(n.IntValue(), mcr); } else { - maxStructParentIndex++; - pageMcrs.PutObjectReferenceMcr(maxStructParentIndex, mcr); - obj.Put(PdfName.StructParent, new PdfNumber(maxStructParentIndex)); - structTreeRoot.GetPdfObject().Put(PdfName.ParentTreeNextKey, new PdfNumber(maxStructParentIndex + 1)); - LOGGER.LogWarning(KernelLogMessageConstant.STRUCT_PARENT_INDEX_MISSED_AND_RECREATED); + if (IsModificationAllowed()) { + maxStructParentIndex++; + pageMcrs.PutObjectReferenceMcr(maxStructParentIndex, mcr); + obj.Put(PdfName.StructParent, new PdfNumber(maxStructParentIndex)); + structTreeRoot.GetPdfObject().Put(PdfName.ParentTreeNextKey, new PdfNumber(maxStructParentIndex + 1)); + LOGGER.LogWarning(KernelLogMessageConstant.STRUCT_PARENT_INDEX_MISSED_AND_RECREATED); + } + else { + throw new PdfException(KernelExceptionMessageConstant.STRUCT_PARENT_INDEX_NOT_FOUND_IN_TAGGED_OBJECT); + } } } else { @@ -251,6 +261,16 @@ public virtual void UnregisterMcr(PdfMcr mcrToUnregister) { } } + private bool IsModificationAllowed() { + PdfReader reader = this.structTreeRoot.GetDocument().GetReader(); + if (reader != null) { + return PdfReader.StrictnessLevel.CONSERVATIVE.IsStricter(reader.GetStrictnessLevel()); + } + else { + return true; + } + } + private void RegisterAllMcrs() { pageToPageMcrs = new Dictionary(); // we create new number tree and not using parentTree, because we want parentTree to be empty diff --git a/port-hash b/port-hash index 0661be00c5..3bd03d7ce3 100644 --- a/port-hash +++ b/port-hash @@ -1 +1 @@ -66f84b927f433f9f4d9d5ff5bda6a3b6d2dec5d4 +9ad717077d0cc1a30ebd87cdd84bfecc36cf0ac4