From 12591b47b877b18cc066e730d364f25f745a0f39 Mon Sep 17 00:00:00 2001
From: Vitali Prudnikovich
Date: Mon, 1 Jul 2024 11:12:02 +0000
Subject: [PATCH] Fix SO error while iterating struct tree root
DEVSIX-8373
Autoported commit.
Original commit hash: [bba9f55da]
---
.../itext/kernel/pdf/PdfStructTreeRootTest.cs | 7 ++
.../pdf/tagutils/TagTreeIteratorTest.cs | 69 ++++++++++++++++
.../pdf/tagutils/TagTreePointerUnitTest.cs | 51 ++++++++++++
.../kernel/utils/TaggedPdfReaderToolTest.cs | 23 ++++++
.../cyclicReferences.pdf | Bin 0 -> 9616 bytes
.../cmp_cyclicReferences.xml | 4 +
.../kernel/pdf/tagging/PdfStructTreeRoot.cs | 15 ++--
.../kernel/pdf/tagutils/TagTreeIterator.cs | 74 +++++++++++++++---
.../TagTreeIteratorAvoidDuplicatesApprover.cs | 68 ++++++++++++++++
.../TagTreeIteratorElementApprover.cs | 56 +++++++++++++
.../pdf/tagutils/TagTreeIteratorFlusher.cs | 46 +++++++++++
.../kernel/pdf/tagutils/TagTreePointer.cs | 73 +++++++++++++----
.../kernel/pdf/tagutils/WaitingTagsManager.cs | 24 ++++--
.../itext/kernel/utils/TaggedPdfReaderTool.cs | 6 ++
port-hash | 2 +-
15 files changed, 479 insertions(+), 39 deletions(-)
create mode 100644 itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/PdfStructTreeRootTest/cyclicReferences.pdf
create mode 100644 itext.tests/itext.kernel.tests/resources/itext/kernel/utils/TaggedPdfReaderToolTest/cmp_cyclicReferences.xml
create mode 100644 itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorAvoidDuplicatesApprover.cs
create mode 100644 itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorElementApprover.cs
create mode 100644 itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorFlusher.cs
diff --git a/itext.tests/itext.kernel.tests/itext/kernel/pdf/PdfStructTreeRootTest.cs b/itext.tests/itext.kernel.tests/itext/kernel/pdf/PdfStructTreeRootTest.cs
index ac49763b6b..7caebd4d0f 100644
--- a/itext.tests/itext.kernel.tests/itext/kernel/pdf/PdfStructTreeRootTest.cs
+++ b/itext.tests/itext.kernel.tests/itext/kernel/pdf/PdfStructTreeRootTest.cs
@@ -88,5 +88,12 @@ public virtual void IdTreeIsLazyTest() {
PdfDocument readPdfDoc = new PdfDocument(r);
NUnit.Framework.Assert.IsFalse(readPdfDoc.GetStructTreeRoot().GetPdfObject().ContainsKey(PdfName.IDTree));
}
+
+ [NUnit.Framework.Test]
+ public virtual void CyclicReferencesTest() {
+ String inFile = sourceFolder + "cyclicReferences.pdf";
+ PdfDocument pdfDoc = new PdfDocument(new PdfReader(inFile), new PdfWriter(new MemoryStream()));
+ NUnit.Framework.Assert.DoesNotThrow(() => pdfDoc.Close());
+ }
}
}
diff --git a/itext.tests/itext.kernel.tests/itext/kernel/pdf/tagutils/TagTreeIteratorTest.cs b/itext.tests/itext.kernel.tests/itext/kernel/pdf/tagutils/TagTreeIteratorTest.cs
index d431109a25..92325f73d6 100644
--- a/itext.tests/itext.kernel.tests/itext/kernel/pdf/tagutils/TagTreeIteratorTest.cs
+++ b/itext.tests/itext.kernel.tests/itext/kernel/pdf/tagutils/TagTreeIteratorTest.cs
@@ -40,6 +40,28 @@ public virtual void TagTreeIteratorTagPointerNull() {
NUnit.Framework.Assert.AreEqual(e.Message, errorMessage);
}
+ [NUnit.Framework.Test]
+ public virtual void TagTreeIteratorApproverNull() {
+ String errorMessage = MessageFormatUtil.Format(KernelExceptionMessageConstant.ARG_SHOULD_NOT_BE_NULL, "approver"
+ );
+ PdfDocument doc = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(), new WriterProperties()));
+ doc.SetTagged();
+ Exception e = NUnit.Framework.Assert.Catch(typeof(ArgumentException), () => new TagTreeIterator(doc.GetStructTreeRoot
+ (), null, TagTreeIterator.TreeTraversalOrder.PRE_ORDER));
+ NUnit.Framework.Assert.AreEqual(e.Message, errorMessage);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void TagTreeIteratorHandlerNull() {
+ String errorMessage = MessageFormatUtil.Format(KernelExceptionMessageConstant.ARG_SHOULD_NOT_BE_NULL, "handler"
+ );
+ PdfDocument doc = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(), new WriterProperties()));
+ doc.SetTagged();
+ TagTreeIterator it = new TagTreeIterator(doc.GetStructTreeRoot());
+ Exception e = NUnit.Framework.Assert.Catch(typeof(ArgumentException), () => it.AddHandler(null));
+ NUnit.Framework.Assert.AreEqual(e.Message, errorMessage);
+ }
+
[NUnit.Framework.Test]
public virtual void TraversalWithoutElements() {
PdfDocument doc = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(), new WriterProperties()));
@@ -76,6 +98,53 @@ public virtual void TraversalWithSomeElements() {
NUnit.Framework.Assert.AreEqual(PdfName.Code, handler.nodes[6].GetRole());
}
+ [NUnit.Framework.Test]
+ public virtual void PostOrderTraversal() {
+ PdfDocument doc = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(), new WriterProperties()));
+ doc.SetTagged();
+ TagTreePointer tp = new TagTreePointer(doc);
+ tp.AddTag(StandardRoles.DIV);
+ tp.AddTag(StandardRoles.P);
+ tp.AddTag(StandardRoles.FIGURE);
+ tp.MoveToParent();
+ tp.AddTag(StandardRoles.DIV);
+ tp.AddTag(StandardRoles.CODE);
+ TagTreeIterator iterator = new TagTreeIterator(doc.GetStructTreeRoot(), new TagTreeIteratorElementApprover
+ (), TagTreeIterator.TreeTraversalOrder.POST_ORDER);
+ TagTreeIteratorTest.TestHandler handler = new TagTreeIteratorTest.TestHandler();
+ iterator.AddHandler(handler);
+ iterator.Traverse();
+ NUnit.Framework.Assert.AreEqual(7, handler.nodes.Count);
+ NUnit.Framework.Assert.AreEqual(PdfName.Figure, handler.nodes[0].GetRole());
+ NUnit.Framework.Assert.AreEqual(PdfName.Code, handler.nodes[1].GetRole());
+ NUnit.Framework.Assert.AreEqual(PdfName.Div, handler.nodes[2].GetRole());
+ NUnit.Framework.Assert.AreEqual(PdfName.P, handler.nodes[3].GetRole());
+ NUnit.Framework.Assert.AreEqual(PdfName.Div, handler.nodes[4].GetRole());
+ NUnit.Framework.Assert.AreEqual(PdfName.Document, handler.nodes[5].GetRole());
+ NUnit.Framework.Assert.IsNull(handler.nodes[6].GetRole());
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CyclicReferencesTraversal() {
+ PdfDocument doc = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(), new WriterProperties()));
+ doc.SetTagged();
+ PdfStructElem kid1 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.P));
+ PdfStructElem kid2 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.DIV));
+ doc.GetStructTreeRoot().AddKid(kid1);
+ doc.GetStructTreeRoot().AddKid(kid2);
+ kid1.AddKid(kid2);
+ kid2.AddKid(kid1);
+ TagTreeIterator iterator = new TagTreeIterator(doc.GetStructTreeRoot(), new TagTreeIteratorAvoidDuplicatesApprover
+ (), TagTreeIterator.TreeTraversalOrder.POST_ORDER);
+ TagTreeIteratorTest.TestHandler handler = new TagTreeIteratorTest.TestHandler();
+ iterator.AddHandler(handler);
+ iterator.Traverse();
+ NUnit.Framework.Assert.AreEqual(3, handler.nodes.Count);
+ NUnit.Framework.Assert.AreEqual(PdfName.Div, handler.nodes[0].GetRole());
+ NUnit.Framework.Assert.AreEqual(PdfName.P, handler.nodes[1].GetRole());
+ NUnit.Framework.Assert.IsNull(handler.nodes[2].GetRole());
+ }
+
//\cond DO_NOT_DOCUMENT
internal class TestHandler : ITagTreeIteratorHandler {
//\cond DO_NOT_DOCUMENT
diff --git a/itext.tests/itext.kernel.tests/itext/kernel/pdf/tagutils/TagTreePointerUnitTest.cs b/itext.tests/itext.kernel.tests/itext/kernel/pdf/tagutils/TagTreePointerUnitTest.cs
index ad8a3e9680..5078be6d57 100644
--- a/itext.tests/itext.kernel.tests/itext/kernel/pdf/tagutils/TagTreePointerUnitTest.cs
+++ b/itext.tests/itext.kernel.tests/itext/kernel/pdf/tagutils/TagTreePointerUnitTest.cs
@@ -130,6 +130,57 @@ public virtual void CannotFlushAlreadyFlushedPageTest() {
NUnit.Framework.Assert.AreEqual(KernelExceptionMessageConstant.PAGE_ALREADY_FLUSHED, exception.Message);
}
+ [NUnit.Framework.Test]
+ public virtual void CyclicReferencesWhileLookingForRoleTest() {
+ PdfDocument doc = CreateTestDocument();
+ PdfStructElem kid1 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.P));
+ PdfStructElem kid2 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.DIV));
+ doc.GetStructTreeRoot().AddKid(kid1);
+ doc.GetStructTreeRoot().AddKid(kid2);
+ kid1.AddKid(kid2);
+ kid2.AddKid(kid1);
+ TagTreePointer pointer = new TagTreePointer(doc);
+ Exception exception = NUnit.Framework.Assert.Catch(typeof(PdfException), () => pointer.MoveToKid(StandardRoles
+ .FIGURE));
+ NUnit.Framework.Assert.AreEqual(KernelExceptionMessageConstant.NO_KID_WITH_SUCH_ROLE, exception.Message);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CyclicReferencesWhileFlushingTest() {
+ PdfDocument doc = CreateTestDocument();
+ PdfStructElem kid1 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.P));
+ PdfStructElem kid2 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.DIV));
+ doc.GetStructTreeRoot().AddKid(kid1);
+ doc.GetStructTreeRoot().AddKid(kid2);
+ kid1.AddKid(kid2);
+ kid2.AddKid(kid1);
+ TagTreePointer pointer = new TagTreePointer(doc);
+ pointer.MoveToKid(StandardRoles.P);
+ NUnit.Framework.Assert.DoesNotThrow(() => pointer.FlushTag());
+ NUnit.Framework.Assert.IsTrue(kid1.IsFlushed());
+ NUnit.Framework.Assert.IsTrue(kid2.IsFlushed());
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void CyclicReferencesWithWaitingObjectsWhileFlushingTest() {
+ PdfDocument doc = CreateTestDocument();
+ PdfStructElem kid1 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.P));
+ PdfStructElem kid2 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.DIV));
+ doc.GetStructTreeRoot().AddKid(kid1);
+ doc.GetStructTreeRoot().AddKid(kid2);
+ kid1.AddKid(kid2);
+ kid2.AddKid(kid1);
+ TagTreePointer pointer = new TagTreePointer(doc);
+ pointer.MoveToKid(StandardRoles.P);
+ WaitingTagsManager waitingTagsManager = pointer.GetContext().GetWaitingTagsManager();
+ Object pWaitingTagObj = new Object();
+ waitingTagsManager.AssignWaitingState(pointer, pWaitingTagObj);
+ pointer.MoveToParent().MoveToKid(StandardRoles.DIV);
+ NUnit.Framework.Assert.DoesNotThrow(() => pointer.FlushTag());
+ NUnit.Framework.Assert.IsFalse(kid1.IsFlushed());
+ NUnit.Framework.Assert.IsTrue(kid2.IsFlushed());
+ }
+
private static PdfDocument CreateTestDocument() {
PdfDocument pdfDoc = new PdfDocument(new PdfWriter(new ByteArrayOutputStream()));
pdfDoc.SetTagged();
diff --git a/itext.tests/itext.kernel.tests/itext/kernel/utils/TaggedPdfReaderToolTest.cs b/itext.tests/itext.kernel.tests/itext/kernel/utils/TaggedPdfReaderToolTest.cs
index 4e9ab1ddb7..02da1d4d86 100644
--- a/itext.tests/itext.kernel.tests/itext/kernel/utils/TaggedPdfReaderToolTest.cs
+++ b/itext.tests/itext.kernel.tests/itext/kernel/utils/TaggedPdfReaderToolTest.cs
@@ -25,6 +25,7 @@ You should have received a copy of the GNU Affero General Public License
using iText.Commons.Utils;
using iText.Kernel.Exceptions;
using iText.Kernel.Pdf;
+using iText.Kernel.Pdf.Tagging;
using iText.Test;
namespace iText.Kernel.Utils {
@@ -77,5 +78,27 @@ public virtual void NoStructTreeRootInDocTest() {
NUnit.Framework.Assert.Fail("IOException is not expected to be triggered");
}
}
+
+ [NUnit.Framework.Test]
+ public virtual void CyclicReferencesTest() {
+ String outXmlPath = DESTINATION_FOLDER + "cyclicReferences.xml";
+ String cmpXmlPath = SOURCE_FOLDER + "cmp_cyclicReferences.xml";
+ PdfDocument doc = new PdfDocument(new PdfWriter(new MemoryStream()));
+ doc.SetTagged();
+ PdfStructElem kid1 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.P));
+ PdfStructElem kid2 = new PdfStructElem(doc, PdfStructTreeRoot.ConvertRoleToPdfName(StandardRoles.DIV));
+ doc.GetStructTreeRoot().AddKid(kid1);
+ doc.GetStructTreeRoot().AddKid(kid2);
+ kid1.AddKid(kid2);
+ kid2.AddKid(kid1);
+ TaggedPdfReaderTool tool = new TaggedPdfReaderTool(doc);
+ using (Stream outXml = FileUtil.GetFileOutputStream(outXmlPath)) {
+ tool.ConvertToXml(outXml, "UTF-8");
+ }
+ CompareTool compareTool = new CompareTool();
+ if (!compareTool.CompareXmls(outXmlPath, cmpXmlPath)) {
+ NUnit.Framework.Assert.Fail("Resultant xml is different.");
+ }
+ }
}
}
diff --git a/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/PdfStructTreeRootTest/cyclicReferences.pdf b/itext.tests/itext.kernel.tests/resources/itext/kernel/pdf/PdfStructTreeRootTest/cyclicReferences.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..5b6965718840d493fd2cc59f9514e476524e3aaa
GIT binary patch
literal 9616
zcmeHt2T)U6*S6g&*c;M=(h<@N9YlikF47GoK!6Yop*OLBA^}8-(lJt%F1=kv0jbiZ
z2ntf9DpgA4J3;U3E$=(?eKY^e|Iau7nSt!H_S$Pdd#$~nlasXsbS`4Wp%Q2ofsxU+
zP8K8x0wP=4v&hJRu|!8Ifda-l;;4j+1Zy&$0M;Nl*;4I57>Jyl3zb5^kyt$9JKdW!
zz~X9#dHL1)f%7E0Qo%+jSZ}b)^u%+sJ;bLD`W&`h|}<^4}l;X5L1CfuR07
z2p~V#1t8d7;Qs^x`WFKAKRfA{Nz~7^TqIk&l7JBY=hZMZf38N|lxbM+kFXn12m}yp
zJDxu`0#l+z@Sv&_JV8*f9@&wgiF1~b0X+IAXEBxZ$Ykm-hcQk3oI(lM=u{%v=^}6r
zgfB|LATS674ueAxNCX5X27wDfAVMNwO)~zM0s?}9NkSyyXt?CR6m%$LysI^VB1|*@
z;N*bN5>N>!R9s(6QAhuhwgD&;Fbff9S_f@wfK(`CS7(scWssS$qKb}&xEq1u0>Cg6
zkp_{-c%luFfCss{5S?s)=>N0``9EMtiA*7YFcJ_6sB$uD(^~c`*BOIU~TVgvV=9#WTIL$=TS&!}D?QeAWi6eTjrne4$BVKqSrOvd0fD
z2SJ%ksrP5DQQc-#FyIMmf-}_>=lBQ5!y%Fwu)eDml}UE0B*2kBT>6tm|IAH4`P#!`
z_Wg$Y$uaKwuQRQ4&4O{71!a!Eggd`EpS+N4k*u4fOxHb@D%^dIYoOSn
zD0b3tQ;0#*3Bvkb7LA%x4i0!18Em0kv`iD5e36vUb}@1}U{WtcIoB#IY&L_oAcorACe(g>Ep6x|PpX}jG2?{+nsl%6iOKdq^8ii4Ar&X;#TgP*6B#4B;o
z-LU)>!gef2y0?CGSiAr8I$MYpEi|;_sD}rC1>^Q3rA)oXYYFQ@ZY^_StFq6ZBTepE
zgqc6}w>vyD%}&fMo=@_1(jFIy9ky0+qt0)3BB1*?#;lq;qUjInQ4_q$Yof*D#j?+r
zj%qev$IjQZ+D}xM4bQ}RqsHqd>49z%Gmr1Hzn%15nH;euKk=_1d{&HAzhfk=!V+g^
zwFBWbeDG9n4Q#o8?sk1Ga%_ujU7dk|?ck_7@{;sXc2S!n2P55(roVBv3{B9@lVRCIT+nL?5H%VQMqNj+)S^CXKQ1710WH<
z)49?sXlZF_rtQ*Ja5);gPk|obeYVhcc#ynVh2-ZnesX{3Tf6IAy~k9Nqzk0&=8_&w
z9;6+-j=gz2*R2}kdnx{P=b$|6{o5km!Gg@elQ~Ow6gC77IS6#<4x^!oc2CNu(+3{Sz_mvd^
z+sCo2Xg7q4qRw=}tP{_QJ>ok9I2+^2>vv4F&v!-Pufc7>hsg7
zW##1K6GVQY6OU>*DlVK@OxE3dZcQ^cefZtntXx{y174D_VXUee>Q}ufW_>|
zbd|O?o-YfW5uB8^B?f~sQ!qv7YMPB9;`tH%gT;fjx#?k^GP0t#Sku?FEkZR9M(_K4
zPIAP6C@7_Q;#^8Q_H0o@b(yWdY)x-ddwRH=Q#Jg{#L_~R`%UE{EE&^F#ET-zvkZt|
zy5G=Eql@2~_1^w=`vJq>d~@yd;jdxYNBG}6JH2f%u%i-8T?Rrms%F0Oyj9CsP+b^p
z^Oj3pTOe#1h$x?s6ijOGO=+)?%vo$6l$%!z<%1)8mX~U7`sYsOhnkr
zMszyG^bL)sX
zVkh3j&&e<6w$9sCvve0xB{)|$C+wR^OLv7vCAt_%b8nO&V=dybdX4c1BXlvw5RC
zwb0!1ZcF+mw`!K{QsD43Rr)4Jk-06v2K#LedzmEzdwVpy>n{1k@LRLcjy84ul`a{#
zC3y!1wzeqmT{ds5A91E`zGS?(J-xhoPHMH}`vaGD<5+e5^+!X`PbF|Te8u*j3L3gp
zBcc@+7_5XNZ&|-KZkOKeFPZdBsKt<9^Z-|gy~7w5u)ve(msuE_TqbcXn4!B++sQ~z
zn^9fAq?_BQ+0wZZnIG1hok8GabibUdT5NlJ(Wqamrq_FXw(IVe+ol39QP)dqKI^9X
zs=Hka)*bIIQ~XUm*6xkTJ>I2if651L%fpAFis`*y7CRD5^4c$n7dYV1n$I_dzR!=|
z^k2BPoQpSJ5y`kvdbNwA1qNu`-xLIk4fymiUdwyQtzPSnwG$Wh*YI(P+R)YLxk0m+
zFPCIX@GhCBb|$?y6sdxqC(+w0imuX;+3vsl_O8tWu~Fn=6*S~Xp!#`99C7NsLCgAn
z;aqF5){{{GZiYY$Bl=kb%7i5*#4D5bmd_>O%MUz$?eGv;B^S^o43DR
zzJa}YwuZ||5T_}m0ZW$kfYxea-V`o
zx5aF-D*U!Hj)zSpPLVD*kWoOd#~J{f{>X}6Dv&FQm`cx4GqN;Z*(r!SPE%8$`*BDd
z?7Vcim=1fJk%V1{Bl0WmnHpt@5I;y;7t~d4kpG?k>KK=*T^E`&n8jB80!I_3QITG)
zD@A9^x&?KOe{5AgU%tL$Q-mwbpSQhS?m1j}N-pWd4vLT5otbuX}2y$rR8Z?#s*
z-9z(2J}#x#OVP95W*D8LG`%0b@l*?F3$>dVEKdj^vRG(duy~`2^RY#z^DicfBBt(6
z`SQQJW3NC@db-m3c@anh|37#&n`-eshkK8#4aCF
z1_DnSuv=2P7)vyj)1d>EY;;k;URqbA=*@)xk`L5FyTHhuH!b=^?3!9LsjgG=e8ET
zDyx{zwtz!q9iUC!5#b3b=PNlQ(Xr=biY|B7fUe`kDh0Zff%eoQ=FIXA+LX@wUTf*8
zy&UGOdl6GRIlAu`ohI_zLLaaAmU44+r+7)rI@PT1qD@^IAg*!FzC!0eD`m4Y=v;f%
zi?GLjkIMsaeO8-kEXyfG7$MEPcHxeP3*t-CWeNf{^xIE
zo{n7I-21;IRJr@kt(#fi2~s|V9|@mj5>Zpe*EpXhL3yd-Qt_RlImA-dcBi6OGYQ~wzHVSuU5!5)x4K$
zH)!5!M@#XfW+xxrrKz~i
zuk7h!y?dhRY%GlZfzNzIMs7UUqKS8XfoHZSrl&0+!AY*D@Iiv_!g}MoQriX+@56@J
zFUDt~yE%6sn3)&~$4_Kps~R{mvzKsm;e_A6js_Wt11m%acH)*F5WxT2D
z=*mxhvMsiOE}ki*2D`K(!ce-q>nB@6ppA*G|M$FlxtaV!sl_YD!f5^SKRT`S+B@CIp_jf>Oog~ZeLh2^&7`V(C2=uh-^V0<4POUVeBH_=FOf`NE(Mh==gqmOzZd4T
zTr`iEdr>K0Srtm_@vP#1r>)HIS616vq|(HtXkquT2fnUw&k-Rz+w45YJ8Us=B+jGi
zR%=*8IY~w9Rte%@{kp~s*HY>cj*gAORK%%B#??=K(IexhubH$>4$))Cp@r9$0^E9m
z?_N_%KINI==joE_l-<#oY)kK{N%tj-W**9byo||ONmvh$wB?yAp-P(qnqo3f&Gjqa
zey;xPn%KShUXW|rRi*V*HDj#NdVQK=+Cc=g%w^L{kD&f{_0B0wV{YxXR$*a5PW2Gh+kJ&$XR;V$?--fzx7Wg
z3C|C(3-AwcSo2z367;#etrQMRpAktvkhC}E3p+kCob%HWWSGZHWh$vbn2()Zw#05;
zON14srW`6>oNsf3Ey88NRu#$0o@%9{E>BjysEg^F_;9=f=u%7izO3nyK
zB2+EBeavX?;CCw(A+U{4$Hx)z7A0&T3N!L%`>nOaXcOPDPdSMvR+LKoP>+f!@z%}b+~$riG84rRD7#yoHKi<#0wKDv5J53-0(_%M!S
z^OS9xNLTy*wLKP+Sf0cRD=+Pd_AWsmG0U9C%lI|b$mg~`q#f+wqu+ZBL09JYSEV{p
zHT!&AhG(TOzt|NV6FnXjVu3!b|2V%<-x6wU^f_Cix-rsy=EL{7UPH#Hab=*G61L{4
zVQO`tn)WD*wtiI5d~B~#`n@Zwci)iN4zn|&Q3e%p4t1vXg7K`0{c}1-Z~O;DzaTn`
zE?{$MpgY3@kkX=Sc)q8h8Yh%LDpWXZeWh^q)o|mfDN8rLDJ=fps>v`iq2P<*wPitV
zC%I*e%g^gS%Gw9>ay@NrWx5Drot!a@v{4U!iDruzlE7%0IUgP+giY^cPtXPeNgDmV
zn0C|co}+!>!Ti!hT0r^!#qxcNT8BOfMP#PF{NSaCorrb_o3mqoHzad45`6oClTYKi
zvimjfT;Q}0!{1^I{vcx
z@>S!ox#O1mPSOH1qv=gIVf6w5SJ%3=4V(7U5q-Z}|wPe%G5EaZ)TPZ}myP8NMN
zDX2>IPaG%?4h#|PxYf4Mzjb->LWIM;*@q4ZE~gZ*QB$w>PSh`ysOB!m=IDXwEXDKh
z#%{Mf+I(hL`*J8#?=;@v#d=+VF}aad^`4KQohvY8HqOEr+vkx~Vz7tZuvlx2JFW4h
z!j!7Lt0Fdv^y*y2uGe`F7otr-^v2Vn?1O%+V19`p$2xOhwo`3v5mjdrtW77McB=)1
zh+16ve&uS8X7NY{3y2;Z19c!4liG)OnprhujJ!TkK(^9zMbF;Xeckksa(4M}O*x>%
zrrmJik-_`(B7n;0F&D#&qxKt#XPJYOSYRxlKy-zKK4H!`^`B~ewkNbw`x4C`=yx(B
zzhOg{YdV0d!qClHB)QzP1+ZJ#B&4v+M^|9sgbyWgENryzMd;y
zt7W87A2D7rlRAwuYYQ=Q=AFx}D#k{QH=0;az418qeBv<+o0owtmVxVh;l>8$Z=Jcb
z9{MeHEhPj!qjh9#Av!ptnAdmo>TJW4nS?juu5_c=L&hn^U11A72Mx6n()&ZOBZ26{
zjW)AZeg(@KbqZJ7opL!pimUc2RHN<2ZwF_r`7C*aEmqYUR)#+eQSIjX+6PU+fSXz0
zu%!oks+$|g+4L_OxAfuqMEcjG=7OB2#%@P2iQV+L?#b#&EH>)%^39@3|y(mo4(ubxHE<0-OJ-l>ElUSB&?S
z;d#{RZx%|3oXVP}3wBm=o1IFYMXA+#Yy5BtmC2qWw)6|*)z<{XYm6g!*IVWupbBRm
zwaW(COKosh>TqX2>vLW*``Evb`Pi%vRBNN^Xd!E%d-Tj=zs%0PgX3;^mBgVwU5T2l
zKDX+tE2j=3+tsk6s*ITQd-iIZ&v-{FYz$8BnKwo^k4R4yX&$R~(>I^-9)Gctb+%90
zdu6}h$MN!vR&+c&zcO6nmyreea3l^EJ(K()C-8o{
zo9nnOp%uL{vT=y5%VP_k5D1gX&Q8BhgY@K57fo_1K^`iM$yrzW`Nn6{317sw&T(`L
zRP(+cT5J$!3khsiiM3cyPE4r%a-j-QZed5jeaNq9MVXUqoKREqJk6ZTCVdB{-x=7?
znyfrxiM?Xkwni(BPyN<2wJ+`A^|Rx*-ZP&?1<$UXwxcT|rc%Wnrj>6#7b>y@bI?=^
z`G_b%&AL4UHX3{=8m;^+^rk$1DM)k1%;oCKA^F5^*@wIlZLYbR4U>8&s8Z1;sLR%e
z6fY$Z>PB2%pSH=^>XWg*!euOMoXZ}{eg)00tdP5peeYDbZt~&(1s|H}8}byPiA(={
z)89_U|9sQeawWL{#Twhi5_&*#^N;$7?X)CQ=g-9^e{_0K2sSLhAOs7LAK$*92oMU1
zgd;&Vpr0}*l$lOtULdD`$zTv5<@#3{1T6_%sqt8H4$$w;eYs1xIuwP?%*hsuw|~
zG8h-$6*k7YaGrBhL)2-!6ji0$Bynh%4HAQdL(mWm8ZZWA>JbEpl{Ll&fdEQ|B&{J<1gH%TZH2+0Ffb?t
zfxti!aBCYYj5Sc?VS|Ih@zyW|T+#-LfCA+l)=0dhH5BN=A*}HzG?1QWmNbBV6r3
+
+
+
diff --git a/itext/itext.kernel/itext/kernel/pdf/tagging/PdfStructTreeRoot.cs b/itext/itext.kernel/itext/kernel/pdf/tagging/PdfStructTreeRoot.cs
index 7a8d440996..7a6d371016 100644
--- a/itext/itext.kernel/itext/kernel/pdf/tagging/PdfStructTreeRoot.cs
+++ b/itext/itext.kernel/itext/kernel/pdf/tagging/PdfStructTreeRoot.cs
@@ -29,6 +29,7 @@ You should have received a copy of the GNU Affero General Public License
using iText.Kernel.Exceptions;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Filespec;
+using iText.Kernel.Pdf.Tagutils;
namespace iText.Kernel.Pdf.Tagging {
/// Represents a wrapper-class for structure tree root dictionary.
@@ -399,7 +400,7 @@ public override void Flush() {
GetPdfObject().Put(PdfName.IDTree, this.idTree.BuildTree().MakeIndirect(GetDocument()));
}
if (!GetDocument().IsAppendMode()) {
- FlushAllKids(this);
+ iText.Kernel.Pdf.Tagging.PdfStructTreeRoot.FlushAllKids(this);
}
base.Flush();
}
@@ -597,13 +598,11 @@ protected internal override bool IsWrappedObjectMustBeIndirect() {
return true;
}
- private void FlushAllKids(IStructureNode elem) {
- foreach (IStructureNode kid in elem.GetKids()) {
- if (kid is PdfStructElem && !((PdfStructElem)kid).IsFlushed()) {
- FlushAllKids(kid);
- ((PdfStructElem)kid).Flush();
- }
- }
+ private static void FlushAllKids(iText.Kernel.Pdf.Tagging.PdfStructTreeRoot elem) {
+ TagTreeIterator iterator = new TagTreeIterator(elem, new TagTreeIteratorAvoidDuplicatesApprover(), TagTreeIterator.TreeTraversalOrder
+ .POST_ORDER);
+ iterator.AddHandler(new TagTreeIteratorFlusher());
+ iterator.Traverse();
}
private void IfKidIsStructElementAddToList(PdfObject kid, IList kids) {
diff --git a/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIterator.cs b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIterator.cs
index d9ad00de10..1ccf6c8faa 100644
--- a/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIterator.cs
+++ b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIterator.cs
@@ -31,28 +31,65 @@ namespace iText.Kernel.Pdf.Tagutils {
///
/// This class is used to traverse the tag tree.
///
- /// There is a possibility to add a handler that will be called for specific events during the traversal.
+ /// There is a possibility to add a handler that will be called for the elements during the traversal.
///
public class TagTreeIterator {
private readonly IStructureNode pointer;
private readonly ICollection handlerList;
+ private readonly TagTreeIteratorElementApprover approver;
+
+ private readonly TagTreeIterator.TreeTraversalOrder traversalOrder;
+
+ ///
+ /// Creates a new instance of
+ /// .
+ ///
+ ///
+ /// Creates a new instance of
+ ///
+ /// . It will use
+ ///
+ /// to filter
+ /// elements and TreeTraversalOrder.PRE_ORDER for tree traversal.
+ ///
+ /// the tag tree pointer.
+ public TagTreeIterator(IStructureNode tagTreePointer)
+ : this(tagTreePointer, new TagTreeIteratorElementApprover(), TagTreeIterator.TreeTraversalOrder.PRE_ORDER) {
+ }
+
///
/// Creates a new instance of
/// .
///
/// the tag tree pointer.
- public TagTreeIterator(IStructureNode tagTreePointer) {
+ ///
+ /// a filter that will be called to let iterator know whether some particular element
+ /// should be traversed or not.
+ ///
+ /// an order in which the tree will be traversed.
+ public TagTreeIterator(IStructureNode tagTreePointer, TagTreeIteratorElementApprover approver, TagTreeIterator.TreeTraversalOrder
+ traversalOrder) {
if (tagTreePointer == null) {
throw new ArgumentException(MessageFormatUtil.Format(KernelExceptionMessageConstant.ARG_SHOULD_NOT_BE_NULL
, "tagTreepointer"));
}
+ if (approver == null) {
+ throw new ArgumentException(MessageFormatUtil.Format(KernelExceptionMessageConstant.ARG_SHOULD_NOT_BE_NULL
+ , "approver"));
+ }
+ if (traversalOrder == null) {
+ throw new ArgumentException(MessageFormatUtil.Format(KernelExceptionMessageConstant.ARG_SHOULD_NOT_BE_NULL
+ , "traversalOrder"));
+ }
this.pointer = tagTreePointer;
+ this.traversalOrder = traversalOrder;
handlerList = new HashSet();
+ this.approver = approver;
}
- /// Adds a handler that will be called for specific events during the traversal.
+ /// Adds a handler that will be called for the elements during the traversal.
/// the handler.
///
/// this
@@ -60,6 +97,10 @@ public TagTreeIterator(IStructureNode tagTreePointer) {
/// instance.
///
public virtual iText.Kernel.Pdf.Tagutils.TagTreeIterator AddHandler(ITagTreeIteratorHandler handler) {
+ if (handler == null) {
+ throw new ArgumentException(MessageFormatUtil.Format(KernelExceptionMessageConstant.ARG_SHOULD_NOT_BE_NULL
+ , "handler"));
+ }
this.handlerList.Add(handler);
return this;
}
@@ -71,22 +112,37 @@ public virtual iText.Kernel.Pdf.Tagutils.TagTreeIterator AddHandler(ITagTreeIter
/// Make sure the correct handlers are added before calling this method.
///
public virtual void Traverse() {
- Traverse(this.pointer, this.handlerList);
+ Traverse(this.pointer);
}
- private static void Traverse(IStructureNode elem, ICollection handlerList) {
- if (elem == null) {
+ private void Traverse(IStructureNode elem) {
+ if (!approver.Approve(elem)) {
return;
}
- foreach (ITagTreeIteratorHandler handler in handlerList) {
- handler.NextElement(elem);
+ if (traversalOrder == TagTreeIterator.TreeTraversalOrder.PRE_ORDER) {
+ foreach (ITagTreeIteratorHandler handler in handlerList) {
+ handler.NextElement(elem);
+ }
}
IList kids = elem.GetKids();
if (kids != null) {
foreach (IStructureNode kid in kids) {
- Traverse(kid, handlerList);
+ Traverse(kid);
+ }
+ }
+ if (traversalOrder == TagTreeIterator.TreeTraversalOrder.POST_ORDER) {
+ foreach (ITagTreeIteratorHandler handler in handlerList) {
+ handler.NextElement(elem);
}
}
}
+
+ /// Tree traversal order enum.
+ public enum TreeTraversalOrder {
+ /// Preorder traversal.
+ PRE_ORDER,
+ /// Postorder traversal.
+ POST_ORDER
+ }
}
}
diff --git a/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorAvoidDuplicatesApprover.cs b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorAvoidDuplicatesApprover.cs
new file mode 100644
index 0000000000..b3688c6bd5
--- /dev/null
+++ b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorAvoidDuplicatesApprover.cs
@@ -0,0 +1,68 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System.Collections.Generic;
+using iText.Kernel.Pdf;
+using iText.Kernel.Pdf.Tagging;
+
+namespace iText.Kernel.Pdf.Tagutils {
+ ///
+ /// Element checker for
+ /// .
+ ///
+ ///
+ /// Element checker for
+ /// .
+ /// It is used to check whether specific element should be traversed.
+ /// It doesn't approve elements which have been traversed before.
+ ///
+ public class TagTreeIteratorAvoidDuplicatesApprover : TagTreeIteratorElementApprover {
+ private readonly ICollection processedObjects = new HashSet();
+
+ ///
+ /// Creates a new instance of
+ ///
+ ///
+ public TagTreeIteratorAvoidDuplicatesApprover()
+ : base() {
+ }
+
+ ///
+ public override bool Approve(IStructureNode elem) {
+ if (elem is PdfStructTreeRoot) {
+ return true;
+ }
+ if (!base.Approve(elem) || !(elem is PdfStructElem)) {
+ return false;
+ }
+ PdfObject obj = ((PdfStructElem)elem).GetPdfObject();
+ bool isProcessed = processedObjects.Contains(obj);
+ if (isProcessed) {
+ return false;
+ }
+ else {
+ processedObjects.Add(obj);
+ return true;
+ }
+ }
+ }
+}
diff --git a/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorElementApprover.cs b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorElementApprover.cs
new file mode 100644
index 0000000000..5ff992fd1e
--- /dev/null
+++ b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorElementApprover.cs
@@ -0,0 +1,56 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Kernel.Pdf.Tagging;
+
+namespace iText.Kernel.Pdf.Tagutils {
+ ///
+ /// Element checker for
+ /// .
+ ///
+ ///
+ /// Element checker for
+ /// .
+ /// It is used to check whether specific element should be traversed.
+ ///
+ public class TagTreeIteratorElementApprover {
+ ///
+ /// Creates a new instance of
+ ///
+ ///
+ public TagTreeIteratorElementApprover() {
+ }
+
+ // Empty constructor
+ /// Checks whether the element should be traversed.
+ /// the element to check
+ ///
+ ///
+ ///
+ /// if the element should be traversed,
+ /// false otherwise
+ ///
+ public virtual bool Approve(IStructureNode elem) {
+ return elem != null;
+ }
+ }
+}
diff --git a/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorFlusher.cs b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorFlusher.cs
new file mode 100644
index 0000000000..faac173d70
--- /dev/null
+++ b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreeIteratorFlusher.cs
@@ -0,0 +1,46 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Kernel.Pdf.Tagging;
+
+namespace iText.Kernel.Pdf.Tagutils {
+ ///
+ /// Class that flushes struct elements while iterating over struct tree root with
+ /// .
+ ///
+ public class TagTreeIteratorFlusher : ITagTreeIteratorHandler {
+ ///
+ /// Creates a new instance of
+ ///
+ ///
+ public TagTreeIteratorFlusher() {
+ }
+
+ // Empty constructor
+ ///
+ public virtual void NextElement(IStructureNode elem) {
+ if (elem is PdfStructElem && !((PdfStructElem)elem).IsFlushed()) {
+ ((PdfStructElem)elem).Flush();
+ }
+ }
+ }
+}
diff --git a/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreePointer.cs b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreePointer.cs
index 4a02187854..b22458e775 100644
--- a/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreePointer.cs
+++ b/itext/itext.kernel/itext/kernel/pdf/tagutils/TagTreePointer.cs
@@ -761,22 +761,19 @@ public virtual iText.Kernel.Pdf.Tagutils.TagTreePointer MoveToKid(int n, String
if (MCR_MARKER.Equals(role)) {
throw new PdfException(KernelExceptionMessageConstant.CANNOT_MOVE_TO_MARKED_CONTENT_REFERENCE);
}
- IList descendants = new List(GetCurrentStructElem().GetKids());
- int k = 0;
- for (int i = 0; i < descendants.Count; ++i) {
- if (descendants[i] == null || descendants[i] is PdfMcr) {
- continue;
- }
- String descendantRole = descendants[i].GetRole().GetValue();
- if (descendantRole.Equals(role) && k++ == n) {
- SetCurrentStructElem((PdfStructElem)descendants[i]);
- return this;
- }
- else {
- descendants.AddAll(descendants[i].GetKids());
- }
+ TagTreePointer.RoleFinderHandler handler = new TagTreePointer.RoleFinderHandler(n, role);
+ TagTreePointer.TagTreeIteratorApproverWithStop approver = new TagTreePointer.TagTreeIteratorApproverWithStop
+ (handler);
+ TagTreeIterator iterator = new TagTreeIterator(GetCurrentStructElem(), approver, TagTreeIterator.TreeTraversalOrder
+ .PRE_ORDER);
+ iterator.AddHandler(handler);
+ iterator.Traverse();
+ PdfStructElem elem = handler.GetFoundElement();
+ if (elem == null) {
+ throw new PdfException(KernelExceptionMessageConstant.NO_KID_WITH_SUCH_ROLE);
}
- throw new PdfException(KernelExceptionMessageConstant.NO_KID_WITH_SUCH_ROLE);
+ SetCurrentStructElem(elem);
+ return this;
}
/// Gets current tag kids roles.
@@ -1131,5 +1128,51 @@ private void ThrowExceptionIfCurrentPageIsNotInited() {
throw new PdfException(KernelExceptionMessageConstant.PAGE_IS_NOT_SET_FOR_THE_PDF_TAG_STRUCTURE);
}
}
+
+ private class RoleFinderHandler : ITagTreeIteratorHandler {
+ private readonly int n;
+
+ private readonly String role;
+
+ private int foundIdx = 0;
+
+ private PdfStructElem foundElem;
+
+//\cond DO_NOT_DOCUMENT
+ internal RoleFinderHandler(int n, String role) {
+ this.n = n;
+ this.role = role;
+ }
+//\endcond
+
+ public virtual void NextElement(IStructureNode elem) {
+ if (foundElem != null) {
+ return;
+ }
+ String descendantRole = elem.GetRole().GetValue();
+ if (descendantRole.Equals(role) && foundIdx++ == n) {
+ foundElem = (PdfStructElem)elem;
+ }
+ }
+
+ public virtual PdfStructElem GetFoundElement() {
+ return foundElem;
+ }
+ }
+
+ [System.ObsoleteAttribute(@"change ITagTreeIteratorHandler#nextElement to return boolean showing whether the iteration should be continued. It will allow to get rid of this ugly workaround."
+ )]
+ private class TagTreeIteratorApproverWithStop : TagTreeIteratorAvoidDuplicatesApprover {
+ private readonly TagTreePointer.RoleFinderHandler handler;
+
+ public TagTreeIteratorApproverWithStop(TagTreePointer.RoleFinderHandler handler)
+ : base() {
+ this.handler = handler;
+ }
+
+ public override bool Approve(IStructureNode elem) {
+ return base.Approve(elem) && handler.GetFoundElement() == null;
+ }
+ }
}
}
diff --git a/itext/itext.kernel/itext/kernel/pdf/tagutils/WaitingTagsManager.cs b/itext/itext.kernel/itext/kernel/pdf/tagutils/WaitingTagsManager.cs
index 1797fff24c..18afe508f4 100644
--- a/itext/itext.kernel/itext/kernel/pdf/tagutils/WaitingTagsManager.cs
+++ b/itext/itext.kernel/itext/kernel/pdf/tagutils/WaitingTagsManager.cs
@@ -222,12 +222,10 @@ private void FlushStructElementAndItKids(PdfStructElem elem) {
if (waitingTagToAssociatedObj.ContainsKey(elem.GetPdfObject())) {
return;
}
- foreach (IStructureNode kid in elem.GetKids()) {
- if (kid is PdfStructElem) {
- FlushStructElementAndItKids((PdfStructElem)kid);
- }
- }
- elem.Flush();
+ TagTreeIterator iterator = new TagTreeIterator(elem, new WaitingTagsManager.WaitingTagsApprover(waitingTagToAssociatedObj
+ .Keys), TagTreeIterator.TreeTraversalOrder.POST_ORDER);
+ iterator.AddHandler(new TagTreeIteratorFlusher());
+ iterator.Traverse();
}
private void RemoveWaitingStateAndFlushIfParentFlushed(PdfStructElem structElem) {
@@ -239,5 +237,19 @@ private void RemoveWaitingStateAndFlushIfParentFlushed(PdfStructElem structElem)
}
}
}
+
+ private class WaitingTagsApprover : TagTreeIteratorAvoidDuplicatesApprover {
+ private readonly ICollection waitingTags;
+
+ public WaitingTagsApprover(ICollection waitingTags)
+ : base() {
+ this.waitingTags = waitingTags;
+ }
+
+ public override bool Approve(IStructureNode elem) {
+ return base.Approve(elem) && elem is PdfStructElem && (waitingTags == null || !waitingTags.Contains(((PdfStructElem
+ )elem).GetPdfObject()));
+ }
+ }
}
}
diff --git a/itext/itext.kernel/itext/kernel/utils/TaggedPdfReaderTool.cs b/itext/itext.kernel/itext/kernel/utils/TaggedPdfReaderTool.cs
index c65335e8e6..ba4c328e15 100644
--- a/itext/itext.kernel/itext/kernel/utils/TaggedPdfReaderTool.cs
+++ b/itext/itext.kernel/itext/kernel/utils/TaggedPdfReaderTool.cs
@@ -46,6 +46,8 @@ public class TaggedPdfReaderTool {
protected internal IDictionary> parsedTags = new Dictionary>();
+ private readonly ICollection inspectedStructTreeElems = new HashSet();
+
///
/// Constructs a
///
@@ -118,6 +120,10 @@ protected internal virtual void InspectKid(IStructureNode kid) {
try {
if (kid is PdfStructElem) {
PdfStructElem structElemKid = (PdfStructElem)kid;
+ if (inspectedStructTreeElems.Contains(structElemKid.GetPdfObject())) {
+ return;
+ }
+ inspectedStructTreeElems.Add(structElemKid.GetPdfObject());
PdfName s = structElemKid.GetRole();
String tagN = s.GetValue();
String tag = FixTagName(tagN);
diff --git a/port-hash b/port-hash
index 3f601ca095..69ab66ba16 100644
--- a/port-hash
+++ b/port-hash
@@ -1 +1 @@
-1f62986b9a995607ee9143809bba30e9ab4cf893
+bba9f55da2ea35bf2b2f5bc3da9e49c2414a7285