diff --git a/tagged-pdf-generation/pom.xml b/tagged-pdf-generation/pom.xml
new file mode 100644
index 0000000..a73f3b2
--- /dev/null
+++ b/tagged-pdf-generation/pom.xml
@@ -0,0 +1,68 @@
+
+
+
+
+ verapdf-tools
+ org.verapdf
+ 1.0-SNAPSHOT
+
+ 4.0.0
+ org.verapdf
+
+ tagged-pdf-generation
+ 1.0-SNAPSHOT
+
+
+ UTF-8
+ 1.8
+ 1.8
+
+
+
+
+ org.apache.pdfbox
+ pdfbox
+ 3.0.2
+
+
+ junit
+ junit
+ 4.11
+ test
+
+
+
+
+
+
+ maven-compiler-plugin
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+
+
+ org.verapdf.tools.TaggedPDFGenerator
+
+
+
+ jar-with-dependencies
+
+ false
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
+
diff --git a/tagged-pdf-generation/src/main/java/org/verapdf/tools/PDStructureTreeRootAccess.java b/tagged-pdf-generation/src/main/java/org/verapdf/tools/PDStructureTreeRootAccess.java
new file mode 100644
index 0000000..9d54a67
--- /dev/null
+++ b/tagged-pdf-generation/src/main/java/org/verapdf/tools/PDStructureTreeRootAccess.java
@@ -0,0 +1,21 @@
+package org.verapdf.tools;
+
+import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
+import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
+
+/**
+ * Allows to add PDMarkedContent in PDStructureTreeRoot
+ */
+public class PDStructureTreeRootAccess extends PDStructureTreeRoot {
+ public PDStructureTreeRootAccess() {
+ super();
+ }
+
+ public void appendKid(PDMarkedContent markedContent) {
+ if (markedContent == null) {
+ return;
+ }
+ this.appendKid(COSInteger.get(markedContent.getMCID()));
+ }
+}
diff --git a/tagged-pdf-generation/src/main/java/org/verapdf/tools/StructureType.java b/tagged-pdf-generation/src/main/java/org/verapdf/tools/StructureType.java
new file mode 100644
index 0000000..62dafc9
--- /dev/null
+++ b/tagged-pdf-generation/src/main/java/org/verapdf/tools/StructureType.java
@@ -0,0 +1,70 @@
+package org.verapdf.tools;
+
+public enum StructureType {
+ DOCUMENT("Document"),
+ PART("Part"),
+ DIV("Div"),
+ CAPTION("Caption"),
+ THEAD("THead"),
+ TBODY("TBody"),
+ TFOOT("TFoot"),
+ H("H"),
+ P("P"),
+ L("L"),
+ LI("LI"),
+ LBL("Lbl"),
+ LBODY("LBody"),
+ TABLE("Table"),
+ TR("TR"),
+ TH("TH"),
+ TD("TD"),
+ SPAN("Span"),
+ LINK("Link"),
+ ANNOT("Annot"),
+ RUBY("Ruby"),
+ WARICHU("Warichu"),
+ FIGURE("Figure"),
+ FORMULA("Formula"),
+ FORM("Form"),
+ RB("RB"),
+ RT("RT"),
+ RP("RP"),
+ WT("WT"),
+ WP("WP"),
+ ART("Art"),
+ SECT("Sect"),
+ BLOCK_QUOTE("BlockQuote"),
+ TOC("TOC"),
+ TOCI("TOCI"),
+ INDEX("Index"),
+ NON_STRUCT("NonStruct"),
+ PRIVATE("Private"),
+ QUOTE("Quote"),
+ NOTE("Note"),
+ REFERENCE("Reference"),
+ BIB_ENTRY("BibEntry"),
+ CODE("Code"),
+ H1("H1"),
+ H2("H2"),
+ H3("H3"),
+ H4("H4"),
+ H5("H5"),
+ H6("H6"),
+ DOCUMENT_FRAGMENT("DocumentFragment"),
+ ASIDE("Aside"),
+ TITLE("Title"),
+ FENOTE("FENote"),
+ SUB("Sub"),
+ EM("Em"),
+ STRONG("Strong"),
+ ARTIFACT("Artifact");
+
+ private final String text;
+ private StructureType(String text) {
+ this.text = text;
+ }
+
+ public String string() {
+ return text;
+ }
+}
diff --git a/tagged-pdf-generation/src/main/java/org/verapdf/tools/TaggedPDFGenerator.java b/tagged-pdf-generation/src/main/java/org/verapdf/tools/TaggedPDFGenerator.java
new file mode 100644
index 0000000..b95d6e3
--- /dev/null
+++ b/tagged-pdf-generation/src/main/java/org/verapdf/tools/TaggedPDFGenerator.java
@@ -0,0 +1,195 @@
+package org.verapdf.tools;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Paths;
+
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
+import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureElement;
+import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
+import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
+import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts.FontName;
+
+import java.util.TreeSet;
+import java.util.logging.Logger;
+import java.util.HashMap;
+import java.util.Set;
+
+public class TaggedPDFGenerator {
+ private static Logger logger = Logger.getLogger("");
+ private static Set types = new TreeSet();
+ private static Set typesExcludeTransit = new TreeSet();
+ private static Set transitionalTypes = new TreeSet();
+ private static HashMap files = new HashMap();
+ static {
+ transitionalTypes.add(StructureType.DIV.string());
+ transitionalTypes.add(StructureType.NON_STRUCT.string());
+ transitionalTypes.add(StructureType.PART.string());
+
+ for (StructureType type : StructureType.values()) {
+ types.add(type.string());
+ typesExcludeTransit.add(type.string());
+ }
+
+ for (String type : transitionalTypes) {
+ typesExcludeTransit.remove(type);
+ }
+
+ files.put(StructureType.DIV.string(), "transitionaltag_div_test");
+ files.put(StructureType.NON_STRUCT.string(), "transitionaltag_nonstruct_test");
+ files.put(StructureType.PART.string(), "transitionaltag_part_test");
+ files.put("all_inclusions", "all_inclusions_test");
+ }
+
+ public static void main(String[] args) {
+ TaggedPDFGenerator taggedPDFGenerator = new TaggedPDFGenerator();
+
+ try {
+ taggedPDFGenerator.run(args);
+ } catch (Exception ex) {
+ logger.severe("Error during pdf generation: " + ex.getMessage() + ", proccess stopped.");
+ ex.printStackTrace();
+ }
+ }
+
+ private Integer currentMCID = 1;
+ private String folder;
+
+ private String getWorkingDir() {
+ if (folder != null && (new File(folder)).exists()) {
+ return folder;
+ }
+
+ File file = new File(System.getProperty("user.dir") + "\\generated_files");
+ file.mkdirs();
+
+ return file.getAbsolutePath();
+ }
+
+ private void run(String[] args) throws IOException {
+ if (args.length > 0){
+ folder = args[0];
+ }
+
+ String parentFolder = getWorkingDir();
+
+ for (String type : files.keySet()) {
+ currentMCID = 1;
+
+ PDDocument document;
+ if (type.equals("all_inclusions")) {
+ document = allInclusionsPDF(type);
+ } else {
+ document = transitionalPDF(type);
+ }
+
+ File file = Paths.get(parentFolder).resolve(files.get(type) + ".pdf").toFile();
+
+ document.save(file, CompressParameters.NO_COMPRESSION);
+ document.close();
+ }
+ }
+
+ private PDDocument allInclusionsPDF(String pdftype) throws IOException {
+ PDDocument document = new PDDocument();
+ PDPage page = new PDPage();
+ PDDocumentCatalog catalog = document.getDocumentCatalog();
+ PDStructureTreeRootAccess treeRoot = new PDStructureTreeRootAccess();
+
+ document.addPage(page);
+ catalog.setStructureTreeRoot(treeRoot);
+
+ PDPageContentStream content = new PDPageContentStream(document, page, AppendMode.OVERWRITE, false);
+
+ treeRoot.appendKid(textContent(content, " "));
+ treeRoot.appendKid(textContent(content, " "));
+ for (String type : typesExcludeTransit) {
+ PDStructureElement element = new PDStructureElement(type, treeRoot);
+
+ element.appendKid(textContent(content, " "));
+ element.appendKid(textContent(content, " "));
+ for (String subType : typesExcludeTransit) {
+ for (Integer index = 0; index < 2; index++) {
+ PDStructureElement subElement = new PDStructureElement(subType, element);
+ subElement.setPage(page);
+
+ element.appendKid(subElement);
+ }
+ }
+
+ element.setPage(page);
+ treeRoot.appendKid(element);
+
+ element = new PDStructureElement(type, treeRoot);
+ element.setPage(page);
+ treeRoot.appendKid(element);
+ }
+
+ content.close();
+
+ return document;
+ }
+
+ private PDDocument transitionalPDF(String transitionalType) throws IOException {
+ PDDocument document = new PDDocument();
+ PDPage page = new PDPage();
+ PDDocumentCatalog catalog = document.getDocumentCatalog();
+ PDStructureTreeRoot treeRoot = new PDStructureTreeRoot();
+
+ document.addPage(page);
+ catalog.setStructureTreeRoot(treeRoot);
+
+ PDPageContentStream content = new PDPageContentStream(document, page, AppendMode.OVERWRITE, false);
+
+ for (String type : types) {
+ PDStructureElement element = new PDStructureElement(type, treeRoot);
+ PDStructureElement transitionalElement = new PDStructureElement(transitionalType, element);
+
+ transitionalElement.appendKid(textContent(content, " "));
+ transitionalElement.appendKid(textContent(content, " "));
+ for (String subType : types) {
+ for (Integer index = 0; index < 2; index++) {
+ PDStructureElement sub_element = new PDStructureElement(subType, transitionalElement);
+ sub_element.setPage(page);
+
+ transitionalElement.appendKid(sub_element);
+ }
+ }
+
+ element.setPage(page);
+ element.appendKid(transitionalElement);
+
+ transitionalElement = new PDStructureElement(transitionalType, element);
+ element.appendKid(transitionalElement);
+ treeRoot.appendKid(element);
+ }
+
+ content.close();
+
+ return document;
+ }
+
+ private PDMarkedContent textContent(PDPageContentStream content, String text) throws IOException {
+ content.beginText();
+ content.setFont(new PDType1Font(FontName.HELVETICA_BOLD), 14);
+ COSDictionary dictionary = new COSDictionary();
+ dictionary.setInt(COSName.MCID, currentMCID);
+ currentMCID++;
+ content.beginMarkedContent(COSName.P, PDPropertyList.create(dictionary));
+ content.showText(text);
+ content.endMarkedContent();
+ PDMarkedContent markedContent = new PDMarkedContent(COSName.P, dictionary);
+ content.endText();
+
+ return markedContent;
+ }
+}