Skip to content

Commit 1a3fe2f

Browse files
committed
Initial commit
0 parents  commit 1a3fe2f

13 files changed

+1044
-0
lines changed

.envrc

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#! /bin/sh
2+
3+
# reload when these files change
4+
watch_file flake.nix
5+
watch_file flake.lock
6+
7+
{
8+
# shell gc root dir
9+
mkdir -p "$(direnv_layout_dir)"
10+
eval "$(nix print-dev-env --profile $(direnv_layout_dir)/flake-profile)"
11+
}

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.direnv/
2+
target/
3+
test/tmp/
4+
result

Dockerfile

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
FROM debian:bookworm-slim
2+
3+
RUN apt-get -y update && \
4+
apt-get -y upgrade && \
5+
apt-get install -y guile-2.2 guile-json qpdf poppler-utils
6+
7+
RUN mkdir -p /opt/addon
8+
COPY src/addon.scm /opt/addon/
9+
RUN chmod 755 /opt/addon/addon.scm
10+
11+
# test
12+
RUN qpdf --version && pdftotext -h
13+
14+
# let it compile into the compile cache
15+
RUN /opt/addon/addon.scm || true
16+
ENTRYPOINT [ "/opt/addon/addon.scm" ]

LICENSE.txt

+674
Large diffs are not rendered by default.

README.md

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Rotate PDFs
2+
3+
This is a simple addon for [Docspell](https://docspell.org) that
4+
allows to rotate attached pdfs files.
5+
6+
7+
## Prerequisites
8+
9+
This addon supports these runners: `nix-flake`, `docker` and
10+
`trivial`.
11+
12+
It is recommended to install [nix](https://nixos.org) on the machine
13+
running joex. This allows to use the `nix-flake` runner which can
14+
build the addon with all dependencies automatically.
15+
16+
Otherwise, for the trivial runner, you need to install these tools
17+
manually: qpdf and pdftotext (might be provided by poppler-utils or
18+
xpdf)
19+
20+
21+
## Usage
22+
23+
It expects arguments as a json file to know how to rotate. It is
24+
currently very basic, you can only set the degree to rotate and it
25+
applies it to all pdfs.
26+
27+
``` json
28+
{ "degree": "90" }
29+
```
30+
31+
Have a look at the
32+
[qpdf](https://qpdf.readthedocs.io/en/stable/cli.html#option-rotate)
33+
manual for possible values.
34+
35+
36+
## Testing
37+
38+
Install [direnv](https://direnv.net/) and [nix](https://nixos.org) and
39+
allow the source root via `direnv allow`. This applies the `devShell`
40+
settings from `flake.nix`. Then build the addon:
41+
42+
```
43+
nix build
44+
```
45+
46+
Now you can run it:
47+
48+
```
49+
./result/bin/rotate-pdf-addon
50+
```
51+
52+
It will run on the test files provided in `test/` and put results in
53+
`test/tmp`.
54+
55+
For quicker turnaround you can also run the source file itself. This
56+
works, because `devShell` puts all required binaries in path.

docspell-addon.yml

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
meta:
2+
name: "rotate-pdf-addon"
3+
version: "0.1.0"
4+
description: |
5+
Allows to rotate pdfs.
6+
7+
Please specify the angle for rotation via a json document.
8+
Example:
9+
10+
```
11+
{ "degree": "90" }
12+
```
13+
14+
*Note that this is applied to ALL attachments!*
15+
16+
This is rotating "right". The possible values for `degree` can be
17+
looked up in the [qpdf
18+
manual](https://qpdf.readthedocs.io/en/stable/cli.html#option-rotate).
19+
20+
triggers:
21+
- existing-item
22+
23+
runner:
24+
nix:
25+
enable: true
26+
27+
docker:
28+
enable: true
29+
30+
trivial:
31+
enable: true
32+
exec: src/addon.scm
33+
34+
options:
35+
networking: false
36+
collectOutput: true

flake.lock

+42
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flake.nix

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
description = "A docspell addon for basic audio file support";
3+
4+
inputs = {
5+
utils.url = "github:numtide/flake-utils";
6+
7+
# Nixpkgs / NixOS version to use.
8+
nixpkgs.url = "nixpkgs/nixos-21.11";
9+
};
10+
11+
outputs = { self, nixpkgs, utils }:
12+
utils.lib.eachDefaultSystem (system:
13+
let
14+
pkgs = import nixpkgs {
15+
inherit system;
16+
overlays = [
17+
18+
];
19+
};
20+
name = "rotate-pdf-addon";
21+
in rec {
22+
packages.${name} = pkgs.callPackage ./nix/addon.nix {
23+
inherit name;
24+
};
25+
26+
defaultPackage = packages.${name};
27+
28+
apps.${name} = utils.lib.mkApp {
29+
inherit name;
30+
drv = packages.${name};
31+
};
32+
defaultApp = apps.${name};
33+
34+
devShell = pkgs.mkShell {
35+
inputsFrom = builtins.attrValues self.packages.${system};
36+
buildInputs =
37+
[ pkgs.guile
38+
pkgs.guile-json
39+
pkgs.qpdf
40+
pkgs.poppler_utils
41+
];
42+
43+
ADDON_DIR = self;
44+
TMPDIR = "/tmp";
45+
ITEM_DATA_JSON="test/item_data.json";
46+
ITEM_PDF_DIR="test/pdf";
47+
TMP_DIR="test/tmp";
48+
CACHE_DIR="test/tmp";
49+
OUTPUT_DIR="test/tmp";
50+
GUILE_WARN_DEPRECATED="detailed";
51+
};
52+
}
53+
);
54+
}

nix/addon.nix

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{ stdenv, bash, qpdf, poppler_utils, guile, guile-json, lib, name }:
2+
3+
stdenv.mkDerivation {
4+
inherit name;
5+
src = lib.sources.cleanSource ../.;
6+
7+
buildInputs = [ guile guile-json ];
8+
9+
patchPhase = ''
10+
TARGET=src/addon.scm
11+
sed -i 's,\*qpdf\* "qpdf",\*qpdf\* "${qpdf}/bin/qpdf",g' $TARGET
12+
sed -i 's,\*pdftotext\* "pdftotext",\*pdftotext\* "${poppler_utils}/bin/pdftotext",g' $TARGET
13+
'';
14+
15+
buildPhase = ''
16+
guild compile -o ${name}.go src/addon.scm
17+
'';
18+
19+
# module name must be same as <filename>.go
20+
installPhase = ''
21+
mkdir -p $out/{bin,lib}
22+
cp ${name}.go $out/lib/
23+
24+
cat > $out/bin/${name} <<-EOF
25+
#!${bash}/bin/bash
26+
exec -a "${name}" ${guile}/bin/guile -C ${guile-json}/share/guile/ccache -C $out/lib -e '(${name}) main' -c "" \$@
27+
EOF
28+
chmod +x $out/bin/${name}
29+
'';
30+
}

src/addon.scm

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/bin/sh
2+
exec guile -e '(rotate-pdf-addon) main' -s $0 "$@"
3+
!#
4+
;; A simple addon for Docspell for rotating pdf files.
5+
;;
6+
;; It uses "qpdf" for rotating.
7+
8+
(define-module (rotate-pdf-addon)
9+
#:use-module (json)
10+
#:use-module (ice-9 rdelim)
11+
#:export (main))
12+
13+
;; Some helpers
14+
(define* (errln formatstr . args)
15+
(apply format (current-error-port) formatstr args)
16+
(newline))
17+
18+
;; Macro for executing system commands and making this program exit in
19+
;; case of failure.
20+
(define-syntax sysexec
21+
(syntax-rules ()
22+
((sysexec exp ...)
23+
(let ((rc (apply system* (list exp ...))))
24+
(unless (eqv? rc EXIT_SUCCESS)
25+
(format (current-error-port) "> '~a …' failed with: ~#*~:*~d~%" exp ... rc)
26+
(exit 1))
27+
#t))))
28+
29+
(fluid-set! %default-port-encoding "UTF-8")
30+
31+
;; External program dependencies
32+
(define *qpdf* "qpdf")
33+
(define *pdftotext* "pdftotext")
34+
35+
;; Getting some environment variables
36+
(define *output-dir* (getenv "OUTPUT_DIR"))
37+
(define *item-data-json* (getenv "ITEM_DATA_JSON"))
38+
(define *pdf-files-dir* (getenv "ITEM_PDF_DIR"))
39+
40+
;; fail early if not in the right context
41+
(when (not *item-data-json*)
42+
(errln "No item data json file found.")
43+
(exit 1))
44+
45+
;; The user input schema
46+
(define-json-type <userinput>
47+
(degree))
48+
49+
;; The itemdata record, only the fields needed here.
50+
(define-json-type <attachment>
51+
(id)
52+
(name)
53+
(position)
54+
(pages))
55+
(define-json-type <itemdata>
56+
(id)
57+
(attachments "attachments" #(<attachment>)))
58+
59+
;; The output record, what is returned to docspell
60+
(define-json-type <itemfiles>
61+
(itemId)
62+
(textFiles)
63+
(pdfFiles))
64+
(define-json-type <output>
65+
(files "files" #(<itemfiles>)))
66+
67+
(define (load-itemdata)
68+
"Load the JSON file containing item data into the itemdata record."
69+
(scm->itemdata (call-with-input-file *item-data-json* json->scm)))
70+
71+
(define (load-user-input file)
72+
(scm->userinput (call-with-input-file file json->scm)))
73+
74+
(define (rotate-pdf file degree out txt)
75+
(errln "Running qpdf to rotate ~s" degree)
76+
(sysexec *qpdf* (format #f "--rotate=~a" degree) file out)
77+
(errln "Running pdftotext to extract the text from rotatet file")
78+
(sysexec *pdftotext* out txt))
79+
80+
(define (process-file itemid degree file)
81+
"Processing a single attachment."
82+
(let* ((id (attachment-id file))
83+
(name (attachment-name file))
84+
(file (format #f "~a/~a" *pdf-files-dir* id))
85+
(out (format #f "~a/~a.pdf" *output-dir* id))
86+
(txt (format #f "~a/~a.txt" *output-dir* id)))
87+
(errln "Processing attachment ~s" name)
88+
(rotate-pdf file degree out txt)
89+
(make-itemfiles itemid
90+
`((,id . ,(format #f "~a.txt" id)))
91+
`((,id . ,(format #f "~a.pdf" id))))))
92+
93+
(define (process-all indata)
94+
(let* ((item-meta (load-itemdata))
95+
(item-id (itemdata-id item-meta))
96+
(attachs (itemdata-attachments item-meta))
97+
(degree (userinput-degree indata)))
98+
(map (lambda (file)
99+
(process-file item-id degree file))
100+
attachs)))
101+
102+
(define (main args)
103+
(let* ((infile (load-user-input (cadr args)))
104+
(out (make-output (process-all infile))))
105+
(format #t "~a" (output->json out))))
106+
107+
;; Local Variables:
108+
;; mode: scheme
109+
;; End:

test/input.json

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{ "degree": "90" }

test/item_data.json

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{ "id": "qZDnyGIAJsXr"
2+
"attachments": [
3+
{"id": "BXLaDza97A",
4+
"name": "my-file.converted.pdf",
5+
"position": 0,
6+
"content": "the whole text",
7+
"language": "deu",
8+
"pages": 2
9+
}
10+
]
11+
}

test/pdf/BXLaDza97A

33.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)