Get into working state

- Make compatible with at least go 1.19
- Fix paths
- Add more log output
- Update README.md
- Fix runner
- Update systemd service
This commit is contained in:
David Vogel 2024-11-04 20:29:33 +01:00
parent c30814d5c3
commit c82cd72a47
6 changed files with 46 additions and 16 deletions

View File

@ -12,7 +12,7 @@ The idea is to let it watch a directory any scanner will scan into, and then thi
1. Install the project somewhere.
2. Edit [main.go](main.go) to use the correct paths to your scanner and paperless consumption directories.
3. Copy the [ocrmypdf-runner.service](service/linux/systemd/ocrmypdf-runner.service) into your paperless systemd services directory (`%HOME/.config/systemd/user/ocrmypdf-runner.service`).
4. `systemctl --user daemon-reload`
5. `systemctl --user enable ocrmypdf-runner.service`
6. `systemctl --user start ocrmypdf-runner.service`
3. Copy the [ocrmypdf-runner.service](service/linux/systemd/ocrmypdf-runner.service) into your systemd services directory (`etc/systemd/system/ocrmypdf-runner.service`). You may have to change the user in the service file.
4. `systemctl daemon-reload`
5. `systemctl enable ocrmypdf-runner.service`
6. `systemctl start ocrmypdf-runner.service`

2
go.mod
View File

@ -1,3 +1,3 @@
module ocrmypdf-runner
go 1.23.0
go 1.19

View File

@ -7,8 +7,8 @@ import (
func init() {
runner := Runner{
InputPatterns: []string{filepath.Join(".", "input", "*.pdf")},
OutputPath: filepath.Join("."),
InputPatterns: []string{filepath.Join("/", "media", "vault", "Paperless", "Consume", "input", "*.pdf")},
OutputPath: filepath.Join("/", "media", "vault", "Paperless", "Consume", "processed"),
Interval: 5 * time.Second,
}
runner.Run()

View File

@ -1,6 +1,10 @@
package main
import "log"
func main() {
log.Printf("Starting OCRmyPDF-runner.")
// The runners will be started in some init.go file.
// Wait forever.

View File

@ -1,6 +1,7 @@
package main
import (
"errors"
"log"
"os"
"os/exec"
@ -23,33 +24,54 @@ func (r Runner) Run() {
}
go func() {
log.Printf("Starting runner polling %q every %s.", r.InputPatterns, r.Interval)
ticker := time.NewTicker(r.Interval)
for range ticker.C {
for _, inputPattern := range r.InputPatterns {
filenames, err := filepath.Glob(inputPattern)
if err != nil {
log.Panicf("Failed to get input files: %v", err)
log.Panicf("Failed to get input files: %v.", err)
}
for _, filename := range filenames {
outputPath := filepath.Join(r.OutputPath, filepath.Base(filename))
base := filepath.Base(filename)
outputPath := filepath.Join(r.OutputPath, base)
log.Printf("Starting to process %q into %q.", base, outputPath)
args := []string{
"-l deu+eng",
"-l", "deu+eng",
"--rotate-pages",
"--optimize 1",
"--optimize", "1",
"--skip-text",
}
args = append(args, filename, outputPath)
cmd := exec.Command(OCRMyPDFExecutable, args...)
if err := cmd.Run(); err != nil {
log.Panicf("Failed to run OCRmyPDF: %v", err)
if output, err := cmd.CombinedOutput(); err != nil {
log.Printf("OCRmyPDF stdout + stderr: %v.", string(output))
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
log.Printf("OCRmyPDF failed with ExitCode: %v.", exitErr)
log.Printf("We will move the document into the output directory instead.")
if err := os.Rename(filename, outputPath); err != nil {
log.Panicf("Failed to move %q to %q.", filename, outputPath)
}
log.Printf("Exec: %q", cmd)
} else {
log.Panicf("Failed to run OCRmyPDF: %v.", err)
}
} else {
// Only delete the PDF if the previous steps did succeed.
if err := os.Remove(filename); err != nil {
log.Printf("Failed to remove original document: %v.", err)
}
}
// Only delete the PDF if the previous steps did succeed.
os.Remove(filename)
log.Printf("Finished processing of %q.", base)
}
}

View File

@ -2,8 +2,12 @@
Description=A runner that will watch directories and runs OCRmyPDF on files in them.
[Service]
Restart=on-failure
RestartSec=60s
WorkingDirectory=/home/paperless/ocrmypdf-runner/
ExecStart=go run .
User=paperless
Group=paperless
[Install]
WantedBy=default.target
WantedBy=multi-user.target