Get into working state

- Make compatible with at least go 1.19
- Fix paths
- Add more log output
- Update README.md
- Fix runner
- Update systemd service
This commit is contained in:
David Vogel 2024-11-04 20:29:33 +01:00
parent c30814d5c3
commit c82cd72a47
6 changed files with 46 additions and 16 deletions

View File

@ -12,7 +12,7 @@ The idea is to let it watch a directory any scanner will scan into, and then thi
1. Install the project somewhere. 1. Install the project somewhere.
2. Edit [main.go](main.go) to use the correct paths to your scanner and paperless consumption directories. 2. Edit [main.go](main.go) to use the correct paths to your scanner and paperless consumption directories.
3. Copy the [ocrmypdf-runner.service](service/linux/systemd/ocrmypdf-runner.service) into your paperless systemd services directory (`%HOME/.config/systemd/user/ocrmypdf-runner.service`). 3. Copy the [ocrmypdf-runner.service](service/linux/systemd/ocrmypdf-runner.service) into your systemd services directory (`etc/systemd/system/ocrmypdf-runner.service`). You may have to change the user in the service file.
4. `systemctl --user daemon-reload` 4. `systemctl daemon-reload`
5. `systemctl --user enable ocrmypdf-runner.service` 5. `systemctl enable ocrmypdf-runner.service`
6. `systemctl --user start ocrmypdf-runner.service` 6. `systemctl start ocrmypdf-runner.service`

2
go.mod
View File

@ -1,3 +1,3 @@
module ocrmypdf-runner module ocrmypdf-runner
go 1.23.0 go 1.19

View File

@ -7,8 +7,8 @@ import (
func init() { func init() {
runner := Runner{ runner := Runner{
InputPatterns: []string{filepath.Join(".", "input", "*.pdf")}, InputPatterns: []string{filepath.Join("/", "media", "vault", "Paperless", "Consume", "input", "*.pdf")},
OutputPath: filepath.Join("."), OutputPath: filepath.Join("/", "media", "vault", "Paperless", "Consume", "processed"),
Interval: 5 * time.Second, Interval: 5 * time.Second,
} }
runner.Run() runner.Run()

View File

@ -1,6 +1,10 @@
package main package main
import "log"
func main() { func main() {
log.Printf("Starting OCRmyPDF-runner.")
// The runners will be started in some init.go file. // The runners will be started in some init.go file.
// Wait forever. // Wait forever.

View File

@ -1,6 +1,7 @@
package main package main
import ( import (
"errors"
"log" "log"
"os" "os"
"os/exec" "os/exec"
@ -23,33 +24,54 @@ func (r Runner) Run() {
} }
go func() { go func() {
log.Printf("Starting runner polling %q every %s.", r.InputPatterns, r.Interval)
ticker := time.NewTicker(r.Interval) ticker := time.NewTicker(r.Interval)
for range ticker.C { for range ticker.C {
for _, inputPattern := range r.InputPatterns { for _, inputPattern := range r.InputPatterns {
filenames, err := filepath.Glob(inputPattern) filenames, err := filepath.Glob(inputPattern)
if err != nil { if err != nil {
log.Panicf("Failed to get input files: %v", err) log.Panicf("Failed to get input files: %v.", err)
} }
for _, filename := range filenames { for _, filename := range filenames {
outputPath := filepath.Join(r.OutputPath, filepath.Base(filename)) base := filepath.Base(filename)
outputPath := filepath.Join(r.OutputPath, base)
log.Printf("Starting to process %q into %q.", base, outputPath)
args := []string{ args := []string{
"-l deu+eng", "-l", "deu+eng",
"--rotate-pages", "--rotate-pages",
"--optimize 1", "--optimize", "1",
"--skip-text",
} }
args = append(args, filename, outputPath) args = append(args, filename, outputPath)
cmd := exec.Command(OCRMyPDFExecutable, args...) cmd := exec.Command(OCRMyPDFExecutable, args...)
if err := cmd.Run(); err != nil { if output, err := cmd.CombinedOutput(); err != nil {
log.Panicf("Failed to run OCRmyPDF: %v", err) log.Printf("OCRmyPDF stdout + stderr: %v.", string(output))
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
log.Printf("OCRmyPDF failed with ExitCode: %v.", exitErr)
log.Printf("We will move the document into the output directory instead.")
if err := os.Rename(filename, outputPath); err != nil {
log.Panicf("Failed to move %q to %q.", filename, outputPath)
}
log.Printf("Exec: %q", cmd)
} else {
log.Panicf("Failed to run OCRmyPDF: %v.", err)
}
} else {
// Only delete the PDF if the previous steps did succeed.
if err := os.Remove(filename); err != nil {
log.Printf("Failed to remove original document: %v.", err)
}
} }
// Only delete the PDF if the previous steps did succeed. log.Printf("Finished processing of %q.", base)
os.Remove(filename)
} }
} }

View File

@ -2,8 +2,12 @@
Description=A runner that will watch directories and runs OCRmyPDF on files in them. Description=A runner that will watch directories and runs OCRmyPDF on files in them.
[Service] [Service]
Restart=on-failure
RestartSec=60s
WorkingDirectory=/home/paperless/ocrmypdf-runner/ WorkingDirectory=/home/paperless/ocrmypdf-runner/
ExecStart=go run . ExecStart=go run .
User=paperless
Group=paperless
[Install] [Install]
WantedBy=default.target WantedBy=multi-user.target