diff --git a/README.md b/README.md index 96cf1b2..6371bdc 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The idea is to let it watch a directory any scanner will scan into, and then thi 1. Install the project somewhere. 2. Edit [main.go](main.go) to use the correct paths to your scanner and paperless consumption directories. -3. Copy the [ocrmypdf-runner.service](service/linux/systemd/ocrmypdf-runner.service) into your paperless systemd services directory (`%HOME/.config/systemd/user/ocrmypdf-runner.service`). -4. `systemctl --user daemon-reload` -5. `systemctl --user enable ocrmypdf-runner.service` -6. `systemctl --user start ocrmypdf-runner.service` +3. Copy the [ocrmypdf-runner.service](service/linux/systemd/ocrmypdf-runner.service) into your systemd services directory (`etc/systemd/system/ocrmypdf-runner.service`). You may have to change the user in the service file. +4. `systemctl daemon-reload` +5. `systemctl enable ocrmypdf-runner.service` +6. `systemctl start ocrmypdf-runner.service` diff --git a/go.mod b/go.mod index 886c39a..8a1c427 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module ocrmypdf-runner -go 1.23.0 +go 1.19 \ No newline at end of file diff --git a/init.go b/init.go index 9105d74..6017508 100644 --- a/init.go +++ b/init.go @@ -7,8 +7,8 @@ import ( func init() { runner := Runner{ - InputPatterns: []string{filepath.Join(".", "input", "*.pdf")}, - OutputPath: filepath.Join("."), + InputPatterns: []string{filepath.Join("/", "media", "vault", "Paperless", "Consume", "input", "*.pdf")}, + OutputPath: filepath.Join("/", "media", "vault", "Paperless", "Consume", "processed"), Interval: 5 * time.Second, } runner.Run() diff --git a/main.go b/main.go index 7a72852..737674f 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,10 @@ package main +import "log" + func main() { + log.Printf("Starting OCRmyPDF-runner.") + // The runners will be started in some init.go file. // Wait forever. diff --git a/runner.go b/runner.go index 240d86e..2d3c27b 100644 --- a/runner.go +++ b/runner.go @@ -1,6 +1,7 @@ package main import ( + "errors" "log" "os" "os/exec" @@ -23,33 +24,54 @@ func (r Runner) Run() { } go func() { + log.Printf("Starting runner polling %q every %s.", r.InputPatterns, r.Interval) + ticker := time.NewTicker(r.Interval) for range ticker.C { for _, inputPattern := range r.InputPatterns { filenames, err := filepath.Glob(inputPattern) if err != nil { - log.Panicf("Failed to get input files: %v", err) + log.Panicf("Failed to get input files: %v.", err) } for _, filename := range filenames { - outputPath := filepath.Join(r.OutputPath, filepath.Base(filename)) + base := filepath.Base(filename) + outputPath := filepath.Join(r.OutputPath, base) + + log.Printf("Starting to process %q into %q.", base, outputPath) args := []string{ - "-l deu+eng", + "-l", "deu+eng", "--rotate-pages", - "--optimize 1", + "--optimize", "1", + "--skip-text", } args = append(args, filename, outputPath) cmd := exec.Command(OCRMyPDFExecutable, args...) - if err := cmd.Run(); err != nil { - log.Panicf("Failed to run OCRmyPDF: %v", err) + if output, err := cmd.CombinedOutput(); err != nil { + log.Printf("OCRmyPDF stdout + stderr: %v.", string(output)) + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + log.Printf("OCRmyPDF failed with ExitCode: %v.", exitErr) + log.Printf("We will move the document into the output directory instead.") + if err := os.Rename(filename, outputPath); err != nil { + log.Panicf("Failed to move %q to %q.", filename, outputPath) + } + log.Printf("Exec: %q", cmd) + } else { + log.Panicf("Failed to run OCRmyPDF: %v.", err) + } + } else { + // Only delete the PDF if the previous steps did succeed. + if err := os.Remove(filename); err != nil { + log.Printf("Failed to remove original document: %v.", err) + } } - // Only delete the PDF if the previous steps did succeed. - os.Remove(filename) + log.Printf("Finished processing of %q.", base) } } diff --git a/service/linux/systemd/ocrmypdf-runner.service b/service/linux/systemd/ocrmypdf-runner.service index b3dc327..ec1f53b 100644 --- a/service/linux/systemd/ocrmypdf-runner.service +++ b/service/linux/systemd/ocrmypdf-runner.service @@ -2,8 +2,12 @@ Description=A runner that will watch directories and runs OCRmyPDF on files in them. [Service] +Restart=on-failure +RestartSec=60s WorkingDirectory=/home/paperless/ocrmypdf-runner/ ExecStart=go run . +User=paperless +Group=paperless [Install] -WantedBy=default.target +WantedBy=multi-user.target