mirror of
https://github.com/Dadido3/Scanyonero.git
synced 2025-06-06 17:30:00 +00:00
Initial commit
This commit is contained in:
commit
c30814d5c3
18
README.md
Normal file
18
README.md
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# OCRmyPDF runner
|
||||||
|
|
||||||
|
A very simple tool that listens for files in a directory, and runs OCRmyPDF on them.
|
||||||
|
|
||||||
|
This is needed as paperless(-ngx) will always create a copy of the document with its built in clean up and OCR feature.
|
||||||
|
Even external pre-consumption scripts will be run on all new documents, not just files in from consumption directory.
|
||||||
|
So the solution is to have this watchdog/runner that only pre-processes scanned documents, and leaves everything else untouched.
|
||||||
|
|
||||||
|
The idea is to let it watch a directory any scanner will scan into, and then this runner will write the final pre-processed document into a directory paperless watches.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Install the project somewhere.
|
||||||
|
2. Edit [main.go](main.go) to use the correct paths to your scanner and paperless consumption directories.
|
||||||
|
3. Copy the [ocrmypdf-runner.service](service/linux/systemd/ocrmypdf-runner.service) into your paperless systemd services directory (`%HOME/.config/systemd/user/ocrmypdf-runner.service`).
|
||||||
|
4. `systemctl --user daemon-reload`
|
||||||
|
5. `systemctl --user enable ocrmypdf-runner.service`
|
||||||
|
6. `systemctl --user start ocrmypdf-runner.service`
|
15
init.go
Normal file
15
init.go
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
runner := Runner{
|
||||||
|
InputPatterns: []string{filepath.Join(".", "input", "*.pdf")},
|
||||||
|
OutputPath: filepath.Join("."),
|
||||||
|
Interval: 5 * time.Second,
|
||||||
|
}
|
||||||
|
runner.Run()
|
||||||
|
}
|
8
main.go
Normal file
8
main.go
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// The runners will be started in some init.go file.
|
||||||
|
|
||||||
|
// Wait forever.
|
||||||
|
select {}
|
||||||
|
}
|
58
runner.go
Normal file
58
runner.go
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
const OCRMyPDFExecutable = "ocrmypdf"
|
||||||
|
|
||||||
|
type Runner struct {
|
||||||
|
InputPatterns []string // The "Glob" patterns used for searching input files.
|
||||||
|
OutputPath string // The output directory passed to OCRmyPDF.
|
||||||
|
|
||||||
|
Interval time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r Runner) Run() {
|
||||||
|
if r.Interval == 0 {
|
||||||
|
r.Interval = 5 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(r.Interval)
|
||||||
|
for range ticker.C {
|
||||||
|
|
||||||
|
for _, inputPattern := range r.InputPatterns {
|
||||||
|
filenames, err := filepath.Glob(inputPattern)
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("Failed to get input files: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, filename := range filenames {
|
||||||
|
|
||||||
|
outputPath := filepath.Join(r.OutputPath, filepath.Base(filename))
|
||||||
|
|
||||||
|
args := []string{
|
||||||
|
"-l deu+eng",
|
||||||
|
"--rotate-pages",
|
||||||
|
"--optimize 1",
|
||||||
|
}
|
||||||
|
args = append(args, filename, outputPath)
|
||||||
|
cmd := exec.Command(OCRMyPDFExecutable, args...)
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
log.Panicf("Failed to run OCRmyPDF: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only delete the PDF if the previous steps did succeed.
|
||||||
|
os.Remove(filename)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
9
service/linux/systemd/ocrmypdf-runner.service
Normal file
9
service/linux/systemd/ocrmypdf-runner.service
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=A runner that will watch directories and runs OCRmyPDF on files in them.
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
WorkingDirectory=/home/paperless/ocrmypdf-runner/
|
||||||
|
ExecStart=go run .
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=default.target
|
Loading…
Reference in New Issue
Block a user