package main import ( "errors" "log" "os" "os/exec" "path/filepath" "time" ) const OCRMyPDFExecutable = "ocrmypdf" type Runner struct { InputPatterns []string // The "Glob" patterns used for searching input files. OutputPath string // The output directory passed to OCRmyPDF. Interval time.Duration } func (r Runner) Run() { if r.Interval == 0 { r.Interval = 5 * time.Second } go func() { log.Printf("Starting runner polling %q every %s.", r.InputPatterns, r.Interval) ticker := time.NewTicker(r.Interval) for range ticker.C { for _, inputPattern := range r.InputPatterns { filenames, err := filepath.Glob(inputPattern) if err != nil { log.Panicf("Failed to get input files: %v.", err) } for _, filename := range filenames { base := filepath.Base(filename) outputPath := filepath.Join(r.OutputPath, base) log.Printf("Starting to process %q into %q.", base, outputPath) args := []string{ "-l", "deu+eng", "--rotate-pages", "--optimize", "1", "--skip-text", } args = append(args, filename, outputPath) cmd := exec.Command(OCRMyPDFExecutable, args...) if output, err := cmd.CombinedOutput(); err != nil { log.Printf("OCRmyPDF stdout + stderr: %v.", string(output)) var exitErr *exec.ExitError if errors.As(err, &exitErr) { log.Printf("OCRmyPDF failed with ExitCode: %v.", exitErr) log.Printf("We will move the document into the output directory instead.") if err := os.Rename(filename, outputPath); err != nil { log.Panicf("Failed to move %q to %q.", filename, outputPath) } log.Printf("Exec: %q", cmd) } else { log.Panicf("Failed to run OCRmyPDF: %v.", err) } } else { // Only delete the PDF if the previous steps did succeed. if err := os.Remove(filename); err != nil { log.Printf("Failed to remove original document: %v.", err) } } log.Printf("Finished processing of %q.", base) } } } }() }