Rework into FTP scanning server

- Rename to Scanyonero
- Add FTP server that ingests TIFF, PNG, JPEG or PDF files
- Add web interface to check and modify ingested files
- Rework how ocrmypdf is invoked

Basics are working, but the program is not in a usable state.
This commit is contained in:
David Vogel 2025-05-14 12:08:38 +02:00
parent c82cd72a47
commit 853a1bb58d
62 changed files with 2919 additions and 28 deletions

76
.gitignore vendored Normal file
View File

@ -0,0 +1,76 @@
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,go
# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,go
### Go ###
# If you prefer the allow list template instead of the deny list, see community template:
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
#
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, built with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Dependency directories (remove the comment below to include it)
# vendor/
# Go workspace file
go.work
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,go
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)

24
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,24 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch static/index.html",
"type": "firefox",
"request": "launch",
"reAttach": true,
"file": "${workspaceFolder}/static/index.html",
},
{
"name": "Launch Go application",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceFolder}",
"output": "${workspaceFolder}/debug_bin"
}
]
}

21
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,21 @@
{
"cSpell.words": [
"afero",
"EXTRACTIMAGES",
"fclairamb",
"ftpserver",
"ftpserverdriver",
"ftpserverlib",
"Ingestor",
"jfif",
"Millimetres",
"neilpa",
"pdfcpu",
"Scanyonero",
"Subed",
"Typer",
"typst",
"unitless",
"xhttp"
]
}

View File

@ -1,15 +1,19 @@
# OCRmyPDF runner
# Scanyonero
A very simple tool that listens for files in a directory, and runs OCRmyPDF on them.
A tool for preprocessing scanned documents before they are sent to paperless.
Simply point your scanner to the FTP server this software provides, and it will do the rest.
This is needed as paperless(-ngx) will always create a copy of the document with its built in clean up and OCR feature.
Even external pre-consumption scripts will be run on all new documents, not just files in from consumption directory.
So the solution is to have this watchdog/runner that only pre-processes scanned documents, and leaves everything else untouched.
## Features
The idea is to let it watch a directory any scanner will scan into, and then this runner will write the final pre-processed document into a directory paperless watches.
- Built in FTP server, and therefore no need for temporary files or any other Rube Goldberg-esque setups.
- Web-app where you can preview, sort, split and merge your scans before sending them to paperless.
- Can use OCRmyPDF for adding searchable text to your scanned documents.
- Can clean up documents and make them as pristine and compact as possible.
## Usage
TODO: Update README.md
1. Install the project somewhere.
2. Edit [main.go](main.go) to use the correct paths to your scanner and paperless consumption directories.
3. Copy the [ocrmypdf-runner.service](service/linux/systemd/ocrmypdf-runner.service) into your systemd services directory (`etc/systemd/system/ocrmypdf-runner.service`). You may have to change the user in the service file.

37
document/file.go Normal file
View File

@ -0,0 +1,37 @@
package document
import (
"fmt"
"log"
"os"
)
// File contains the raw data of a file coming from a scanner.
type File struct {
Name string
Data []byte
}
// LoadFile reads a file and returns it as a File.
func LoadFile(filename string) (File, error) {
data, err := os.ReadFile(filename)
if err != nil {
return File{}, fmt.Errorf("couldn't read file: %w", err)
}
return File{
Name: filename,
Data: data,
}, nil
}
// LoadFile reads a file and returns it as a File.
// This will panic on any error.
func MustLoadFile(filename string) File {
file, err := LoadFile(filename)
if err != nil {
log.Panicf("Failed to load file: %v.", err)
}
return file
}

View File

@ -0,0 +1,56 @@
package document
import (
"Scanyonero/unit"
"regexp"
)
type IngestorRule struct {
// All entries that are non nil have to match.
Match struct {
Name *regexp.Regexp // When this regular expression matches with the filename/filepath then the rule is used.
XPixels *int // When the scanned image width in pixels matches with the given amount of pixels, the rule is used.
YPixels *int // When the scanned image height in pixels matches with the given amount of pixels, the rule is used.
}
// All non nil entries will be applied to the document pages.
Action struct {
MediumWidth *unit.Millimeter // Sets the width of the medium.
MediumHeight *unit.Millimeter // Sets the height of the medium.
ScanOffsetX *unit.Millimeter // Offsets the scan in the medium on the x axis.
ScanOffsetY *unit.Millimeter // Offsets the scan in the medium on the y axis.
}
}
// Apply will check and apply the rule per ingested page.
func (rule IngestorRule) Apply(ingestor Ingestor, file File, page *Page) error {
// Match.
if rule.Match.Name != nil && !rule.Match.Name.MatchString(file.Name) {
return nil
}
imageBounds := page.Image.Bounds()
if rule.Match.XPixels != nil && *rule.Match.XPixels != imageBounds.Dx() {
return nil
}
if rule.Match.YPixels != nil && *rule.Match.YPixels != imageBounds.Dy() {
return nil
}
// Apply actions.
if rule.Action.MediumWidth != nil {
page.Dimensions.MediumSize.X = *rule.Action.MediumWidth
}
if rule.Action.MediumHeight != nil {
page.Dimensions.MediumSize.Y = *rule.Action.MediumHeight
}
if rule.Action.ScanOffsetX != nil {
page.Dimensions.ScanSize.Origin.X = *rule.Action.ScanOffsetX
}
if rule.Action.ScanOffsetY != nil {
page.Dimensions.ScanSize.Origin.Y = *rule.Action.ScanOffsetY
}
return nil
}

138
document/ingestor.go Normal file
View File

@ -0,0 +1,138 @@
package document
import (
"Scanyonero/unit"
"bytes"
"fmt"
"image"
"path/filepath"
"strings"
"github.com/chai2010/tiff"
"github.com/pdfcpu/pdfcpu/pkg/api"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model"
"image/jpeg"
"image/png"
)
// Ingestor contains all settings and rules for image/document file ingestion.
type Ingestor struct {
DefaultDPI unit.PerInch // Default/fallback dots per inch value.
Rules []IngestorRule
}
func (ingestor Ingestor) Ingest(file File) ([]Page, error) {
ext := filepath.Ext(file.Name)
var pages []Page
switch strings.ToLower(ext) {
case ".jpg", ".jpeg":
img, err := jpeg.Decode(bytes.NewReader(file.Data))
if err != nil {
return nil, fmt.Errorf("failed to decode JPEG file: %w", err)
}
dimensions := unit.NewPageDimensionsFromDensity(img.Bounds().Dx(), img.Bounds().Dy(), ingestor.DefaultDPI, ingestor.DefaultDPI)
if tag, err := decodeJFIF(bytes.NewReader(file.Data)); err == nil {
// Get more exact density info from the file metadata.
xDensity, yDensity := tag.Density()
dimensions = unit.NewPageDimensionsFromDensity(img.Bounds().Dx(), img.Bounds().Dy(), xDensity, yDensity)
}
pages = append(pages, Page{
Image: img,
Dimensions: dimensions,
})
case ".png":
img, err := png.Decode(bytes.NewReader(file.Data))
if err != nil {
return nil, fmt.Errorf("failed to decode PNG file: %w", err)
}
pages = append(pages, Page{
Image: img,
Dimensions: unit.NewPageDimensionsFromDensity(img.Bounds().Dx(), img.Bounds().Dy(), ingestor.DefaultDPI, ingestor.DefaultDPI),
})
// TODO: Read pixel density metadata from PNG file
case ".tiff":
mm, _, err := tiff.DecodeAll(bytes.NewReader(file.Data))
if err != nil {
return nil, fmt.Errorf("failed to decode TIFF file: %w", err)
}
for _, m := range mm {
for _, img := range m {
pages = append(pages, Page{
Image: img,
Dimensions: unit.NewPageDimensionsFromDensity(img.Bounds().Dx(), img.Bounds().Dy(), ingestor.DefaultDPI, ingestor.DefaultDPI),
})
// TODO: Read pixel density metadata from TIFF file
}
}
case ".pdf":
conf := model.NewDefaultConfiguration()
conf.Cmd = model.EXTRACTIMAGES
ctx, err := api.ReadValidateAndOptimize(bytes.NewReader(file.Data), conf)
if err != nil {
return nil, fmt.Errorf("failed to read and validate PDF file: %w", err)
}
boundaries, err := ctx.PageBoundaries(nil)
if err != nil {
return nil, fmt.Errorf("failed to get page dimensions: %w", err)
}
if len(boundaries) != ctx.PageCount {
return nil, fmt.Errorf("number of retrieved page boundaries (%d) and pages (%d) differ", len(boundaries), ctx.PageCount)
}
for page := 1; page <= ctx.PageCount; page++ {
mm, err := pdfcpu.ExtractPageImages(ctx, page, false)
if err != nil {
return nil, fmt.Errorf("failed to extract image from page: %w", err)
}
if len(mm) == 0 {
return nil, fmt.Errorf("page %d doesn't contain any images", page)
}
if len(mm) > 1 {
return nil, fmt.Errorf("page %d contains %d images, expected 1", page, len(mm))
}
boundary := boundaries[page-1]
dim := boundary.Media.Rect.Dimensions().ToMillimetres()
dimX, dimY := unit.Millimeter(dim.Width), unit.Millimeter(dim.Height)
// Decode only image of the page.
for _, m := range mm {
img, _, err := image.Decode(m)
if err != nil {
return nil, fmt.Errorf("failed to decode %q file: %w", ext, err)
}
pages = append(pages, Page{
Image: img,
Dimensions: unit.NewPageDimensionsFromLengths(dimX, dimY),
})
break
}
}
default:
return nil, fmt.Errorf("unsupported file extension %q", ext)
}
for iPage := range pages {
page := &pages[iPage]
for i, rule := range ingestor.Rules {
if err := rule.Apply(ingestor, file, page); err != nil {
return nil, fmt.Errorf("failed to apply ingestor rule %d on page %d: %w", i, iPage, err)
}
}
}
return pages, nil
}

110
document/ingestor_test.go Normal file
View File

@ -0,0 +1,110 @@
package document
import (
"Scanyonero/unit"
"path/filepath"
"regexp"
"testing"
)
func TestIngestor_Ingest(t *testing.T) {
t.SkipNow()
tests := []struct {
name string
file File
want []Page
wantErr bool
}{
{"300 DPI Feed JPG", MustLoadFile(filepath.Join("..", "test-documents", "300 DPI Feeder.jpg")),
[]Page{{
Dimensions: unit.PageDimensions[unit.Millimeter]{
ScanSize: unit.Rectangle[unit.Millimeter]{
Size: unit.Vec2[unit.Millimeter]{X: 209.97, Y: 288.713}}},
}}, false},
{"300 DPI Feed PDF", MustLoadFile(filepath.Join("..", "test-documents", "300 DPI Feeder.pdf")),
[]Page{{
Dimensions: unit.PageDimensions[unit.Millimeter]{
ScanSize: unit.Rectangle[unit.Millimeter]{
Size: unit.Vec2[unit.Millimeter]{X: 209.8, Y: 288.5}}},
}}, false},
{"300 DPI Flat JPG", MustLoadFile(filepath.Join("..", "test-documents", "300 DPI Flatbed.jpg")),
[]Page{{
Dimensions: unit.PageDimensions[unit.Millimeter]{
ScanSize: unit.Rectangle[unit.Millimeter]{
Size: unit.Vec2[unit.Millimeter]{X: 203.2, Y: 290.83}}},
}}, false},
{"300 DPI Flat PDF", MustLoadFile(filepath.Join("..", "test-documents", "300 DPI Flatbed.pdf")),
[]Page{{
Dimensions: unit.PageDimensions[unit.Millimeter]{
ScanSize: unit.Rectangle[unit.Millimeter]{
Size: unit.Vec2[unit.Millimeter]{X: 203.2, Y: 290.8}}},
}}, false},
{"600 DPI Feed JPG", MustLoadFile(filepath.Join("..", "test-documents", "600 DPI Feeder.jpg")),
[]Page{{
Dimensions: unit.PageDimensions[unit.Millimeter]{
ScanSize: unit.Rectangle[unit.Millimeter]{
Size: unit.Vec2[unit.Millimeter]{X: 209.97, Y: 288.671}}},
}}, false},
{"600 DPI Feed PDF", MustLoadFile(filepath.Join("..", "test-documents", "600 DPI Feeder.pdf")),
[]Page{{
Dimensions: unit.PageDimensions[unit.Millimeter]{
ScanSize: unit.Rectangle[unit.Millimeter]{
Size: unit.Vec2[unit.Millimeter]{X: 209.8, Y: 288.0}}},
}}, false},
{"600 DPI Flat JPG", MustLoadFile(filepath.Join("..", "test-documents", "600 DPI Flatbed.jpg")),
[]Page{{
Dimensions: unit.PageDimensions[unit.Millimeter]{
ScanSize: unit.Rectangle[unit.Millimeter]{
Size: unit.Vec2[unit.Millimeter]{X: 203.88, Y: 290.872}}},
}}, false},
{"600 DPI Flat PDF", MustLoadFile(filepath.Join("..", "test-documents", "600 DPI Flatbed.pdf")),
[]Page{{
Dimensions: unit.PageDimensions[unit.Millimeter]{
ScanSize: unit.Rectangle[unit.Millimeter]{
Size: unit.Vec2[unit.Millimeter]{X: 203.7, Y: 290.8}}},
}}, false},
}
ingestor := Ingestor{
DefaultDPI: unit.PerInch(150),
Rules: []IngestorRule{{
Match: struct {
Name *regexp.Regexp
XPixels *int
YPixels *int
}{
Name: regexp.MustCompile(`^.*\.pdf$`),
},
Action: struct {
MediumWidth *unit.Millimeter
MediumHeight *unit.Millimeter
ScanOffsetX *unit.Millimeter
ScanOffsetY *unit.Millimeter
}{
MediumWidth: &([]unit.Millimeter{100}[0]),
},
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ingestor.Ingest(tt.file)
if (err != nil) != tt.wantErr {
t.Errorf("Ingestor.Ingest() error = %v, wantErr %v.", err, tt.wantErr)
return
}
if len(got) != len(tt.want) {
t.Errorf("Got %d pages, but want %d pages.", len(got), len(tt.want))
return
}
for i, gotPage := range got {
wantPage := tt.want[i]
if !gotPage.Dimensions.ScanSize.Size.EqualWithPrecision(wantPage.Dimensions.ScanSize.Size, 0.1) {
t.Errorf("Resulting ScanSize is %v, want %v.", gotPage.Dimensions.ScanSize.Size, wantPage.Dimensions.ScanSize.Size)
}
}
})
}
}

72
document/jfif.go Normal file
View File

@ -0,0 +1,72 @@
package document
import (
"Scanyonero/unit"
"encoding/binary"
"fmt"
"io"
"neilpa.me/go-jfif"
)
type jfifTagUnits byte
const (
jfifTagUnitsNoUnits jfifTagUnits = iota
jfifTagUnitsDotsPerInch
jfifTagUnitsDotsPerCM
)
type jfifTag struct {
Version uint16
Units jfifTagUnits
XDensity uint16
YDensity uint16
// Omit thumbnail width, height and pixel data.
}
func (j *jfifTag) UnmarshalBinary(data []byte) error {
if len(data) < 7 {
return fmt.Errorf("JFIF tag length (%d) is smaller than expected (%d)", len(data), 7)
}
j.Version = binary.BigEndian.Uint16(data[0:2])
j.Units = jfifTagUnits(data[2])
j.XDensity = binary.BigEndian.Uint16(data[3:5])
j.YDensity = binary.BigEndian.Uint16(data[5:7])
return nil
}
// Density returns the number of pixels per length unit.
func (j *jfifTag) Density() (x, y unit.Density) {
switch j.Units {
case jfifTagUnitsDotsPerInch:
return unit.PerInch(j.XDensity), unit.PerInch(j.YDensity)
case jfifTagUnitsDotsPerCM:
return unit.PerMillimeter(j.XDensity) / 10, unit.PerMillimeter(j.YDensity) / 10
}
return nil, nil
}
func decodeJFIF(r io.Reader) (jfifTag, error) {
segments, err := jfif.DecodeSegments(r)
if err != nil {
return jfifTag{}, fmt.Errorf("failed to decode JPEG segments: %w", err)
}
for _, segment := range segments {
sig, data, _ := segment.AppPayload()
switch sig {
case jfif.SigJFIF:
tag := jfifTag{}
if err := tag.UnmarshalBinary(data); err != nil {
return jfifTag{}, fmt.Errorf("failed to unmarshal tag data: %w", err)
}
return tag, nil
}
}
return jfifTag{}, fmt.Errorf("couldn't find any JFIF tag")
}

12
document/page.go Normal file
View File

@ -0,0 +1,12 @@
package document
import (
"Scanyonero/unit"
"image"
)
type Page struct {
Image image.Image `json:"-"`
Dimensions unit.PageDimensions[unit.Millimeter] `json:"dimensions"`
}

57
ftpserver/driver.go Normal file
View File

@ -0,0 +1,57 @@
package ftpserver
import (
"crypto/tls"
"fmt"
ftpserver "github.com/fclairamb/ftpserverlib"
"github.com/spf13/afero"
)
type driver struct {
User, Pass string
ListenAddr string
ClientDriver afero.Fs
}
// GetSettings returns some general settings around the server setup
func (d *driver) GetSettings() (*ftpserver.Settings, error) {
//log.Printf("Passed settings to ftpserverlib.")
return &ftpserver.Settings{
ListenAddr: d.ListenAddr,
DefaultTransferType: ftpserver.TransferTypeBinary,
}, nil
}
// ClientConnected is called to send the very first welcome message
func (d *driver) ClientConnected(cc ftpserver.ClientContext) (string, error) {
//log.Printf("Client %q connected.", cc.RemoteAddr())
return "Welcome to the Scanyonero FTP server.", nil
}
// ClientDisconnected is called when the user disconnects, even if he never authenticated
func (d *driver) ClientDisconnected(cc ftpserver.ClientContext) {
//log.Printf("Client %q disconnected.", cc.RemoteAddr())
}
// AuthUser authenticates the user and selects an handling driver
func (d *driver) AuthUser(cc ftpserver.ClientContext, user, pass string) (ftpserver.ClientDriver, error) {
if d.User != "" && d.Pass != "" {
if user != d.User || pass != d.Pass {
return nil, fmt.Errorf("wrong username or password")
}
}
//log.Printf("Client %q authenticated with %q and %q.", cc.RemoteAddr(), user, pass)
return d.ClientDriver, nil
}
// GetTLSConfig returns a TLS Certificate to use
// The certificate could frequently change if we use something like "let's encrypt"
func (d *driver) GetTLSConfig() (*tls.Config, error) {
return nil, fmt.Errorf("tls not implemented")
}

20
ftpserver/driver_test.go Normal file
View File

@ -0,0 +1,20 @@
package ftpserver_test
import (
"Scanyonero/ftpserver"
"log"
"testing"
)
func TestDriver(t *testing.T) {
t.SkipNow()
server, err := ftpserver.NewFTPServer("", "", "127.0.0.1:21")
if err != nil {
log.Panicf("Failed to create FTP-Server: %v.", err)
}
for file := range server.FileChan() {
log.Printf("Received file %v with %d bytes.", file.Name, len(file.Data))
}
}

80
ftpserver/ftp-server.go Normal file
View File

@ -0,0 +1,80 @@
package ftpserver
import (
"Scanyonero/document"
"fmt"
"sync"
ftpserverlib "github.com/fclairamb/ftpserverlib"
)
type FTPServer struct {
files chan document.File
ftpServer *ftpserverlib.FtpServer
}
// NewFTPServer returns a new FTP server instance.
// This will also start the server.
func NewFTPServer(user, pass, listenAddr string) (*FTPServer, error) {
fs := &virtualFS{}
driver := &driver{
User: user,
Pass: pass,
ListenAddr: listenAddr,
ClientDriver: fs,
}
s := &FTPServer{
files: make(chan document.File),
ftpServer: ftpserverlib.NewFtpServer(driver),
}
// Handler callback for newly uploaded files.
// We will pass the files into the files channel.
var closed bool
var mutex sync.Mutex
fs.Callback = func(file document.File) error {
mutex.Lock()
defer mutex.Unlock()
if closed {
return fmt.Errorf("server is closing")
}
s.files <- file
return nil
}
// Create listener.
if err := s.ftpServer.Listen(); err != nil {
return nil, err
}
// Start listening.
go func() {
s.ftpServer.Serve()
// We will be shutting down everything soon.
// Ensure that no new files will be written into the files channel.
mutex.Lock()
closed = true
mutex.Unlock()
close(s.files)
}()
return s, nil
}
// Returns the file channel where files uploaded to the FTP-Server will be sent to.
//
// The channel will automatically be closed after Stop() has been called.
func (s *FTPServer) FileChan() <-chan document.File {
return s.files
}
// Shuts the FTP-Server down.
func (s *FTPServer) Stop() error {
return s.ftpServer.Stop()
}

16
ftpserver/utils.go Normal file
View File

@ -0,0 +1,16 @@
package ftpserver
import "path/filepath"
func normalizePath(path string) string {
path = filepath.Clean(path)
switch path {
case ".":
return string(filePathSeparator)
case "..":
return string(filePathSeparator)
default:
return path
}
}

146
ftpserver/virtual-fs.go Normal file
View File

@ -0,0 +1,146 @@
package ftpserver
import (
"Scanyonero/document"
"io"
"os"
"time"
"github.com/spf13/afero"
"github.com/spf13/afero/mem"
)
const filePathSeparator = os.PathSeparator
type virtualFile struct {
mem.File
Callback func(file document.File) error
}
// Override close behavior.
// When the file handle is closed, we will forward the file content via callback.
func (v *virtualFile) Close() error {
if v.Callback != nil {
file := document.File{
Name: v.Name(),
}
var err error
if file.Data, err = io.ReadAll(mem.NewReadOnlyFileHandle(v.Data())); err != nil {
v.File.Close()
return err
}
if err := v.Callback(file); err != nil {
v.File.Close()
return err
}
}
return v.File.Close()
}
var _ afero.File = &virtualFile{}
type virtualFS struct {
Callback func(file document.File) error
}
var _ afero.Fs = &virtualFS{}
// Create creates a file in the filesystem, returning the file and an error, if any happens.
func (v *virtualFS) Create(name string) (afero.File, error) {
name = normalizePath(name)
//log.Printf("VirtualFS: Create: %v", name)
file := mem.CreateFile(name)
fileHandle := *mem.NewFileHandle(file)
return &virtualFile{File: fileHandle, Callback: v.Callback}, nil
}
// Mkdir creates a directory in the filesystem, return an error if any happens.
func (v *virtualFS) Mkdir(name string, perm os.FileMode) error {
//name = normalizePath(name)
//log.Printf("VirtualFS: Mkdir: %v, %v", name, perm)
return nil
}
// MkdirAll creates a directory path and all parents that does not exist yet.
func (v *virtualFS) MkdirAll(path string, perm os.FileMode) error {
//path = normalizePath(path)
//log.Printf("VirtualFS: MkdirAll: %v, %v", path, perm)
return nil
}
// Open opens a file, returning it or an error, if any happens.
func (v *virtualFS) Open(name string) (afero.File, error) {
name = normalizePath(name)
//log.Printf("VirtualFS: Open: %v", name)
dir := mem.CreateDir(name)
mem.SetMode(dir, os.ModeDir|0o755)
return mem.NewReadOnlyFileHandle(dir), nil
}
// OpenFile opens a file using the given flags and the given mode.
func (v *virtualFS) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) {
name = normalizePath(name)
//log.Printf("VirtualFS: OpenFile: %v, %v, %v", name, flag, perm)
return v.Create(name)
}
// Remove removes a file identified by name, returning an error, if any happens.
func (v *virtualFS) Remove(name string) error {
name = normalizePath(name)
//log.Printf("VirtualFS: Remove: %v", name)
return &os.PathError{Op: "remove", Path: name, Err: os.ErrNotExist}
}
// RemoveAll removes a directory path and any children it contains. It does not fail if the path does not exist (return nil).
func (v *virtualFS) RemoveAll(path string) error {
//path = normalizePath(path)
//log.Printf("VirtualFS: RemoveAll: %v", path)
return nil
}
// Rename renames a file.
func (v *virtualFS) Rename(oldName, newName string) error {
oldName = normalizePath(oldName)
//newName = normalizePath(newName)
//log.Printf("VirtualFS: Rename: %v -> %v", oldName, newName)
return &os.PathError{Op: "rename", Path: oldName, Err: os.ErrNotExist}
}
// Stat returns a FileInfo describing the named file, or an error, if any happens.
func (v *virtualFS) Stat(name string) (os.FileInfo, error) {
name = normalizePath(name)
//log.Printf("VirtualFS: Stat: %v", name)
dir := mem.CreateDir(name)
mem.SetMode(dir, os.ModeDir|0o755)
return mem.GetFileInfo(dir), nil
}
// The name of this FileSystem.
func (v *virtualFS) Name() string {
return "ScanyoneroVirtualFS"
}
// Chmod changes the mode of the named file to mode.
func (v *virtualFS) Chmod(name string, mode os.FileMode) error {
name = normalizePath(name)
//log.Printf("VirtualFS: Chmod: %v, %v", name, mode)
return &os.PathError{Op: "chmod", Path: name, Err: os.ErrNotExist}
}
// Chown changes the uid and gid of the named file.
func (v *virtualFS) Chown(name string, uid, gid int) error {
name = normalizePath(name)
//log.Printf("VirtualFS: Chown: %v, %v, %v", name, uid, gid)
return &os.PathError{Op: "chown", Path: name, Err: os.ErrNotExist}
}
// Chtimes changes the access and modification times of the named file.
func (v *virtualFS) Chtimes(name string, atime time.Time, mtime time.Time) error {
name = normalizePath(name)
//log.Printf("VirtualFS: Chtimes: %v, %v, %v", name, atime, mtime)
return &os.PathError{Op: "chtimes", Path: name, Err: os.ErrNotExist}
}

32
go.mod
View File

@ -1,3 +1,31 @@
module ocrmypdf-runner
module Scanyonero
go 1.19
go 1.23.0
toolchain go1.24.2
require (
github.com/chai2010/tiff v0.0.0-20211005095045-4ec2aa243943
github.com/fclairamb/ftpserverlib v0.25.0
github.com/gorilla/websocket v1.5.3
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646
github.com/pdfcpu/pdfcpu v0.10.2
github.com/spf13/afero v1.14.0
neilpa.me/go-jfif v0.5.0
)
require (
github.com/fclairamb/go-log v0.5.0 // indirect
github.com/hhrutter/lzw v1.0.0 // indirect
github.com/hhrutter/pkcs7 v0.2.0 // indirect
github.com/hhrutter/tiff v1.0.2 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
golang.org/x/crypto v0.37.0 // indirect
golang.org/x/image v0.26.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/text v0.24.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
neilpa.me/go-x v0.2.0 // indirect
)

57
go.sum Normal file
View File

@ -0,0 +1,57 @@
github.com/chai2010/tiff v0.0.0-20211005095045-4ec2aa243943 h1:CjuhVIUiyWQZVY4rmcvm/9R+60e/Wi6LkXyHU38MqXI=
github.com/chai2010/tiff v0.0.0-20211005095045-4ec2aa243943/go.mod h1:FhMMqekobM33oGdTfbi65oQ9P7bnQ5/0EDfmleW35RE=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fclairamb/ftpserverlib v0.25.0 h1:swV2CK+WiN9KEkqkwNgGbSIfRoYDWNno41hoVtYwgfA=
github.com/fclairamb/ftpserverlib v0.25.0/go.mod h1:LIDqyiFPhjE9IuzTkntST8Sn8TaU6NRgzSvbMpdfRC4=
github.com/fclairamb/go-log v0.5.0 h1:Gz9wSamEaA6lta4IU2cjJc2xSq5sV5VYSB5w/SUHhVc=
github.com/fclairamb/go-log v0.5.0/go.mod h1:XoRO1dYezpsGmLLkZE9I+sHqpqY65p8JA+Vqblb7k40=
github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA=
github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/hhrutter/lzw v1.0.0 h1:laL89Llp86W3rRs83LvKbwYRx6INE8gDn0XNb1oXtm0=
github.com/hhrutter/lzw v1.0.0/go.mod h1:2HC6DJSn/n6iAZfgM3Pg+cP1KxeWc3ezG8bBqW5+WEo=
github.com/hhrutter/pkcs7 v0.2.0 h1:i4HN2XMbGQpZRnKBLsUwO3dSckzgX142TNqY/KfXg+I=
github.com/hhrutter/pkcs7 v0.2.0/go.mod h1:aEzKz0+ZAlz7YaEMY47jDHL14hVWD6iXt0AgqgAvWgE=
github.com/hhrutter/tiff v1.0.2 h1:7H3FQQpKu/i5WaSChoD1nnJbGx4MxU5TlNqqpxw55z8=
github.com/hhrutter/tiff v1.0.2/go.mod h1:pcOeuK5loFUE7Y/WnzGw20YxUdnqjY1P0Jlcieb/cCw=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
github.com/pdfcpu/pdfcpu v0.10.2 h1:DB2dWuoq0eF0QwHjgyLirYKLTCzFOoZdmmIUSu72aL0=
github.com/pdfcpu/pdfcpu v0.10.2/go.mod h1:Q2Z3sqdRqHTdIq1mPAUl8nfAoim8p3c1ASOaQ10mCpE=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/secsy/goftp v0.0.0-20200609142545-aa2de14babf4 h1:PT+ElG/UUFMfqy5HrxJxNzj3QBOf7dZwupeVC+mG1Lo=
github.com/secsy/goftp v0.0.0-20200609142545-aa2de14babf4/go.mod h1:MnkX001NG75g3p8bhFycnyIjeQoOjGL6CEIsdE/nKSY=
github.com/spf13/afero v1.14.0 h1:9tH6MapGnn/j0eb0yIXiLjERO8RB6xIVZRDCX7PtqWA=
github.com/spf13/afero v1.14.0/go.mod h1:acJQ8t0ohCGuMN3O+Pv0V0hgMxNYDlvdk+VTfyZmbYo=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
golang.org/x/image v0.26.0 h1:4XjIFEZWQmCZi6Wv8BoxsDhRU3RVnLX04dToTDAEPlY=
golang.org/x/image v0.26.0/go.mod h1:lcxbMFAovzpnJxzXS3nyL83K27tmqtKzIJpctK8YO5c=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
neilpa.me/go-jfif v0.5.0 h1:MNx8t0K4ysVKywCb5O98FhN3CcYih7rAUpCehj5HgtQ=
neilpa.me/go-jfif v0.5.0/go.mod h1:EbYXM1sxsAfbRgty+I5j1Ommm6WTsLKT+KXNgC998d0=
neilpa.me/go-x v0.2.0 h1:GbLRDtAZ9MgVrwrIe3jWnHF2W40LCFA9Ng/aDbd9GVs=
neilpa.me/go-x v0.2.0/go.mod h1:aIemU+pQYLLV3dygXotHKF7SantXe5HzZR6VIjzY/4g=

View File

@ -1,6 +1,6 @@
package main
import (
/*import (
"path/filepath"
"time"
)
@ -12,4 +12,4 @@ func init() {
Interval: 5 * time.Second,
}
runner.Run()
}
}*/

7
jsconfig.json Normal file
View File

@ -0,0 +1,7 @@
{
"compilerOptions": {
"alwaysStrict": true,
"checkJs": true,
"target": "ES6",
},
}

83
main.go
View File

@ -1,12 +1,83 @@
package main
import "log"
import (
"Scanyonero/document"
"Scanyonero/ftpserver"
"Scanyonero/unit"
"log"
"net/http"
"path/filepath"
"regexp"
)
func main() {
log.Printf("Starting OCRmyPDF-runner.")
log.Printf("Starting Scanyonero.")
// The runners will be started in some init.go file.
// Wait forever.
select {}
// Start FTP server.
ftpServer, err := ftpserver.NewFTPServer("", "", "127.0.0.1:21")
if err != nil {
log.Panicf("Failed to start FTP server: %v.", err)
}
defer ftpServer.Stop()
// Web and WS server.
server := NewServer()
// Add test documents.
server.Documents.Append(LoadExampleQueueEntries()...)
server.Documents.Append(LoadExampleQueueEntries()...)
server.Documents.Append(LoadExampleQueueEntries()...)
server.Documents.Append(LoadExampleQueueEntries()...)
go func() {
ingestor := document.Ingestor{
DefaultDPI: unit.PerInch(150),
Rules: []document.IngestorRule{{
Match: struct {
Name *regexp.Regexp
XPixels *int
YPixels *int
}{
Name: regexp.MustCompile(`^.*\.pdf$`),
},
Action: struct {
MediumWidth *unit.Millimeter
MediumHeight *unit.Millimeter
ScanOffsetX *unit.Millimeter
ScanOffsetY *unit.Millimeter
}{
MediumWidth: &([]unit.Millimeter{100}[0]),
},
}},
}
for file := range ftpServer.FileChan() {
docPages, err := ingestor.Ingest(file)
if err != nil {
log.Printf("Failed to ingest document file %q: %v.", file.Name, err)
continue
}
var entries []QueueEntry
for _, page := range docPages {
entries = append(entries, QueueEntry{
ID: NewQueueEntryID(),
Name: filepath.Base(file.Name),
QueueEntryData: QueueEntryDataPage{Page: &page},
})
}
entries = append(entries, QueueEntry{ID: NewQueueEntryID(), QueueEntryData: QueueEntryDataSeparator{}})
server.Documents.Lock()
server.Documents.Append(entries...)
server.Documents.Unlock()
}
}()
http.Handle("/", server)
log.Println("Server is running on port 8080.")
if err := http.ListenAndServe(":8080", nil); err != nil {
log.Panicf("ListenAndServe failed: %v.", err)
}
}

13
ocrmypdf/cli-options.go Normal file
View File

@ -0,0 +1,13 @@
package ocrmypdf
type CLIOptions struct {
Custom []string // Custom command line options go here.
}
// Args returns a list of CLI arguments that should be passed to the executable.
func (c *CLIOptions) Args() (result []string) {
result = append(result, c.Custom...)
return
}

88
ocrmypdf/cli.go Normal file
View File

@ -0,0 +1,88 @@
package ocrmypdf
import (
"bytes"
"fmt"
"io"
"os/exec"
"strings"
)
type CLI struct {
// The OCRmyPDF executable path can be overridden here. Otherwise the default path will be used.
// Special cases:
// - "py -m ocrmypdf": This will run "py -m ocrmypdf".
ExecutablePath string
}
// executableAndArgs returns the executable name and its base arguments.
func (c CLI) executableAndArgs() (string, []string) {
// Get path of executable.
execPath := ExecutablePath
if c.ExecutablePath != "" {
execPath = c.ExecutablePath
}
// Special cases.
var args []string
switch execPath {
case "py -m ocrmypdf":
execPath, args = "py", []string{"-m", "ocrmypdf"}
}
return execPath, args
}
// VersionString returns the version string as returned by OCRmyPDF.
func (c CLI) VersionString() (string, error) {
execPath, args := c.executableAndArgs()
args = append(args, "--version")
cmd := exec.Command(execPath, args...)
var output, errBuffer bytes.Buffer
cmd.Stdout = &output
cmd.Stderr = &errBuffer
if err := cmd.Run(); err != nil {
switch err := err.(type) {
case *exec.ExitError:
return "", fmt.Errorf("OCRmyPDF stopped with exit code %v: %v", err.ExitCode(), errBuffer.String())
default:
return "", err
}
}
return strings.TrimRight(output.String(), "\n\r"), nil
}
// Run takes a document from input, and writes the resulting document into output.
// The options parameter is optional.
func (c CLI) Run(input io.Reader, output io.Writer, options *CLIOptions) error {
execPath, args := c.executableAndArgs()
if options != nil {
args = append(args, options.Args()...)
}
args = append(args, "-", "-")
cmd := exec.Command(execPath, args...)
cmd.Stdin = input
cmd.Stdout = output
errBuffer := bytes.Buffer{}
cmd.Stderr = &errBuffer
if err := cmd.Run(); err != nil {
switch err := err.(type) {
case *exec.ExitError:
return fmt.Errorf("OCRmyPDF stopped with exit code %v: %v", err.ExitCode(), errBuffer.String())
default:
return err
}
}
return nil
}

40
ocrmypdf/cli_test.go Normal file
View File

@ -0,0 +1,40 @@
package ocrmypdf_test
import (
"Scanyonero/ocrmypdf"
"os"
"path/filepath"
"testing"
)
func TestCLI_VersionString(t *testing.T) {
cli := ocrmypdf.CLI{}
v, err := cli.VersionString()
if err != nil {
t.Fatalf("VersionString() returned error: %v", err)
}
if v == "" {
t.Errorf("Returned version string is empty: %v", v)
}
}
func TestCLI_Run(t *testing.T) {
t.SkipNow()
cli := ocrmypdf.CLI{}
source, err := os.Open(filepath.Join(".", "..", "test-documents", "typst-example", "600 DPI Flatbed.pdf"))
if err != nil {
t.Fatalf("Couldn't open file: %v.", err)
}
dest, err := os.Create(filepath.Join(".", "Output.pdf"))
if err != nil {
t.Fatalf("Couldn't create file: %v.", err)
}
if err := cli.Run(source, dest, nil); err != nil {
t.Fatalf("Run() returned error: %v", err)
}
}

6
ocrmypdf/cli_unix.go Normal file
View File

@ -0,0 +1,6 @@
//go:build unix
package ocrmypdf
// The path to the OCRmyPDF executable.
var ExecutablePath = "ocrmypdf"

6
ocrmypdf/cli_windows.go Normal file
View File

@ -0,0 +1,6 @@
//go:build windows
package ocrmypdf
// The path to the OCRmyPDF executable.
var ExecutablePath = "py -m ocrmypdf"

11
queue-entry-data-page.go Normal file
View File

@ -0,0 +1,11 @@
package main
import "Scanyonero/document"
type QueueEntryDataPage struct {
Page *document.Page `json:"page"`
}
func (q QueueEntryDataPage) QueueEntryDataType() string {
return "Page"
}

View File

@ -0,0 +1,7 @@
package main
type QueueEntryDataSeparator struct{}
func (q QueueEntryDataSeparator) QueueEntryDataType() string {
return "Separator"
}

5
queue-entry-data.go Normal file
View File

@ -0,0 +1,5 @@
package main
type QueueEntryData interface {
QueueEntryDataType() string
}

41
queue-entry.go Normal file
View File

@ -0,0 +1,41 @@
package main
import (
"encoding/json"
"fmt"
"sync/atomic"
)
var queueEntryIDCounter atomic.Uint32
// NewQueueEntryID returns a unique document id.
func NewQueueEntryID() QueueEntryID {
return QueueEntryID(queueEntryIDCounter.Add(1))
}
type QueueEntryID int
// QueueEntry can contain a single or multiple scanned pages.
type QueueEntry struct {
ID QueueEntryID `json:"id"`
Name string `json:"name"`
QueueEntryData
}
func (q QueueEntry) MarshalJSON() ([]byte, error) {
if q.QueueEntryData == nil {
return nil, fmt.Errorf("queue entry doesn't contain any data")
}
type embedded QueueEntry // Prevent recursion.
return json.Marshal(
struct {
embedded
Type string `json:"type"`
}{
embedded: embedded(q),
Type: q.QueueEntryDataType(),
},
)
}

186
queue.go Normal file
View File

@ -0,0 +1,186 @@
package main
import (
"slices"
"sync"
)
// Queue contains a list of scanned documents.
// A user can issue operations on these entries.
//
// The list is synced between the server and clients via websockets.
type Queue struct {
sync.Mutex
Documents []QueueEntry
listeners map[chan<- ServerWebsocketPacket]struct{}
}
// RegisterListener will add the given channel c to receive updates in the form of websocket packets.
//
// UnregisterListener must be called before the channel can be closed or stopped reading from.
func (d *Queue) RegisterListener(c chan<- ServerWebsocketPacket) {
d.Lock()
defer d.Unlock()
if d.listeners == nil {
d.listeners = make(map[chan<- ServerWebsocketPacket]struct{})
}
d.listeners[c] = struct{}{}
// Send current document queue.
c <- &ServerWebsocketPacketQueueReplace{Documents: d.Documents}
}
// UnregisterListener will stop the given listener from receiving updates.
// Upon return, the listener will not receive any updates from the queue.
func (d *Queue) UnregisterListener(c chan<- ServerWebsocketPacket) {
d.Lock()
defer d.Unlock()
if d.listeners == nil {
d.listeners = make(map[chan<- ServerWebsocketPacket]struct{})
}
delete(d.listeners, c)
}
// Broadcast will send a websocket packet to all registered listeners.
//
// The Queue must be locked when calling this.
func (d *Queue) Broadcast(p ...ServerWebsocketPacket) {
for listener := range d.listeners {
for _, packet := range p {
listener <- packet
}
}
}
// DeleteAt removes all elements at [i:j].
//
// This will automatically limit the indices to valid ranges.
//
// The Queue must be locked when calling this.
func (d *Queue) DeleteAt(i, j int) {
i = max(0, min(i, len(d.Documents)-1)) // Limit to [0; len).
j = max(0, min(j, len(d.Documents))) // Limit to [0; len].
if i >= j {
return
}
d.Documents = slices.Delete(d.Documents, i, j)
d.Broadcast(&ServerWebsocketPacketQueueDeleteAt{IndexA: i, IndexB: j})
}
// Delete removes the elements with the given DocumentIDs.
//
// The Queue must be locked when calling this.
func (d *Queue) Delete(ids ...QueueEntryID) {
for i, doc := range slices.Backward(d.Documents) {
if slices.Contains(ids, doc.ID) {
d.DeleteAt(i, i+1)
}
}
}
// InsertAt inserts the given document at index i.
//
// Documents will be shifted accordingly, valid indices are in the range of [0; len(queue)].
// This will automatically limit the index to valid ranges.
//
// The Queue must be locked when calling this.
func (d *Queue) InsertAt(i int, documents ...QueueEntry) {
i = max(0, min(i, len(d.Documents))) // Limit to [0; len].
d.Documents = slices.Insert(d.Documents, i, documents...)
d.Broadcast(&ServerWebsocketPacketQueueInsertAt{Index: i, Documents: documents})
}
// Append will add the given documents to the end of the queue.
//
// The Queue must be locked when calling this.
func (d *Queue) Append(documents ...QueueEntry) {
d.InsertAt(len(d.Documents), documents...)
}
// Replace will replace the whole list of documents with the given one.
//
// The Queue must be locked when calling this.
func (d *Queue) Replace(documents ...QueueEntry) {
d.Documents = slices.Clone(documents)
d.Broadcast(&ServerWebsocketPacketQueueReplace{Documents: documents})
}
// ShiftAt will move the element at index i by the given offset.
//
// This will automatically limit the index and the offset to valid ranges.
//
// The Queue must be locked when calling this.
func (d *Queue) ShiftAt(i, offset int) {
if len(d.Documents) <= 0 {
return
}
i = max(0, min(i, len(d.Documents)-1)) // Limit to [0; len).
offset = max(-i, min(offset, len(d.Documents)-i-1)) // Limit to [-i; len-i-1].
for tempOffset, i := offset, i; tempOffset != 0; {
switch {
case tempOffset > 0:
d.Documents[i], d.Documents[i+1] = d.Documents[i+1], d.Documents[i]
tempOffset--
i++
case offset < 0:
d.Documents[i], d.Documents[i-1] = d.Documents[i-1], d.Documents[i]
tempOffset++
i--
}
}
d.Broadcast(&ServerWebsocketPacketQueueShiftAt{Index: i, Offset: offset})
}
// Shift will move the index of all elements with the given DocumentIDs by offset.
//
// The Queue must be locked when calling this.
func (d *Queue) Shift(offset int, ids ...QueueEntryID) {
switch {
case offset < 0:
for i, entry := range d.Documents {
if slices.Contains(ids, entry.ID) {
if offset < -i {
offset = -i
}
d.ShiftAt(i, offset)
}
}
case offset > 0:
for i, entry := range slices.Backward(d.Documents) {
if slices.Contains(ids, entry.ID) {
if offset > len(d.Documents)-i-1 {
offset = len(d.Documents) - i - 1
}
d.ShiftAt(i, offset)
}
}
}
}
// QueueEntryByID returns the QueueEntry with the given ID.
//
// The Queue must be locked when calling this.
func (d *Queue) QueueEntryByID(id QueueEntryID) *QueueEntry {
for i := range d.Documents {
document := &d.Documents[i]
if document.ID == id {
return document
}
}
return nil
}

View File

@ -0,0 +1,12 @@
package main
// ServerWebsocketPacket represents a websocket packet.
type ServerWebsocketPacket interface {
Type() string
}
var serverWebsocketPacketRegistry map[string]ServerWebsocketPacket = map[string]ServerWebsocketPacket{}
func ServerWebsocketPacketRegister(prototype ServerWebsocketPacket) {
serverWebsocketPacketRegistry[prototype.Type()] = prototype
}

View File

@ -0,0 +1,70 @@
package main
// ServerWebsocketPacketQueueDeleteAt represents a delete operation on a document queue list.
// The range of the deleted indices is [IndexA; IndexB).
type ServerWebsocketPacketQueueDeleteAt struct {
IndexA int `json:"indexA"` // Starting index of the deletion range.
IndexB int `json:"indexB"` // End index of the deletion range. This index is not included in the range.
}
func (s *ServerWebsocketPacketQueueDeleteAt) Type() string { return "QueueDeleteAt" }
func init() { ServerWebsocketPacketRegister(&ServerWebsocketPacketQueueDeleteAt{}) }
// ServerWebsocketPacketQueueDelete represents a delete operation on a document queue list.
type ServerWebsocketPacketQueueDelete struct {
IDs []QueueEntryID `json:"ids"` // IDs of the documents.
}
func (s *ServerWebsocketPacketQueueDelete) Type() string { return "QueueDelete" }
func init() { ServerWebsocketPacketRegister(&ServerWebsocketPacketQueueDelete{}) }
// ServerWebsocketPacketQueueInsertAt represents an insert operation on a document queue list.
type ServerWebsocketPacketQueueInsertAt struct {
Index int `json:"index"`
Documents []QueueEntry `json:"documents"`
}
func (s *ServerWebsocketPacketQueueInsertAt) Type() string { return "QueueInsertAt" }
func init() { ServerWebsocketPacketRegister(&ServerWebsocketPacketQueueInsertAt{}) }
// ServerWebsocketPacketQueueReplace represents a replace operation on a document queue list.
type ServerWebsocketPacketQueueReplace struct {
Documents []QueueEntry `json:"documents"`
}
func (s *ServerWebsocketPacketQueueReplace) Type() string { return "QueueReplace" }
func init() { ServerWebsocketPacketRegister(&ServerWebsocketPacketQueueReplace{}) }
// ServerWebsocketPacketQueueShiftAt represents a shift operation on a document queue list.
type ServerWebsocketPacketQueueShiftAt struct {
Index int `json:"index"` // Index of the to be shifted element.
Offset int `json:"offset"` // Shift offset.
}
func (s *ServerWebsocketPacketQueueShiftAt) Type() string { return "QueueShiftAt" }
func init() { ServerWebsocketPacketRegister(&ServerWebsocketPacketQueueShiftAt{}) }
// ServerWebsocketPacketQueueShift represents a shift operation on a document queue list.
type ServerWebsocketPacketQueueShift struct {
IDs []QueueEntryID `json:"ids"` // IDs of the documents.
Offset int `json:"offset"` // Shift offset.
}
func (s *ServerWebsocketPacketQueueShift) Type() string { return "QueueShift" }
func init() { ServerWebsocketPacketRegister(&ServerWebsocketPacketQueueShift{}) }
// ServerWebsocketPacketQueueUpdate represents an update operation of documents in a queue list.
// The receiver should update any of the received documents in their local queue list.
type ServerWebsocketPacketQueueUpdate struct {
Documents []QueueEntry `json:"documents"`
}
func (s *ServerWebsocketPacketQueueUpdate) Type() string { return "QueueUpdate" }
func init() { ServerWebsocketPacketRegister(&ServerWebsocketPacketQueueUpdate{}) }

101
server-websocket.go Normal file
View File

@ -0,0 +1,101 @@
package main
import (
"encoding/json"
"log"
"net/http"
"github.com/gorilla/websocket"
)
var upgrader = websocket.Upgrader{}
func (s *Server) handleWebSocket(w http.ResponseWriter, r *http.Request) {
conn, err := upgrader.Upgrade(w, r, nil)
if err != nil {
log.Printf("Upgrading connection to WS failed: %v.", err)
return
}
defer conn.Close()
sendChan := make(chan ServerWebsocketPacket)
defer close(sendChan)
// Goroutine for sending packets.
go func(sendChan <-chan ServerWebsocketPacket) {
for packet := range sendChan {
message := struct {
Type string `json:"type"`
Payload any `json:"payload"`
}{
Type: packet.Type(),
Payload: packet,
}
messageData, err := json.Marshal(message)
if err != nil {
log.Printf("Failed to marshal websocket packet: %v.", err)
continue
}
conn.WriteMessage(websocket.TextMessage, messageData)
}
}(sendChan)
// Register listener on document queue updates.
s.Documents.RegisterListener(sendChan)
defer s.Documents.UnregisterListener(sendChan)
// Main loop that receives packets.
for {
messageType, data, err := conn.ReadMessage()
if err != nil {
log.Printf("Reading WS message failed: %v.", err)
break
}
switch messageType {
case websocket.CloseMessage:
log.Printf("Connection %v closed.", conn.LocalAddr())
return
case websocket.TextMessage:
//log.Printf("Message from %v: %s.", conn.LocalAddr(), data)
var message struct {
Type string `json:"type"`
Payload json.RawMessage `json:"payload"`
}
if err := json.Unmarshal(data, &message); err != nil {
log.Printf("Failed to marshal websocket packet from client %v: %v.", conn.LocalAddr(), err)
return
}
prototype, ok := serverWebsocketPacketRegistry[message.Type]
if !ok {
log.Printf("Unknown websocket packet type %q from client %v.", message.Type, conn.LocalAddr())
return
}
if err := json.Unmarshal(message.Payload, prototype); err != nil {
log.Printf("Failed to marshal websocket packet payload from client %v: %v.", conn.LocalAddr(), err)
return
}
switch packet := prototype.(type) {
case *ServerWebsocketPacketQueueDelete:
s.Documents.Lock()
s.Documents.Delete(packet.IDs...)
s.Documents.Unlock()
case *ServerWebsocketPacketQueueShift:
s.Documents.Lock()
s.Documents.Shift(packet.Offset, packet.IDs...)
s.Documents.Unlock()
default:
log.Printf("Websocket client %q sent unsupported packet type %T.", conn.LocalAddr(), prototype)
return
}
}
}
}

151
server.go Normal file
View File

@ -0,0 +1,151 @@
package main
import (
"fmt"
"image/jpeg"
"log"
"net/http"
"strconv"
"github.com/nfnt/resize"
)
type Server struct {
http.ServeMux
Documents Queue
}
func NewServer() *Server {
s := &Server{}
s.Handle("/", http.FileServer(http.Dir("./static")))
s.HandleFunc("GET /api/queue-entry-page/{id}/image", s.handleGetQueueEntryImage)
s.HandleFunc("GET /api/queue-entry-page/{id}/preview", s.handleGetQueueEntryPreview)
//s.HandleFunc("PUT /documents/{id}", addItem)
//s.HandleFunc("DELETE /documents/{id}", s.handleRemoveDocument)
s.HandleFunc("/ws", s.handleWebSocket)
return s
}
func (s *Server) handleGetQueueEntryImage(w http.ResponseWriter, r *http.Request) {
var id QueueEntryID
if i, err := strconv.ParseInt(r.PathValue("id"), 10, 0); err != nil {
w.WriteHeader(http.StatusBadRequest)
msg := fmt.Sprintf("Failed to parse document id: %v.", err)
w.Write([]byte(msg))
log.Print(msg)
return
} else {
id = QueueEntryID(i)
}
s.Documents.Lock()
defer s.Documents.Unlock()
entry := s.Documents.QueueEntryByID(id)
if entry == nil {
w.WriteHeader(http.StatusNotFound)
msg := fmt.Sprintf("Failed to find %T with ID %v.", entry, id)
w.Write([]byte(msg))
log.Print(msg)
return
}
page, ok := entry.QueueEntryData.(QueueEntryDataPage)
if !ok {
w.WriteHeader(http.StatusInternalServerError)
msg := fmt.Sprintf("Entry %d isn't a page.", entry.ID)
w.Write([]byte(msg))
log.Print(msg)
return
}
if page.Page == nil {
w.WriteHeader(http.StatusInternalServerError)
msg := fmt.Sprintf("Entry %d doesn't contain any page data.", entry.ID)
w.Write([]byte(msg))
log.Print(msg)
return
}
if page.Page.Image == nil {
w.WriteHeader(http.StatusInternalServerError)
msg := fmt.Sprintf("Page %d doesn't contain any image.", entry.ID)
w.Write([]byte(msg))
log.Print(msg)
return
}
w.Header().Set("Content-Type", "image/jpeg")
if err := jpeg.Encode(w, page.Page.Image, nil); err != nil {
w.WriteHeader(http.StatusInternalServerError)
msg := fmt.Sprintf("Failed to encode JPEG: %v.", err)
w.Write([]byte(msg))
log.Print(msg)
return
}
}
func (s *Server) handleGetQueueEntryPreview(w http.ResponseWriter, r *http.Request) {
var id QueueEntryID
if i, err := strconv.ParseInt(r.PathValue("id"), 10, 0); err != nil {
w.WriteHeader(http.StatusBadRequest)
msg := fmt.Sprintf("Failed to parse document id: %v.", err)
w.Write([]byte(msg))
log.Print(msg)
return
} else {
id = QueueEntryID(i)
}
s.Documents.Lock()
defer s.Documents.Unlock()
entry := s.Documents.QueueEntryByID(id)
if entry == nil {
w.WriteHeader(http.StatusNotFound)
msg := fmt.Sprintf("Failed to find %T with ID %v.", entry, id)
w.Write([]byte(msg))
log.Print(msg)
return
}
page, ok := entry.QueueEntryData.(QueueEntryDataPage)
if !ok {
w.WriteHeader(http.StatusInternalServerError)
msg := fmt.Sprintf("Entry %d isn't a page.", entry.ID)
w.Write([]byte(msg))
log.Print(msg)
return
}
if page.Page == nil {
w.WriteHeader(http.StatusInternalServerError)
msg := fmt.Sprintf("Entry %d doesn't contain any page data.", entry.ID)
w.Write([]byte(msg))
log.Print(msg)
return
}
if page.Page.Image == nil {
w.WriteHeader(http.StatusInternalServerError)
msg := fmt.Sprintf("Page %d doesn't contain any image.", entry.ID)
w.Write([]byte(msg))
log.Print(msg)
return
}
// Resize image to a preview with a width of about 512 pixels.
img := resize.Resize(512, 0, page.Page.Image, resize.Lanczos2)
w.Header().Set("Content-Type", "image/jpeg")
if err := jpeg.Encode(w, img, nil); err != nil {
w.WriteHeader(http.StatusInternalServerError)
msg := fmt.Sprintf("Failed to encode JPEG: %v.", err)
w.Write([]byte(msg))
log.Print(msg)
return
}
}

View File

@ -1,13 +0,0 @@
[Unit]
Description=A runner that will watch directories and runs OCRmyPDF on files in them.
[Service]
Restart=on-failure
RestartSec=60s
WorkingDirectory=/home/paperless/ocrmypdf-runner/
ExecStart=go run .
User=paperless
Group=paperless
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,10 @@
[Unit]
Description=A server that will receive scanned documents via FTP, process them and send them to paperless.
[Service]
Restart=on-failure
RestartSec=60s
ExecStart=go run .
[Install]
WantedBy=multi-user.target

25
static/index.html Normal file
View File

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Scanyonero</title>
<link rel="stylesheet" href="styles.css">
<script type="module" src="./js/components/document-menu.js"></script>
<script type="module" src="./js/components/document-queue.js"></script>
</head>
<body>
<div id="main-container">
<document-menu id="document-menu"></document-menu>
<document-queue id="document-queue"></document-queue>
</div>
<script type="module">
import { App } from "./js/app.js";
const app = new App();
</script>
</body>
</html>

128
static/js/api.js Normal file
View File

@ -0,0 +1,128 @@
export class API extends EventTarget {
constructor() {
super();
this.#socketOpen();
}
/** @type {WebSocket} */
#socket;
/**
* Opens a new websocket connection, and adds all necessary listeners.
*/
#socketOpen() {
this.#socket = new WebSocket("ws://" + location.host + "/ws");
this.#socket.onopen = event => {
console.log(`Websocket connection opened.`)
}
this.#socket.onmessage = event => {
/** @type {{type: string, payload: any}} */
const data = JSON.parse(event.data);
switch (data.type) {
case "QueueDeleteAt": {
/** @type {CustomEventInit<import("./model").APIPacketQueueDeleteAt>} */
const eventData = { detail: data.payload };
this.dispatchEvent(new CustomEvent("queuedeleteat", eventData));
break;
}
case "QueueInsertAt": {
/** @type {CustomEventInit<import("./model").APIPacketQueueInsertAt>} */
const eventData = { detail: data.payload };
this.dispatchEvent(new CustomEvent("queueinsertat", eventData));
break;
}
case "QueueReplace": {
/** @type {CustomEventInit<import("./model").APIPacketQueueReplace>} */
const eventData = { detail: data.payload };
this.dispatchEvent(new CustomEvent("queuereplace", eventData));
break;
}
case "QueueShiftAt": {
/** @type {CustomEventInit<import("./model").APIPacketQueueShift>} */
const eventData = { detail: data.payload };
this.dispatchEvent(new CustomEvent("queueshiftat", eventData));
break;
}
default:
console.error(`Unknown websocket data type "${data.type}"`);
break;
}
}
this.#socket.onclose = event => {
console.log(`Socket is closed. Reconnect will be attempted in 1 second. Code: ${event.code} Reason: ${event.reason} WasClean: ${event.wasClean}.`);
setTimeout(() => {
this.#socketOpen();
}, 1000);
};
this.#socket.onerror = event => {
console.error(`Socket encountered error: ${event}. Closing socket.`);
this.#socket.close();
};
}
/**
* Sends a document queue delete request to the server.
* @param {...number} ids Document ids.
*/
queueDelete(...ids) {
if (this.#socket.readyState !== WebSocket.OPEN) {
return
}
/** @type {{type: string, payload: import("./model").APIPacketQueueDelete}} */
const message = { type: "QueueDelete", payload: { ids: ids } };
this.#socket.send(JSON.stringify(message));
}
/**
* Sends a document queue shift request to the server.
* @param {number} offset
* @param {...number} ids Document ids.
*/
queueShift(offset, ...ids) {
if (this.#socket.readyState !== WebSocket.OPEN) {
return
}
/** @type {{type: string, payload: import("./model").APIPacketQueueShift}} */
const message = { type: "QueueShift", payload: { offset: offset, ids: ids } };
this.#socket.send(JSON.stringify(message));
}
/**
*
* @param {"GET"|"POST"|"DELETE"|"UPDATE"} method
* @param {string} url
*/
#ajaxRequest(method, url) {
return new Promise(function (resolve, reject) {
const xhr = new XMLHttpRequest();
xhr.open(method, url);
xhr.onload = function () {
if (this.status >= 200 && this.status < 300) {
resolve(this.response);
} else {
reject({
status: this.status,
statusText: this.statusText,
});
}
};
xhr.onerror = function () {
reject({
status: this.status,
statusText: this.statusText,
});
};
xhr.send();
});
}
}

25
static/js/app.js Normal file
View File

@ -0,0 +1,25 @@
import { API } from './api.js';
import { DocumentMenu } from './components/document-menu.js';
import { DocumentQueue } from './components/document-queue.js';
export class App {
/** @type {API} */
#api;
/** @type {DocumentMenu} */
#documentMenu;
/** @type {DocumentQueue} */
#documentQueue;
constructor() {
this.#api = new API();
this.#documentMenu = document.querySelector("#document-menu");
this.#documentQueue = document.querySelector("#document-queue");
this.#documentMenu.documentQueue = this.#documentQueue;
this.#documentMenu.api = this.#api;
this.#documentQueue.api = this.#api;
}
}

View File

@ -0,0 +1,126 @@
import { API } from '../api.js';
import { LitElement, css, html } from '../vendor/lit-html/lit-all.min.js';
import { DocumentQueue } from './document-queue.js';
export class DocumentMenu extends LitElement {
static properties = {
selectionAll: { type: Boolean },
selectionIndeterminate: { type: Boolean },
};
/** @type {API} */
api;
/** @type {DocumentQueue|undefined} */
#documentQueue;
/**
* @param {DocumentQueue} documentQueue
*/
set documentQueue(documentQueue) {
this.#documentQueue = documentQueue;
this.#documentQueue.addEventListener("changeselection", /** @param {import('./document-queue.js').DocumentQueueEventChangeSelection} event */(event) => {
switch (event.detail.selectedIDs.length) {
case 0:
this.selectionAll = false; this.selectionIndeterminate = false;
break;
case event.detail.allIDs.length:
this.selectionAll = true; this.selectionIndeterminate = false;
break;
default:
this.selectionAll = false; this.selectionIndeterminate = true;
break;
}
});
}
constructor() {
super();
this.selectionAll = false;
this.selectionIndeterminate = false;
}
onCheckboxChange(event) {
switch (event.target.checked) {
case true:
this.selectionAll = true; this.selectionIndeterminate = false;
this.#documentQueue.selectAll(true);
break;
default:
this.selectionAll = false; this.selectionIndeterminate = false;
this.#documentQueue.selectAll(false);
break;
}
}
static styles = css`
:host {
display: flex;
padding: 8px;
background-color: black;
}
#select-all {
align-self: center;
margin-left: 8px;
margin-right: 16px;
width: 24px;
height: 24px;
}
#buttons {
display: flex;
flex-direction: row;
flex-wrap: wrap;
gap: 16px;
}
button {
padding: 8px;
}
`;
// @ts-ignore
render() {
return html`
<input id="select-all" type="checkbox" .checked=${this.selectionAll} .indeterminate=${this.selectionIndeterminate} @change=${this.onCheckboxChange}></input>
<div id="buttons">
<button @click=${this.onButtonUpwards} title="Shifts all selected elements upwards."></button>
<button @click=${this.onButtonDownwards} title="Shifts all selected elements downwards."></button>
<button title="Takes two stacks of single sided scans and merges them as if they were scanned from both sides.">Duplex Merge</button>
<button @click=${this.onButtonDelete}>Delete</button>
</div>
`;
}
/** @param {Event} event */
onButtonUpwards(event) {
if (this.api == undefined || this.#documentQueue == undefined) { return }
const sInfo = this.#documentQueue.selectionInfo();
this.api.queueShift(-1, ...sInfo.selectedIDs);
}
/** @param {Event} event */
onButtonDownwards(event) {
if (this.api == undefined || this.#documentQueue == undefined) { return }
const sInfo = this.#documentQueue.selectionInfo();
this.api.queueShift(1, ...sInfo.selectedIDs);
}
/** @param {Event} event */
onButtonDelete(event) {
if (this.api == undefined || this.#documentQueue == undefined) { return }
const sInfo = this.#documentQueue.selectionInfo();
this.api.queueDelete(...sInfo.selectedIDs);
}
}
customElements.define("document-menu", DocumentMenu);

View File

@ -0,0 +1,35 @@
import { API } from '../api.js';
import { LitElement, css, html, repeat } from '../vendor/lit-html/lit-all.min.js';
export class DocumentQueueEntryPage extends LitElement {
static properties = {
queueEntry: { type: Object },
api: { type: Object, state: true },
};
constructor() {
super();
/** @type {API} */
this.api;
/** @type {import('model').APIQueueEntry} */
this.document;
}
static styles = css`
img {
width: 128px;
}
`;
// @ts-ignore
render() {
return html`
<img id="image" src=${`/api/queue-entry-page/${this.document.id}/preview`}></img>
<span>This is a document</span>
`;
}
}
customElements.define("document-queue-entry-page", DocumentQueueEntryPage);

View File

@ -0,0 +1,35 @@
import { API } from '../api.js';
import { LitElement, css, html, repeat } from '../vendor/lit-html/lit-all.min.js';
export class DocumentQueueEntrySeparator extends LitElement {
static properties = {
queueEntry: { type: Object },
api: { type: Object, state: true },
};
constructor() {
super();
/** @type {API} */
this.api;
/** @type {import('model').APIQueueEntry} */
this.document;
}
static styles = css`
:host {
width: 100%;
background: black;
}
`;
// @ts-ignore
render() {
return html`
`;
}
}
customElements.define("document-queue-entry-separator", DocumentQueueEntrySeparator);

View File

@ -0,0 +1,126 @@
import { API } from '../api.js';
import { LitElement, css, html, repeat } from '../vendor/lit-html/lit-all.min.js';
import './document-queue-entry-page.js';
import './document-queue-entry-separator.js';
/** @typedef {{selected: boolean}} DocumentQueueEntryEventChangeSelectionDetails */
/** @typedef {CustomEvent<DocumentQueueEntryEventChangeSelectionDetails>} DocumentQueueEntryEventChangeSelection */
export class DocumentQueueEntry extends LitElement {
static properties = {
selected: { type: Boolean },
queueEntry: { type: Object },
api: { type: Object, state: true },
};
constructor() {
super();
this.selected = false;
/** @type {API} */
this.api;
/** @type {import('model').APIQueueEntry} */
this.queueEntry;
}
static styles = css`
:host {
padding: 8px;
display: flex;
flex-direction: row;
gap: 8px;
background-color: rgba(0, 0, 0, 0.1);
border-radius: 8px;
}
#left-bar {
position: relative;
display: flex;
flex-direction: column;
justify-content: center;
}
#checkbox-selected {
align-self: center;
width: 24px;
height: 24px;
}
#button-swap {
width: 32px;
height: 32px;
padding: 0px;
position: absolute;
bottom: 0px;
}
`;
// @ts-ignore
render() {
let embeddedElement;
switch (this.queueEntry.type) {
case "Page":
embeddedElement = html`<document-queue-entry-page .document=${this.queueEntry} .api=${this.api}></document-queue-entry-page>`; break;
case "Separator":
embeddedElement = html`<document-queue-entry-separator .document=${this.queueEntry} .api=${this.api}></document-queue-entry-separator>`; break;
default:
embeddedElement = html`<span>Unsupported entry type!</span>`
}
return html`
<div id="left-bar">
<input id="checkbox-selected" type="checkbox" .checked=${this.selected} @change=${this.onCheckboxChange}></input>
<button id="button-swap" @click=${e => this.api.queueShift(1, this.queueEntry.id)}></button>
</div>
${embeddedElement}
`;
}
/** @param {Event} event */
onCheckboxChange(event) {
// @ts-ignore
this.selected = event.target.checked;
/** @type {CustomEventInit<DocumentQueueEntryEventChangeSelectionDetails>} */
const eventData = { detail: { selected: this.selected } };
this.dispatchEvent(new CustomEvent("changeselection", eventData));
}
/**
* Used for FLIP animations.
* @type {DOMRect}
*/
#oldBoundingClientRect;
prepareFLIP() {
this.#oldBoundingClientRect = this.getBoundingClientRect();
}
doFLIP() {
const oldRect = this.#oldBoundingClientRect;
if (oldRect == undefined) {
return;
}
const newRect = this.getBoundingClientRect();
const deltaX = oldRect.left - newRect.left;
const deltaY = oldRect.top - newRect.top;
if (Math.abs(deltaX) >= 1 || Math.abs(deltaY) >= 1) {
this.animate([{
transform: `translate(${deltaX}px, ${deltaY}px)`
}, {
transform: 'none'
}], {
duration: 150,
easing: 'ease-out',
fill: 'both',
});
}
}
}
customElements.define("document-queue-entry", DocumentQueueEntry);

View File

@ -0,0 +1,169 @@
import { API } from '../api.js';
import { DocumentQueueEntry } from './document-queue-entry.js'
/** @typedef {{selectedIDs: number[], allIDs: number[]}} DocumentQueueEventChangeSelectionDetails */
/** @typedef {CustomEvent<DocumentQueueEventChangeSelectionDetails>} DocumentQueueEventChangeSelection */
// TODO: Use LitElement, and use repeat directive, which also keeps the DOM state when shuffling elements around
export class DocumentQueue extends HTMLElement {
/** @type {API|undefined} */
#api;
/** @param {API} api */
set api(api) {
this.#api = api;
this.#api.addEventListener("queuedeleteat", /** @param {import("model").APIEvents["queuedeleteat"]} event */(event) => {
this.queueDeleteAt(event.detail.indexA, event.detail.indexB);
});
this.#api.addEventListener("queueinsertat", /** @param {import("model").APIEvents["queueinsertat"]} event */(event) => {
this.queueInsertAt(event.detail.index, event.detail.documents);
});
this.#api.addEventListener("queueshiftat", /** @param {import("model").APIEvents["queueshiftat"]} event */(event) => {
this.queueShiftAt(event.detail.index, event.detail.offset);
});
this.#api.addEventListener("queuereplace", /** @param {import("model").APIEvents["queuereplace"]} event */(event) => {
this.queueReplace(event.detail.documents);
});
}
connectedCallback() {
this.style.display = "flex";
this.style.gap = "8px";
this.style.flexDirection = "column";
this.style.padding = "8px";
}
selectionInfo() {
const children = Array.from(this.children);
const result = {};
result.selectedIDs = children.filter(/** @param {DocumentQueueEntry} value */ value => { return value.selected; }).map(/** @param {DocumentQueueEntry} value */(value) => { return value.queueEntry.id; });
result.allIDs = children.map(/** @param {DocumentQueueEntry} value */ value => { return value.queueEntry.id; });
return result;
}
updateSelection() {
/** @type {CustomEventInit<DocumentQueueEventChangeSelectionDetails>} */
const eventData = { detail: this.selectionInfo() };
this.dispatchEvent(new CustomEvent("changeselection", eventData));
}
/**
*
* @param {boolean} state
*/
selectAll(state) {
const children = Array.from(this.children);
children.forEach(/** @param {DocumentQueueEntry} child */ child => {
child.selected = state;
});
}
/**
* Deletes a range of documents.
* @param {number} indexA // Start index.
* @param {number} indexB // End index. (Not included in the range).
*/
queueDeleteAt(indexA, indexB) {
// Store positions.
Array.from(this.children).forEach(/** @param {DocumentQueueEntry} child */ child => {
child.prepareFLIP();
});
if (this.hasChildNodes()) {
const children = this.children;
for (let i = indexA; i < indexB; i++) {
this.removeChild(children[i]);
}
}
// Start FLIP animation.
Array.from(this.children).forEach(/** @param {DocumentQueueEntry} child */ child => {
child.doFLIP();
});
this.updateSelection();
}
/**
* Inserts a range of documents at the given index.
* @param {number} index
* @param {import('model').APIQueueEntry[]} documents
*/
queueInsertAt(index, documents) {
// Store positions.
Array.from(this.children).forEach(/** @param {DocumentQueueEntry} child */ child => {
child.prepareFLIP();
});
documents.forEach(document => {
if (this.hasChildNodes() || this.children.length === index) {
const newChild = this.appendChild(new DocumentQueueEntry());
newChild.api = this.#api;
newChild.queueEntry = document;
newChild.addEventListener("changeselection", e => this.updateSelection());
} else {
const newChild = this.insertBefore(new DocumentQueueEntry(), this.childNodes[index]);
newChild.api = this.#api;
newChild.queueEntry = document;
newChild.addEventListener("changeselection", e => this.updateSelection());
}
index++;
});
// Start FLIP animation.
Array.from(this.children).forEach(/** @param {DocumentQueueEntry} child */ child => {
child.doFLIP();
});
this.updateSelection();
}
/**
* Replaces all documents currently in the list/queue.
* @param {import('model').APIQueueEntry[]} documents
*/
queueReplace(documents) {
this.innerHTML = "";
documents.forEach(document => {
const newChild = this.appendChild(new DocumentQueueEntry());
newChild.api = this.#api;
newChild.queueEntry = document;
newChild.addEventListener("changeselection", e => this.updateSelection());
});
this.updateSelection();
}
/**
* Shifts a single document entry by the given offset.
* @param {number} index
* @param {number} offset
*/
queueShiftAt(index, offset) {
// Store positions.
Array.from(this.children).forEach(/** @param {DocumentQueueEntry} child */ child => {
child.prepareFLIP();
});
const child = this.children[index];
child.remove();
const newIndex = index + offset;
if (!this.hasChildNodes() || this.children.length === newIndex) {
this.appendChild(child);
} else {
this.insertBefore(child, this.children[newIndex]);
}
// Start FLIP animation.
Array.from(this.children).forEach(/** @param {DocumentQueueEntry} child */ child => {
child.doFLIP();
});
}
}
customElements.define("document-queue", DocumentQueue);

41
static/js/model.d.ts vendored Normal file
View File

@ -0,0 +1,41 @@
export interface APIQueueEntry {
id: number;
name: string;
type: string;
page: string | undefined;
}
export type APIPacketQueueDelete = {
ids: number[];
}
export type APIPacketQueueDeleteAt = {
indexA: number;
indexB: number;
}
export type APIPacketQueueInsertAt = {
index: number;
documents: APIQueueEntry[];
}
export type APIPacketQueueReplace = {
documents: APIQueueEntry[];
}
export type APIPacketQueueShiftAt = {
index: number;
offset: number;
}
export type APIPacketQueueShift = {
ids: number[];
offset: number;
}
export type APIEvents = {
queuedeleteat: CustomEvent<APIPacketQueueDeleteAt>;
queueinsertat: CustomEvent<APIPacketQueueInsertAt>;
queuereplace: CustomEvent<APIPacketQueueReplace>;
queueshiftat: CustomEvent<APIPacketQueueShiftAt>;
}

120
static/js/vendor/lit-html/lit-all.min.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

16
static/styles.css Normal file
View File

@ -0,0 +1,16 @@
body {
font-family: sans-serif;
margin: 0;
}
#main-container {
height: 100vh;
width: 100vw;
display: flex;
flex-direction: column;
}
#main-container > document-queue {
overflow-y: auto;
flex-grow: 1;
}

3
test-documents/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
!.gitignore

3
unit/consts.go Normal file
View File

@ -0,0 +1,3 @@
package unit
const MillimetersPerInch = 25.4

6
unit/density.go Normal file
View File

@ -0,0 +1,6 @@
package unit
// Density is representing something unitless per length unit.
type Density interface {
PerMillimeter() PerMillimeter
}

21
unit/inch.go Normal file
View File

@ -0,0 +1,21 @@
package unit
import "strconv"
// Inch denotes a distance, position or offset in inches.
type Inch float64
var _ Unit = Inch(0)
var _ Length = Inch(0)
func (v Inch) String() string {
return strconv.FormatFloat(float64(v), 'f', -1, 64) + "\u00A0inch"
}
func (Inch) UnitSymbol() string {
return "inch"
}
func (v Inch) Millimeters() Millimeter {
return Millimeter(v * MillimetersPerInch)
}

6
unit/length.go Normal file
View File

@ -0,0 +1,6 @@
package unit
// Length is something that can represent a distance, offset or position.
type Length interface {
Millimeters() Millimeter
}

25
unit/millimeter.go Normal file
View File

@ -0,0 +1,25 @@
package unit
import "strconv"
// Millimeter denotes a distance, position or offset in millimeters.
type Millimeter float64
var _ Unit = Millimeter(0)
var _ Length = Millimeter(0)
func (v Millimeter) String() string {
return strconv.FormatFloat(float64(v), 'f', -1, 64) + "\u00A0mm"
}
func (Millimeter) UnitSymbol() string {
return "mm"
}
func (v Millimeter) Inches() Inch {
return Inch(v / MillimetersPerInch)
}
func (v Millimeter) Millimeters() Millimeter {
return v
}

52
unit/page-dimensions.go Normal file
View File

@ -0,0 +1,52 @@
package unit
// PageDimensions contains the page size and margins of a scanned document page.
type PageDimensions[T UnitConstraint] struct {
MediumSize Vec2[T] `json:"mediumSize"` // The size of the page or medium.
ScanSize Rectangle[T] `json:"scanSize"` // The size of the scanned area or image.
}
// NewPageDimensionsFromDensity returns page dimensions for the given resolution resX and resY, and the respective densities.
//
// The resulting PageDimensions will have equal MediumSize and ScanSize (No margin).
// The correct margin or MediumSize can be added later on.
//
// In case any density is nil, this will return zero value PageDimensions.
func NewPageDimensionsFromDensity(resX, resY int, densityX, densityY Density) PageDimensions[Millimeter] {
if densityX == nil || densityY == nil {
return PageDimensions[Millimeter]{}
}
size := Vec2[Millimeter]{
X: Millimeter(resX) / Millimeter(densityX.PerMillimeter()),
Y: Millimeter(resY) / Millimeter(densityY.PerMillimeter()),
}
return PageDimensions[Millimeter]{
MediumSize: size,
ScanSize: Rectangle[Millimeter]{
Size: size,
},
}
}
// NewPageDimensionsFromLengths returns page dimensions for the given lengths.
//
// The resulting PageDimensions will have equal MediumSize and ScanSize (No margin).
// The correct margin or MediumSize can be added later on.
//
// In case any length is nil, this will return zero value PageDimensions.
func NewPageDimensionsFromLengths(x, y Length) PageDimensions[Millimeter] {
if x == nil || y == nil {
return PageDimensions[Millimeter]{}
}
size := Vec2[Millimeter]{X: x.Millimeters(), Y: y.Millimeters()}
return PageDimensions[Millimeter]{
MediumSize: size,
ScanSize: Rectangle[Millimeter]{
Size: size,
},
}
}

21
unit/per-inch.go Normal file
View File

@ -0,0 +1,21 @@
package unit
import "strconv"
// PerInch denotes a density of something unitless per inch.
type PerInch float64
var _ Unit = PerInch(0)
var _ Density = PerInch(0)
func (v PerInch) String() string {
return strconv.FormatFloat(float64(v), 'f', -1, 64) + "/inch"
}
func (PerInch) UnitSymbol() string {
return "/inch"
}
func (v PerInch) PerMillimeter() PerMillimeter {
return PerMillimeter(v / MillimetersPerInch)
}

25
unit/per-millimeter.go Normal file
View File

@ -0,0 +1,25 @@
package unit
import "strconv"
// PerMillimeter denotes a density of something unitless per millimeter.
type PerMillimeter float64
var _ Unit = PerMillimeter(0)
var _ Density = PerMillimeter(0)
func (v PerMillimeter) String() string {
return strconv.FormatFloat(float64(v), 'f', -1, 64) + "/mm"
}
func (PerMillimeter) UnitSymbol() string {
return "/mm"
}
func (v PerMillimeter) PerInch() PerInch {
return PerInch(v * MillimetersPerInch)
}
func (v PerMillimeter) PerMillimeter() PerMillimeter {
return v
}

6
unit/rectangle.go Normal file
View File

@ -0,0 +1,6 @@
package unit
type Rectangle[T UnitConstraint] struct {
Origin Vec2[T] // The offset of the rectangle.
Size Vec2[T] // The dimensions of the rectangle.
}

12
unit/unit.go Normal file
View File

@ -0,0 +1,12 @@
package unit
type Unit interface {
UnitSymbol() string
}
// UnitConstraint can be used as generic type constraints for Unit.
type UnitConstraint interface {
Unit
~float64
}

10
unit/unitless.go Normal file
View File

@ -0,0 +1,10 @@
package unit
// Unitless is a "Unit" for something without dimension.
type Unitless float64
var _ Unit = Unitless(0)
func (Unitless) UnitSymbol() string {
return ""
}

62
unit/vec-2.go Normal file
View File

@ -0,0 +1,62 @@
package unit
import (
"math"
"strconv"
)
// Vec2 is a 2D vector of values with the unit of T.
type Vec2[T UnitConstraint] struct {
X T `json:"x"`
Y T `json:"y"`
}
// NewVec2FromVec2 takes an arbitrary Vec2 and returns it as a new vector with the type T.
func NewVec2FromVec2[I, T UnitConstraint](a Vec2[I]) Vec2[T] {
return Vec2[T]{T(a.X), T(a.Y)}
}
func (a Vec2[T]) String() string {
return "(" +
strconv.FormatFloat(float64(a.X), 'f', -1, 64) + ", " +
strconv.FormatFloat(float64(a.Y), 'f', -1, 64) +
")\u00A0" + T.UnitSymbol(0)
}
func (a Vec2[T]) EqualWithPrecision(b Vec2[T], precision T) bool {
if math.Abs(float64(a.X-b.X)) > float64(precision) {
return false
}
if math.Abs(float64(a.Y-b.Y)) > float64(precision) {
return false
}
return true
}
func (a Vec2[T]) Unitless() Vec2[Unitless] {
return Vec2[Unitless]{Unitless(a.X), Unitless(a.Y)}
}
func (a Vec2[T]) Added(b Vec2[T]) Vec2[T] {
return Vec2[T]{a.X + b.X, a.Y + b.Y}
}
func (a Vec2[T]) Subed(b Vec2[T]) Vec2[T] {
return Vec2[T]{a.X - b.X, a.Y - b.Y}
}
func (a Vec2[T]) Scaled(s float64) Vec2[T] {
return Vec2[T]{a.X * T(s), a.Y * T(s)}
}
func (a Vec2[T]) Normalized() Vec2[Unitless] {
return a.Scaled(1 / float64(a.Length())).Unitless()
}
func (a Vec2[T]) LengthSqr() T {
return a.X*a.X + a.Y*a.Y
}
func (a Vec2[T]) Length() T {
return T(math.Abs(float64(a.LengthSqr())))
}

26
util.go Normal file
View File

@ -0,0 +1,26 @@
package main
import (
"Scanyonero/document"
"log"
"path/filepath"
)
func LoadExampleQueueEntries() []QueueEntry {
ingestor := document.Ingestor{DefaultDPI: 600}
pages, err := ingestor.Ingest(document.MustLoadFile(filepath.Join("test-documents", "300 DPI Feeder.jpg")))
if err != nil {
log.Panicf("Failed to ingest document: %v", err)
}
var entries []QueueEntry
for _, page := range pages {
entries = append(entries, QueueEntry{
ID: NewQueueEntryID(),
QueueEntryData: QueueEntryDataPage{Page: &page},
})
}
return entries
}