diff --git a/.github/workflows/cross.sh b/.github/workflows/cross.sh index 592267d..e447b6f 100755 --- a/.github/workflows/cross.sh +++ b/.github/workflows/cross.sh @@ -17,11 +17,13 @@ echo aix ; GOOS=aix GOARCH=ppc64 go build . echo js ; GOOS=js GOARCH=wasm go build . echo wasip1 ; GOOS=wasip1 GOARCH=wasm go build . echo linux-flock ; GOOS=linux GOARCH=amd64 go build -tags sqlite3_flock . -echo linux-noshm ; GOOS=linux GOARCH=amd64 go build -tags sqlite3_noshm . +echo linux-dotlk ; GOOS=linux GOARCH=amd64 go build -tags sqlite3_dotlk . echo linux-nosys ; GOOS=linux GOARCH=amd64 go build -tags sqlite3_nosys . echo darwin-flock ; GOOS=darwin GOARCH=amd64 go build -tags sqlite3_flock . -echo darwin-noshm ; GOOS=darwin GOARCH=amd64 go build -tags sqlite3_noshm . +echo darwin-dotlk ; GOOS=darwin GOARCH=amd64 go build -tags sqlite3_dotlk . echo darwin-nosys ; GOOS=darwin GOARCH=amd64 go build -tags sqlite3_nosys . +echo windows-dotlk ; GOOS=windows GOARCH=amd64 go build -tags sqlite3_dotlk . echo windows-nosys ; GOOS=windows GOARCH=amd64 go build -tags sqlite3_nosys . +echo freebsd-dotlk ; GOOS=freebsd GOARCH=amd64 go build -tags sqlite3_dotlk . echo freebsd-nosys ; GOOS=freebsd GOARCH=amd64 go build -tags sqlite3_nosys . -echo solaris-flock ; GOOS=solaris GOARCH=amd64 go build -tags sqlite3_flock . \ No newline at end of file +echo solaris-dotlk ; GOOS=solaris GOARCH=amd64 go build -tags sqlite3_dotlk . \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8a50039..fda82a8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -60,10 +60,6 @@ jobs: run: go test -v -tags sqlite3_dotlk ./... if: matrix.os == 'macos-latest' - - name: Test no shared memory - run: go test -v -tags sqlite3_noshm ./... - if: matrix.os == 'ubuntu-latest' - - name: Test no locks run: go test -v -tags sqlite3_nosys ./... if: matrix.os == 'ubuntu-latest' diff --git a/driver/example_test.go b/driver/example_test.go index 5628ef9..1e04050 100644 --- a/driver/example_test.go +++ b/driver/example_test.go @@ -1,4 +1,4 @@ -//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys +//go:build ((linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk package driver_test diff --git a/internal/util/mmap.go b/internal/util/mmap.go index b17c659..25d1936 100644 --- a/internal/util/mmap.go +++ b/internal/util/mmap.go @@ -1,4 +1,4 @@ -//go:build unix && !(sqlite3_noshm || sqlite3_nosys) +//go:build unix && !sqlite3_nosys package util diff --git a/internal/util/mmap_other.go b/internal/util/mmap_other.go index 7eb710b..e11f953 100644 --- a/internal/util/mmap_other.go +++ b/internal/util/mmap_other.go @@ -1,4 +1,4 @@ -//go:build !unix || sqlite3_noshm || sqlite3_nosys +//go:build !unix || sqlite3_nosys package util diff --git a/tests/bradfitz/sql_test.go b/tests/bradfitz/sql_test.go index bc86b0f..d7e2b3b 100644 --- a/tests/bradfitz/sql_test.go +++ b/tests/bradfitz/sql_test.go @@ -1,4 +1,4 @@ -//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys +//go:build ((linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk package bradfitz diff --git a/vfs/README.md b/vfs/README.md index aecae6d..50e338e 100644 --- a/vfs/README.md +++ b/vfs/README.md @@ -15,8 +15,7 @@ The main differences are [file locking](#file-locking) and [WAL mode](#write-ahe POSIX advisory locks, which SQLite uses on Unix, are [broken by design](https://github.com/sqlite/sqlite/blob/b74eb0/src/os_unix.c#L1073-L1161). - -On Linux and macOS, this package uses +Instead, on Linux and macOS, this package uses [OFD locks](https://www.gnu.org/software/libc/manual/html_node/Open-File-Description-Locks.html) to synchronize access to database files. @@ -45,7 +44,7 @@ to check if your build supports file locking. ### Write-Ahead Logging -On little-endian Unix, this package uses `mmap` to implement +On Unix, this package may use `mmap` to implement [shared-memory for the WAL-index](https://sqlite.org/wal.html#implementation_of_shared_memory_for_the_wal_index), like SQLite. @@ -54,6 +53,9 @@ a WAL database can only be accessed by a single proccess. Other processes that attempt to access a database locked with BSD locks, will fail with the [`SQLITE_PROTOCOL`](https://sqlite.org/rescode.html#protocol) error code. +You can also opt into a cross platform, in-process, memory sharing implementation +with the `sqlite3_dotlk` build tag. + Otherwise, [WAL support is limited](https://sqlite.org/wal.html#noshm), and `EXCLUSIVE` locking mode must be set to create, read, and write WAL databases. To use `EXCLUSIVE` locking mode with the @@ -66,7 +68,7 @@ to check if your build supports shared memory. ### Batch-Atomic Write -On 64-bit Linux, this package supports +On Linux, this package may support [batch-atomic writes](https://sqlite.org/cgi/src/technote/714) on the F2FS filesystem. @@ -87,9 +89,7 @@ The implementation is compatible with SQLite's The VFS can be customized with a few build tags: - `sqlite3_flock` forces the use of BSD locks. - `sqlite3_dotlk` forces the use of dot-file locks. -- `sqlite3_nosys` prevents importing [`x/sys`](https://pkg.go.dev/golang.org/x/sys); - disables locking _and_ shared memory on all platforms. -- `sqlite3_noshm` disables shared memory on all platforms. +- `sqlite3_nosys` prevents importing [`x/sys`](https://pkg.go.dev/golang.org/x/sys). > [!IMPORTANT] > The default configuration of this package is compatible with the standard diff --git a/vfs/adiantum/example_test.go b/vfs/adiantum/example_test.go index aae7ed9..d4da01a 100644 --- a/vfs/adiantum/example_test.go +++ b/vfs/adiantum/example_test.go @@ -1,4 +1,4 @@ -//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys +//go:build ((linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk package adiantum_test diff --git a/vfs/shm.go b/vfs/shm.go index a63d414..9affac0 100644 --- a/vfs/shm.go +++ b/vfs/shm.go @@ -1,4 +1,4 @@ -//go:build (darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_dotlk || sqlite3_noshm || sqlite3_nosys) +//go:build ((darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk package vfs diff --git a/vfs/shm_bsd.go b/vfs/shm_bsd.go index c5a6aaf..079c8f4 100644 --- a/vfs/shm_bsd.go +++ b/vfs/shm_bsd.go @@ -1,4 +1,4 @@ -//go:build (freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_dotlk || sqlite3_noshm || sqlite3_nosys) +//go:build ((freebsd || openbsd || netbsd || dragonfly || illumos) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_dotlk || sqlite3_nosys)) || sqlite3_flock package vfs @@ -23,9 +23,9 @@ type vfsShmFile struct { // +checklocks:vfsShmFilesMtx refs int - // +checklocks:lockMtx - lock [_SHM_NLOCK]int16 - lockMtx sync.Mutex + // +checklocks:Mutex + lock [_SHM_NLOCK]int16 + sync.Mutex } var ( @@ -54,7 +54,7 @@ func (s *vfsShm) Close() error { s.shmLock(0, _SHM_NLOCK, _SHM_UNLOCK) // Decrease reference count. - if s.vfsShmFile.refs > 1 { + if s.vfsShmFile.refs > 0 { s.vfsShmFile.refs-- s.vfsShmFile = nil return nil @@ -119,7 +119,6 @@ func (s *vfsShm) shmOpen() (rc _ErrorCode) { s.vfsShmFile = &vfsShmFile{ File: f, info: fi, - refs: 1, } f = nil // Don't close the file. for i, g := range vfsShmFiles { @@ -174,8 +173,8 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext } func (s *vfsShm) shmLock(offset, n int32, flags _ShmFlag) _ErrorCode { - s.lockMtx.Lock() - defer s.lockMtx.Unlock() + s.Lock() + defer s.Unlock() switch { case flags&_SHM_UNLOCK != 0: @@ -234,8 +233,7 @@ func (s *vfsShm) shmUnmap(delete bool) { for _, r := range s.regions { r.Unmap() } - clear(s.regions) - s.regions = s.regions[:0] + s.regions = nil // Close the file. if delete { @@ -245,7 +243,7 @@ func (s *vfsShm) shmUnmap(delete bool) { } func (s *vfsShm) shmBarrier() { - s.lockMtx.Lock() + s.Lock() //lint:ignore SA2001 memory barrier. - s.lockMtx.Unlock() + s.Unlock() } diff --git a/vfs/shm_copy.go b/vfs/shm_copy.go new file mode 100644 index 0000000..9f7f586 --- /dev/null +++ b/vfs/shm_copy.go @@ -0,0 +1,277 @@ +//go:build sqlite3_dotlk + +package vfs + +import ( + "context" + "sync" + "unsafe" + + "github.com/ncruces/go-sqlite3/internal/util" + "github.com/tetratelabs/wazero/api" +) + +const ( + _SHM_NLOCK = 8 + _WALINDEX_PGSZ = 32768 +) + +type vfsShmBuffer struct { + shared []byte // +checklocks:Mutex + refs int // +checklocks:vfsShmBuffersMtx + + lock [_SHM_NLOCK]int16 // +checklocks:Mutex + sync.Mutex +} + +var ( + // +checklocks:vfsShmBuffersMtx + vfsShmBuffers = map[string]*vfsShmBuffer{} + vfsShmBuffersMtx sync.Mutex +) + +type vfsShm struct { + *vfsShmBuffer + mod api.Module + alloc api.Function + free api.Function + path string + shadow []byte + ptrs []uint32 + stack [1]uint64 + lock [_SHM_NLOCK]bool + readOnly bool +} + +func (s *vfsShm) Close() error { + if s.vfsShmBuffer == nil { + return nil + } + + vfsShmBuffersMtx.Lock() + defer vfsShmBuffersMtx.Unlock() + + // Unlock everything. + s.shmLock(0, _SHM_NLOCK, _SHM_UNLOCK) + + // Decrease reference count. + if s.vfsShmBuffer.refs > 0 { + s.vfsShmBuffer.refs-- + s.vfsShmBuffer = nil + return nil + } + + delete(vfsShmBuffers, s.path) + return nil +} + +func (s *vfsShm) shmOpen() { + if s.vfsShmBuffer != nil { + return + } + + vfsShmBuffersMtx.Lock() + defer vfsShmBuffersMtx.Unlock() + + // Find a shared buffer, increase the reference count. + if g, ok := vfsShmBuffers[s.path]; ok { + s.vfsShmBuffer = g + g.refs++ + return + } + + // Add the new shared buffer. + s.vfsShmBuffer = &vfsShmBuffer{} + vfsShmBuffers[s.path] = s.vfsShmBuffer +} + +func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, extend bool) (uint32, _ErrorCode) { + if size != _WALINDEX_PGSZ { + return 0, _IOERR_SHMMAP + } + if s.mod == nil { + s.mod = mod + s.free = mod.ExportedFunction("sqlite3_free") + s.alloc = mod.ExportedFunction("sqlite3_malloc64") + } + + s.shmOpen() + s.Lock() + defer s.Unlock() + defer s.shmAcquire() + + n := (int(id) + 1) * int(size) + + if n > len(s.shared) { + if !extend { + return 0, _OK + } + s.shared = append(s.shared, make([]byte, n-len(s.shared))...) + } + + if n > len(s.shadow) { + s.shadow = append(s.shadow, make([]byte, n-len(s.shadow))...) + } + + for int(id) >= len(s.ptrs) { + s.stack[0] = uint64(size) + if err := s.alloc.CallWithStack(ctx, s.stack[:]); err != nil { + panic(err) + } + if s.stack[0] == 0 { + panic(util.OOMErr) + } + clear(util.View(s.mod, uint32(s.stack[0]), _WALINDEX_PGSZ)) + s.ptrs = append(s.ptrs, uint32(s.stack[0])) + } + + return s.ptrs[id], _OK +} + +func (s *vfsShm) shmLock(offset, n int32, flags _ShmFlag) _ErrorCode { + s.Lock() + defer s.Unlock() + + if flags&_SHM_UNLOCK == 0 { + s.shmAcquire() + } else { + s.shmRelease() + } + + switch { + case flags&_SHM_UNLOCK != 0: + for i := offset; i < offset+n; i++ { + if s.lock[i] { + if s.vfsShmBuffer.lock[i] == 0 { + panic(util.AssertErr()) + } + if s.vfsShmBuffer.lock[i] <= 0 { + s.vfsShmBuffer.lock[i] = 0 + } else { + s.vfsShmBuffer.lock[i]-- + } + s.lock[i] = false + } + } + case flags&_SHM_SHARED != 0: + for i := offset; i < offset+n; i++ { + if s.lock[i] { + panic(util.AssertErr()) + } + if s.vfsShmBuffer.lock[i]+1 <= 0 { + return _BUSY + } + } + for i := offset; i < offset+n; i++ { + s.vfsShmBuffer.lock[i]++ + s.lock[i] = true + } + case flags&_SHM_EXCLUSIVE != 0: + for i := offset; i < offset+n; i++ { + if s.lock[i] { + panic(util.AssertErr()) + } + if s.vfsShmBuffer.lock[i] != 0 { + return _BUSY + } + } + for i := offset; i < offset+n; i++ { + s.vfsShmBuffer.lock[i] = -1 + s.lock[i] = true + } + default: + panic(util.AssertErr()) + } + + return _OK +} + +func (s *vfsShm) shmUnmap(delete bool) { + if s.vfsShmBuffer == nil { + return + } + defer s.Close() + + s.Lock() + s.shmRelease() + defer s.Unlock() + + for _, p := range s.ptrs { + s.stack[0] = uint64(p) + if err := s.free.CallWithStack(context.Background(), s.stack[:]); err != nil { + panic(err) + } + } + s.ptrs = nil + s.shadow = nil +} + +func (s *vfsShm) shmBarrier() { + s.Lock() + s.shmAcquire() + s.shmRelease() + s.Unlock() +} + +// This looks like a safe, if inefficient, way of keeping memory in sync. +// +// The WAL-index file starts with a header. +// This header starts with two 48 byte, checksummed, copies of the same information, +// which are accessed independently between memory barriers. +// The checkpoint information that follows uses 4 byte aligned words. +// +// Finally, we have the WAL-index hash tables, +// which are only modified holding the exclusive WAL_WRITE_LOCK. +// +// Since all the data is either redundant+checksummed, +// 4 byte aligned, or modified under an exclusive lock, +// the copies below should correctly keep memory in sync. +// +// https://sqlite.org/walformat.html#the_wal_index_file_format + +// +checklocks:s.Mutex +func (s *vfsShm) shmAcquire() { + // Copies modified words from shared to private memory. + for id, p := range s.ptrs { + i0 := id * _WALINDEX_PGSZ + i1 := i0 + _WALINDEX_PGSZ + shared := shmPage(s.shared[i0:i1]) + shadow := shmPage(s.shadow[i0:i1]) + privat := shmPage(util.View(s.mod, p, _WALINDEX_PGSZ)) + if *shadow == *shared { + continue + } + for i, shared := range shared { + if shadow[i] != shared { + shadow[i] = shared + privat[i] = shared + } + } + } +} + +// +checklocks:s.Mutex +func (s *vfsShm) shmRelease() { + // Copies modified words from private to shared memory. + for id, p := range s.ptrs { + i0 := id * _WALINDEX_PGSZ + i1 := i0 + _WALINDEX_PGSZ + shared := shmPage(s.shared[i0:i1]) + shadow := shmPage(s.shadow[i0:i1]) + privat := shmPage(util.View(s.mod, p, _WALINDEX_PGSZ)) + if *shadow == *privat { + continue + } + for i, privat := range privat { + if shadow[i] != privat { + shadow[i] = privat + shared[i] = privat + } + } + } +} + +func shmPage(s []byte) *[_WALINDEX_PGSZ / 4]uint32 { + p := (*uint32)(unsafe.Pointer(unsafe.SliceData(s))) + return (*[_WALINDEX_PGSZ / 4]uint32)(unsafe.Slice(p, _WALINDEX_PGSZ/4)) +} diff --git a/vfs/shm_ofd.go b/vfs/shm_ofd.go index 8107b46..bf04755 100644 --- a/vfs/shm_ofd.go +++ b/vfs/shm_ofd.go @@ -1,4 +1,4 @@ -//go:build (darwin || linux) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_flock || sqlite3_dotlk || sqlite3_noshm || sqlite3_nosys) +//go:build (darwin || linux) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_flock || sqlite3_dotlk || sqlite3_nosys) package vfs diff --git a/vfs/shm_other.go b/vfs/shm_other.go index 31d71c4..9394b62 100644 --- a/vfs/shm_other.go +++ b/vfs/shm_other.go @@ -1,4 +1,4 @@ -//go:build !(darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) || !(386 || arm || amd64 || arm64 || riscv64 || ppc64le) || sqlite3_dotlk || sqlite3_noshm || sqlite3_nosys +//go:build !(((darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk) package vfs