updated to make better use of go 1.16 features
This commit is contained in:
parent
d272cafd2e
commit
2510448ed7
12
assets/assets.go
Normal file
12
assets/assets.go
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
package assets
|
||||||
|
|
||||||
|
import (
|
||||||
|
"embed"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed *.ico *.html *.css
|
||||||
|
var assets embed.FS
|
||||||
|
|
||||||
|
func ReadFile(name string) ([]byte, error) {
|
||||||
|
return assets.ReadFile(name)
|
||||||
|
}
|
5
go.mod
Normal file
5
go.mod
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
module git.binarythought.com/cdramey/qurl
|
||||||
|
|
||||||
|
go 1.16
|
||||||
|
|
||||||
|
require go.etcd.io/bbolt v1.3.6
|
4
go.sum
Normal file
4
go.sum
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU=
|
||||||
|
go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4=
|
||||||
|
golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d h1:L/IKR6COd7ubZrs2oTnTi73IhgqJ71c9s80WsQnh0Es=
|
||||||
|
golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
6
main.go
6
main.go
@ -10,12 +10,6 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:generate go run ./togo -n Favicon_ico -i assets/favicon.ico -p static -o static/favicon_ico.go
|
|
||||||
//go:generate go run ./togo -n Index_html -i assets/index.html -p static -o static/index_html.go
|
|
||||||
//go:generate go run ./togo -n Qurl_css -i assets/qurl.css -p static -o static/qurl_css.go
|
|
||||||
//go:generate go run ./togo -n Submit_html -i assets/submit.html -p static -o static/submit_html.go
|
|
||||||
//go:generate go run ./togo -n Usage_html -i assets/usage.html -p static -o static/usage_html.go
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
dburl := flag.String("u", "bolt:./qurl.db", "url to database")
|
dburl := flag.String("u", "bolt:./qurl.db", "url to database")
|
||||||
lsaddr := flag.String("l", "127.0.0.1:8080", "listen address/port")
|
lsaddr := flag.String("l", "127.0.0.1:8080", "listen address/port")
|
||||||
|
@ -3,8 +3,8 @@ package pages
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"git.binarythought.com/cdramey/qurl/obj"
|
"git.binarythought.com/cdramey/qurl/obj"
|
||||||
"git.binarythought.com/cdramey/qurl/static"
|
|
||||||
"git.binarythought.com/cdramey/qurl/storage"
|
"git.binarythought.com/cdramey/qurl/storage"
|
||||||
|
"git.binarythought.com/cdramey/qurl/assets"
|
||||||
"html/template"
|
"html/template"
|
||||||
"net/http"
|
"net/http"
|
||||||
)
|
)
|
||||||
@ -68,24 +68,44 @@ func (ctx *RootHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
func (ctx *RootHandler) Init() error {
|
func (ctx *RootHandler) Init() error {
|
||||||
// Initialize the static content object for the index page
|
// Initialize the static content object for the index page
|
||||||
ctx.index = &StaticContent{Type: "text/html", Content: static.Index_html}
|
data, err := assets.ReadFile("index.html")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ctx.index = &StaticContent{Type: "text/html", Content: data }
|
||||||
ctx.index.Init()
|
ctx.index.Init()
|
||||||
|
|
||||||
// Initialize the static content object for the css
|
// Initialize the static content object for the css
|
||||||
ctx.css = &StaticContent{Type: "text/css", Content: static.Qurl_css}
|
data, err = assets.ReadFile("qurl.css")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ctx.css = &StaticContent{Type: "text/css", Content: data }
|
||||||
ctx.css.Init()
|
ctx.css.Init()
|
||||||
|
|
||||||
// Initialize the static content object favicon
|
// Initialize the static content object favicon
|
||||||
ctx.favi = &StaticContent{Type: "image/x-icon", Content: static.Favicon_ico}
|
data, err = assets.ReadFile("favicon.ico")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ctx.favi = &StaticContent{Type: "image/x-icon", Content: data}
|
||||||
ctx.favi.Init()
|
ctx.favi.Init()
|
||||||
|
|
||||||
// Initialize the api usage instructions
|
// Initialize the api usage instructions
|
||||||
ctx.usage = &StaticContent{Type: "text/html", Content: static.Usage_html}
|
data, err = assets.ReadFile("usage.html")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ctx.usage = &StaticContent{Type: "text/html", Content: data}
|
||||||
ctx.usage.Init()
|
ctx.usage.Init()
|
||||||
|
|
||||||
// Initialize submit page template
|
// Initialize submit page template
|
||||||
|
data, err = assets.ReadFile("submit.html")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
ctx.submit = template.New("submit.html")
|
ctx.submit = template.New("submit.html")
|
||||||
_, err := ctx.submit.Parse(string(static.Submit_html))
|
_, err = ctx.submit.Parse(string(data))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
4
static/.gitignore
vendored
4
static/.gitignore
vendored
@ -1,4 +0,0 @@
|
|||||||
# Ignore everything in this directory
|
|
||||||
*
|
|
||||||
# Except this file
|
|
||||||
!.gitignore
|
|
88
togo/main.go
88
togo/main.go
@ -1,88 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
pkg := flag.String("p", "", "package")
|
|
||||||
name := flag.String("n", "", "const name")
|
|
||||||
inputfn := flag.String("i", "", "input file")
|
|
||||||
outputfn := flag.String("o", "", "output file")
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if *pkg == "" {
|
|
||||||
log.Fatal("pkg required")
|
|
||||||
}
|
|
||||||
|
|
||||||
if *name == "" {
|
|
||||||
log.Fatal("name required")
|
|
||||||
}
|
|
||||||
|
|
||||||
if *inputfn == "" {
|
|
||||||
log.Fatal("input file required")
|
|
||||||
}
|
|
||||||
|
|
||||||
if *outputfn == "" {
|
|
||||||
*outputfn = *inputfn + ".go"
|
|
||||||
}
|
|
||||||
|
|
||||||
omod := fmod(*outputfn)
|
|
||||||
imod := fmod(*inputfn)
|
|
||||||
if omod.After(imod) {
|
|
||||||
log.Printf("Refusing to update %s\n", *outputfn)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
ifile, err := os.Open(*inputfn)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
defer ifile.Close()
|
|
||||||
|
|
||||||
ofile, err := os.OpenFile(*outputfn, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
defer ofile.Close()
|
|
||||||
|
|
||||||
fmt.Fprintf(ofile, "package %s\n\nvar %s = []byte{", *pkg, *name)
|
|
||||||
|
|
||||||
buf := make([]byte, 4096)
|
|
||||||
for c := 0; ; {
|
|
||||||
i, err := ifile.Read(buf)
|
|
||||||
if err != nil {
|
|
||||||
if err != io.EOF {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
for j := 0; j < i; j++ {
|
|
||||||
if (c % 13) == 0 {
|
|
||||||
fmt.Fprintf(ofile, "\n\t")
|
|
||||||
} else {
|
|
||||||
fmt.Fprintf(ofile, " ")
|
|
||||||
}
|
|
||||||
fmt.Fprintf(ofile, "0x%02x,", buf[j])
|
|
||||||
c++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fmt.Fprintf(ofile, "\n}\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
func fmod(fn string) time.Time {
|
|
||||||
fi, err := os.Stat(fn)
|
|
||||||
if err != nil {
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return time.Time{}
|
|
||||||
}
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
return fi.ModTime()
|
|
||||||
}
|
|
20
vendor/go.etcd.io/bbolt/LICENSE
generated
vendored
20
vendor/go.etcd.io/bbolt/LICENSE
generated
vendored
@ -1,20 +0,0 @@
|
|||||||
The MIT License (MIT)
|
|
||||||
|
|
||||||
Copyright (c) 2013 Ben Johnson
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
||||||
this software and associated documentation files (the "Software"), to deal in
|
|
||||||
the Software without restriction, including without limitation the rights to
|
|
||||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
||||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
|
||||||
subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
||||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
||||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
||||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
10
vendor/go.etcd.io/bbolt/bolt_386.go
generated
vendored
10
vendor/go.etcd.io/bbolt/bolt_386.go
generated
vendored
@ -1,10 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0x7FFFFFFF // 2GB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0xFFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
10
vendor/go.etcd.io/bbolt/bolt_amd64.go
generated
vendored
10
vendor/go.etcd.io/bbolt/bolt_amd64.go
generated
vendored
@ -1,10 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0x7FFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
28
vendor/go.etcd.io/bbolt/bolt_arm.go
generated
vendored
28
vendor/go.etcd.io/bbolt/bolt_arm.go
generated
vendored
@ -1,28 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import "unsafe"
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0x7FFFFFFF // 2GB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0xFFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned bool
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
// Simple check to see whether this arch handles unaligned load/stores
|
|
||||||
// correctly.
|
|
||||||
|
|
||||||
// ARM9 and older devices require load/stores to be from/to aligned
|
|
||||||
// addresses. If not, the lower 2 bits are cleared and that address is
|
|
||||||
// read in a jumbled up order.
|
|
||||||
|
|
||||||
// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html
|
|
||||||
|
|
||||||
raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11}
|
|
||||||
val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2))
|
|
||||||
|
|
||||||
brokenUnaligned = val != 0x11222211
|
|
||||||
}
|
|
12
vendor/go.etcd.io/bbolt/bolt_arm64.go
generated
vendored
12
vendor/go.etcd.io/bbolt/bolt_arm64.go
generated
vendored
@ -1,12 +0,0 @@
|
|||||||
// +build arm64
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0x7FFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
10
vendor/go.etcd.io/bbolt/bolt_linux.go
generated
vendored
10
vendor/go.etcd.io/bbolt/bolt_linux.go
generated
vendored
@ -1,10 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"syscall"
|
|
||||||
)
|
|
||||||
|
|
||||||
// fdatasync flushes written data to a file descriptor.
|
|
||||||
func fdatasync(db *DB) error {
|
|
||||||
return syscall.Fdatasync(int(db.file.Fd()))
|
|
||||||
}
|
|
12
vendor/go.etcd.io/bbolt/bolt_mips64x.go
generated
vendored
12
vendor/go.etcd.io/bbolt/bolt_mips64x.go
generated
vendored
@ -1,12 +0,0 @@
|
|||||||
// +build mips64 mips64le
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0x8000000000 // 512GB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0x7FFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
12
vendor/go.etcd.io/bbolt/bolt_mipsx.go
generated
vendored
12
vendor/go.etcd.io/bbolt/bolt_mipsx.go
generated
vendored
@ -1,12 +0,0 @@
|
|||||||
// +build mips mipsle
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0x40000000 // 1GB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0xFFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
27
vendor/go.etcd.io/bbolt/bolt_openbsd.go
generated
vendored
27
vendor/go.etcd.io/bbolt/bolt_openbsd.go
generated
vendored
@ -1,27 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
msAsync = 1 << iota // perform asynchronous writes
|
|
||||||
msSync // perform synchronous writes
|
|
||||||
msInvalidate // invalidate cached data
|
|
||||||
)
|
|
||||||
|
|
||||||
func msync(db *DB) error {
|
|
||||||
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
|
|
||||||
if errno != 0 {
|
|
||||||
return errno
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fdatasync(db *DB) error {
|
|
||||||
if db.data != nil {
|
|
||||||
return msync(db)
|
|
||||||
}
|
|
||||||
return db.file.Sync()
|
|
||||||
}
|
|
12
vendor/go.etcd.io/bbolt/bolt_ppc.go
generated
vendored
12
vendor/go.etcd.io/bbolt/bolt_ppc.go
generated
vendored
@ -1,12 +0,0 @@
|
|||||||
// +build ppc
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0x7FFFFFFF // 2GB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0xFFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
12
vendor/go.etcd.io/bbolt/bolt_ppc64.go
generated
vendored
12
vendor/go.etcd.io/bbolt/bolt_ppc64.go
generated
vendored
@ -1,12 +0,0 @@
|
|||||||
// +build ppc64
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0x7FFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
12
vendor/go.etcd.io/bbolt/bolt_ppc64le.go
generated
vendored
12
vendor/go.etcd.io/bbolt/bolt_ppc64le.go
generated
vendored
@ -1,12 +0,0 @@
|
|||||||
// +build ppc64le
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0x7FFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
12
vendor/go.etcd.io/bbolt/bolt_s390x.go
generated
vendored
12
vendor/go.etcd.io/bbolt/bolt_s390x.go
generated
vendored
@ -1,12 +0,0 @@
|
|||||||
// +build s390x
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
|
||||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
||||||
|
|
||||||
// maxAllocSize is the size used when creating array pointers.
|
|
||||||
const maxAllocSize = 0x7FFFFFFF
|
|
||||||
|
|
||||||
// Are unaligned load/stores broken on this arch?
|
|
||||||
var brokenUnaligned = false
|
|
93
vendor/go.etcd.io/bbolt/bolt_unix.go
generated
vendored
93
vendor/go.etcd.io/bbolt/bolt_unix.go
generated
vendored
@ -1,93 +0,0 @@
|
|||||||
// +build !windows,!plan9,!solaris
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"syscall"
|
|
||||||
"time"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// flock acquires an advisory lock on a file descriptor.
|
|
||||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
|
||||||
var t time.Time
|
|
||||||
if timeout != 0 {
|
|
||||||
t = time.Now()
|
|
||||||
}
|
|
||||||
fd := db.file.Fd()
|
|
||||||
flag := syscall.LOCK_NB
|
|
||||||
if exclusive {
|
|
||||||
flag |= syscall.LOCK_EX
|
|
||||||
} else {
|
|
||||||
flag |= syscall.LOCK_SH
|
|
||||||
}
|
|
||||||
for {
|
|
||||||
// Attempt to obtain an exclusive lock.
|
|
||||||
err := syscall.Flock(int(fd), flag)
|
|
||||||
if err == nil {
|
|
||||||
return nil
|
|
||||||
} else if err != syscall.EWOULDBLOCK {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we timed out then return an error.
|
|
||||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
|
||||||
return ErrTimeout
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for a bit and try again.
|
|
||||||
time.Sleep(flockRetryTimeout)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// funlock releases an advisory lock on a file descriptor.
|
|
||||||
func funlock(db *DB) error {
|
|
||||||
return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
|
|
||||||
}
|
|
||||||
|
|
||||||
// mmap memory maps a DB's data file.
|
|
||||||
func mmap(db *DB, sz int) error {
|
|
||||||
// Map the data file to memory.
|
|
||||||
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Advise the kernel that the mmap is accessed randomly.
|
|
||||||
err = madvise(b, syscall.MADV_RANDOM)
|
|
||||||
if err != nil && err != syscall.ENOSYS {
|
|
||||||
// Ignore not implemented error in kernel because it still works.
|
|
||||||
return fmt.Errorf("madvise: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save the original byte slice and convert to a byte array pointer.
|
|
||||||
db.dataref = b
|
|
||||||
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
|
|
||||||
db.datasz = sz
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// munmap unmaps a DB's data file from memory.
|
|
||||||
func munmap(db *DB) error {
|
|
||||||
// Ignore the unmap if we have no mapped data.
|
|
||||||
if db.dataref == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unmap using the original byte slice.
|
|
||||||
err := syscall.Munmap(db.dataref)
|
|
||||||
db.dataref = nil
|
|
||||||
db.data = nil
|
|
||||||
db.datasz = 0
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: This function is copied from stdlib because it is not available on darwin.
|
|
||||||
func madvise(b []byte, advice int) (err error) {
|
|
||||||
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
|
|
||||||
if e1 != 0 {
|
|
||||||
err = e1
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
88
vendor/go.etcd.io/bbolt/bolt_unix_solaris.go
generated
vendored
88
vendor/go.etcd.io/bbolt/bolt_unix_solaris.go
generated
vendored
@ -1,88 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"syscall"
|
|
||||||
"time"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
// flock acquires an advisory lock on a file descriptor.
|
|
||||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
|
||||||
var t time.Time
|
|
||||||
if timeout != 0 {
|
|
||||||
t = time.Now()
|
|
||||||
}
|
|
||||||
fd := db.file.Fd()
|
|
||||||
var lockType int16
|
|
||||||
if exclusive {
|
|
||||||
lockType = syscall.F_WRLCK
|
|
||||||
} else {
|
|
||||||
lockType = syscall.F_RDLCK
|
|
||||||
}
|
|
||||||
for {
|
|
||||||
// Attempt to obtain an exclusive lock.
|
|
||||||
lock := syscall.Flock_t{Type: lockType}
|
|
||||||
err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
|
|
||||||
if err == nil {
|
|
||||||
return nil
|
|
||||||
} else if err != syscall.EAGAIN {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we timed out then return an error.
|
|
||||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
|
||||||
return ErrTimeout
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for a bit and try again.
|
|
||||||
time.Sleep(flockRetryTimeout)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// funlock releases an advisory lock on a file descriptor.
|
|
||||||
func funlock(db *DB) error {
|
|
||||||
var lock syscall.Flock_t
|
|
||||||
lock.Start = 0
|
|
||||||
lock.Len = 0
|
|
||||||
lock.Type = syscall.F_UNLCK
|
|
||||||
lock.Whence = 0
|
|
||||||
return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
|
|
||||||
}
|
|
||||||
|
|
||||||
// mmap memory maps a DB's data file.
|
|
||||||
func mmap(db *DB, sz int) error {
|
|
||||||
// Map the data file to memory.
|
|
||||||
b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Advise the kernel that the mmap is accessed randomly.
|
|
||||||
if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
|
|
||||||
return fmt.Errorf("madvise: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save the original byte slice and convert to a byte array pointer.
|
|
||||||
db.dataref = b
|
|
||||||
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
|
|
||||||
db.datasz = sz
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// munmap unmaps a DB's data file from memory.
|
|
||||||
func munmap(db *DB) error {
|
|
||||||
// Ignore the unmap if we have no mapped data.
|
|
||||||
if db.dataref == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unmap using the original byte slice.
|
|
||||||
err := unix.Munmap(db.dataref)
|
|
||||||
db.dataref = nil
|
|
||||||
db.data = nil
|
|
||||||
db.datasz = 0
|
|
||||||
return err
|
|
||||||
}
|
|
141
vendor/go.etcd.io/bbolt/bolt_windows.go
generated
vendored
141
vendor/go.etcd.io/bbolt/bolt_windows.go
generated
vendored
@ -1,141 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"syscall"
|
|
||||||
"time"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
|
|
||||||
var (
|
|
||||||
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
|
|
||||||
procLockFileEx = modkernel32.NewProc("LockFileEx")
|
|
||||||
procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
|
|
||||||
flagLockExclusive = 2
|
|
||||||
flagLockFailImmediately = 1
|
|
||||||
|
|
||||||
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
|
|
||||||
errLockViolation syscall.Errno = 0x21
|
|
||||||
)
|
|
||||||
|
|
||||||
func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
|
||||||
r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
|
|
||||||
if r == 0 {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
|
||||||
r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
|
|
||||||
if r == 0 {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// fdatasync flushes written data to a file descriptor.
|
|
||||||
func fdatasync(db *DB) error {
|
|
||||||
return db.file.Sync()
|
|
||||||
}
|
|
||||||
|
|
||||||
// flock acquires an advisory lock on a file descriptor.
|
|
||||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
|
||||||
var t time.Time
|
|
||||||
if timeout != 0 {
|
|
||||||
t = time.Now()
|
|
||||||
}
|
|
||||||
var flag uint32 = flagLockFailImmediately
|
|
||||||
if exclusive {
|
|
||||||
flag |= flagLockExclusive
|
|
||||||
}
|
|
||||||
for {
|
|
||||||
// Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range
|
|
||||||
// -1..0 as the lock on the database file.
|
|
||||||
var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
|
|
||||||
err := lockFileEx(syscall.Handle(db.file.Fd()), flag, 0, 1, 0, &syscall.Overlapped{
|
|
||||||
Offset: m1,
|
|
||||||
OffsetHigh: m1,
|
|
||||||
})
|
|
||||||
|
|
||||||
if err == nil {
|
|
||||||
return nil
|
|
||||||
} else if err != errLockViolation {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we timed oumercit then return an error.
|
|
||||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
|
||||||
return ErrTimeout
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for a bit and try again.
|
|
||||||
time.Sleep(flockRetryTimeout)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// funlock releases an advisory lock on a file descriptor.
|
|
||||||
func funlock(db *DB) error {
|
|
||||||
var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
|
|
||||||
err := unlockFileEx(syscall.Handle(db.file.Fd()), 0, 1, 0, &syscall.Overlapped{
|
|
||||||
Offset: m1,
|
|
||||||
OffsetHigh: m1,
|
|
||||||
})
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// mmap memory maps a DB's data file.
|
|
||||||
// Based on: https://github.com/edsrzf/mmap-go
|
|
||||||
func mmap(db *DB, sz int) error {
|
|
||||||
if !db.readOnly {
|
|
||||||
// Truncate the database to the size of the mmap.
|
|
||||||
if err := db.file.Truncate(int64(sz)); err != nil {
|
|
||||||
return fmt.Errorf("truncate: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open a file mapping handle.
|
|
||||||
sizelo := uint32(sz >> 32)
|
|
||||||
sizehi := uint32(sz) & 0xffffffff
|
|
||||||
h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil)
|
|
||||||
if h == 0 {
|
|
||||||
return os.NewSyscallError("CreateFileMapping", errno)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the memory map.
|
|
||||||
addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz))
|
|
||||||
if addr == 0 {
|
|
||||||
return os.NewSyscallError("MapViewOfFile", errno)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close mapping handle.
|
|
||||||
if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
|
|
||||||
return os.NewSyscallError("CloseHandle", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert to a byte array.
|
|
||||||
db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
|
|
||||||
db.datasz = sz
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// munmap unmaps a pointer from a file.
|
|
||||||
// Based on: https://github.com/edsrzf/mmap-go
|
|
||||||
func munmap(db *DB) error {
|
|
||||||
if db.data == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
addr := (uintptr)(unsafe.Pointer(&db.data[0]))
|
|
||||||
if err := syscall.UnmapViewOfFile(addr); err != nil {
|
|
||||||
return os.NewSyscallError("UnmapViewOfFile", err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
8
vendor/go.etcd.io/bbolt/boltsync_unix.go
generated
vendored
8
vendor/go.etcd.io/bbolt/boltsync_unix.go
generated
vendored
@ -1,8 +0,0 @@
|
|||||||
// +build !windows,!plan9,!linux,!openbsd
|
|
||||||
|
|
||||||
package bbolt
|
|
||||||
|
|
||||||
// fdatasync flushes written data to a file descriptor.
|
|
||||||
func fdatasync(db *DB) error {
|
|
||||||
return db.file.Sync()
|
|
||||||
}
|
|
775
vendor/go.etcd.io/bbolt/bucket.go
generated
vendored
775
vendor/go.etcd.io/bbolt/bucket.go
generated
vendored
@ -1,775 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// MaxKeySize is the maximum length of a key, in bytes.
|
|
||||||
MaxKeySize = 32768
|
|
||||||
|
|
||||||
// MaxValueSize is the maximum length of a value, in bytes.
|
|
||||||
MaxValueSize = (1 << 31) - 2
|
|
||||||
)
|
|
||||||
|
|
||||||
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
|
|
||||||
|
|
||||||
const (
|
|
||||||
minFillPercent = 0.1
|
|
||||||
maxFillPercent = 1.0
|
|
||||||
)
|
|
||||||
|
|
||||||
// DefaultFillPercent is the percentage that split pages are filled.
|
|
||||||
// This value can be changed by setting Bucket.FillPercent.
|
|
||||||
const DefaultFillPercent = 0.5
|
|
||||||
|
|
||||||
// Bucket represents a collection of key/value pairs inside the database.
|
|
||||||
type Bucket struct {
|
|
||||||
*bucket
|
|
||||||
tx *Tx // the associated transaction
|
|
||||||
buckets map[string]*Bucket // subbucket cache
|
|
||||||
page *page // inline page reference
|
|
||||||
rootNode *node // materialized node for the root page.
|
|
||||||
nodes map[pgid]*node // node cache
|
|
||||||
|
|
||||||
// Sets the threshold for filling nodes when they split. By default,
|
|
||||||
// the bucket will fill to 50% but it can be useful to increase this
|
|
||||||
// amount if you know that your write workloads are mostly append-only.
|
|
||||||
//
|
|
||||||
// This is non-persisted across transactions so it must be set in every Tx.
|
|
||||||
FillPercent float64
|
|
||||||
}
|
|
||||||
|
|
||||||
// bucket represents the on-file representation of a bucket.
|
|
||||||
// This is stored as the "value" of a bucket key. If the bucket is small enough,
|
|
||||||
// then its root page can be stored inline in the "value", after the bucket
|
|
||||||
// header. In the case of inline buckets, the "root" will be 0.
|
|
||||||
type bucket struct {
|
|
||||||
root pgid // page id of the bucket's root-level page
|
|
||||||
sequence uint64 // monotonically incrementing, used by NextSequence()
|
|
||||||
}
|
|
||||||
|
|
||||||
// newBucket returns a new bucket associated with a transaction.
|
|
||||||
func newBucket(tx *Tx) Bucket {
|
|
||||||
var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
|
|
||||||
if tx.writable {
|
|
||||||
b.buckets = make(map[string]*Bucket)
|
|
||||||
b.nodes = make(map[pgid]*node)
|
|
||||||
}
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tx returns the tx of the bucket.
|
|
||||||
func (b *Bucket) Tx() *Tx {
|
|
||||||
return b.tx
|
|
||||||
}
|
|
||||||
|
|
||||||
// Root returns the root of the bucket.
|
|
||||||
func (b *Bucket) Root() pgid {
|
|
||||||
return b.root
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writable returns whether the bucket is writable.
|
|
||||||
func (b *Bucket) Writable() bool {
|
|
||||||
return b.tx.writable
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cursor creates a cursor associated with the bucket.
|
|
||||||
// The cursor is only valid as long as the transaction is open.
|
|
||||||
// Do not use a cursor after the transaction is closed.
|
|
||||||
func (b *Bucket) Cursor() *Cursor {
|
|
||||||
// Update transaction statistics.
|
|
||||||
b.tx.stats.CursorCount++
|
|
||||||
|
|
||||||
// Allocate and return a cursor.
|
|
||||||
return &Cursor{
|
|
||||||
bucket: b,
|
|
||||||
stack: make([]elemRef, 0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bucket retrieves a nested bucket by name.
|
|
||||||
// Returns nil if the bucket does not exist.
|
|
||||||
// The bucket instance is only valid for the lifetime of the transaction.
|
|
||||||
func (b *Bucket) Bucket(name []byte) *Bucket {
|
|
||||||
if b.buckets != nil {
|
|
||||||
if child := b.buckets[string(name)]; child != nil {
|
|
||||||
return child
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move cursor to key.
|
|
||||||
c := b.Cursor()
|
|
||||||
k, v, flags := c.seek(name)
|
|
||||||
|
|
||||||
// Return nil if the key doesn't exist or it is not a bucket.
|
|
||||||
if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise create a bucket and cache it.
|
|
||||||
var child = b.openBucket(v)
|
|
||||||
if b.buckets != nil {
|
|
||||||
b.buckets[string(name)] = child
|
|
||||||
}
|
|
||||||
|
|
||||||
return child
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper method that re-interprets a sub-bucket value
|
|
||||||
// from a parent into a Bucket
|
|
||||||
func (b *Bucket) openBucket(value []byte) *Bucket {
|
|
||||||
var child = newBucket(b.tx)
|
|
||||||
|
|
||||||
// If unaligned load/stores are broken on this arch and value is
|
|
||||||
// unaligned simply clone to an aligned byte array.
|
|
||||||
unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
|
|
||||||
|
|
||||||
if unaligned {
|
|
||||||
value = cloneBytes(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this is a writable transaction then we need to copy the bucket entry.
|
|
||||||
// Read-only transactions can point directly at the mmap entry.
|
|
||||||
if b.tx.writable && !unaligned {
|
|
||||||
child.bucket = &bucket{}
|
|
||||||
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
|
|
||||||
} else {
|
|
||||||
child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save a reference to the inline page if the bucket is inline.
|
|
||||||
if child.root == 0 {
|
|
||||||
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
|
|
||||||
}
|
|
||||||
|
|
||||||
return &child
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateBucket creates a new bucket at the given key and returns the new bucket.
|
|
||||||
// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
|
|
||||||
// The bucket instance is only valid for the lifetime of the transaction.
|
|
||||||
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
|
|
||||||
if b.tx.db == nil {
|
|
||||||
return nil, ErrTxClosed
|
|
||||||
} else if !b.tx.writable {
|
|
||||||
return nil, ErrTxNotWritable
|
|
||||||
} else if len(key) == 0 {
|
|
||||||
return nil, ErrBucketNameRequired
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move cursor to correct position.
|
|
||||||
c := b.Cursor()
|
|
||||||
k, _, flags := c.seek(key)
|
|
||||||
|
|
||||||
// Return an error if there is an existing key.
|
|
||||||
if bytes.Equal(key, k) {
|
|
||||||
if (flags & bucketLeafFlag) != 0 {
|
|
||||||
return nil, ErrBucketExists
|
|
||||||
}
|
|
||||||
return nil, ErrIncompatibleValue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create empty, inline bucket.
|
|
||||||
var bucket = Bucket{
|
|
||||||
bucket: &bucket{},
|
|
||||||
rootNode: &node{isLeaf: true},
|
|
||||||
FillPercent: DefaultFillPercent,
|
|
||||||
}
|
|
||||||
var value = bucket.write()
|
|
||||||
|
|
||||||
// Insert into node.
|
|
||||||
key = cloneBytes(key)
|
|
||||||
c.node().put(key, key, value, 0, bucketLeafFlag)
|
|
||||||
|
|
||||||
// Since subbuckets are not allowed on inline buckets, we need to
|
|
||||||
// dereference the inline page, if it exists. This will cause the bucket
|
|
||||||
// to be treated as a regular, non-inline bucket for the rest of the tx.
|
|
||||||
b.page = nil
|
|
||||||
|
|
||||||
return b.Bucket(key), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
|
|
||||||
// Returns an error if the bucket name is blank, or if the bucket name is too long.
|
|
||||||
// The bucket instance is only valid for the lifetime of the transaction.
|
|
||||||
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
|
|
||||||
child, err := b.CreateBucket(key)
|
|
||||||
if err == ErrBucketExists {
|
|
||||||
return b.Bucket(key), nil
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return child, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeleteBucket deletes a bucket at the given key.
|
|
||||||
// Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
|
|
||||||
func (b *Bucket) DeleteBucket(key []byte) error {
|
|
||||||
if b.tx.db == nil {
|
|
||||||
return ErrTxClosed
|
|
||||||
} else if !b.Writable() {
|
|
||||||
return ErrTxNotWritable
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move cursor to correct position.
|
|
||||||
c := b.Cursor()
|
|
||||||
k, _, flags := c.seek(key)
|
|
||||||
|
|
||||||
// Return an error if bucket doesn't exist or is not a bucket.
|
|
||||||
if !bytes.Equal(key, k) {
|
|
||||||
return ErrBucketNotFound
|
|
||||||
} else if (flags & bucketLeafFlag) == 0 {
|
|
||||||
return ErrIncompatibleValue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursively delete all child buckets.
|
|
||||||
child := b.Bucket(key)
|
|
||||||
err := child.ForEach(func(k, v []byte) error {
|
|
||||||
if v == nil {
|
|
||||||
if err := child.DeleteBucket(k); err != nil {
|
|
||||||
return fmt.Errorf("delete bucket: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove cached copy.
|
|
||||||
delete(b.buckets, string(key))
|
|
||||||
|
|
||||||
// Release all bucket pages to freelist.
|
|
||||||
child.nodes = nil
|
|
||||||
child.rootNode = nil
|
|
||||||
child.free()
|
|
||||||
|
|
||||||
// Delete the node if we have a matching key.
|
|
||||||
c.node().del(key)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get retrieves the value for a key in the bucket.
|
|
||||||
// Returns a nil value if the key does not exist or if the key is a nested bucket.
|
|
||||||
// The returned value is only valid for the life of the transaction.
|
|
||||||
func (b *Bucket) Get(key []byte) []byte {
|
|
||||||
k, v, flags := b.Cursor().seek(key)
|
|
||||||
|
|
||||||
// Return nil if this is a bucket.
|
|
||||||
if (flags & bucketLeafFlag) != 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// If our target node isn't the same key as what's passed in then return nil.
|
|
||||||
if !bytes.Equal(key, k) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
|
|
||||||
// Put sets the value for a key in the bucket.
|
|
||||||
// If the key exist then its previous value will be overwritten.
|
|
||||||
// Supplied value must remain valid for the life of the transaction.
|
|
||||||
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
|
|
||||||
func (b *Bucket) Put(key []byte, value []byte) error {
|
|
||||||
if b.tx.db == nil {
|
|
||||||
return ErrTxClosed
|
|
||||||
} else if !b.Writable() {
|
|
||||||
return ErrTxNotWritable
|
|
||||||
} else if len(key) == 0 {
|
|
||||||
return ErrKeyRequired
|
|
||||||
} else if len(key) > MaxKeySize {
|
|
||||||
return ErrKeyTooLarge
|
|
||||||
} else if int64(len(value)) > MaxValueSize {
|
|
||||||
return ErrValueTooLarge
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move cursor to correct position.
|
|
||||||
c := b.Cursor()
|
|
||||||
k, _, flags := c.seek(key)
|
|
||||||
|
|
||||||
// Return an error if there is an existing key with a bucket value.
|
|
||||||
if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
|
|
||||||
return ErrIncompatibleValue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Insert into node.
|
|
||||||
key = cloneBytes(key)
|
|
||||||
c.node().put(key, key, value, 0, 0)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete removes a key from the bucket.
|
|
||||||
// If the key does not exist then nothing is done and a nil error is returned.
|
|
||||||
// Returns an error if the bucket was created from a read-only transaction.
|
|
||||||
func (b *Bucket) Delete(key []byte) error {
|
|
||||||
if b.tx.db == nil {
|
|
||||||
return ErrTxClosed
|
|
||||||
} else if !b.Writable() {
|
|
||||||
return ErrTxNotWritable
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move cursor to correct position.
|
|
||||||
c := b.Cursor()
|
|
||||||
k, _, flags := c.seek(key)
|
|
||||||
|
|
||||||
// Return nil if the key doesn't exist.
|
|
||||||
if !bytes.Equal(key, k) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return an error if there is already existing bucket value.
|
|
||||||
if (flags & bucketLeafFlag) != 0 {
|
|
||||||
return ErrIncompatibleValue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete the node if we have a matching key.
|
|
||||||
c.node().del(key)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sequence returns the current integer for the bucket without incrementing it.
|
|
||||||
func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
|
|
||||||
|
|
||||||
// SetSequence updates the sequence number for the bucket.
|
|
||||||
func (b *Bucket) SetSequence(v uint64) error {
|
|
||||||
if b.tx.db == nil {
|
|
||||||
return ErrTxClosed
|
|
||||||
} else if !b.Writable() {
|
|
||||||
return ErrTxNotWritable
|
|
||||||
}
|
|
||||||
|
|
||||||
// Materialize the root node if it hasn't been already so that the
|
|
||||||
// bucket will be saved during commit.
|
|
||||||
if b.rootNode == nil {
|
|
||||||
_ = b.node(b.root, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Increment and return the sequence.
|
|
||||||
b.bucket.sequence = v
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// NextSequence returns an autoincrementing integer for the bucket.
|
|
||||||
func (b *Bucket) NextSequence() (uint64, error) {
|
|
||||||
if b.tx.db == nil {
|
|
||||||
return 0, ErrTxClosed
|
|
||||||
} else if !b.Writable() {
|
|
||||||
return 0, ErrTxNotWritable
|
|
||||||
}
|
|
||||||
|
|
||||||
// Materialize the root node if it hasn't been already so that the
|
|
||||||
// bucket will be saved during commit.
|
|
||||||
if b.rootNode == nil {
|
|
||||||
_ = b.node(b.root, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Increment and return the sequence.
|
|
||||||
b.bucket.sequence++
|
|
||||||
return b.bucket.sequence, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// ForEach executes a function for each key/value pair in a bucket.
|
|
||||||
// If the provided function returns an error then the iteration is stopped and
|
|
||||||
// the error is returned to the caller. The provided function must not modify
|
|
||||||
// the bucket; this will result in undefined behavior.
|
|
||||||
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
|
|
||||||
if b.tx.db == nil {
|
|
||||||
return ErrTxClosed
|
|
||||||
}
|
|
||||||
c := b.Cursor()
|
|
||||||
for k, v := c.First(); k != nil; k, v = c.Next() {
|
|
||||||
if err := fn(k, v); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stat returns stats on a bucket.
|
|
||||||
func (b *Bucket) Stats() BucketStats {
|
|
||||||
var s, subStats BucketStats
|
|
||||||
pageSize := b.tx.db.pageSize
|
|
||||||
s.BucketN += 1
|
|
||||||
if b.root == 0 {
|
|
||||||
s.InlineBucketN += 1
|
|
||||||
}
|
|
||||||
b.forEachPage(func(p *page, depth int) {
|
|
||||||
if (p.flags & leafPageFlag) != 0 {
|
|
||||||
s.KeyN += int(p.count)
|
|
||||||
|
|
||||||
// used totals the used bytes for the page
|
|
||||||
used := pageHeaderSize
|
|
||||||
|
|
||||||
if p.count != 0 {
|
|
||||||
// If page has any elements, add all element headers.
|
|
||||||
used += leafPageElementSize * int(p.count-1)
|
|
||||||
|
|
||||||
// Add all element key, value sizes.
|
|
||||||
// The computation takes advantage of the fact that the position
|
|
||||||
// of the last element's key/value equals to the total of the sizes
|
|
||||||
// of all previous elements' keys and values.
|
|
||||||
// It also includes the last element's header.
|
|
||||||
lastElement := p.leafPageElement(p.count - 1)
|
|
||||||
used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
|
|
||||||
}
|
|
||||||
|
|
||||||
if b.root == 0 {
|
|
||||||
// For inlined bucket just update the inline stats
|
|
||||||
s.InlineBucketInuse += used
|
|
||||||
} else {
|
|
||||||
// For non-inlined bucket update all the leaf stats
|
|
||||||
s.LeafPageN++
|
|
||||||
s.LeafInuse += used
|
|
||||||
s.LeafOverflowN += int(p.overflow)
|
|
||||||
|
|
||||||
// Collect stats from sub-buckets.
|
|
||||||
// Do that by iterating over all element headers
|
|
||||||
// looking for the ones with the bucketLeafFlag.
|
|
||||||
for i := uint16(0); i < p.count; i++ {
|
|
||||||
e := p.leafPageElement(i)
|
|
||||||
if (e.flags & bucketLeafFlag) != 0 {
|
|
||||||
// For any bucket element, open the element value
|
|
||||||
// and recursively call Stats on the contained bucket.
|
|
||||||
subStats.Add(b.openBucket(e.value()).Stats())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (p.flags & branchPageFlag) != 0 {
|
|
||||||
s.BranchPageN++
|
|
||||||
lastElement := p.branchPageElement(p.count - 1)
|
|
||||||
|
|
||||||
// used totals the used bytes for the page
|
|
||||||
// Add header and all element headers.
|
|
||||||
used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
|
|
||||||
|
|
||||||
// Add size of all keys and values.
|
|
||||||
// Again, use the fact that last element's position equals to
|
|
||||||
// the total of key, value sizes of all previous elements.
|
|
||||||
used += int(lastElement.pos + lastElement.ksize)
|
|
||||||
s.BranchInuse += used
|
|
||||||
s.BranchOverflowN += int(p.overflow)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keep track of maximum page depth.
|
|
||||||
if depth+1 > s.Depth {
|
|
||||||
s.Depth = (depth + 1)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Alloc stats can be computed from page counts and pageSize.
|
|
||||||
s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
|
|
||||||
s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
|
|
||||||
|
|
||||||
// Add the max depth of sub-buckets to get total nested depth.
|
|
||||||
s.Depth += subStats.Depth
|
|
||||||
// Add the stats for all sub-buckets
|
|
||||||
s.Add(subStats)
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
// forEachPage iterates over every page in a bucket, including inline pages.
|
|
||||||
func (b *Bucket) forEachPage(fn func(*page, int)) {
|
|
||||||
// If we have an inline page then just use that.
|
|
||||||
if b.page != nil {
|
|
||||||
fn(b.page, 0)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise traverse the page hierarchy.
|
|
||||||
b.tx.forEachPage(b.root, 0, fn)
|
|
||||||
}
|
|
||||||
|
|
||||||
// forEachPageNode iterates over every page (or node) in a bucket.
|
|
||||||
// This also includes inline pages.
|
|
||||||
func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
|
|
||||||
// If we have an inline page or root node then just use that.
|
|
||||||
if b.page != nil {
|
|
||||||
fn(b.page, nil, 0)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
b._forEachPageNode(b.root, 0, fn)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
|
|
||||||
var p, n = b.pageNode(pgid)
|
|
||||||
|
|
||||||
// Execute function.
|
|
||||||
fn(p, n, depth)
|
|
||||||
|
|
||||||
// Recursively loop over children.
|
|
||||||
if p != nil {
|
|
||||||
if (p.flags & branchPageFlag) != 0 {
|
|
||||||
for i := 0; i < int(p.count); i++ {
|
|
||||||
elem := p.branchPageElement(uint16(i))
|
|
||||||
b._forEachPageNode(elem.pgid, depth+1, fn)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if !n.isLeaf {
|
|
||||||
for _, inode := range n.inodes {
|
|
||||||
b._forEachPageNode(inode.pgid, depth+1, fn)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// spill writes all the nodes for this bucket to dirty pages.
|
|
||||||
func (b *Bucket) spill() error {
|
|
||||||
// Spill all child buckets first.
|
|
||||||
for name, child := range b.buckets {
|
|
||||||
// If the child bucket is small enough and it has no child buckets then
|
|
||||||
// write it inline into the parent bucket's page. Otherwise spill it
|
|
||||||
// like a normal bucket and make the parent value a pointer to the page.
|
|
||||||
var value []byte
|
|
||||||
if child.inlineable() {
|
|
||||||
child.free()
|
|
||||||
value = child.write()
|
|
||||||
} else {
|
|
||||||
if err := child.spill(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update the child bucket header in this bucket.
|
|
||||||
value = make([]byte, unsafe.Sizeof(bucket{}))
|
|
||||||
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
|
||||||
*bucket = *child.bucket
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip writing the bucket if there are no materialized nodes.
|
|
||||||
if child.rootNode == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update parent node.
|
|
||||||
var c = b.Cursor()
|
|
||||||
k, _, flags := c.seek([]byte(name))
|
|
||||||
if !bytes.Equal([]byte(name), k) {
|
|
||||||
panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
|
|
||||||
}
|
|
||||||
if flags&bucketLeafFlag == 0 {
|
|
||||||
panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
|
|
||||||
}
|
|
||||||
c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ignore if there's not a materialized root node.
|
|
||||||
if b.rootNode == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spill nodes.
|
|
||||||
if err := b.rootNode.spill(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
b.rootNode = b.rootNode.root()
|
|
||||||
|
|
||||||
// Update the root node for this bucket.
|
|
||||||
if b.rootNode.pgid >= b.tx.meta.pgid {
|
|
||||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
|
|
||||||
}
|
|
||||||
b.root = b.rootNode.pgid
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// inlineable returns true if a bucket is small enough to be written inline
|
|
||||||
// and if it contains no subbuckets. Otherwise returns false.
|
|
||||||
func (b *Bucket) inlineable() bool {
|
|
||||||
var n = b.rootNode
|
|
||||||
|
|
||||||
// Bucket must only contain a single leaf node.
|
|
||||||
if n == nil || !n.isLeaf {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bucket is not inlineable if it contains subbuckets or if it goes beyond
|
|
||||||
// our threshold for inline bucket size.
|
|
||||||
var size = pageHeaderSize
|
|
||||||
for _, inode := range n.inodes {
|
|
||||||
size += leafPageElementSize + len(inode.key) + len(inode.value)
|
|
||||||
|
|
||||||
if inode.flags&bucketLeafFlag != 0 {
|
|
||||||
return false
|
|
||||||
} else if size > b.maxInlineBucketSize() {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the maximum total size of a bucket to make it a candidate for inlining.
|
|
||||||
func (b *Bucket) maxInlineBucketSize() int {
|
|
||||||
return b.tx.db.pageSize / 4
|
|
||||||
}
|
|
||||||
|
|
||||||
// write allocates and writes a bucket to a byte slice.
|
|
||||||
func (b *Bucket) write() []byte {
|
|
||||||
// Allocate the appropriate size.
|
|
||||||
var n = b.rootNode
|
|
||||||
var value = make([]byte, bucketHeaderSize+n.size())
|
|
||||||
|
|
||||||
// Write a bucket header.
|
|
||||||
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
|
||||||
*bucket = *b.bucket
|
|
||||||
|
|
||||||
// Convert byte slice to a fake page and write the root node.
|
|
||||||
var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
|
|
||||||
n.write(p)
|
|
||||||
|
|
||||||
return value
|
|
||||||
}
|
|
||||||
|
|
||||||
// rebalance attempts to balance all nodes.
|
|
||||||
func (b *Bucket) rebalance() {
|
|
||||||
for _, n := range b.nodes {
|
|
||||||
n.rebalance()
|
|
||||||
}
|
|
||||||
for _, child := range b.buckets {
|
|
||||||
child.rebalance()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// node creates a node from a page and associates it with a given parent.
|
|
||||||
func (b *Bucket) node(pgid pgid, parent *node) *node {
|
|
||||||
_assert(b.nodes != nil, "nodes map expected")
|
|
||||||
|
|
||||||
// Retrieve node if it's already been created.
|
|
||||||
if n := b.nodes[pgid]; n != nil {
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise create a node and cache it.
|
|
||||||
n := &node{bucket: b, parent: parent}
|
|
||||||
if parent == nil {
|
|
||||||
b.rootNode = n
|
|
||||||
} else {
|
|
||||||
parent.children = append(parent.children, n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use the inline page if this is an inline bucket.
|
|
||||||
var p = b.page
|
|
||||||
if p == nil {
|
|
||||||
p = b.tx.page(pgid)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read the page into the node and cache it.
|
|
||||||
n.read(p)
|
|
||||||
b.nodes[pgid] = n
|
|
||||||
|
|
||||||
// Update statistics.
|
|
||||||
b.tx.stats.NodeCount++
|
|
||||||
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
// free recursively frees all pages in the bucket.
|
|
||||||
func (b *Bucket) free() {
|
|
||||||
if b.root == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var tx = b.tx
|
|
||||||
b.forEachPageNode(func(p *page, n *node, _ int) {
|
|
||||||
if p != nil {
|
|
||||||
tx.db.freelist.free(tx.meta.txid, p)
|
|
||||||
} else {
|
|
||||||
n.free()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
b.root = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// dereference removes all references to the old mmap.
|
|
||||||
func (b *Bucket) dereference() {
|
|
||||||
if b.rootNode != nil {
|
|
||||||
b.rootNode.root().dereference()
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, child := range b.buckets {
|
|
||||||
child.dereference()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// pageNode returns the in-memory node, if it exists.
|
|
||||||
// Otherwise returns the underlying page.
|
|
||||||
func (b *Bucket) pageNode(id pgid) (*page, *node) {
|
|
||||||
// Inline buckets have a fake page embedded in their value so treat them
|
|
||||||
// differently. We'll return the rootNode (if available) or the fake page.
|
|
||||||
if b.root == 0 {
|
|
||||||
if id != 0 {
|
|
||||||
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
|
|
||||||
}
|
|
||||||
if b.rootNode != nil {
|
|
||||||
return nil, b.rootNode
|
|
||||||
}
|
|
||||||
return b.page, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check the node cache for non-inline buckets.
|
|
||||||
if b.nodes != nil {
|
|
||||||
if n := b.nodes[id]; n != nil {
|
|
||||||
return nil, n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finally lookup the page from the transaction if no node is materialized.
|
|
||||||
return b.tx.page(id), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// BucketStats records statistics about resources used by a bucket.
|
|
||||||
type BucketStats struct {
|
|
||||||
// Page count statistics.
|
|
||||||
BranchPageN int // number of logical branch pages
|
|
||||||
BranchOverflowN int // number of physical branch overflow pages
|
|
||||||
LeafPageN int // number of logical leaf pages
|
|
||||||
LeafOverflowN int // number of physical leaf overflow pages
|
|
||||||
|
|
||||||
// Tree statistics.
|
|
||||||
KeyN int // number of keys/value pairs
|
|
||||||
Depth int // number of levels in B+tree
|
|
||||||
|
|
||||||
// Page size utilization.
|
|
||||||
BranchAlloc int // bytes allocated for physical branch pages
|
|
||||||
BranchInuse int // bytes actually used for branch data
|
|
||||||
LeafAlloc int // bytes allocated for physical leaf pages
|
|
||||||
LeafInuse int // bytes actually used for leaf data
|
|
||||||
|
|
||||||
// Bucket statistics
|
|
||||||
BucketN int // total number of buckets including the top bucket
|
|
||||||
InlineBucketN int // total number on inlined buckets
|
|
||||||
InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *BucketStats) Add(other BucketStats) {
|
|
||||||
s.BranchPageN += other.BranchPageN
|
|
||||||
s.BranchOverflowN += other.BranchOverflowN
|
|
||||||
s.LeafPageN += other.LeafPageN
|
|
||||||
s.LeafOverflowN += other.LeafOverflowN
|
|
||||||
s.KeyN += other.KeyN
|
|
||||||
if s.Depth < other.Depth {
|
|
||||||
s.Depth = other.Depth
|
|
||||||
}
|
|
||||||
s.BranchAlloc += other.BranchAlloc
|
|
||||||
s.BranchInuse += other.BranchInuse
|
|
||||||
s.LeafAlloc += other.LeafAlloc
|
|
||||||
s.LeafInuse += other.LeafInuse
|
|
||||||
|
|
||||||
s.BucketN += other.BucketN
|
|
||||||
s.InlineBucketN += other.InlineBucketN
|
|
||||||
s.InlineBucketInuse += other.InlineBucketInuse
|
|
||||||
}
|
|
||||||
|
|
||||||
// cloneBytes returns a copy of a given slice.
|
|
||||||
func cloneBytes(v []byte) []byte {
|
|
||||||
var clone = make([]byte, len(v))
|
|
||||||
copy(clone, v)
|
|
||||||
return clone
|
|
||||||
}
|
|
396
vendor/go.etcd.io/bbolt/cursor.go
generated
vendored
396
vendor/go.etcd.io/bbolt/cursor.go
generated
vendored
@ -1,396 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"sort"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
|
|
||||||
// Cursors see nested buckets with value == nil.
|
|
||||||
// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
|
|
||||||
//
|
|
||||||
// Keys and values returned from the cursor are only valid for the life of the transaction.
|
|
||||||
//
|
|
||||||
// Changing data while traversing with a cursor may cause it to be invalidated
|
|
||||||
// and return unexpected keys and/or values. You must reposition your cursor
|
|
||||||
// after mutating data.
|
|
||||||
type Cursor struct {
|
|
||||||
bucket *Bucket
|
|
||||||
stack []elemRef
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bucket returns the bucket that this cursor was created from.
|
|
||||||
func (c *Cursor) Bucket() *Bucket {
|
|
||||||
return c.bucket
|
|
||||||
}
|
|
||||||
|
|
||||||
// First moves the cursor to the first item in the bucket and returns its key and value.
|
|
||||||
// If the bucket is empty then a nil key and value are returned.
|
|
||||||
// The returned key and value are only valid for the life of the transaction.
|
|
||||||
func (c *Cursor) First() (key []byte, value []byte) {
|
|
||||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
||||||
c.stack = c.stack[:0]
|
|
||||||
p, n := c.bucket.pageNode(c.bucket.root)
|
|
||||||
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
|
||||||
c.first()
|
|
||||||
|
|
||||||
// If we land on an empty page then move to the next value.
|
|
||||||
// https://github.com/boltdb/bolt/issues/450
|
|
||||||
if c.stack[len(c.stack)-1].count() == 0 {
|
|
||||||
c.next()
|
|
||||||
}
|
|
||||||
|
|
||||||
k, v, flags := c.keyValue()
|
|
||||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
||||||
return k, nil
|
|
||||||
}
|
|
||||||
return k, v
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// Last moves the cursor to the last item in the bucket and returns its key and value.
|
|
||||||
// If the bucket is empty then a nil key and value are returned.
|
|
||||||
// The returned key and value are only valid for the life of the transaction.
|
|
||||||
func (c *Cursor) Last() (key []byte, value []byte) {
|
|
||||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
||||||
c.stack = c.stack[:0]
|
|
||||||
p, n := c.bucket.pageNode(c.bucket.root)
|
|
||||||
ref := elemRef{page: p, node: n}
|
|
||||||
ref.index = ref.count() - 1
|
|
||||||
c.stack = append(c.stack, ref)
|
|
||||||
c.last()
|
|
||||||
k, v, flags := c.keyValue()
|
|
||||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
||||||
return k, nil
|
|
||||||
}
|
|
||||||
return k, v
|
|
||||||
}
|
|
||||||
|
|
||||||
// Next moves the cursor to the next item in the bucket and returns its key and value.
|
|
||||||
// If the cursor is at the end of the bucket then a nil key and value are returned.
|
|
||||||
// The returned key and value are only valid for the life of the transaction.
|
|
||||||
func (c *Cursor) Next() (key []byte, value []byte) {
|
|
||||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
||||||
k, v, flags := c.next()
|
|
||||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
||||||
return k, nil
|
|
||||||
}
|
|
||||||
return k, v
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prev moves the cursor to the previous item in the bucket and returns its key and value.
|
|
||||||
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
|
|
||||||
// The returned key and value are only valid for the life of the transaction.
|
|
||||||
func (c *Cursor) Prev() (key []byte, value []byte) {
|
|
||||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
||||||
|
|
||||||
// Attempt to move back one element until we're successful.
|
|
||||||
// Move up the stack as we hit the beginning of each page in our stack.
|
|
||||||
for i := len(c.stack) - 1; i >= 0; i-- {
|
|
||||||
elem := &c.stack[i]
|
|
||||||
if elem.index > 0 {
|
|
||||||
elem.index--
|
|
||||||
break
|
|
||||||
}
|
|
||||||
c.stack = c.stack[:i]
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we've hit the end then return nil.
|
|
||||||
if len(c.stack) == 0 {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move down the stack to find the last element of the last leaf under this branch.
|
|
||||||
c.last()
|
|
||||||
k, v, flags := c.keyValue()
|
|
||||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
||||||
return k, nil
|
|
||||||
}
|
|
||||||
return k, v
|
|
||||||
}
|
|
||||||
|
|
||||||
// Seek moves the cursor to a given key and returns it.
|
|
||||||
// If the key does not exist then the next key is used. If no keys
|
|
||||||
// follow, a nil key is returned.
|
|
||||||
// The returned key and value are only valid for the life of the transaction.
|
|
||||||
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
|
|
||||||
k, v, flags := c.seek(seek)
|
|
||||||
|
|
||||||
// If we ended up after the last element of a page then move to the next one.
|
|
||||||
if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
|
|
||||||
k, v, flags = c.next()
|
|
||||||
}
|
|
||||||
|
|
||||||
if k == nil {
|
|
||||||
return nil, nil
|
|
||||||
} else if (flags & uint32(bucketLeafFlag)) != 0 {
|
|
||||||
return k, nil
|
|
||||||
}
|
|
||||||
return k, v
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete removes the current key/value under the cursor from the bucket.
|
|
||||||
// Delete fails if current key/value is a bucket or if the transaction is not writable.
|
|
||||||
func (c *Cursor) Delete() error {
|
|
||||||
if c.bucket.tx.db == nil {
|
|
||||||
return ErrTxClosed
|
|
||||||
} else if !c.bucket.Writable() {
|
|
||||||
return ErrTxNotWritable
|
|
||||||
}
|
|
||||||
|
|
||||||
key, _, flags := c.keyValue()
|
|
||||||
// Return an error if current value is a bucket.
|
|
||||||
if (flags & bucketLeafFlag) != 0 {
|
|
||||||
return ErrIncompatibleValue
|
|
||||||
}
|
|
||||||
c.node().del(key)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// seek moves the cursor to a given key and returns it.
|
|
||||||
// If the key does not exist then the next key is used.
|
|
||||||
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
|
|
||||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
|
||||||
|
|
||||||
// Start from root page/node and traverse to correct page.
|
|
||||||
c.stack = c.stack[:0]
|
|
||||||
c.search(seek, c.bucket.root)
|
|
||||||
|
|
||||||
// If this is a bucket then return a nil value.
|
|
||||||
return c.keyValue()
|
|
||||||
}
|
|
||||||
|
|
||||||
// first moves the cursor to the first leaf element under the last page in the stack.
|
|
||||||
func (c *Cursor) first() {
|
|
||||||
for {
|
|
||||||
// Exit when we hit a leaf page.
|
|
||||||
var ref = &c.stack[len(c.stack)-1]
|
|
||||||
if ref.isLeaf() {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keep adding pages pointing to the first element to the stack.
|
|
||||||
var pgid pgid
|
|
||||||
if ref.node != nil {
|
|
||||||
pgid = ref.node.inodes[ref.index].pgid
|
|
||||||
} else {
|
|
||||||
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
|
|
||||||
}
|
|
||||||
p, n := c.bucket.pageNode(pgid)
|
|
||||||
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// last moves the cursor to the last leaf element under the last page in the stack.
|
|
||||||
func (c *Cursor) last() {
|
|
||||||
for {
|
|
||||||
// Exit when we hit a leaf page.
|
|
||||||
ref := &c.stack[len(c.stack)-1]
|
|
||||||
if ref.isLeaf() {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keep adding pages pointing to the last element in the stack.
|
|
||||||
var pgid pgid
|
|
||||||
if ref.node != nil {
|
|
||||||
pgid = ref.node.inodes[ref.index].pgid
|
|
||||||
} else {
|
|
||||||
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
|
|
||||||
}
|
|
||||||
p, n := c.bucket.pageNode(pgid)
|
|
||||||
|
|
||||||
var nextRef = elemRef{page: p, node: n}
|
|
||||||
nextRef.index = nextRef.count() - 1
|
|
||||||
c.stack = append(c.stack, nextRef)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// next moves to the next leaf element and returns the key and value.
|
|
||||||
// If the cursor is at the last leaf element then it stays there and returns nil.
|
|
||||||
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
|
|
||||||
for {
|
|
||||||
// Attempt to move over one element until we're successful.
|
|
||||||
// Move up the stack as we hit the end of each page in our stack.
|
|
||||||
var i int
|
|
||||||
for i = len(c.stack) - 1; i >= 0; i-- {
|
|
||||||
elem := &c.stack[i]
|
|
||||||
if elem.index < elem.count()-1 {
|
|
||||||
elem.index++
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we've hit the root page then stop and return. This will leave the
|
|
||||||
// cursor on the last element of the last page.
|
|
||||||
if i == -1 {
|
|
||||||
return nil, nil, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise start from where we left off in the stack and find the
|
|
||||||
// first element of the first leaf page.
|
|
||||||
c.stack = c.stack[:i+1]
|
|
||||||
c.first()
|
|
||||||
|
|
||||||
// If this is an empty page then restart and move back up the stack.
|
|
||||||
// https://github.com/boltdb/bolt/issues/450
|
|
||||||
if c.stack[len(c.stack)-1].count() == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.keyValue()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// search recursively performs a binary search against a given page/node until it finds a given key.
|
|
||||||
func (c *Cursor) search(key []byte, pgid pgid) {
|
|
||||||
p, n := c.bucket.pageNode(pgid)
|
|
||||||
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
|
|
||||||
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
|
|
||||||
}
|
|
||||||
e := elemRef{page: p, node: n}
|
|
||||||
c.stack = append(c.stack, e)
|
|
||||||
|
|
||||||
// If we're on a leaf page/node then find the specific node.
|
|
||||||
if e.isLeaf() {
|
|
||||||
c.nsearch(key)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if n != nil {
|
|
||||||
c.searchNode(key, n)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
c.searchPage(key, p)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Cursor) searchNode(key []byte, n *node) {
|
|
||||||
var exact bool
|
|
||||||
index := sort.Search(len(n.inodes), func(i int) bool {
|
|
||||||
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
|
||||||
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
|
||||||
ret := bytes.Compare(n.inodes[i].key, key)
|
|
||||||
if ret == 0 {
|
|
||||||
exact = true
|
|
||||||
}
|
|
||||||
return ret != -1
|
|
||||||
})
|
|
||||||
if !exact && index > 0 {
|
|
||||||
index--
|
|
||||||
}
|
|
||||||
c.stack[len(c.stack)-1].index = index
|
|
||||||
|
|
||||||
// Recursively search to the next page.
|
|
||||||
c.search(key, n.inodes[index].pgid)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Cursor) searchPage(key []byte, p *page) {
|
|
||||||
// Binary search for the correct range.
|
|
||||||
inodes := p.branchPageElements()
|
|
||||||
|
|
||||||
var exact bool
|
|
||||||
index := sort.Search(int(p.count), func(i int) bool {
|
|
||||||
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
|
||||||
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
|
||||||
ret := bytes.Compare(inodes[i].key(), key)
|
|
||||||
if ret == 0 {
|
|
||||||
exact = true
|
|
||||||
}
|
|
||||||
return ret != -1
|
|
||||||
})
|
|
||||||
if !exact && index > 0 {
|
|
||||||
index--
|
|
||||||
}
|
|
||||||
c.stack[len(c.stack)-1].index = index
|
|
||||||
|
|
||||||
// Recursively search to the next page.
|
|
||||||
c.search(key, inodes[index].pgid)
|
|
||||||
}
|
|
||||||
|
|
||||||
// nsearch searches the leaf node on the top of the stack for a key.
|
|
||||||
func (c *Cursor) nsearch(key []byte) {
|
|
||||||
e := &c.stack[len(c.stack)-1]
|
|
||||||
p, n := e.page, e.node
|
|
||||||
|
|
||||||
// If we have a node then search its inodes.
|
|
||||||
if n != nil {
|
|
||||||
index := sort.Search(len(n.inodes), func(i int) bool {
|
|
||||||
return bytes.Compare(n.inodes[i].key, key) != -1
|
|
||||||
})
|
|
||||||
e.index = index
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have a page then search its leaf elements.
|
|
||||||
inodes := p.leafPageElements()
|
|
||||||
index := sort.Search(int(p.count), func(i int) bool {
|
|
||||||
return bytes.Compare(inodes[i].key(), key) != -1
|
|
||||||
})
|
|
||||||
e.index = index
|
|
||||||
}
|
|
||||||
|
|
||||||
// keyValue returns the key and value of the current leaf element.
|
|
||||||
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
|
|
||||||
ref := &c.stack[len(c.stack)-1]
|
|
||||||
|
|
||||||
// If the cursor is pointing to the end of page/node then return nil.
|
|
||||||
if ref.count() == 0 || ref.index >= ref.count() {
|
|
||||||
return nil, nil, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Retrieve value from node.
|
|
||||||
if ref.node != nil {
|
|
||||||
inode := &ref.node.inodes[ref.index]
|
|
||||||
return inode.key, inode.value, inode.flags
|
|
||||||
}
|
|
||||||
|
|
||||||
// Or retrieve value from page.
|
|
||||||
elem := ref.page.leafPageElement(uint16(ref.index))
|
|
||||||
return elem.key(), elem.value(), elem.flags
|
|
||||||
}
|
|
||||||
|
|
||||||
// node returns the node that the cursor is currently positioned on.
|
|
||||||
func (c *Cursor) node() *node {
|
|
||||||
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
|
|
||||||
|
|
||||||
// If the top of the stack is a leaf node then just return it.
|
|
||||||
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
|
|
||||||
return ref.node
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start from root and traverse down the hierarchy.
|
|
||||||
var n = c.stack[0].node
|
|
||||||
if n == nil {
|
|
||||||
n = c.bucket.node(c.stack[0].page.id, nil)
|
|
||||||
}
|
|
||||||
for _, ref := range c.stack[:len(c.stack)-1] {
|
|
||||||
_assert(!n.isLeaf, "expected branch node")
|
|
||||||
n = n.childAt(int(ref.index))
|
|
||||||
}
|
|
||||||
_assert(n.isLeaf, "expected leaf node")
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
// elemRef represents a reference to an element on a given page/node.
|
|
||||||
type elemRef struct {
|
|
||||||
page *page
|
|
||||||
node *node
|
|
||||||
index int
|
|
||||||
}
|
|
||||||
|
|
||||||
// isLeaf returns whether the ref is pointing at a leaf page/node.
|
|
||||||
func (r *elemRef) isLeaf() bool {
|
|
||||||
if r.node != nil {
|
|
||||||
return r.node.isLeaf
|
|
||||||
}
|
|
||||||
return (r.page.flags & leafPageFlag) != 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// count returns the number of inodes or page elements.
|
|
||||||
func (r *elemRef) count() int {
|
|
||||||
if r.node != nil {
|
|
||||||
return len(r.node.inodes)
|
|
||||||
}
|
|
||||||
return int(r.page.count)
|
|
||||||
}
|
|
1138
vendor/go.etcd.io/bbolt/db.go
generated
vendored
1138
vendor/go.etcd.io/bbolt/db.go
generated
vendored
File diff suppressed because it is too large
Load Diff
44
vendor/go.etcd.io/bbolt/doc.go
generated
vendored
44
vendor/go.etcd.io/bbolt/doc.go
generated
vendored
@ -1,44 +0,0 @@
|
|||||||
/*
|
|
||||||
package bbolt implements a low-level key/value store in pure Go. It supports
|
|
||||||
fully serializable transactions, ACID semantics, and lock-free MVCC with
|
|
||||||
multiple readers and a single writer. Bolt can be used for projects that
|
|
||||||
want a simple data store without the need to add large dependencies such as
|
|
||||||
Postgres or MySQL.
|
|
||||||
|
|
||||||
Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
|
|
||||||
optimized for fast read access and does not require recovery in the event of a
|
|
||||||
system crash. Transactions which have not finished committing will simply be
|
|
||||||
rolled back in the event of a crash.
|
|
||||||
|
|
||||||
The design of Bolt is based on Howard Chu's LMDB database project.
|
|
||||||
|
|
||||||
Bolt currently works on Windows, Mac OS X, and Linux.
|
|
||||||
|
|
||||||
|
|
||||||
Basics
|
|
||||||
|
|
||||||
There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
|
|
||||||
a collection of buckets and is represented by a single file on disk. A bucket is
|
|
||||||
a collection of unique keys that are associated with values.
|
|
||||||
|
|
||||||
Transactions provide either read-only or read-write access to the database.
|
|
||||||
Read-only transactions can retrieve key/value pairs and can use Cursors to
|
|
||||||
iterate over the dataset sequentially. Read-write transactions can create and
|
|
||||||
delete buckets and can insert and remove keys. Only one read-write transaction
|
|
||||||
is allowed at a time.
|
|
||||||
|
|
||||||
|
|
||||||
Caveats
|
|
||||||
|
|
||||||
The database uses a read-only, memory-mapped data file to ensure that
|
|
||||||
applications cannot corrupt the database, however, this means that keys and
|
|
||||||
values returned from Bolt cannot be changed. Writing to a read-only byte slice
|
|
||||||
will cause Go to panic.
|
|
||||||
|
|
||||||
Keys and values retrieved from the database are only valid for the life of
|
|
||||||
the transaction. When used outside the transaction, these byte slices can
|
|
||||||
point to different data or can point to invalid memory which will cause a panic.
|
|
||||||
|
|
||||||
|
|
||||||
*/
|
|
||||||
package bbolt
|
|
71
vendor/go.etcd.io/bbolt/errors.go
generated
vendored
71
vendor/go.etcd.io/bbolt/errors.go
generated
vendored
@ -1,71 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import "errors"
|
|
||||||
|
|
||||||
// These errors can be returned when opening or calling methods on a DB.
|
|
||||||
var (
|
|
||||||
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
|
|
||||||
// is opened or after it is closed.
|
|
||||||
ErrDatabaseNotOpen = errors.New("database not open")
|
|
||||||
|
|
||||||
// ErrDatabaseOpen is returned when opening a database that is
|
|
||||||
// already open.
|
|
||||||
ErrDatabaseOpen = errors.New("database already open")
|
|
||||||
|
|
||||||
// ErrInvalid is returned when both meta pages on a database are invalid.
|
|
||||||
// This typically occurs when a file is not a bolt database.
|
|
||||||
ErrInvalid = errors.New("invalid database")
|
|
||||||
|
|
||||||
// ErrVersionMismatch is returned when the data file was created with a
|
|
||||||
// different version of Bolt.
|
|
||||||
ErrVersionMismatch = errors.New("version mismatch")
|
|
||||||
|
|
||||||
// ErrChecksum is returned when either meta page checksum does not match.
|
|
||||||
ErrChecksum = errors.New("checksum error")
|
|
||||||
|
|
||||||
// ErrTimeout is returned when a database cannot obtain an exclusive lock
|
|
||||||
// on the data file after the timeout passed to Open().
|
|
||||||
ErrTimeout = errors.New("timeout")
|
|
||||||
)
|
|
||||||
|
|
||||||
// These errors can occur when beginning or committing a Tx.
|
|
||||||
var (
|
|
||||||
// ErrTxNotWritable is returned when performing a write operation on a
|
|
||||||
// read-only transaction.
|
|
||||||
ErrTxNotWritable = errors.New("tx not writable")
|
|
||||||
|
|
||||||
// ErrTxClosed is returned when committing or rolling back a transaction
|
|
||||||
// that has already been committed or rolled back.
|
|
||||||
ErrTxClosed = errors.New("tx closed")
|
|
||||||
|
|
||||||
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
|
|
||||||
// read-only database.
|
|
||||||
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
|
|
||||||
)
|
|
||||||
|
|
||||||
// These errors can occur when putting or deleting a value or a bucket.
|
|
||||||
var (
|
|
||||||
// ErrBucketNotFound is returned when trying to access a bucket that has
|
|
||||||
// not been created yet.
|
|
||||||
ErrBucketNotFound = errors.New("bucket not found")
|
|
||||||
|
|
||||||
// ErrBucketExists is returned when creating a bucket that already exists.
|
|
||||||
ErrBucketExists = errors.New("bucket already exists")
|
|
||||||
|
|
||||||
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
|
|
||||||
ErrBucketNameRequired = errors.New("bucket name required")
|
|
||||||
|
|
||||||
// ErrKeyRequired is returned when inserting a zero-length key.
|
|
||||||
ErrKeyRequired = errors.New("key required")
|
|
||||||
|
|
||||||
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
|
|
||||||
ErrKeyTooLarge = errors.New("key too large")
|
|
||||||
|
|
||||||
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
|
|
||||||
ErrValueTooLarge = errors.New("value too large")
|
|
||||||
|
|
||||||
// ErrIncompatibleValue is returned when trying create or delete a bucket
|
|
||||||
// on an existing non-bucket key or when trying to create or delete a
|
|
||||||
// non-bucket key on an existing bucket key.
|
|
||||||
ErrIncompatibleValue = errors.New("incompatible value")
|
|
||||||
)
|
|
333
vendor/go.etcd.io/bbolt/freelist.go
generated
vendored
333
vendor/go.etcd.io/bbolt/freelist.go
generated
vendored
@ -1,333 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"sort"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// txPending holds a list of pgids and corresponding allocation txns
|
|
||||||
// that are pending to be freed.
|
|
||||||
type txPending struct {
|
|
||||||
ids []pgid
|
|
||||||
alloctx []txid // txids allocating the ids
|
|
||||||
lastReleaseBegin txid // beginning txid of last matching releaseRange
|
|
||||||
}
|
|
||||||
|
|
||||||
// freelist represents a list of all pages that are available for allocation.
|
|
||||||
// It also tracks pages that have been freed but are still in use by open transactions.
|
|
||||||
type freelist struct {
|
|
||||||
ids []pgid // all free and available free page ids.
|
|
||||||
allocs map[pgid]txid // mapping of txid that allocated a pgid.
|
|
||||||
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
|
|
||||||
cache map[pgid]bool // fast lookup of all free and pending page ids.
|
|
||||||
}
|
|
||||||
|
|
||||||
// newFreelist returns an empty, initialized freelist.
|
|
||||||
func newFreelist() *freelist {
|
|
||||||
return &freelist{
|
|
||||||
allocs: make(map[pgid]txid),
|
|
||||||
pending: make(map[txid]*txPending),
|
|
||||||
cache: make(map[pgid]bool),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// size returns the size of the page after serialization.
|
|
||||||
func (f *freelist) size() int {
|
|
||||||
n := f.count()
|
|
||||||
if n >= 0xFFFF {
|
|
||||||
// The first element will be used to store the count. See freelist.write.
|
|
||||||
n++
|
|
||||||
}
|
|
||||||
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// count returns count of pages on the freelist
|
|
||||||
func (f *freelist) count() int {
|
|
||||||
return f.free_count() + f.pending_count()
|
|
||||||
}
|
|
||||||
|
|
||||||
// free_count returns count of free pages
|
|
||||||
func (f *freelist) free_count() int {
|
|
||||||
return len(f.ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
// pending_count returns count of pending pages
|
|
||||||
func (f *freelist) pending_count() int {
|
|
||||||
var count int
|
|
||||||
for _, txp := range f.pending {
|
|
||||||
count += len(txp.ids)
|
|
||||||
}
|
|
||||||
return count
|
|
||||||
}
|
|
||||||
|
|
||||||
// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
|
|
||||||
// f.count returns the minimum length required for dst.
|
|
||||||
func (f *freelist) copyall(dst []pgid) {
|
|
||||||
m := make(pgids, 0, f.pending_count())
|
|
||||||
for _, txp := range f.pending {
|
|
||||||
m = append(m, txp.ids...)
|
|
||||||
}
|
|
||||||
sort.Sort(m)
|
|
||||||
mergepgids(dst, f.ids, m)
|
|
||||||
}
|
|
||||||
|
|
||||||
// allocate returns the starting page id of a contiguous list of pages of a given size.
|
|
||||||
// If a contiguous block cannot be found then 0 is returned.
|
|
||||||
func (f *freelist) allocate(txid txid, n int) pgid {
|
|
||||||
if len(f.ids) == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
var initial, previd pgid
|
|
||||||
for i, id := range f.ids {
|
|
||||||
if id <= 1 {
|
|
||||||
panic(fmt.Sprintf("invalid page allocation: %d", id))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset initial page if this is not contiguous.
|
|
||||||
if previd == 0 || id-previd != 1 {
|
|
||||||
initial = id
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we found a contiguous block then remove it and return it.
|
|
||||||
if (id-initial)+1 == pgid(n) {
|
|
||||||
// If we're allocating off the beginning then take the fast path
|
|
||||||
// and just adjust the existing slice. This will use extra memory
|
|
||||||
// temporarily but the append() in free() will realloc the slice
|
|
||||||
// as is necessary.
|
|
||||||
if (i + 1) == n {
|
|
||||||
f.ids = f.ids[i+1:]
|
|
||||||
} else {
|
|
||||||
copy(f.ids[i-n+1:], f.ids[i+1:])
|
|
||||||
f.ids = f.ids[:len(f.ids)-n]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove from the free cache.
|
|
||||||
for i := pgid(0); i < pgid(n); i++ {
|
|
||||||
delete(f.cache, initial+i)
|
|
||||||
}
|
|
||||||
f.allocs[initial] = txid
|
|
||||||
return initial
|
|
||||||
}
|
|
||||||
|
|
||||||
previd = id
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// free releases a page and its overflow for a given transaction id.
|
|
||||||
// If the page is already free then a panic will occur.
|
|
||||||
func (f *freelist) free(txid txid, p *page) {
|
|
||||||
if p.id <= 1 {
|
|
||||||
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Free page and all its overflow pages.
|
|
||||||
txp := f.pending[txid]
|
|
||||||
if txp == nil {
|
|
||||||
txp = &txPending{}
|
|
||||||
f.pending[txid] = txp
|
|
||||||
}
|
|
||||||
allocTxid, ok := f.allocs[p.id]
|
|
||||||
if ok {
|
|
||||||
delete(f.allocs, p.id)
|
|
||||||
} else if (p.flags & freelistPageFlag) != 0 {
|
|
||||||
// Freelist is always allocated by prior tx.
|
|
||||||
allocTxid = txid - 1
|
|
||||||
}
|
|
||||||
|
|
||||||
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
|
|
||||||
// Verify that page is not already free.
|
|
||||||
if f.cache[id] {
|
|
||||||
panic(fmt.Sprintf("page %d already freed", id))
|
|
||||||
}
|
|
||||||
// Add to the freelist and cache.
|
|
||||||
txp.ids = append(txp.ids, id)
|
|
||||||
txp.alloctx = append(txp.alloctx, allocTxid)
|
|
||||||
f.cache[id] = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// release moves all page ids for a transaction id (or older) to the freelist.
|
|
||||||
func (f *freelist) release(txid txid) {
|
|
||||||
m := make(pgids, 0)
|
|
||||||
for tid, txp := range f.pending {
|
|
||||||
if tid <= txid {
|
|
||||||
// Move transaction's pending pages to the available freelist.
|
|
||||||
// Don't remove from the cache since the page is still free.
|
|
||||||
m = append(m, txp.ids...)
|
|
||||||
delete(f.pending, tid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sort.Sort(m)
|
|
||||||
f.ids = pgids(f.ids).merge(m)
|
|
||||||
}
|
|
||||||
|
|
||||||
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
|
|
||||||
func (f *freelist) releaseRange(begin, end txid) {
|
|
||||||
if begin > end {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var m pgids
|
|
||||||
for tid, txp := range f.pending {
|
|
||||||
if tid < begin || tid > end {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Don't recompute freed pages if ranges haven't updated.
|
|
||||||
if txp.lastReleaseBegin == begin {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for i := 0; i < len(txp.ids); i++ {
|
|
||||||
if atx := txp.alloctx[i]; atx < begin || atx > end {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
m = append(m, txp.ids[i])
|
|
||||||
txp.ids[i] = txp.ids[len(txp.ids)-1]
|
|
||||||
txp.ids = txp.ids[:len(txp.ids)-1]
|
|
||||||
txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
|
|
||||||
txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
|
|
||||||
i--
|
|
||||||
}
|
|
||||||
txp.lastReleaseBegin = begin
|
|
||||||
if len(txp.ids) == 0 {
|
|
||||||
delete(f.pending, tid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sort.Sort(m)
|
|
||||||
f.ids = pgids(f.ids).merge(m)
|
|
||||||
}
|
|
||||||
|
|
||||||
// rollback removes the pages from a given pending tx.
|
|
||||||
func (f *freelist) rollback(txid txid) {
|
|
||||||
// Remove page ids from cache.
|
|
||||||
txp := f.pending[txid]
|
|
||||||
if txp == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var m pgids
|
|
||||||
for i, pgid := range txp.ids {
|
|
||||||
delete(f.cache, pgid)
|
|
||||||
tx := txp.alloctx[i]
|
|
||||||
if tx == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if tx != txid {
|
|
||||||
// Pending free aborted; restore page back to alloc list.
|
|
||||||
f.allocs[pgid] = tx
|
|
||||||
} else {
|
|
||||||
// Freed page was allocated by this txn; OK to throw away.
|
|
||||||
m = append(m, pgid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Remove pages from pending list and mark as free if allocated by txid.
|
|
||||||
delete(f.pending, txid)
|
|
||||||
sort.Sort(m)
|
|
||||||
f.ids = pgids(f.ids).merge(m)
|
|
||||||
}
|
|
||||||
|
|
||||||
// freed returns whether a given page is in the free list.
|
|
||||||
func (f *freelist) freed(pgid pgid) bool {
|
|
||||||
return f.cache[pgid]
|
|
||||||
}
|
|
||||||
|
|
||||||
// read initializes the freelist from a freelist page.
|
|
||||||
func (f *freelist) read(p *page) {
|
|
||||||
if (p.flags & freelistPageFlag) == 0 {
|
|
||||||
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
|
|
||||||
}
|
|
||||||
// If the page.count is at the max uint16 value (64k) then it's considered
|
|
||||||
// an overflow and the size of the freelist is stored as the first element.
|
|
||||||
idx, count := 0, int(p.count)
|
|
||||||
if count == 0xFFFF {
|
|
||||||
idx = 1
|
|
||||||
count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy the list of page ids from the freelist.
|
|
||||||
if count == 0 {
|
|
||||||
f.ids = nil
|
|
||||||
} else {
|
|
||||||
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
|
|
||||||
f.ids = make([]pgid, len(ids))
|
|
||||||
copy(f.ids, ids)
|
|
||||||
|
|
||||||
// Make sure they're sorted.
|
|
||||||
sort.Sort(pgids(f.ids))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rebuild the page cache.
|
|
||||||
f.reindex()
|
|
||||||
}
|
|
||||||
|
|
||||||
// read initializes the freelist from a given list of ids.
|
|
||||||
func (f *freelist) readIDs(ids []pgid) {
|
|
||||||
f.ids = ids
|
|
||||||
f.reindex()
|
|
||||||
}
|
|
||||||
|
|
||||||
// write writes the page ids onto a freelist page. All free and pending ids are
|
|
||||||
// saved to disk since in the event of a program crash, all pending ids will
|
|
||||||
// become free.
|
|
||||||
func (f *freelist) write(p *page) error {
|
|
||||||
// Combine the old free pgids and pgids waiting on an open transaction.
|
|
||||||
|
|
||||||
// Update the header flag.
|
|
||||||
p.flags |= freelistPageFlag
|
|
||||||
|
|
||||||
// The page.count can only hold up to 64k elements so if we overflow that
|
|
||||||
// number then we handle it by putting the size in the first element.
|
|
||||||
lenids := f.count()
|
|
||||||
if lenids == 0 {
|
|
||||||
p.count = uint16(lenids)
|
|
||||||
} else if lenids < 0xFFFF {
|
|
||||||
p.count = uint16(lenids)
|
|
||||||
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
|
|
||||||
} else {
|
|
||||||
p.count = 0xFFFF
|
|
||||||
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
|
|
||||||
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// reload reads the freelist from a page and filters out pending items.
|
|
||||||
func (f *freelist) reload(p *page) {
|
|
||||||
f.read(p)
|
|
||||||
|
|
||||||
// Build a cache of only pending pages.
|
|
||||||
pcache := make(map[pgid]bool)
|
|
||||||
for _, txp := range f.pending {
|
|
||||||
for _, pendingID := range txp.ids {
|
|
||||||
pcache[pendingID] = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check each page in the freelist and build a new available freelist
|
|
||||||
// with any pages not in the pending lists.
|
|
||||||
var a []pgid
|
|
||||||
for _, id := range f.ids {
|
|
||||||
if !pcache[id] {
|
|
||||||
a = append(a, id)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
f.ids = a
|
|
||||||
|
|
||||||
// Once the available list is rebuilt then rebuild the free cache so that
|
|
||||||
// it includes the available and pending free pages.
|
|
||||||
f.reindex()
|
|
||||||
}
|
|
||||||
|
|
||||||
// reindex rebuilds the free cache based on available and pending free lists.
|
|
||||||
func (f *freelist) reindex() {
|
|
||||||
f.cache = make(map[pgid]bool, len(f.ids))
|
|
||||||
for _, id := range f.ids {
|
|
||||||
f.cache[id] = true
|
|
||||||
}
|
|
||||||
for _, txp := range f.pending {
|
|
||||||
for _, pendingID := range txp.ids {
|
|
||||||
f.cache[pendingID] = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
604
vendor/go.etcd.io/bbolt/node.go
generated
vendored
604
vendor/go.etcd.io/bbolt/node.go
generated
vendored
@ -1,604 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"sort"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// node represents an in-memory, deserialized page.
|
|
||||||
type node struct {
|
|
||||||
bucket *Bucket
|
|
||||||
isLeaf bool
|
|
||||||
unbalanced bool
|
|
||||||
spilled bool
|
|
||||||
key []byte
|
|
||||||
pgid pgid
|
|
||||||
parent *node
|
|
||||||
children nodes
|
|
||||||
inodes inodes
|
|
||||||
}
|
|
||||||
|
|
||||||
// root returns the top-level node this node is attached to.
|
|
||||||
func (n *node) root() *node {
|
|
||||||
if n.parent == nil {
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
return n.parent.root()
|
|
||||||
}
|
|
||||||
|
|
||||||
// minKeys returns the minimum number of inodes this node should have.
|
|
||||||
func (n *node) minKeys() int {
|
|
||||||
if n.isLeaf {
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
|
|
||||||
// size returns the size of the node after serialization.
|
|
||||||
func (n *node) size() int {
|
|
||||||
sz, elsz := pageHeaderSize, n.pageElementSize()
|
|
||||||
for i := 0; i < len(n.inodes); i++ {
|
|
||||||
item := &n.inodes[i]
|
|
||||||
sz += elsz + len(item.key) + len(item.value)
|
|
||||||
}
|
|
||||||
return sz
|
|
||||||
}
|
|
||||||
|
|
||||||
// sizeLessThan returns true if the node is less than a given size.
|
|
||||||
// This is an optimization to avoid calculating a large node when we only need
|
|
||||||
// to know if it fits inside a certain page size.
|
|
||||||
func (n *node) sizeLessThan(v int) bool {
|
|
||||||
sz, elsz := pageHeaderSize, n.pageElementSize()
|
|
||||||
for i := 0; i < len(n.inodes); i++ {
|
|
||||||
item := &n.inodes[i]
|
|
||||||
sz += elsz + len(item.key) + len(item.value)
|
|
||||||
if sz >= v {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// pageElementSize returns the size of each page element based on the type of node.
|
|
||||||
func (n *node) pageElementSize() int {
|
|
||||||
if n.isLeaf {
|
|
||||||
return leafPageElementSize
|
|
||||||
}
|
|
||||||
return branchPageElementSize
|
|
||||||
}
|
|
||||||
|
|
||||||
// childAt returns the child node at a given index.
|
|
||||||
func (n *node) childAt(index int) *node {
|
|
||||||
if n.isLeaf {
|
|
||||||
panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
|
|
||||||
}
|
|
||||||
return n.bucket.node(n.inodes[index].pgid, n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// childIndex returns the index of a given child node.
|
|
||||||
func (n *node) childIndex(child *node) int {
|
|
||||||
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
|
|
||||||
return index
|
|
||||||
}
|
|
||||||
|
|
||||||
// numChildren returns the number of children.
|
|
||||||
func (n *node) numChildren() int {
|
|
||||||
return len(n.inodes)
|
|
||||||
}
|
|
||||||
|
|
||||||
// nextSibling returns the next node with the same parent.
|
|
||||||
func (n *node) nextSibling() *node {
|
|
||||||
if n.parent == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
index := n.parent.childIndex(n)
|
|
||||||
if index >= n.parent.numChildren()-1 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return n.parent.childAt(index + 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// prevSibling returns the previous node with the same parent.
|
|
||||||
func (n *node) prevSibling() *node {
|
|
||||||
if n.parent == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
index := n.parent.childIndex(n)
|
|
||||||
if index == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return n.parent.childAt(index - 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// put inserts a key/value.
|
|
||||||
func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
|
|
||||||
if pgid >= n.bucket.tx.meta.pgid {
|
|
||||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
|
|
||||||
} else if len(oldKey) <= 0 {
|
|
||||||
panic("put: zero-length old key")
|
|
||||||
} else if len(newKey) <= 0 {
|
|
||||||
panic("put: zero-length new key")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find insertion index.
|
|
||||||
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
|
|
||||||
|
|
||||||
// Add capacity and shift nodes if we don't have an exact match and need to insert.
|
|
||||||
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
|
|
||||||
if !exact {
|
|
||||||
n.inodes = append(n.inodes, inode{})
|
|
||||||
copy(n.inodes[index+1:], n.inodes[index:])
|
|
||||||
}
|
|
||||||
|
|
||||||
inode := &n.inodes[index]
|
|
||||||
inode.flags = flags
|
|
||||||
inode.key = newKey
|
|
||||||
inode.value = value
|
|
||||||
inode.pgid = pgid
|
|
||||||
_assert(len(inode.key) > 0, "put: zero-length inode key")
|
|
||||||
}
|
|
||||||
|
|
||||||
// del removes a key from the node.
|
|
||||||
func (n *node) del(key []byte) {
|
|
||||||
// Find index of key.
|
|
||||||
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })
|
|
||||||
|
|
||||||
// Exit if the key isn't found.
|
|
||||||
if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete inode from the node.
|
|
||||||
n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
|
|
||||||
|
|
||||||
// Mark the node as needing rebalancing.
|
|
||||||
n.unbalanced = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// read initializes the node from a page.
|
|
||||||
func (n *node) read(p *page) {
|
|
||||||
n.pgid = p.id
|
|
||||||
n.isLeaf = ((p.flags & leafPageFlag) != 0)
|
|
||||||
n.inodes = make(inodes, int(p.count))
|
|
||||||
|
|
||||||
for i := 0; i < int(p.count); i++ {
|
|
||||||
inode := &n.inodes[i]
|
|
||||||
if n.isLeaf {
|
|
||||||
elem := p.leafPageElement(uint16(i))
|
|
||||||
inode.flags = elem.flags
|
|
||||||
inode.key = elem.key()
|
|
||||||
inode.value = elem.value()
|
|
||||||
} else {
|
|
||||||
elem := p.branchPageElement(uint16(i))
|
|
||||||
inode.pgid = elem.pgid
|
|
||||||
inode.key = elem.key()
|
|
||||||
}
|
|
||||||
_assert(len(inode.key) > 0, "read: zero-length inode key")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save first key so we can find the node in the parent when we spill.
|
|
||||||
if len(n.inodes) > 0 {
|
|
||||||
n.key = n.inodes[0].key
|
|
||||||
_assert(len(n.key) > 0, "read: zero-length node key")
|
|
||||||
} else {
|
|
||||||
n.key = nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// write writes the items onto one or more pages.
|
|
||||||
func (n *node) write(p *page) {
|
|
||||||
// Initialize page.
|
|
||||||
if n.isLeaf {
|
|
||||||
p.flags |= leafPageFlag
|
|
||||||
} else {
|
|
||||||
p.flags |= branchPageFlag
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(n.inodes) >= 0xFFFF {
|
|
||||||
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
|
|
||||||
}
|
|
||||||
p.count = uint16(len(n.inodes))
|
|
||||||
|
|
||||||
// Stop here if there are no items to write.
|
|
||||||
if p.count == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Loop over each item and write it to the page.
|
|
||||||
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
|
|
||||||
for i, item := range n.inodes {
|
|
||||||
_assert(len(item.key) > 0, "write: zero-length inode key")
|
|
||||||
|
|
||||||
// Write the page element.
|
|
||||||
if n.isLeaf {
|
|
||||||
elem := p.leafPageElement(uint16(i))
|
|
||||||
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
|
||||||
elem.flags = item.flags
|
|
||||||
elem.ksize = uint32(len(item.key))
|
|
||||||
elem.vsize = uint32(len(item.value))
|
|
||||||
} else {
|
|
||||||
elem := p.branchPageElement(uint16(i))
|
|
||||||
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
|
||||||
elem.ksize = uint32(len(item.key))
|
|
||||||
elem.pgid = item.pgid
|
|
||||||
_assert(elem.pgid != p.id, "write: circular dependency occurred")
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the length of key+value is larger than the max allocation size
|
|
||||||
// then we need to reallocate the byte array pointer.
|
|
||||||
//
|
|
||||||
// See: https://github.com/boltdb/bolt/pull/335
|
|
||||||
klen, vlen := len(item.key), len(item.value)
|
|
||||||
if len(b) < klen+vlen {
|
|
||||||
b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write data for the element to the end of the page.
|
|
||||||
copy(b[0:], item.key)
|
|
||||||
b = b[klen:]
|
|
||||||
copy(b[0:], item.value)
|
|
||||||
b = b[vlen:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// DEBUG ONLY: n.dump()
|
|
||||||
}
|
|
||||||
|
|
||||||
// split breaks up a node into multiple smaller nodes, if appropriate.
|
|
||||||
// This should only be called from the spill() function.
|
|
||||||
func (n *node) split(pageSize int) []*node {
|
|
||||||
var nodes []*node
|
|
||||||
|
|
||||||
node := n
|
|
||||||
for {
|
|
||||||
// Split node into two.
|
|
||||||
a, b := node.splitTwo(pageSize)
|
|
||||||
nodes = append(nodes, a)
|
|
||||||
|
|
||||||
// If we can't split then exit the loop.
|
|
||||||
if b == nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set node to b so it gets split on the next iteration.
|
|
||||||
node = b
|
|
||||||
}
|
|
||||||
|
|
||||||
return nodes
|
|
||||||
}
|
|
||||||
|
|
||||||
// splitTwo breaks up a node into two smaller nodes, if appropriate.
|
|
||||||
// This should only be called from the split() function.
|
|
||||||
func (n *node) splitTwo(pageSize int) (*node, *node) {
|
|
||||||
// Ignore the split if the page doesn't have at least enough nodes for
|
|
||||||
// two pages or if the nodes can fit in a single page.
|
|
||||||
if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
|
|
||||||
return n, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the threshold before starting a new node.
|
|
||||||
var fillPercent = n.bucket.FillPercent
|
|
||||||
if fillPercent < minFillPercent {
|
|
||||||
fillPercent = minFillPercent
|
|
||||||
} else if fillPercent > maxFillPercent {
|
|
||||||
fillPercent = maxFillPercent
|
|
||||||
}
|
|
||||||
threshold := int(float64(pageSize) * fillPercent)
|
|
||||||
|
|
||||||
// Determine split position and sizes of the two pages.
|
|
||||||
splitIndex, _ := n.splitIndex(threshold)
|
|
||||||
|
|
||||||
// Split node into two separate nodes.
|
|
||||||
// If there's no parent then we'll need to create one.
|
|
||||||
if n.parent == nil {
|
|
||||||
n.parent = &node{bucket: n.bucket, children: []*node{n}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new node and add it to the parent.
|
|
||||||
next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
|
|
||||||
n.parent.children = append(n.parent.children, next)
|
|
||||||
|
|
||||||
// Split inodes across two nodes.
|
|
||||||
next.inodes = n.inodes[splitIndex:]
|
|
||||||
n.inodes = n.inodes[:splitIndex]
|
|
||||||
|
|
||||||
// Update the statistics.
|
|
||||||
n.bucket.tx.stats.Split++
|
|
||||||
|
|
||||||
return n, next
|
|
||||||
}
|
|
||||||
|
|
||||||
// splitIndex finds the position where a page will fill a given threshold.
|
|
||||||
// It returns the index as well as the size of the first page.
|
|
||||||
// This is only be called from split().
|
|
||||||
func (n *node) splitIndex(threshold int) (index, sz int) {
|
|
||||||
sz = pageHeaderSize
|
|
||||||
|
|
||||||
// Loop until we only have the minimum number of keys required for the second page.
|
|
||||||
for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
|
|
||||||
index = i
|
|
||||||
inode := n.inodes[i]
|
|
||||||
elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
|
|
||||||
|
|
||||||
// If we have at least the minimum number of keys and adding another
|
|
||||||
// node would put us over the threshold then exit and return.
|
|
||||||
if i >= minKeysPerPage && sz+elsize > threshold {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the element size to the total size.
|
|
||||||
sz += elsize
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// spill writes the nodes to dirty pages and splits nodes as it goes.
|
|
||||||
// Returns an error if dirty pages cannot be allocated.
|
|
||||||
func (n *node) spill() error {
|
|
||||||
var tx = n.bucket.tx
|
|
||||||
if n.spilled {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spill child nodes first. Child nodes can materialize sibling nodes in
|
|
||||||
// the case of split-merge so we cannot use a range loop. We have to check
|
|
||||||
// the children size on every loop iteration.
|
|
||||||
sort.Sort(n.children)
|
|
||||||
for i := 0; i < len(n.children); i++ {
|
|
||||||
if err := n.children[i].spill(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We no longer need the child list because it's only used for spill tracking.
|
|
||||||
n.children = nil
|
|
||||||
|
|
||||||
// Split nodes into appropriate sizes. The first node will always be n.
|
|
||||||
var nodes = n.split(tx.db.pageSize)
|
|
||||||
for _, node := range nodes {
|
|
||||||
// Add node's page to the freelist if it's not new.
|
|
||||||
if node.pgid > 0 {
|
|
||||||
tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
|
|
||||||
node.pgid = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate contiguous space for the node.
|
|
||||||
p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write the node.
|
|
||||||
if p.id >= tx.meta.pgid {
|
|
||||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
|
|
||||||
}
|
|
||||||
node.pgid = p.id
|
|
||||||
node.write(p)
|
|
||||||
node.spilled = true
|
|
||||||
|
|
||||||
// Insert into parent inodes.
|
|
||||||
if node.parent != nil {
|
|
||||||
var key = node.key
|
|
||||||
if key == nil {
|
|
||||||
key = node.inodes[0].key
|
|
||||||
}
|
|
||||||
|
|
||||||
node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
|
|
||||||
node.key = node.inodes[0].key
|
|
||||||
_assert(len(node.key) > 0, "spill: zero-length node key")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update the statistics.
|
|
||||||
tx.stats.Spill++
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the root node split and created a new root then we need to spill that
|
|
||||||
// as well. We'll clear out the children to make sure it doesn't try to respill.
|
|
||||||
if n.parent != nil && n.parent.pgid == 0 {
|
|
||||||
n.children = nil
|
|
||||||
return n.parent.spill()
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// rebalance attempts to combine the node with sibling nodes if the node fill
|
|
||||||
// size is below a threshold or if there are not enough keys.
|
|
||||||
func (n *node) rebalance() {
|
|
||||||
if !n.unbalanced {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
n.unbalanced = false
|
|
||||||
|
|
||||||
// Update statistics.
|
|
||||||
n.bucket.tx.stats.Rebalance++
|
|
||||||
|
|
||||||
// Ignore if node is above threshold (25%) and has enough keys.
|
|
||||||
var threshold = n.bucket.tx.db.pageSize / 4
|
|
||||||
if n.size() > threshold && len(n.inodes) > n.minKeys() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Root node has special handling.
|
|
||||||
if n.parent == nil {
|
|
||||||
// If root node is a branch and only has one node then collapse it.
|
|
||||||
if !n.isLeaf && len(n.inodes) == 1 {
|
|
||||||
// Move root's child up.
|
|
||||||
child := n.bucket.node(n.inodes[0].pgid, n)
|
|
||||||
n.isLeaf = child.isLeaf
|
|
||||||
n.inodes = child.inodes[:]
|
|
||||||
n.children = child.children
|
|
||||||
|
|
||||||
// Reparent all child nodes being moved.
|
|
||||||
for _, inode := range n.inodes {
|
|
||||||
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
|
||||||
child.parent = n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove old child.
|
|
||||||
child.parent = nil
|
|
||||||
delete(n.bucket.nodes, child.pgid)
|
|
||||||
child.free()
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If node has no keys then just remove it.
|
|
||||||
if n.numChildren() == 0 {
|
|
||||||
n.parent.del(n.key)
|
|
||||||
n.parent.removeChild(n)
|
|
||||||
delete(n.bucket.nodes, n.pgid)
|
|
||||||
n.free()
|
|
||||||
n.parent.rebalance()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
|
|
||||||
|
|
||||||
// Destination node is right sibling if idx == 0, otherwise left sibling.
|
|
||||||
var target *node
|
|
||||||
var useNextSibling = (n.parent.childIndex(n) == 0)
|
|
||||||
if useNextSibling {
|
|
||||||
target = n.nextSibling()
|
|
||||||
} else {
|
|
||||||
target = n.prevSibling()
|
|
||||||
}
|
|
||||||
|
|
||||||
// If both this node and the target node are too small then merge them.
|
|
||||||
if useNextSibling {
|
|
||||||
// Reparent all child nodes being moved.
|
|
||||||
for _, inode := range target.inodes {
|
|
||||||
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
|
||||||
child.parent.removeChild(child)
|
|
||||||
child.parent = n
|
|
||||||
child.parent.children = append(child.parent.children, child)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy over inodes from target and remove target.
|
|
||||||
n.inodes = append(n.inodes, target.inodes...)
|
|
||||||
n.parent.del(target.key)
|
|
||||||
n.parent.removeChild(target)
|
|
||||||
delete(n.bucket.nodes, target.pgid)
|
|
||||||
target.free()
|
|
||||||
} else {
|
|
||||||
// Reparent all child nodes being moved.
|
|
||||||
for _, inode := range n.inodes {
|
|
||||||
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
|
||||||
child.parent.removeChild(child)
|
|
||||||
child.parent = target
|
|
||||||
child.parent.children = append(child.parent.children, child)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy over inodes to target and remove node.
|
|
||||||
target.inodes = append(target.inodes, n.inodes...)
|
|
||||||
n.parent.del(n.key)
|
|
||||||
n.parent.removeChild(n)
|
|
||||||
delete(n.bucket.nodes, n.pgid)
|
|
||||||
n.free()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Either this node or the target node was deleted from the parent so rebalance it.
|
|
||||||
n.parent.rebalance()
|
|
||||||
}
|
|
||||||
|
|
||||||
// removes a node from the list of in-memory children.
|
|
||||||
// This does not affect the inodes.
|
|
||||||
func (n *node) removeChild(target *node) {
|
|
||||||
for i, child := range n.children {
|
|
||||||
if child == target {
|
|
||||||
n.children = append(n.children[:i], n.children[i+1:]...)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// dereference causes the node to copy all its inode key/value references to heap memory.
|
|
||||||
// This is required when the mmap is reallocated so inodes are not pointing to stale data.
|
|
||||||
func (n *node) dereference() {
|
|
||||||
if n.key != nil {
|
|
||||||
key := make([]byte, len(n.key))
|
|
||||||
copy(key, n.key)
|
|
||||||
n.key = key
|
|
||||||
_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range n.inodes {
|
|
||||||
inode := &n.inodes[i]
|
|
||||||
|
|
||||||
key := make([]byte, len(inode.key))
|
|
||||||
copy(key, inode.key)
|
|
||||||
inode.key = key
|
|
||||||
_assert(len(inode.key) > 0, "dereference: zero-length inode key")
|
|
||||||
|
|
||||||
value := make([]byte, len(inode.value))
|
|
||||||
copy(value, inode.value)
|
|
||||||
inode.value = value
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursively dereference children.
|
|
||||||
for _, child := range n.children {
|
|
||||||
child.dereference()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update statistics.
|
|
||||||
n.bucket.tx.stats.NodeDeref++
|
|
||||||
}
|
|
||||||
|
|
||||||
// free adds the node's underlying page to the freelist.
|
|
||||||
func (n *node) free() {
|
|
||||||
if n.pgid != 0 {
|
|
||||||
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
|
|
||||||
n.pgid = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// dump writes the contents of the node to STDERR for debugging purposes.
|
|
||||||
/*
|
|
||||||
func (n *node) dump() {
|
|
||||||
// Write node header.
|
|
||||||
var typ = "branch"
|
|
||||||
if n.isLeaf {
|
|
||||||
typ = "leaf"
|
|
||||||
}
|
|
||||||
warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))
|
|
||||||
|
|
||||||
// Write out abbreviated version of each item.
|
|
||||||
for _, item := range n.inodes {
|
|
||||||
if n.isLeaf {
|
|
||||||
if item.flags&bucketLeafFlag != 0 {
|
|
||||||
bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
|
|
||||||
warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
|
|
||||||
} else {
|
|
||||||
warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
warn("")
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
type nodes []*node
|
|
||||||
|
|
||||||
func (s nodes) Len() int { return len(s) }
|
|
||||||
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
||||||
func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
|
|
||||||
|
|
||||||
// inode represents an internal node inside of a node.
|
|
||||||
// It can be used to point to elements in a page or point
|
|
||||||
// to an element which hasn't been added to a page yet.
|
|
||||||
type inode struct {
|
|
||||||
flags uint32
|
|
||||||
pgid pgid
|
|
||||||
key []byte
|
|
||||||
value []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
type inodes []inode
|
|
197
vendor/go.etcd.io/bbolt/page.go
generated
vendored
197
vendor/go.etcd.io/bbolt/page.go
generated
vendored
@ -1,197 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"sort"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
|
|
||||||
|
|
||||||
const minKeysPerPage = 2
|
|
||||||
|
|
||||||
const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
|
|
||||||
const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
|
|
||||||
|
|
||||||
const (
|
|
||||||
branchPageFlag = 0x01
|
|
||||||
leafPageFlag = 0x02
|
|
||||||
metaPageFlag = 0x04
|
|
||||||
freelistPageFlag = 0x10
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
bucketLeafFlag = 0x01
|
|
||||||
)
|
|
||||||
|
|
||||||
type pgid uint64
|
|
||||||
|
|
||||||
type page struct {
|
|
||||||
id pgid
|
|
||||||
flags uint16
|
|
||||||
count uint16
|
|
||||||
overflow uint32
|
|
||||||
ptr uintptr
|
|
||||||
}
|
|
||||||
|
|
||||||
// typ returns a human readable page type string used for debugging.
|
|
||||||
func (p *page) typ() string {
|
|
||||||
if (p.flags & branchPageFlag) != 0 {
|
|
||||||
return "branch"
|
|
||||||
} else if (p.flags & leafPageFlag) != 0 {
|
|
||||||
return "leaf"
|
|
||||||
} else if (p.flags & metaPageFlag) != 0 {
|
|
||||||
return "meta"
|
|
||||||
} else if (p.flags & freelistPageFlag) != 0 {
|
|
||||||
return "freelist"
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("unknown<%02x>", p.flags)
|
|
||||||
}
|
|
||||||
|
|
||||||
// meta returns a pointer to the metadata section of the page.
|
|
||||||
func (p *page) meta() *meta {
|
|
||||||
return (*meta)(unsafe.Pointer(&p.ptr))
|
|
||||||
}
|
|
||||||
|
|
||||||
// leafPageElement retrieves the leaf node by index
|
|
||||||
func (p *page) leafPageElement(index uint16) *leafPageElement {
|
|
||||||
n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
// leafPageElements retrieves a list of leaf nodes.
|
|
||||||
func (p *page) leafPageElements() []leafPageElement {
|
|
||||||
if p.count == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// branchPageElement retrieves the branch node by index
|
|
||||||
func (p *page) branchPageElement(index uint16) *branchPageElement {
|
|
||||||
return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
|
||||||
}
|
|
||||||
|
|
||||||
// branchPageElements retrieves a list of branch nodes.
|
|
||||||
func (p *page) branchPageElements() []branchPageElement {
|
|
||||||
if p.count == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// dump writes n bytes of the page to STDERR as hex output.
|
|
||||||
func (p *page) hexdump(n int) {
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
|
|
||||||
fmt.Fprintf(os.Stderr, "%x\n", buf)
|
|
||||||
}
|
|
||||||
|
|
||||||
type pages []*page
|
|
||||||
|
|
||||||
func (s pages) Len() int { return len(s) }
|
|
||||||
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
||||||
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
|
|
||||||
|
|
||||||
// branchPageElement represents a node on a branch page.
|
|
||||||
type branchPageElement struct {
|
|
||||||
pos uint32
|
|
||||||
ksize uint32
|
|
||||||
pgid pgid
|
|
||||||
}
|
|
||||||
|
|
||||||
// key returns a byte slice of the node key.
|
|
||||||
func (n *branchPageElement) key() []byte {
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
|
||||||
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
|
|
||||||
}
|
|
||||||
|
|
||||||
// leafPageElement represents a node on a leaf page.
|
|
||||||
type leafPageElement struct {
|
|
||||||
flags uint32
|
|
||||||
pos uint32
|
|
||||||
ksize uint32
|
|
||||||
vsize uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
// key returns a byte slice of the node key.
|
|
||||||
func (n *leafPageElement) key() []byte {
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
|
||||||
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize]
|
|
||||||
}
|
|
||||||
|
|
||||||
// value returns a byte slice of the node value.
|
|
||||||
func (n *leafPageElement) value() []byte {
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
|
||||||
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize]
|
|
||||||
}
|
|
||||||
|
|
||||||
// PageInfo represents human readable information about a page.
|
|
||||||
type PageInfo struct {
|
|
||||||
ID int
|
|
||||||
Type string
|
|
||||||
Count int
|
|
||||||
OverflowCount int
|
|
||||||
}
|
|
||||||
|
|
||||||
type pgids []pgid
|
|
||||||
|
|
||||||
func (s pgids) Len() int { return len(s) }
|
|
||||||
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
||||||
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
|
|
||||||
|
|
||||||
// merge returns the sorted union of a and b.
|
|
||||||
func (a pgids) merge(b pgids) pgids {
|
|
||||||
// Return the opposite slice if one is nil.
|
|
||||||
if len(a) == 0 {
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
if len(b) == 0 {
|
|
||||||
return a
|
|
||||||
}
|
|
||||||
merged := make(pgids, len(a)+len(b))
|
|
||||||
mergepgids(merged, a, b)
|
|
||||||
return merged
|
|
||||||
}
|
|
||||||
|
|
||||||
// mergepgids copies the sorted union of a and b into dst.
|
|
||||||
// If dst is too small, it panics.
|
|
||||||
func mergepgids(dst, a, b pgids) {
|
|
||||||
if len(dst) < len(a)+len(b) {
|
|
||||||
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
|
|
||||||
}
|
|
||||||
// Copy in the opposite slice if one is nil.
|
|
||||||
if len(a) == 0 {
|
|
||||||
copy(dst, b)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if len(b) == 0 {
|
|
||||||
copy(dst, a)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merged will hold all elements from both lists.
|
|
||||||
merged := dst[:0]
|
|
||||||
|
|
||||||
// Assign lead to the slice with a lower starting value, follow to the higher value.
|
|
||||||
lead, follow := a, b
|
|
||||||
if b[0] < a[0] {
|
|
||||||
lead, follow = b, a
|
|
||||||
}
|
|
||||||
|
|
||||||
// Continue while there are elements in the lead.
|
|
||||||
for len(lead) > 0 {
|
|
||||||
// Merge largest prefix of lead that is ahead of follow[0].
|
|
||||||
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
|
|
||||||
merged = append(merged, lead[:n]...)
|
|
||||||
if n >= len(lead) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Swap lead and follow.
|
|
||||||
lead, follow = follow, lead[n:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Append what's left in follow.
|
|
||||||
_ = append(merged, follow...)
|
|
||||||
}
|
|
707
vendor/go.etcd.io/bbolt/tx.go
generated
vendored
707
vendor/go.etcd.io/bbolt/tx.go
generated
vendored
@ -1,707 +0,0 @@
|
|||||||
package bbolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// txid represents the internal transaction identifier.
|
|
||||||
type txid uint64
|
|
||||||
|
|
||||||
// Tx represents a read-only or read/write transaction on the database.
|
|
||||||
// Read-only transactions can be used for retrieving values for keys and creating cursors.
|
|
||||||
// Read/write transactions can create and remove buckets and create and remove keys.
|
|
||||||
//
|
|
||||||
// IMPORTANT: You must commit or rollback transactions when you are done with
|
|
||||||
// them. Pages can not be reclaimed by the writer until no more transactions
|
|
||||||
// are using them. A long running read transaction can cause the database to
|
|
||||||
// quickly grow.
|
|
||||||
type Tx struct {
|
|
||||||
writable bool
|
|
||||||
managed bool
|
|
||||||
db *DB
|
|
||||||
meta *meta
|
|
||||||
root Bucket
|
|
||||||
pages map[pgid]*page
|
|
||||||
stats TxStats
|
|
||||||
commitHandlers []func()
|
|
||||||
|
|
||||||
// WriteFlag specifies the flag for write-related methods like WriteTo().
|
|
||||||
// Tx opens the database file with the specified flag to copy the data.
|
|
||||||
//
|
|
||||||
// By default, the flag is unset, which works well for mostly in-memory
|
|
||||||
// workloads. For databases that are much larger than available RAM,
|
|
||||||
// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
|
|
||||||
WriteFlag int
|
|
||||||
}
|
|
||||||
|
|
||||||
// init initializes the transaction.
|
|
||||||
func (tx *Tx) init(db *DB) {
|
|
||||||
tx.db = db
|
|
||||||
tx.pages = nil
|
|
||||||
|
|
||||||
// Copy the meta page since it can be changed by the writer.
|
|
||||||
tx.meta = &meta{}
|
|
||||||
db.meta().copy(tx.meta)
|
|
||||||
|
|
||||||
// Copy over the root bucket.
|
|
||||||
tx.root = newBucket(tx)
|
|
||||||
tx.root.bucket = &bucket{}
|
|
||||||
*tx.root.bucket = tx.meta.root
|
|
||||||
|
|
||||||
// Increment the transaction id and add a page cache for writable transactions.
|
|
||||||
if tx.writable {
|
|
||||||
tx.pages = make(map[pgid]*page)
|
|
||||||
tx.meta.txid += txid(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ID returns the transaction id.
|
|
||||||
func (tx *Tx) ID() int {
|
|
||||||
return int(tx.meta.txid)
|
|
||||||
}
|
|
||||||
|
|
||||||
// DB returns a reference to the database that created the transaction.
|
|
||||||
func (tx *Tx) DB() *DB {
|
|
||||||
return tx.db
|
|
||||||
}
|
|
||||||
|
|
||||||
// Size returns current database size in bytes as seen by this transaction.
|
|
||||||
func (tx *Tx) Size() int64 {
|
|
||||||
return int64(tx.meta.pgid) * int64(tx.db.pageSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writable returns whether the transaction can perform write operations.
|
|
||||||
func (tx *Tx) Writable() bool {
|
|
||||||
return tx.writable
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cursor creates a cursor associated with the root bucket.
|
|
||||||
// All items in the cursor will return a nil value because all root bucket keys point to buckets.
|
|
||||||
// The cursor is only valid as long as the transaction is open.
|
|
||||||
// Do not use a cursor after the transaction is closed.
|
|
||||||
func (tx *Tx) Cursor() *Cursor {
|
|
||||||
return tx.root.Cursor()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stats retrieves a copy of the current transaction statistics.
|
|
||||||
func (tx *Tx) Stats() TxStats {
|
|
||||||
return tx.stats
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bucket retrieves a bucket by name.
|
|
||||||
// Returns nil if the bucket does not exist.
|
|
||||||
// The bucket instance is only valid for the lifetime of the transaction.
|
|
||||||
func (tx *Tx) Bucket(name []byte) *Bucket {
|
|
||||||
return tx.root.Bucket(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateBucket creates a new bucket.
|
|
||||||
// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
|
|
||||||
// The bucket instance is only valid for the lifetime of the transaction.
|
|
||||||
func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
|
|
||||||
return tx.root.CreateBucket(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
|
|
||||||
// Returns an error if the bucket name is blank, or if the bucket name is too long.
|
|
||||||
// The bucket instance is only valid for the lifetime of the transaction.
|
|
||||||
func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
|
|
||||||
return tx.root.CreateBucketIfNotExists(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeleteBucket deletes a bucket.
|
|
||||||
// Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
|
|
||||||
func (tx *Tx) DeleteBucket(name []byte) error {
|
|
||||||
return tx.root.DeleteBucket(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ForEach executes a function for each bucket in the root.
|
|
||||||
// If the provided function returns an error then the iteration is stopped and
|
|
||||||
// the error is returned to the caller.
|
|
||||||
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
|
|
||||||
return tx.root.ForEach(func(k, v []byte) error {
|
|
||||||
return fn(k, tx.root.Bucket(k))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// OnCommit adds a handler function to be executed after the transaction successfully commits.
|
|
||||||
func (tx *Tx) OnCommit(fn func()) {
|
|
||||||
tx.commitHandlers = append(tx.commitHandlers, fn)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Commit writes all changes to disk and updates the meta page.
|
|
||||||
// Returns an error if a disk write error occurs, or if Commit is
|
|
||||||
// called on a read-only transaction.
|
|
||||||
func (tx *Tx) Commit() error {
|
|
||||||
_assert(!tx.managed, "managed tx commit not allowed")
|
|
||||||
if tx.db == nil {
|
|
||||||
return ErrTxClosed
|
|
||||||
} else if !tx.writable {
|
|
||||||
return ErrTxNotWritable
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
|
|
||||||
|
|
||||||
// Rebalance nodes which have had deletions.
|
|
||||||
var startTime = time.Now()
|
|
||||||
tx.root.rebalance()
|
|
||||||
if tx.stats.Rebalance > 0 {
|
|
||||||
tx.stats.RebalanceTime += time.Since(startTime)
|
|
||||||
}
|
|
||||||
|
|
||||||
// spill data onto dirty pages.
|
|
||||||
startTime = time.Now()
|
|
||||||
if err := tx.root.spill(); err != nil {
|
|
||||||
tx.rollback()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
tx.stats.SpillTime += time.Since(startTime)
|
|
||||||
|
|
||||||
// Free the old root bucket.
|
|
||||||
tx.meta.root.root = tx.root.root
|
|
||||||
|
|
||||||
// Free the old freelist because commit writes out a fresh freelist.
|
|
||||||
if tx.meta.freelist != pgidNoFreelist {
|
|
||||||
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
|
|
||||||
}
|
|
||||||
|
|
||||||
if !tx.db.NoFreelistSync {
|
|
||||||
err := tx.commitFreelist()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tx.meta.freelist = pgidNoFreelist
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write dirty pages to disk.
|
|
||||||
startTime = time.Now()
|
|
||||||
if err := tx.write(); err != nil {
|
|
||||||
tx.rollback()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If strict mode is enabled then perform a consistency check.
|
|
||||||
// Only the first consistency error is reported in the panic.
|
|
||||||
if tx.db.StrictMode {
|
|
||||||
ch := tx.Check()
|
|
||||||
var errs []string
|
|
||||||
for {
|
|
||||||
err, ok := <-ch
|
|
||||||
if !ok {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
errs = append(errs, err.Error())
|
|
||||||
}
|
|
||||||
if len(errs) > 0 {
|
|
||||||
panic("check fail: " + strings.Join(errs, "\n"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write meta to disk.
|
|
||||||
if err := tx.writeMeta(); err != nil {
|
|
||||||
tx.rollback()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
tx.stats.WriteTime += time.Since(startTime)
|
|
||||||
|
|
||||||
// Finalize the transaction.
|
|
||||||
tx.close()
|
|
||||||
|
|
||||||
// Execute commit handlers now that the locks have been removed.
|
|
||||||
for _, fn := range tx.commitHandlers {
|
|
||||||
fn()
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tx *Tx) commitFreelist() error {
|
|
||||||
// Allocate new pages for the new free list. This will overestimate
|
|
||||||
// the size of the freelist but not underestimate the size (which would be bad).
|
|
||||||
opgid := tx.meta.pgid
|
|
||||||
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
|
|
||||||
if err != nil {
|
|
||||||
tx.rollback()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := tx.db.freelist.write(p); err != nil {
|
|
||||||
tx.rollback()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
tx.meta.freelist = p.id
|
|
||||||
// If the high water mark has moved up then attempt to grow the database.
|
|
||||||
if tx.meta.pgid > opgid {
|
|
||||||
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
|
|
||||||
tx.rollback()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rollback closes the transaction and ignores all previous updates. Read-only
|
|
||||||
// transactions must be rolled back and not committed.
|
|
||||||
func (tx *Tx) Rollback() error {
|
|
||||||
_assert(!tx.managed, "managed tx rollback not allowed")
|
|
||||||
if tx.db == nil {
|
|
||||||
return ErrTxClosed
|
|
||||||
}
|
|
||||||
tx.rollback()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tx *Tx) rollback() {
|
|
||||||
if tx.db == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if tx.writable {
|
|
||||||
tx.db.freelist.rollback(tx.meta.txid)
|
|
||||||
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
|
|
||||||
}
|
|
||||||
tx.close()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tx *Tx) close() {
|
|
||||||
if tx.db == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if tx.writable {
|
|
||||||
// Grab freelist stats.
|
|
||||||
var freelistFreeN = tx.db.freelist.free_count()
|
|
||||||
var freelistPendingN = tx.db.freelist.pending_count()
|
|
||||||
var freelistAlloc = tx.db.freelist.size()
|
|
||||||
|
|
||||||
// Remove transaction ref & writer lock.
|
|
||||||
tx.db.rwtx = nil
|
|
||||||
tx.db.rwlock.Unlock()
|
|
||||||
|
|
||||||
// Merge statistics.
|
|
||||||
tx.db.statlock.Lock()
|
|
||||||
tx.db.stats.FreePageN = freelistFreeN
|
|
||||||
tx.db.stats.PendingPageN = freelistPendingN
|
|
||||||
tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
|
|
||||||
tx.db.stats.FreelistInuse = freelistAlloc
|
|
||||||
tx.db.stats.TxStats.add(&tx.stats)
|
|
||||||
tx.db.statlock.Unlock()
|
|
||||||
} else {
|
|
||||||
tx.db.removeTx(tx)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear all references.
|
|
||||||
tx.db = nil
|
|
||||||
tx.meta = nil
|
|
||||||
tx.root = Bucket{tx: tx}
|
|
||||||
tx.pages = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy writes the entire database to a writer.
|
|
||||||
// This function exists for backwards compatibility.
|
|
||||||
//
|
|
||||||
// Deprecated; Use WriteTo() instead.
|
|
||||||
func (tx *Tx) Copy(w io.Writer) error {
|
|
||||||
_, err := tx.WriteTo(w)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// WriteTo writes the entire database to a writer.
|
|
||||||
// If err == nil then exactly tx.Size() bytes will be written into the writer.
|
|
||||||
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
|
||||||
// Attempt to open reader with WriteFlag
|
|
||||||
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if cerr := f.Close(); err == nil {
|
|
||||||
err = cerr
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Generate a meta page. We use the same page data for both meta pages.
|
|
||||||
buf := make([]byte, tx.db.pageSize)
|
|
||||||
page := (*page)(unsafe.Pointer(&buf[0]))
|
|
||||||
page.flags = metaPageFlag
|
|
||||||
*page.meta() = *tx.meta
|
|
||||||
|
|
||||||
// Write meta 0.
|
|
||||||
page.id = 0
|
|
||||||
page.meta().checksum = page.meta().sum64()
|
|
||||||
nn, err := w.Write(buf)
|
|
||||||
n += int64(nn)
|
|
||||||
if err != nil {
|
|
||||||
return n, fmt.Errorf("meta 0 copy: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write meta 1 with a lower transaction id.
|
|
||||||
page.id = 1
|
|
||||||
page.meta().txid -= 1
|
|
||||||
page.meta().checksum = page.meta().sum64()
|
|
||||||
nn, err = w.Write(buf)
|
|
||||||
n += int64(nn)
|
|
||||||
if err != nil {
|
|
||||||
return n, fmt.Errorf("meta 1 copy: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move past the meta pages in the file.
|
|
||||||
if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
|
|
||||||
return n, fmt.Errorf("seek: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy data pages.
|
|
||||||
wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
|
|
||||||
n += wn
|
|
||||||
if err != nil {
|
|
||||||
return n, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return n, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// CopyFile copies the entire database to file at the given path.
|
|
||||||
// A reader transaction is maintained during the copy so it is safe to continue
|
|
||||||
// using the database while a copy is in progress.
|
|
||||||
func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
|
|
||||||
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
err = tx.Copy(f)
|
|
||||||
if err != nil {
|
|
||||||
_ = f.Close()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return f.Close()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check performs several consistency checks on the database for this transaction.
|
|
||||||
// An error is returned if any inconsistency is found.
|
|
||||||
//
|
|
||||||
// It can be safely run concurrently on a writable transaction. However, this
|
|
||||||
// incurs a high cost for large databases and databases with a lot of subbuckets
|
|
||||||
// because of caching. This overhead can be removed if running on a read-only
|
|
||||||
// transaction, however, it is not safe to execute other writer transactions at
|
|
||||||
// the same time.
|
|
||||||
func (tx *Tx) Check() <-chan error {
|
|
||||||
ch := make(chan error)
|
|
||||||
go tx.check(ch)
|
|
||||||
return ch
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tx *Tx) check(ch chan error) {
|
|
||||||
// Force loading free list if opened in ReadOnly mode.
|
|
||||||
tx.db.loadFreelist()
|
|
||||||
|
|
||||||
// Check if any pages are double freed.
|
|
||||||
freed := make(map[pgid]bool)
|
|
||||||
all := make([]pgid, tx.db.freelist.count())
|
|
||||||
tx.db.freelist.copyall(all)
|
|
||||||
for _, id := range all {
|
|
||||||
if freed[id] {
|
|
||||||
ch <- fmt.Errorf("page %d: already freed", id)
|
|
||||||
}
|
|
||||||
freed[id] = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Track every reachable page.
|
|
||||||
reachable := make(map[pgid]*page)
|
|
||||||
reachable[0] = tx.page(0) // meta0
|
|
||||||
reachable[1] = tx.page(1) // meta1
|
|
||||||
if tx.meta.freelist != pgidNoFreelist {
|
|
||||||
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
|
|
||||||
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursively check buckets.
|
|
||||||
tx.checkBucket(&tx.root, reachable, freed, ch)
|
|
||||||
|
|
||||||
// Ensure all pages below high water mark are either reachable or freed.
|
|
||||||
for i := pgid(0); i < tx.meta.pgid; i++ {
|
|
||||||
_, isReachable := reachable[i]
|
|
||||||
if !isReachable && !freed[i] {
|
|
||||||
ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close the channel to signal completion.
|
|
||||||
close(ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
|
|
||||||
// Ignore inline buckets.
|
|
||||||
if b.root == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check every page used by this bucket.
|
|
||||||
b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
|
|
||||||
if p.id > tx.meta.pgid {
|
|
||||||
ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure each page is only referenced once.
|
|
||||||
for i := pgid(0); i <= pgid(p.overflow); i++ {
|
|
||||||
var id = p.id + i
|
|
||||||
if _, ok := reachable[id]; ok {
|
|
||||||
ch <- fmt.Errorf("page %d: multiple references", int(id))
|
|
||||||
}
|
|
||||||
reachable[id] = p
|
|
||||||
}
|
|
||||||
|
|
||||||
// We should only encounter un-freed leaf and branch pages.
|
|
||||||
if freed[p.id] {
|
|
||||||
ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
|
|
||||||
} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
|
|
||||||
ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Check each bucket within this bucket.
|
|
||||||
_ = b.ForEach(func(k, v []byte) error {
|
|
||||||
if child := b.Bucket(k); child != nil {
|
|
||||||
tx.checkBucket(child, reachable, freed, ch)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// allocate returns a contiguous block of memory starting at a given page.
|
|
||||||
func (tx *Tx) allocate(count int) (*page, error) {
|
|
||||||
p, err := tx.db.allocate(tx.meta.txid, count)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save to our page cache.
|
|
||||||
tx.pages[p.id] = p
|
|
||||||
|
|
||||||
// Update statistics.
|
|
||||||
tx.stats.PageCount += count
|
|
||||||
tx.stats.PageAlloc += count * tx.db.pageSize
|
|
||||||
|
|
||||||
return p, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// write writes any dirty pages to disk.
|
|
||||||
func (tx *Tx) write() error {
|
|
||||||
// Sort pages by id.
|
|
||||||
pages := make(pages, 0, len(tx.pages))
|
|
||||||
for _, p := range tx.pages {
|
|
||||||
pages = append(pages, p)
|
|
||||||
}
|
|
||||||
// Clear out page cache early.
|
|
||||||
tx.pages = make(map[pgid]*page)
|
|
||||||
sort.Sort(pages)
|
|
||||||
|
|
||||||
// Write pages to disk in order.
|
|
||||||
for _, p := range pages {
|
|
||||||
size := (int(p.overflow) + 1) * tx.db.pageSize
|
|
||||||
offset := int64(p.id) * int64(tx.db.pageSize)
|
|
||||||
|
|
||||||
// Write out page in "max allocation" sized chunks.
|
|
||||||
ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
|
|
||||||
for {
|
|
||||||
// Limit our write to our max allocation size.
|
|
||||||
sz := size
|
|
||||||
if sz > maxAllocSize-1 {
|
|
||||||
sz = maxAllocSize - 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write chunk to disk.
|
|
||||||
buf := ptr[:sz]
|
|
||||||
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update statistics.
|
|
||||||
tx.stats.Write++
|
|
||||||
|
|
||||||
// Exit inner for loop if we've written all the chunks.
|
|
||||||
size -= sz
|
|
||||||
if size == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise move offset forward and move pointer to next chunk.
|
|
||||||
offset += int64(sz)
|
|
||||||
ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ignore file sync if flag is set on DB.
|
|
||||||
if !tx.db.NoSync || IgnoreNoSync {
|
|
||||||
if err := fdatasync(tx.db); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Put small pages back to page pool.
|
|
||||||
for _, p := range pages {
|
|
||||||
// Ignore page sizes over 1 page.
|
|
||||||
// These are allocated using make() instead of the page pool.
|
|
||||||
if int(p.overflow) != 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize]
|
|
||||||
|
|
||||||
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
|
|
||||||
for i := range buf {
|
|
||||||
buf[i] = 0
|
|
||||||
}
|
|
||||||
tx.db.pagePool.Put(buf)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// writeMeta writes the meta to the disk.
|
|
||||||
func (tx *Tx) writeMeta() error {
|
|
||||||
// Create a temporary buffer for the meta page.
|
|
||||||
buf := make([]byte, tx.db.pageSize)
|
|
||||||
p := tx.db.pageInBuffer(buf, 0)
|
|
||||||
tx.meta.write(p)
|
|
||||||
|
|
||||||
// Write the meta page to file.
|
|
||||||
if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if !tx.db.NoSync || IgnoreNoSync {
|
|
||||||
if err := fdatasync(tx.db); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update statistics.
|
|
||||||
tx.stats.Write++
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// page returns a reference to the page with a given id.
|
|
||||||
// If page has been written to then a temporary buffered page is returned.
|
|
||||||
func (tx *Tx) page(id pgid) *page {
|
|
||||||
// Check the dirty pages first.
|
|
||||||
if tx.pages != nil {
|
|
||||||
if p, ok := tx.pages[id]; ok {
|
|
||||||
return p
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise return directly from the mmap.
|
|
||||||
return tx.db.page(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
// forEachPage iterates over every page within a given page and executes a function.
|
|
||||||
func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
|
|
||||||
p := tx.page(pgid)
|
|
||||||
|
|
||||||
// Execute function.
|
|
||||||
fn(p, depth)
|
|
||||||
|
|
||||||
// Recursively loop over children.
|
|
||||||
if (p.flags & branchPageFlag) != 0 {
|
|
||||||
for i := 0; i < int(p.count); i++ {
|
|
||||||
elem := p.branchPageElement(uint16(i))
|
|
||||||
tx.forEachPage(elem.pgid, depth+1, fn)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Page returns page information for a given page number.
|
|
||||||
// This is only safe for concurrent use when used by a writable transaction.
|
|
||||||
func (tx *Tx) Page(id int) (*PageInfo, error) {
|
|
||||||
if tx.db == nil {
|
|
||||||
return nil, ErrTxClosed
|
|
||||||
} else if pgid(id) >= tx.meta.pgid {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build the page info.
|
|
||||||
p := tx.db.page(pgid(id))
|
|
||||||
info := &PageInfo{
|
|
||||||
ID: id,
|
|
||||||
Count: int(p.count),
|
|
||||||
OverflowCount: int(p.overflow),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the type (or if it's free).
|
|
||||||
if tx.db.freelist.freed(pgid(id)) {
|
|
||||||
info.Type = "free"
|
|
||||||
} else {
|
|
||||||
info.Type = p.typ()
|
|
||||||
}
|
|
||||||
|
|
||||||
return info, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// TxStats represents statistics about the actions performed by the transaction.
|
|
||||||
type TxStats struct {
|
|
||||||
// Page statistics.
|
|
||||||
PageCount int // number of page allocations
|
|
||||||
PageAlloc int // total bytes allocated
|
|
||||||
|
|
||||||
// Cursor statistics.
|
|
||||||
CursorCount int // number of cursors created
|
|
||||||
|
|
||||||
// Node statistics
|
|
||||||
NodeCount int // number of node allocations
|
|
||||||
NodeDeref int // number of node dereferences
|
|
||||||
|
|
||||||
// Rebalance statistics.
|
|
||||||
Rebalance int // number of node rebalances
|
|
||||||
RebalanceTime time.Duration // total time spent rebalancing
|
|
||||||
|
|
||||||
// Split/Spill statistics.
|
|
||||||
Split int // number of nodes split
|
|
||||||
Spill int // number of nodes spilled
|
|
||||||
SpillTime time.Duration // total time spent spilling
|
|
||||||
|
|
||||||
// Write statistics.
|
|
||||||
Write int // number of writes performed
|
|
||||||
WriteTime time.Duration // total time spent writing to disk
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *TxStats) add(other *TxStats) {
|
|
||||||
s.PageCount += other.PageCount
|
|
||||||
s.PageAlloc += other.PageAlloc
|
|
||||||
s.CursorCount += other.CursorCount
|
|
||||||
s.NodeCount += other.NodeCount
|
|
||||||
s.NodeDeref += other.NodeDeref
|
|
||||||
s.Rebalance += other.Rebalance
|
|
||||||
s.RebalanceTime += other.RebalanceTime
|
|
||||||
s.Split += other.Split
|
|
||||||
s.Spill += other.Spill
|
|
||||||
s.SpillTime += other.SpillTime
|
|
||||||
s.Write += other.Write
|
|
||||||
s.WriteTime += other.WriteTime
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sub calculates and returns the difference between two sets of transaction stats.
|
|
||||||
// This is useful when obtaining stats at two different points and time and
|
|
||||||
// you need the performance counters that occurred within that time span.
|
|
||||||
func (s *TxStats) Sub(other *TxStats) TxStats {
|
|
||||||
var diff TxStats
|
|
||||||
diff.PageCount = s.PageCount - other.PageCount
|
|
||||||
diff.PageAlloc = s.PageAlloc - other.PageAlloc
|
|
||||||
diff.CursorCount = s.CursorCount - other.CursorCount
|
|
||||||
diff.NodeCount = s.NodeCount - other.NodeCount
|
|
||||||
diff.NodeDeref = s.NodeDeref - other.NodeDeref
|
|
||||||
diff.Rebalance = s.Rebalance - other.Rebalance
|
|
||||||
diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
|
|
||||||
diff.Split = s.Split - other.Split
|
|
||||||
diff.Spill = s.Spill - other.Spill
|
|
||||||
diff.SpillTime = s.SpillTime - other.SpillTime
|
|
||||||
diff.Write = s.Write - other.Write
|
|
||||||
diff.WriteTime = s.WriteTime - other.WriteTime
|
|
||||||
return diff
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user