Initial commit
This commit is contained in:
parent
669da74e7e
commit
208e13d2a8
80
load.go
Normal file
80
load.go
Normal file
@ -0,0 +1,80 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"qurl/qurl"
|
||||
"qurl/storage"
|
||||
"time"
|
||||
)
|
||||
|
||||
type qurljson struct {
|
||||
ID uint64 `json:"id"`
|
||||
IP string `json:"ip"`
|
||||
Browser string `json:"browser"`
|
||||
URL string `json:"url"`
|
||||
Date struct {
|
||||
Date int64 `json:"$date"`
|
||||
} `json:"date"`
|
||||
}
|
||||
|
||||
func loadjson(stor storage.Storage, filename string) error {
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error opening %s: %s", filename, err.Error())
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var qj []qurljson
|
||||
decoder := json.NewDecoder(file)
|
||||
err = decoder.Decode(&qj)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error parsing %s: %s", filename, err.Error())
|
||||
}
|
||||
|
||||
fmt.Printf("Parsing %d qurls.. \n", len(qj))
|
||||
var max uint64 = 0
|
||||
var count uint64 = 0
|
||||
for _, e := range qj {
|
||||
if e.ID > max {
|
||||
max = e.ID
|
||||
}
|
||||
|
||||
qurl := &qurl.QURL{
|
||||
ID: e.ID,
|
||||
URL: e.URL,
|
||||
}
|
||||
|
||||
if e.Date.Date > 0 {
|
||||
qurl.Created = time.Unix((e.Date.Date / 1000), 0)
|
||||
}
|
||||
|
||||
if e.IP != "" {
|
||||
qurl.IP = net.ParseIP(e.IP)
|
||||
}
|
||||
|
||||
if e.Browser != "" {
|
||||
qurl.Browser = e.Browser
|
||||
}
|
||||
|
||||
err := stor.AddQURL(qurl)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error adding qurl: %s", err.Error())
|
||||
}
|
||||
|
||||
count++
|
||||
if (count % 100) == 0 {
|
||||
fmt.Printf("*")
|
||||
}
|
||||
}
|
||||
|
||||
err = stor.SetQURLSequence(max)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error setting sequence: %s", err.Error())
|
||||
}
|
||||
|
||||
fmt.Printf("\nDone!\n")
|
||||
return nil
|
||||
}
|
29
main.go
Normal file
29
main.go
Normal file
@ -0,0 +1,29 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"qurl/storage"
|
||||
)
|
||||
|
||||
func main() {
|
||||
dburl := flag.String("u", "bolt:./qurl.db", "url to database")
|
||||
jsonfile := flag.String("j", "", "path to json to load into database")
|
||||
flag.Parse()
|
||||
|
||||
stor, err := storage.NewStorage(*dburl)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Database connection error: %s", err.Error())
|
||||
return
|
||||
}
|
||||
defer stor.Shutdown()
|
||||
|
||||
if *jsonfile != "" {
|
||||
err := loadjson(stor, *jsonfile)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Load error: %s", err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
59
qurl/qurl.go
Normal file
59
qurl/qurl.go
Normal file
@ -0,0 +1,59 @@
|
||||
package qurl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type QURL struct {
|
||||
ID uint64
|
||||
URL string
|
||||
Created time.Time
|
||||
IP []byte
|
||||
Browser string
|
||||
}
|
||||
|
||||
const alpha = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
const alphalen = uint64(len(alpha))
|
||||
|
||||
func ToString(v uint64) string {
|
||||
var r strings.Builder
|
||||
var i uint64
|
||||
for i = v; i >= alphalen; i = (i / alphalen) - 1 {
|
||||
r.WriteByte(alpha[i%alphalen])
|
||||
}
|
||||
r.WriteByte(alpha[i])
|
||||
return r.String()
|
||||
}
|
||||
|
||||
func FromString(s string) (uint64, error) {
|
||||
var r uint64
|
||||
vlen := uint64(len(s))
|
||||
|
||||
for i := uint64(0); i < vlen; i++ {
|
||||
x := strings.IndexByte(alpha, s[i])
|
||||
if x == -1 {
|
||||
return 0, fmt.Errorf("Invalid character [%s]", s[i])
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
r += uint64(x)
|
||||
} else {
|
||||
r += (uint64(x+1) * pow(alphalen, i))
|
||||
}
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func pow(a, b uint64) uint64 {
|
||||
p := uint64(1)
|
||||
for b > 0 {
|
||||
if b&1 != 0 {
|
||||
p *= a
|
||||
}
|
||||
b >>= 1
|
||||
a *= a
|
||||
}
|
||||
return p
|
||||
}
|
28
storage/bolt/bolt.go
Normal file
28
storage/bolt/bolt.go
Normal file
@ -0,0 +1,28 @@
|
||||
package bolt
|
||||
|
||||
import (
|
||||
bolt "go.etcd.io/bbolt"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
type BoltStorage struct {
|
||||
DB *bolt.DB
|
||||
}
|
||||
|
||||
func (stor *BoltStorage) Shutdown() {
|
||||
stor.DB.Close()
|
||||
}
|
||||
|
||||
func New(u *url.URL) (*BoltStorage, error) {
|
||||
path := u.Opaque
|
||||
if u.Path != "" {
|
||||
path = u.Path
|
||||
}
|
||||
|
||||
db, err := bolt.Open(path, 0600, &bolt.Options{Timeout: 3 * time.Second})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &BoltStorage{DB: db}, nil
|
||||
}
|
188
storage/bolt/qurl.go
Normal file
188
storage/bolt/qurl.go
Normal file
@ -0,0 +1,188 @@
|
||||
package bolt
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"qurl/qurl"
|
||||
// "bytes"
|
||||
// "fmt"
|
||||
)
|
||||
|
||||
var (
|
||||
QURLBucket = []byte{0x00}
|
||||
ReverseBucket = []byte{0x01}
|
||||
|
||||
URLField = []byte{0x00}
|
||||
CreatedField = []byte{0x01}
|
||||
IPField = []byte{0x02}
|
||||
BrowserField = []byte{0x03}
|
||||
)
|
||||
|
||||
func (stor *BoltStorage) AddQURL(qurl *qurl.QURL) error {
|
||||
tx, err := stor.DB.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
rb, err := tx.CreateBucketIfNotExists(QURLBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Populate the ID from the sequence if we don't have one
|
||||
if qurl.ID == 0 {
|
||||
qurl.ID, err = rb.NextSequence()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Create a byte array from the ID
|
||||
bid := make([]byte, 8)
|
||||
binary.BigEndian.PutUint64(bid, qurl.ID)
|
||||
|
||||
// Add an entry into the reverse indexed bucket for quickly
|
||||
// determining if a URL is already in the database
|
||||
ab, err := tx.CreateBucketIfNotExists(ReverseBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = ab.Put([]byte(qurl.URL), bid)
|
||||
|
||||
qb, err := rb.CreateBucketIfNotExists(bid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write the ID to URL
|
||||
err = qb.Put(URLField, []byte(qurl.URL))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !qurl.Created.IsZero() {
|
||||
// Create byte array from the Created date
|
||||
bdt, err := qurl.Created.MarshalBinary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write the Created date
|
||||
err = qb.Put(CreatedField, bdt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if qurl.IP != nil {
|
||||
err = qb.Put(IPField, qurl.IP)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if len(qurl.Browser) > 0 {
|
||||
err = qb.Put(BrowserField, []byte(qurl.Browser))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (stor *BoltStorage) SetQURLSequence(seq uint64) error {
|
||||
tx, err := stor.DB.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
qb, err := tx.CreateBucketIfNotExists(QURLBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
qb.SetSequence(seq)
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (stor *BoltStorage) GetQURLByURL(u string) (*qurl.QURL, error) {
|
||||
tx, err := stor.DB.Begin(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
ab := tx.Bucket(ReverseBucket)
|
||||
if ab == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
bid := ab.Get([]byte(u))
|
||||
if bid == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rb := tx.Bucket(QURLBucket)
|
||||
if rb == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
qb := rb.Bucket(bid)
|
||||
if qb == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
qurl := &qurl.QURL{ ID: binary.BigEndian.Uint64(bid) }
|
||||
|
||||
qu := qb.Get(URLField)
|
||||
if qu != nil {
|
||||
qurl.URL = string(qu)
|
||||
}
|
||||
|
||||
return qurl, nil
|
||||
}
|
||||
|
||||
/*
|
||||
func (stor *BoltStorage) GetQURLByURL(u string) (*qurl.QURL, error) {
|
||||
tx, err := stor.DB.Begin(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
rb := tx.Bucket(QURLBucket)
|
||||
if rb == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
bu := []byte(u)
|
||||
rc := rb.Cursor()
|
||||
for k, _ := rc.First(); k != nil; k, _ = rc.Next() {
|
||||
qb := rb.Bucket(k)
|
||||
if qb == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
qu := qb.Get(URLField)
|
||||
if bytes.Equal(bu, qu) {
|
||||
qurl := &qurl.QURL{
|
||||
ID: binary.BigEndian.Uint64(k),
|
||||
URL: string(qu),
|
||||
}
|
||||
return qurl, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
*/
|
43
storage/storage.go
Normal file
43
storage/storage.go
Normal file
@ -0,0 +1,43 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"qurl/qurl"
|
||||
"qurl/storage/bolt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Storage interface {
|
||||
AddQURL(*qurl.QURL) error
|
||||
// GetQURL(uint64) (*qurl.QURL, error)
|
||||
GetQURLByURL(string) (*qurl.QURL, error)
|
||||
SetQURLSequence(uint64) error
|
||||
Shutdown()
|
||||
}
|
||||
|
||||
func NewStorage(su string) (Storage, error) {
|
||||
u, err := url.Parse(su)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if u.Scheme == "" {
|
||||
return nil, fmt.Errorf("URL must include a scheme")
|
||||
}
|
||||
|
||||
var stor Storage
|
||||
|
||||
switch strings.ToLower(u.Scheme) {
|
||||
case "bolt", "boltdb", "bbolt":
|
||||
stor, err = bolt.New(u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("Unsupported URL scheme")
|
||||
}
|
||||
|
||||
return stor, nil
|
||||
}
|
20
vendor/go.etcd.io/bbolt/LICENSE
generated
vendored
Normal file
20
vendor/go.etcd.io/bbolt/LICENSE
generated
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2013 Ben Johnson
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
10
vendor/go.etcd.io/bbolt/bolt_386.go
generated
vendored
Normal file
10
vendor/go.etcd.io/bbolt/bolt_386.go
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x7FFFFFFF // 2GB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0xFFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
10
vendor/go.etcd.io/bbolt/bolt_amd64.go
generated
vendored
Normal file
10
vendor/go.etcd.io/bbolt/bolt_amd64.go
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
28
vendor/go.etcd.io/bbolt/bolt_arm.go
generated
vendored
Normal file
28
vendor/go.etcd.io/bbolt/bolt_arm.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
package bbolt
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x7FFFFFFF // 2GB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0xFFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned bool
|
||||
|
||||
func init() {
|
||||
// Simple check to see whether this arch handles unaligned load/stores
|
||||
// correctly.
|
||||
|
||||
// ARM9 and older devices require load/stores to be from/to aligned
|
||||
// addresses. If not, the lower 2 bits are cleared and that address is
|
||||
// read in a jumbled up order.
|
||||
|
||||
// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html
|
||||
|
||||
raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11}
|
||||
val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2))
|
||||
|
||||
brokenUnaligned = val != 0x11222211
|
||||
}
|
12
vendor/go.etcd.io/bbolt/bolt_arm64.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_arm64.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
// +build arm64
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
10
vendor/go.etcd.io/bbolt/bolt_linux.go
generated
vendored
Normal file
10
vendor/go.etcd.io/bbolt/bolt_linux.go
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// fdatasync flushes written data to a file descriptor.
|
||||
func fdatasync(db *DB) error {
|
||||
return syscall.Fdatasync(int(db.file.Fd()))
|
||||
}
|
12
vendor/go.etcd.io/bbolt/bolt_mips64x.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_mips64x.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
// +build mips64 mips64le
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x8000000000 // 512GB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
12
vendor/go.etcd.io/bbolt/bolt_mipsx.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_mipsx.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
// +build mips mipsle
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x40000000 // 1GB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0xFFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
27
vendor/go.etcd.io/bbolt/bolt_openbsd.go
generated
vendored
Normal file
27
vendor/go.etcd.io/bbolt/bolt_openbsd.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
msAsync = 1 << iota // perform asynchronous writes
|
||||
msSync // perform synchronous writes
|
||||
msInvalidate // invalidate cached data
|
||||
)
|
||||
|
||||
func msync(db *DB) error {
|
||||
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
|
||||
if errno != 0 {
|
||||
return errno
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func fdatasync(db *DB) error {
|
||||
if db.data != nil {
|
||||
return msync(db)
|
||||
}
|
||||
return db.file.Sync()
|
||||
}
|
12
vendor/go.etcd.io/bbolt/bolt_ppc.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_ppc.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
// +build ppc
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x7FFFFFFF // 2GB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0xFFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
12
vendor/go.etcd.io/bbolt/bolt_ppc64.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_ppc64.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
// +build ppc64
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
12
vendor/go.etcd.io/bbolt/bolt_ppc64le.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_ppc64le.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
// +build ppc64le
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
12
vendor/go.etcd.io/bbolt/bolt_s390x.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_s390x.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
// +build s390x
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
93
vendor/go.etcd.io/bbolt/bolt_unix.go
generated
vendored
Normal file
93
vendor/go.etcd.io/bbolt/bolt_unix.go
generated
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
// +build !windows,!plan9,!solaris
|
||||
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"syscall"
|
||||
"time"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// flock acquires an advisory lock on a file descriptor.
|
||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||
var t time.Time
|
||||
if timeout != 0 {
|
||||
t = time.Now()
|
||||
}
|
||||
fd := db.file.Fd()
|
||||
flag := syscall.LOCK_NB
|
||||
if exclusive {
|
||||
flag |= syscall.LOCK_EX
|
||||
} else {
|
||||
flag |= syscall.LOCK_SH
|
||||
}
|
||||
for {
|
||||
// Attempt to obtain an exclusive lock.
|
||||
err := syscall.Flock(int(fd), flag)
|
||||
if err == nil {
|
||||
return nil
|
||||
} else if err != syscall.EWOULDBLOCK {
|
||||
return err
|
||||
}
|
||||
|
||||
// If we timed out then return an error.
|
||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
|
||||
// Wait for a bit and try again.
|
||||
time.Sleep(flockRetryTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
// funlock releases an advisory lock on a file descriptor.
|
||||
func funlock(db *DB) error {
|
||||
return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
|
||||
}
|
||||
|
||||
// mmap memory maps a DB's data file.
|
||||
func mmap(db *DB, sz int) error {
|
||||
// Map the data file to memory.
|
||||
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Advise the kernel that the mmap is accessed randomly.
|
||||
err = madvise(b, syscall.MADV_RANDOM)
|
||||
if err != nil && err != syscall.ENOSYS {
|
||||
// Ignore not implemented error in kernel because it still works.
|
||||
return fmt.Errorf("madvise: %s", err)
|
||||
}
|
||||
|
||||
// Save the original byte slice and convert to a byte array pointer.
|
||||
db.dataref = b
|
||||
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
|
||||
db.datasz = sz
|
||||
return nil
|
||||
}
|
||||
|
||||
// munmap unmaps a DB's data file from memory.
|
||||
func munmap(db *DB) error {
|
||||
// Ignore the unmap if we have no mapped data.
|
||||
if db.dataref == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unmap using the original byte slice.
|
||||
err := syscall.Munmap(db.dataref)
|
||||
db.dataref = nil
|
||||
db.data = nil
|
||||
db.datasz = 0
|
||||
return err
|
||||
}
|
||||
|
||||
// NOTE: This function is copied from stdlib because it is not available on darwin.
|
||||
func madvise(b []byte, advice int) (err error) {
|
||||
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
|
||||
if e1 != 0 {
|
||||
err = e1
|
||||
}
|
||||
return
|
||||
}
|
88
vendor/go.etcd.io/bbolt/bolt_unix_solaris.go
generated
vendored
Normal file
88
vendor/go.etcd.io/bbolt/bolt_unix_solaris.go
generated
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"syscall"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// flock acquires an advisory lock on a file descriptor.
|
||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||
var t time.Time
|
||||
if timeout != 0 {
|
||||
t = time.Now()
|
||||
}
|
||||
fd := db.file.Fd()
|
||||
var lockType int16
|
||||
if exclusive {
|
||||
lockType = syscall.F_WRLCK
|
||||
} else {
|
||||
lockType = syscall.F_RDLCK
|
||||
}
|
||||
for {
|
||||
// Attempt to obtain an exclusive lock.
|
||||
lock := syscall.Flock_t{Type: lockType}
|
||||
err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
|
||||
if err == nil {
|
||||
return nil
|
||||
} else if err != syscall.EAGAIN {
|
||||
return err
|
||||
}
|
||||
|
||||
// If we timed out then return an error.
|
||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
|
||||
// Wait for a bit and try again.
|
||||
time.Sleep(flockRetryTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
// funlock releases an advisory lock on a file descriptor.
|
||||
func funlock(db *DB) error {
|
||||
var lock syscall.Flock_t
|
||||
lock.Start = 0
|
||||
lock.Len = 0
|
||||
lock.Type = syscall.F_UNLCK
|
||||
lock.Whence = 0
|
||||
return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
|
||||
}
|
||||
|
||||
// mmap memory maps a DB's data file.
|
||||
func mmap(db *DB, sz int) error {
|
||||
// Map the data file to memory.
|
||||
b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Advise the kernel that the mmap is accessed randomly.
|
||||
if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
|
||||
return fmt.Errorf("madvise: %s", err)
|
||||
}
|
||||
|
||||
// Save the original byte slice and convert to a byte array pointer.
|
||||
db.dataref = b
|
||||
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
|
||||
db.datasz = sz
|
||||
return nil
|
||||
}
|
||||
|
||||
// munmap unmaps a DB's data file from memory.
|
||||
func munmap(db *DB) error {
|
||||
// Ignore the unmap if we have no mapped data.
|
||||
if db.dataref == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unmap using the original byte slice.
|
||||
err := unix.Munmap(db.dataref)
|
||||
db.dataref = nil
|
||||
db.data = nil
|
||||
db.datasz = 0
|
||||
return err
|
||||
}
|
141
vendor/go.etcd.io/bbolt/bolt_windows.go
generated
vendored
Normal file
141
vendor/go.etcd.io/bbolt/bolt_windows.go
generated
vendored
Normal file
@ -0,0 +1,141 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
"time"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
|
||||
var (
|
||||
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
|
||||
procLockFileEx = modkernel32.NewProc("LockFileEx")
|
||||
procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
|
||||
)
|
||||
|
||||
const (
|
||||
// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
|
||||
flagLockExclusive = 2
|
||||
flagLockFailImmediately = 1
|
||||
|
||||
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
|
||||
errLockViolation syscall.Errno = 0x21
|
||||
)
|
||||
|
||||
func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
||||
r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
|
||||
if r == 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
||||
r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
|
||||
if r == 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// fdatasync flushes written data to a file descriptor.
|
||||
func fdatasync(db *DB) error {
|
||||
return db.file.Sync()
|
||||
}
|
||||
|
||||
// flock acquires an advisory lock on a file descriptor.
|
||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||
var t time.Time
|
||||
if timeout != 0 {
|
||||
t = time.Now()
|
||||
}
|
||||
var flag uint32 = flagLockFailImmediately
|
||||
if exclusive {
|
||||
flag |= flagLockExclusive
|
||||
}
|
||||
for {
|
||||
// Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range
|
||||
// -1..0 as the lock on the database file.
|
||||
var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
|
||||
err := lockFileEx(syscall.Handle(db.file.Fd()), flag, 0, 1, 0, &syscall.Overlapped{
|
||||
Offset: m1,
|
||||
OffsetHigh: m1,
|
||||
})
|
||||
|
||||
if err == nil {
|
||||
return nil
|
||||
} else if err != errLockViolation {
|
||||
return err
|
||||
}
|
||||
|
||||
// If we timed oumercit then return an error.
|
||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
|
||||
// Wait for a bit and try again.
|
||||
time.Sleep(flockRetryTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
// funlock releases an advisory lock on a file descriptor.
|
||||
func funlock(db *DB) error {
|
||||
var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
|
||||
err := unlockFileEx(syscall.Handle(db.file.Fd()), 0, 1, 0, &syscall.Overlapped{
|
||||
Offset: m1,
|
||||
OffsetHigh: m1,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
// mmap memory maps a DB's data file.
|
||||
// Based on: https://github.com/edsrzf/mmap-go
|
||||
func mmap(db *DB, sz int) error {
|
||||
if !db.readOnly {
|
||||
// Truncate the database to the size of the mmap.
|
||||
if err := db.file.Truncate(int64(sz)); err != nil {
|
||||
return fmt.Errorf("truncate: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Open a file mapping handle.
|
||||
sizelo := uint32(sz >> 32)
|
||||
sizehi := uint32(sz) & 0xffffffff
|
||||
h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil)
|
||||
if h == 0 {
|
||||
return os.NewSyscallError("CreateFileMapping", errno)
|
||||
}
|
||||
|
||||
// Create the memory map.
|
||||
addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz))
|
||||
if addr == 0 {
|
||||
return os.NewSyscallError("MapViewOfFile", errno)
|
||||
}
|
||||
|
||||
// Close mapping handle.
|
||||
if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
|
||||
return os.NewSyscallError("CloseHandle", err)
|
||||
}
|
||||
|
||||
// Convert to a byte array.
|
||||
db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
|
||||
db.datasz = sz
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// munmap unmaps a pointer from a file.
|
||||
// Based on: https://github.com/edsrzf/mmap-go
|
||||
func munmap(db *DB) error {
|
||||
if db.data == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
addr := (uintptr)(unsafe.Pointer(&db.data[0]))
|
||||
if err := syscall.UnmapViewOfFile(addr); err != nil {
|
||||
return os.NewSyscallError("UnmapViewOfFile", err)
|
||||
}
|
||||
return nil
|
||||
}
|
8
vendor/go.etcd.io/bbolt/boltsync_unix.go
generated
vendored
Normal file
8
vendor/go.etcd.io/bbolt/boltsync_unix.go
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
// +build !windows,!plan9,!linux,!openbsd
|
||||
|
||||
package bbolt
|
||||
|
||||
// fdatasync flushes written data to a file descriptor.
|
||||
func fdatasync(db *DB) error {
|
||||
return db.file.Sync()
|
||||
}
|
775
vendor/go.etcd.io/bbolt/bucket.go
generated
vendored
Normal file
775
vendor/go.etcd.io/bbolt/bucket.go
generated
vendored
Normal file
@ -0,0 +1,775 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
// MaxKeySize is the maximum length of a key, in bytes.
|
||||
MaxKeySize = 32768
|
||||
|
||||
// MaxValueSize is the maximum length of a value, in bytes.
|
||||
MaxValueSize = (1 << 31) - 2
|
||||
)
|
||||
|
||||
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
|
||||
|
||||
const (
|
||||
minFillPercent = 0.1
|
||||
maxFillPercent = 1.0
|
||||
)
|
||||
|
||||
// DefaultFillPercent is the percentage that split pages are filled.
|
||||
// This value can be changed by setting Bucket.FillPercent.
|
||||
const DefaultFillPercent = 0.5
|
||||
|
||||
// Bucket represents a collection of key/value pairs inside the database.
|
||||
type Bucket struct {
|
||||
*bucket
|
||||
tx *Tx // the associated transaction
|
||||
buckets map[string]*Bucket // subbucket cache
|
||||
page *page // inline page reference
|
||||
rootNode *node // materialized node for the root page.
|
||||
nodes map[pgid]*node // node cache
|
||||
|
||||
// Sets the threshold for filling nodes when they split. By default,
|
||||
// the bucket will fill to 50% but it can be useful to increase this
|
||||
// amount if you know that your write workloads are mostly append-only.
|
||||
//
|
||||
// This is non-persisted across transactions so it must be set in every Tx.
|
||||
FillPercent float64
|
||||
}
|
||||
|
||||
// bucket represents the on-file representation of a bucket.
|
||||
// This is stored as the "value" of a bucket key. If the bucket is small enough,
|
||||
// then its root page can be stored inline in the "value", after the bucket
|
||||
// header. In the case of inline buckets, the "root" will be 0.
|
||||
type bucket struct {
|
||||
root pgid // page id of the bucket's root-level page
|
||||
sequence uint64 // monotonically incrementing, used by NextSequence()
|
||||
}
|
||||
|
||||
// newBucket returns a new bucket associated with a transaction.
|
||||
func newBucket(tx *Tx) Bucket {
|
||||
var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
|
||||
if tx.writable {
|
||||
b.buckets = make(map[string]*Bucket)
|
||||
b.nodes = make(map[pgid]*node)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// Tx returns the tx of the bucket.
|
||||
func (b *Bucket) Tx() *Tx {
|
||||
return b.tx
|
||||
}
|
||||
|
||||
// Root returns the root of the bucket.
|
||||
func (b *Bucket) Root() pgid {
|
||||
return b.root
|
||||
}
|
||||
|
||||
// Writable returns whether the bucket is writable.
|
||||
func (b *Bucket) Writable() bool {
|
||||
return b.tx.writable
|
||||
}
|
||||
|
||||
// Cursor creates a cursor associated with the bucket.
|
||||
// The cursor is only valid as long as the transaction is open.
|
||||
// Do not use a cursor after the transaction is closed.
|
||||
func (b *Bucket) Cursor() *Cursor {
|
||||
// Update transaction statistics.
|
||||
b.tx.stats.CursorCount++
|
||||
|
||||
// Allocate and return a cursor.
|
||||
return &Cursor{
|
||||
bucket: b,
|
||||
stack: make([]elemRef, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// Bucket retrieves a nested bucket by name.
|
||||
// Returns nil if the bucket does not exist.
|
||||
// The bucket instance is only valid for the lifetime of the transaction.
|
||||
func (b *Bucket) Bucket(name []byte) *Bucket {
|
||||
if b.buckets != nil {
|
||||
if child := b.buckets[string(name)]; child != nil {
|
||||
return child
|
||||
}
|
||||
}
|
||||
|
||||
// Move cursor to key.
|
||||
c := b.Cursor()
|
||||
k, v, flags := c.seek(name)
|
||||
|
||||
// Return nil if the key doesn't exist or it is not a bucket.
|
||||
if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Otherwise create a bucket and cache it.
|
||||
var child = b.openBucket(v)
|
||||
if b.buckets != nil {
|
||||
b.buckets[string(name)] = child
|
||||
}
|
||||
|
||||
return child
|
||||
}
|
||||
|
||||
// Helper method that re-interprets a sub-bucket value
|
||||
// from a parent into a Bucket
|
||||
func (b *Bucket) openBucket(value []byte) *Bucket {
|
||||
var child = newBucket(b.tx)
|
||||
|
||||
// If unaligned load/stores are broken on this arch and value is
|
||||
// unaligned simply clone to an aligned byte array.
|
||||
unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
|
||||
|
||||
if unaligned {
|
||||
value = cloneBytes(value)
|
||||
}
|
||||
|
||||
// If this is a writable transaction then we need to copy the bucket entry.
|
||||
// Read-only transactions can point directly at the mmap entry.
|
||||
if b.tx.writable && !unaligned {
|
||||
child.bucket = &bucket{}
|
||||
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
|
||||
} else {
|
||||
child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||
}
|
||||
|
||||
// Save a reference to the inline page if the bucket is inline.
|
||||
if child.root == 0 {
|
||||
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
|
||||
}
|
||||
|
||||
return &child
|
||||
}
|
||||
|
||||
// CreateBucket creates a new bucket at the given key and returns the new bucket.
|
||||
// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
|
||||
// The bucket instance is only valid for the lifetime of the transaction.
|
||||
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
|
||||
if b.tx.db == nil {
|
||||
return nil, ErrTxClosed
|
||||
} else if !b.tx.writable {
|
||||
return nil, ErrTxNotWritable
|
||||
} else if len(key) == 0 {
|
||||
return nil, ErrBucketNameRequired
|
||||
}
|
||||
|
||||
// Move cursor to correct position.
|
||||
c := b.Cursor()
|
||||
k, _, flags := c.seek(key)
|
||||
|
||||
// Return an error if there is an existing key.
|
||||
if bytes.Equal(key, k) {
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
return nil, ErrBucketExists
|
||||
}
|
||||
return nil, ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Create empty, inline bucket.
|
||||
var bucket = Bucket{
|
||||
bucket: &bucket{},
|
||||
rootNode: &node{isLeaf: true},
|
||||
FillPercent: DefaultFillPercent,
|
||||
}
|
||||
var value = bucket.write()
|
||||
|
||||
// Insert into node.
|
||||
key = cloneBytes(key)
|
||||
c.node().put(key, key, value, 0, bucketLeafFlag)
|
||||
|
||||
// Since subbuckets are not allowed on inline buckets, we need to
|
||||
// dereference the inline page, if it exists. This will cause the bucket
|
||||
// to be treated as a regular, non-inline bucket for the rest of the tx.
|
||||
b.page = nil
|
||||
|
||||
return b.Bucket(key), nil
|
||||
}
|
||||
|
||||
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
|
||||
// Returns an error if the bucket name is blank, or if the bucket name is too long.
|
||||
// The bucket instance is only valid for the lifetime of the transaction.
|
||||
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
|
||||
child, err := b.CreateBucket(key)
|
||||
if err == ErrBucketExists {
|
||||
return b.Bucket(key), nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return child, nil
|
||||
}
|
||||
|
||||
// DeleteBucket deletes a bucket at the given key.
|
||||
// Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
|
||||
func (b *Bucket) DeleteBucket(key []byte) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return ErrTxNotWritable
|
||||
}
|
||||
|
||||
// Move cursor to correct position.
|
||||
c := b.Cursor()
|
||||
k, _, flags := c.seek(key)
|
||||
|
||||
// Return an error if bucket doesn't exist or is not a bucket.
|
||||
if !bytes.Equal(key, k) {
|
||||
return ErrBucketNotFound
|
||||
} else if (flags & bucketLeafFlag) == 0 {
|
||||
return ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Recursively delete all child buckets.
|
||||
child := b.Bucket(key)
|
||||
err := child.ForEach(func(k, v []byte) error {
|
||||
if v == nil {
|
||||
if err := child.DeleteBucket(k); err != nil {
|
||||
return fmt.Errorf("delete bucket: %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Remove cached copy.
|
||||
delete(b.buckets, string(key))
|
||||
|
||||
// Release all bucket pages to freelist.
|
||||
child.nodes = nil
|
||||
child.rootNode = nil
|
||||
child.free()
|
||||
|
||||
// Delete the node if we have a matching key.
|
||||
c.node().del(key)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get retrieves the value for a key in the bucket.
|
||||
// Returns a nil value if the key does not exist or if the key is a nested bucket.
|
||||
// The returned value is only valid for the life of the transaction.
|
||||
func (b *Bucket) Get(key []byte) []byte {
|
||||
k, v, flags := b.Cursor().seek(key)
|
||||
|
||||
// Return nil if this is a bucket.
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If our target node isn't the same key as what's passed in then return nil.
|
||||
if !bytes.Equal(key, k) {
|
||||
return nil
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// Put sets the value for a key in the bucket.
|
||||
// If the key exist then its previous value will be overwritten.
|
||||
// Supplied value must remain valid for the life of the transaction.
|
||||
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
|
||||
func (b *Bucket) Put(key []byte, value []byte) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return ErrTxNotWritable
|
||||
} else if len(key) == 0 {
|
||||
return ErrKeyRequired
|
||||
} else if len(key) > MaxKeySize {
|
||||
return ErrKeyTooLarge
|
||||
} else if int64(len(value)) > MaxValueSize {
|
||||
return ErrValueTooLarge
|
||||
}
|
||||
|
||||
// Move cursor to correct position.
|
||||
c := b.Cursor()
|
||||
k, _, flags := c.seek(key)
|
||||
|
||||
// Return an error if there is an existing key with a bucket value.
|
||||
if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
|
||||
return ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Insert into node.
|
||||
key = cloneBytes(key)
|
||||
c.node().put(key, key, value, 0, 0)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete removes a key from the bucket.
|
||||
// If the key does not exist then nothing is done and a nil error is returned.
|
||||
// Returns an error if the bucket was created from a read-only transaction.
|
||||
func (b *Bucket) Delete(key []byte) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return ErrTxNotWritable
|
||||
}
|
||||
|
||||
// Move cursor to correct position.
|
||||
c := b.Cursor()
|
||||
k, _, flags := c.seek(key)
|
||||
|
||||
// Return nil if the key doesn't exist.
|
||||
if !bytes.Equal(key, k) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return an error if there is already existing bucket value.
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
return ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Delete the node if we have a matching key.
|
||||
c.node().del(key)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sequence returns the current integer for the bucket without incrementing it.
|
||||
func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
|
||||
|
||||
// SetSequence updates the sequence number for the bucket.
|
||||
func (b *Bucket) SetSequence(v uint64) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return ErrTxNotWritable
|
||||
}
|
||||
|
||||
// Materialize the root node if it hasn't been already so that the
|
||||
// bucket will be saved during commit.
|
||||
if b.rootNode == nil {
|
||||
_ = b.node(b.root, nil)
|
||||
}
|
||||
|
||||
// Increment and return the sequence.
|
||||
b.bucket.sequence = v
|
||||
return nil
|
||||
}
|
||||
|
||||
// NextSequence returns an autoincrementing integer for the bucket.
|
||||
func (b *Bucket) NextSequence() (uint64, error) {
|
||||
if b.tx.db == nil {
|
||||
return 0, ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return 0, ErrTxNotWritable
|
||||
}
|
||||
|
||||
// Materialize the root node if it hasn't been already so that the
|
||||
// bucket will be saved during commit.
|
||||
if b.rootNode == nil {
|
||||
_ = b.node(b.root, nil)
|
||||
}
|
||||
|
||||
// Increment and return the sequence.
|
||||
b.bucket.sequence++
|
||||
return b.bucket.sequence, nil
|
||||
}
|
||||
|
||||
// ForEach executes a function for each key/value pair in a bucket.
|
||||
// If the provided function returns an error then the iteration is stopped and
|
||||
// the error is returned to the caller. The provided function must not modify
|
||||
// the bucket; this will result in undefined behavior.
|
||||
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
}
|
||||
c := b.Cursor()
|
||||
for k, v := c.First(); k != nil; k, v = c.Next() {
|
||||
if err := fn(k, v); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stat returns stats on a bucket.
|
||||
func (b *Bucket) Stats() BucketStats {
|
||||
var s, subStats BucketStats
|
||||
pageSize := b.tx.db.pageSize
|
||||
s.BucketN += 1
|
||||
if b.root == 0 {
|
||||
s.InlineBucketN += 1
|
||||
}
|
||||
b.forEachPage(func(p *page, depth int) {
|
||||
if (p.flags & leafPageFlag) != 0 {
|
||||
s.KeyN += int(p.count)
|
||||
|
||||
// used totals the used bytes for the page
|
||||
used := pageHeaderSize
|
||||
|
||||
if p.count != 0 {
|
||||
// If page has any elements, add all element headers.
|
||||
used += leafPageElementSize * int(p.count-1)
|
||||
|
||||
// Add all element key, value sizes.
|
||||
// The computation takes advantage of the fact that the position
|
||||
// of the last element's key/value equals to the total of the sizes
|
||||
// of all previous elements' keys and values.
|
||||
// It also includes the last element's header.
|
||||
lastElement := p.leafPageElement(p.count - 1)
|
||||
used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
|
||||
}
|
||||
|
||||
if b.root == 0 {
|
||||
// For inlined bucket just update the inline stats
|
||||
s.InlineBucketInuse += used
|
||||
} else {
|
||||
// For non-inlined bucket update all the leaf stats
|
||||
s.LeafPageN++
|
||||
s.LeafInuse += used
|
||||
s.LeafOverflowN += int(p.overflow)
|
||||
|
||||
// Collect stats from sub-buckets.
|
||||
// Do that by iterating over all element headers
|
||||
// looking for the ones with the bucketLeafFlag.
|
||||
for i := uint16(0); i < p.count; i++ {
|
||||
e := p.leafPageElement(i)
|
||||
if (e.flags & bucketLeafFlag) != 0 {
|
||||
// For any bucket element, open the element value
|
||||
// and recursively call Stats on the contained bucket.
|
||||
subStats.Add(b.openBucket(e.value()).Stats())
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (p.flags & branchPageFlag) != 0 {
|
||||
s.BranchPageN++
|
||||
lastElement := p.branchPageElement(p.count - 1)
|
||||
|
||||
// used totals the used bytes for the page
|
||||
// Add header and all element headers.
|
||||
used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
|
||||
|
||||
// Add size of all keys and values.
|
||||
// Again, use the fact that last element's position equals to
|
||||
// the total of key, value sizes of all previous elements.
|
||||
used += int(lastElement.pos + lastElement.ksize)
|
||||
s.BranchInuse += used
|
||||
s.BranchOverflowN += int(p.overflow)
|
||||
}
|
||||
|
||||
// Keep track of maximum page depth.
|
||||
if depth+1 > s.Depth {
|
||||
s.Depth = (depth + 1)
|
||||
}
|
||||
})
|
||||
|
||||
// Alloc stats can be computed from page counts and pageSize.
|
||||
s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
|
||||
s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
|
||||
|
||||
// Add the max depth of sub-buckets to get total nested depth.
|
||||
s.Depth += subStats.Depth
|
||||
// Add the stats for all sub-buckets
|
||||
s.Add(subStats)
|
||||
return s
|
||||
}
|
||||
|
||||
// forEachPage iterates over every page in a bucket, including inline pages.
|
||||
func (b *Bucket) forEachPage(fn func(*page, int)) {
|
||||
// If we have an inline page then just use that.
|
||||
if b.page != nil {
|
||||
fn(b.page, 0)
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise traverse the page hierarchy.
|
||||
b.tx.forEachPage(b.root, 0, fn)
|
||||
}
|
||||
|
||||
// forEachPageNode iterates over every page (or node) in a bucket.
|
||||
// This also includes inline pages.
|
||||
func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
|
||||
// If we have an inline page or root node then just use that.
|
||||
if b.page != nil {
|
||||
fn(b.page, nil, 0)
|
||||
return
|
||||
}
|
||||
b._forEachPageNode(b.root, 0, fn)
|
||||
}
|
||||
|
||||
func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
|
||||
var p, n = b.pageNode(pgid)
|
||||
|
||||
// Execute function.
|
||||
fn(p, n, depth)
|
||||
|
||||
// Recursively loop over children.
|
||||
if p != nil {
|
||||
if (p.flags & branchPageFlag) != 0 {
|
||||
for i := 0; i < int(p.count); i++ {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
b._forEachPageNode(elem.pgid, depth+1, fn)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if !n.isLeaf {
|
||||
for _, inode := range n.inodes {
|
||||
b._forEachPageNode(inode.pgid, depth+1, fn)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// spill writes all the nodes for this bucket to dirty pages.
|
||||
func (b *Bucket) spill() error {
|
||||
// Spill all child buckets first.
|
||||
for name, child := range b.buckets {
|
||||
// If the child bucket is small enough and it has no child buckets then
|
||||
// write it inline into the parent bucket's page. Otherwise spill it
|
||||
// like a normal bucket and make the parent value a pointer to the page.
|
||||
var value []byte
|
||||
if child.inlineable() {
|
||||
child.free()
|
||||
value = child.write()
|
||||
} else {
|
||||
if err := child.spill(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Update the child bucket header in this bucket.
|
||||
value = make([]byte, unsafe.Sizeof(bucket{}))
|
||||
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||
*bucket = *child.bucket
|
||||
}
|
||||
|
||||
// Skip writing the bucket if there are no materialized nodes.
|
||||
if child.rootNode == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Update parent node.
|
||||
var c = b.Cursor()
|
||||
k, _, flags := c.seek([]byte(name))
|
||||
if !bytes.Equal([]byte(name), k) {
|
||||
panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
|
||||
}
|
||||
if flags&bucketLeafFlag == 0 {
|
||||
panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
|
||||
}
|
||||
c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
|
||||
}
|
||||
|
||||
// Ignore if there's not a materialized root node.
|
||||
if b.rootNode == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Spill nodes.
|
||||
if err := b.rootNode.spill(); err != nil {
|
||||
return err
|
||||
}
|
||||
b.rootNode = b.rootNode.root()
|
||||
|
||||
// Update the root node for this bucket.
|
||||
if b.rootNode.pgid >= b.tx.meta.pgid {
|
||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
|
||||
}
|
||||
b.root = b.rootNode.pgid
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// inlineable returns true if a bucket is small enough to be written inline
|
||||
// and if it contains no subbuckets. Otherwise returns false.
|
||||
func (b *Bucket) inlineable() bool {
|
||||
var n = b.rootNode
|
||||
|
||||
// Bucket must only contain a single leaf node.
|
||||
if n == nil || !n.isLeaf {
|
||||
return false
|
||||
}
|
||||
|
||||
// Bucket is not inlineable if it contains subbuckets or if it goes beyond
|
||||
// our threshold for inline bucket size.
|
||||
var size = pageHeaderSize
|
||||
for _, inode := range n.inodes {
|
||||
size += leafPageElementSize + len(inode.key) + len(inode.value)
|
||||
|
||||
if inode.flags&bucketLeafFlag != 0 {
|
||||
return false
|
||||
} else if size > b.maxInlineBucketSize() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Returns the maximum total size of a bucket to make it a candidate for inlining.
|
||||
func (b *Bucket) maxInlineBucketSize() int {
|
||||
return b.tx.db.pageSize / 4
|
||||
}
|
||||
|
||||
// write allocates and writes a bucket to a byte slice.
|
||||
func (b *Bucket) write() []byte {
|
||||
// Allocate the appropriate size.
|
||||
var n = b.rootNode
|
||||
var value = make([]byte, bucketHeaderSize+n.size())
|
||||
|
||||
// Write a bucket header.
|
||||
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||
*bucket = *b.bucket
|
||||
|
||||
// Convert byte slice to a fake page and write the root node.
|
||||
var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
|
||||
n.write(p)
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
// rebalance attempts to balance all nodes.
|
||||
func (b *Bucket) rebalance() {
|
||||
for _, n := range b.nodes {
|
||||
n.rebalance()
|
||||
}
|
||||
for _, child := range b.buckets {
|
||||
child.rebalance()
|
||||
}
|
||||
}
|
||||
|
||||
// node creates a node from a page and associates it with a given parent.
|
||||
func (b *Bucket) node(pgid pgid, parent *node) *node {
|
||||
_assert(b.nodes != nil, "nodes map expected")
|
||||
|
||||
// Retrieve node if it's already been created.
|
||||
if n := b.nodes[pgid]; n != nil {
|
||||
return n
|
||||
}
|
||||
|
||||
// Otherwise create a node and cache it.
|
||||
n := &node{bucket: b, parent: parent}
|
||||
if parent == nil {
|
||||
b.rootNode = n
|
||||
} else {
|
||||
parent.children = append(parent.children, n)
|
||||
}
|
||||
|
||||
// Use the inline page if this is an inline bucket.
|
||||
var p = b.page
|
||||
if p == nil {
|
||||
p = b.tx.page(pgid)
|
||||
}
|
||||
|
||||
// Read the page into the node and cache it.
|
||||
n.read(p)
|
||||
b.nodes[pgid] = n
|
||||
|
||||
// Update statistics.
|
||||
b.tx.stats.NodeCount++
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
// free recursively frees all pages in the bucket.
|
||||
func (b *Bucket) free() {
|
||||
if b.root == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
var tx = b.tx
|
||||
b.forEachPageNode(func(p *page, n *node, _ int) {
|
||||
if p != nil {
|
||||
tx.db.freelist.free(tx.meta.txid, p)
|
||||
} else {
|
||||
n.free()
|
||||
}
|
||||
})
|
||||
b.root = 0
|
||||
}
|
||||
|
||||
// dereference removes all references to the old mmap.
|
||||
func (b *Bucket) dereference() {
|
||||
if b.rootNode != nil {
|
||||
b.rootNode.root().dereference()
|
||||
}
|
||||
|
||||
for _, child := range b.buckets {
|
||||
child.dereference()
|
||||
}
|
||||
}
|
||||
|
||||
// pageNode returns the in-memory node, if it exists.
|
||||
// Otherwise returns the underlying page.
|
||||
func (b *Bucket) pageNode(id pgid) (*page, *node) {
|
||||
// Inline buckets have a fake page embedded in their value so treat them
|
||||
// differently. We'll return the rootNode (if available) or the fake page.
|
||||
if b.root == 0 {
|
||||
if id != 0 {
|
||||
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
|
||||
}
|
||||
if b.rootNode != nil {
|
||||
return nil, b.rootNode
|
||||
}
|
||||
return b.page, nil
|
||||
}
|
||||
|
||||
// Check the node cache for non-inline buckets.
|
||||
if b.nodes != nil {
|
||||
if n := b.nodes[id]; n != nil {
|
||||
return nil, n
|
||||
}
|
||||
}
|
||||
|
||||
// Finally lookup the page from the transaction if no node is materialized.
|
||||
return b.tx.page(id), nil
|
||||
}
|
||||
|
||||
// BucketStats records statistics about resources used by a bucket.
|
||||
type BucketStats struct {
|
||||
// Page count statistics.
|
||||
BranchPageN int // number of logical branch pages
|
||||
BranchOverflowN int // number of physical branch overflow pages
|
||||
LeafPageN int // number of logical leaf pages
|
||||
LeafOverflowN int // number of physical leaf overflow pages
|
||||
|
||||
// Tree statistics.
|
||||
KeyN int // number of keys/value pairs
|
||||
Depth int // number of levels in B+tree
|
||||
|
||||
// Page size utilization.
|
||||
BranchAlloc int // bytes allocated for physical branch pages
|
||||
BranchInuse int // bytes actually used for branch data
|
||||
LeafAlloc int // bytes allocated for physical leaf pages
|
||||
LeafInuse int // bytes actually used for leaf data
|
||||
|
||||
// Bucket statistics
|
||||
BucketN int // total number of buckets including the top bucket
|
||||
InlineBucketN int // total number on inlined buckets
|
||||
InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
|
||||
}
|
||||
|
||||
func (s *BucketStats) Add(other BucketStats) {
|
||||
s.BranchPageN += other.BranchPageN
|
||||
s.BranchOverflowN += other.BranchOverflowN
|
||||
s.LeafPageN += other.LeafPageN
|
||||
s.LeafOverflowN += other.LeafOverflowN
|
||||
s.KeyN += other.KeyN
|
||||
if s.Depth < other.Depth {
|
||||
s.Depth = other.Depth
|
||||
}
|
||||
s.BranchAlloc += other.BranchAlloc
|
||||
s.BranchInuse += other.BranchInuse
|
||||
s.LeafAlloc += other.LeafAlloc
|
||||
s.LeafInuse += other.LeafInuse
|
||||
|
||||
s.BucketN += other.BucketN
|
||||
s.InlineBucketN += other.InlineBucketN
|
||||
s.InlineBucketInuse += other.InlineBucketInuse
|
||||
}
|
||||
|
||||
// cloneBytes returns a copy of a given slice.
|
||||
func cloneBytes(v []byte) []byte {
|
||||
var clone = make([]byte, len(v))
|
||||
copy(clone, v)
|
||||
return clone
|
||||
}
|
396
vendor/go.etcd.io/bbolt/cursor.go
generated
vendored
Normal file
396
vendor/go.etcd.io/bbolt/cursor.go
generated
vendored
Normal file
@ -0,0 +1,396 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
|
||||
// Cursors see nested buckets with value == nil.
|
||||
// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
|
||||
//
|
||||
// Keys and values returned from the cursor are only valid for the life of the transaction.
|
||||
//
|
||||
// Changing data while traversing with a cursor may cause it to be invalidated
|
||||
// and return unexpected keys and/or values. You must reposition your cursor
|
||||
// after mutating data.
|
||||
type Cursor struct {
|
||||
bucket *Bucket
|
||||
stack []elemRef
|
||||
}
|
||||
|
||||
// Bucket returns the bucket that this cursor was created from.
|
||||
func (c *Cursor) Bucket() *Bucket {
|
||||
return c.bucket
|
||||
}
|
||||
|
||||
// First moves the cursor to the first item in the bucket and returns its key and value.
|
||||
// If the bucket is empty then a nil key and value are returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) First() (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
c.stack = c.stack[:0]
|
||||
p, n := c.bucket.pageNode(c.bucket.root)
|
||||
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
||||
c.first()
|
||||
|
||||
// If we land on an empty page then move to the next value.
|
||||
// https://github.com/boltdb/bolt/issues/450
|
||||
if c.stack[len(c.stack)-1].count() == 0 {
|
||||
c.next()
|
||||
}
|
||||
|
||||
k, v, flags := c.keyValue()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
|
||||
}
|
||||
|
||||
// Last moves the cursor to the last item in the bucket and returns its key and value.
|
||||
// If the bucket is empty then a nil key and value are returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) Last() (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
c.stack = c.stack[:0]
|
||||
p, n := c.bucket.pageNode(c.bucket.root)
|
||||
ref := elemRef{page: p, node: n}
|
||||
ref.index = ref.count() - 1
|
||||
c.stack = append(c.stack, ref)
|
||||
c.last()
|
||||
k, v, flags := c.keyValue()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
}
|
||||
|
||||
// Next moves the cursor to the next item in the bucket and returns its key and value.
|
||||
// If the cursor is at the end of the bucket then a nil key and value are returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) Next() (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
k, v, flags := c.next()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
}
|
||||
|
||||
// Prev moves the cursor to the previous item in the bucket and returns its key and value.
|
||||
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) Prev() (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
|
||||
// Attempt to move back one element until we're successful.
|
||||
// Move up the stack as we hit the beginning of each page in our stack.
|
||||
for i := len(c.stack) - 1; i >= 0; i-- {
|
||||
elem := &c.stack[i]
|
||||
if elem.index > 0 {
|
||||
elem.index--
|
||||
break
|
||||
}
|
||||
c.stack = c.stack[:i]
|
||||
}
|
||||
|
||||
// If we've hit the end then return nil.
|
||||
if len(c.stack) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Move down the stack to find the last element of the last leaf under this branch.
|
||||
c.last()
|
||||
k, v, flags := c.keyValue()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
}
|
||||
|
||||
// Seek moves the cursor to a given key and returns it.
|
||||
// If the key does not exist then the next key is used. If no keys
|
||||
// follow, a nil key is returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
|
||||
k, v, flags := c.seek(seek)
|
||||
|
||||
// If we ended up after the last element of a page then move to the next one.
|
||||
if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
|
||||
k, v, flags = c.next()
|
||||
}
|
||||
|
||||
if k == nil {
|
||||
return nil, nil
|
||||
} else if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
}
|
||||
|
||||
// Delete removes the current key/value under the cursor from the bucket.
|
||||
// Delete fails if current key/value is a bucket or if the transaction is not writable.
|
||||
func (c *Cursor) Delete() error {
|
||||
if c.bucket.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
} else if !c.bucket.Writable() {
|
||||
return ErrTxNotWritable
|
||||
}
|
||||
|
||||
key, _, flags := c.keyValue()
|
||||
// Return an error if current value is a bucket.
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
return ErrIncompatibleValue
|
||||
}
|
||||
c.node().del(key)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// seek moves the cursor to a given key and returns it.
|
||||
// If the key does not exist then the next key is used.
|
||||
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
|
||||
// Start from root page/node and traverse to correct page.
|
||||
c.stack = c.stack[:0]
|
||||
c.search(seek, c.bucket.root)
|
||||
|
||||
// If this is a bucket then return a nil value.
|
||||
return c.keyValue()
|
||||
}
|
||||
|
||||
// first moves the cursor to the first leaf element under the last page in the stack.
|
||||
func (c *Cursor) first() {
|
||||
for {
|
||||
// Exit when we hit a leaf page.
|
||||
var ref = &c.stack[len(c.stack)-1]
|
||||
if ref.isLeaf() {
|
||||
break
|
||||
}
|
||||
|
||||
// Keep adding pages pointing to the first element to the stack.
|
||||
var pgid pgid
|
||||
if ref.node != nil {
|
||||
pgid = ref.node.inodes[ref.index].pgid
|
||||
} else {
|
||||
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
|
||||
}
|
||||
p, n := c.bucket.pageNode(pgid)
|
||||
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
||||
}
|
||||
}
|
||||
|
||||
// last moves the cursor to the last leaf element under the last page in the stack.
|
||||
func (c *Cursor) last() {
|
||||
for {
|
||||
// Exit when we hit a leaf page.
|
||||
ref := &c.stack[len(c.stack)-1]
|
||||
if ref.isLeaf() {
|
||||
break
|
||||
}
|
||||
|
||||
// Keep adding pages pointing to the last element in the stack.
|
||||
var pgid pgid
|
||||
if ref.node != nil {
|
||||
pgid = ref.node.inodes[ref.index].pgid
|
||||
} else {
|
||||
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
|
||||
}
|
||||
p, n := c.bucket.pageNode(pgid)
|
||||
|
||||
var nextRef = elemRef{page: p, node: n}
|
||||
nextRef.index = nextRef.count() - 1
|
||||
c.stack = append(c.stack, nextRef)
|
||||
}
|
||||
}
|
||||
|
||||
// next moves to the next leaf element and returns the key and value.
|
||||
// If the cursor is at the last leaf element then it stays there and returns nil.
|
||||
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
|
||||
for {
|
||||
// Attempt to move over one element until we're successful.
|
||||
// Move up the stack as we hit the end of each page in our stack.
|
||||
var i int
|
||||
for i = len(c.stack) - 1; i >= 0; i-- {
|
||||
elem := &c.stack[i]
|
||||
if elem.index < elem.count()-1 {
|
||||
elem.index++
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If we've hit the root page then stop and return. This will leave the
|
||||
// cursor on the last element of the last page.
|
||||
if i == -1 {
|
||||
return nil, nil, 0
|
||||
}
|
||||
|
||||
// Otherwise start from where we left off in the stack and find the
|
||||
// first element of the first leaf page.
|
||||
c.stack = c.stack[:i+1]
|
||||
c.first()
|
||||
|
||||
// If this is an empty page then restart and move back up the stack.
|
||||
// https://github.com/boltdb/bolt/issues/450
|
||||
if c.stack[len(c.stack)-1].count() == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
return c.keyValue()
|
||||
}
|
||||
}
|
||||
|
||||
// search recursively performs a binary search against a given page/node until it finds a given key.
|
||||
func (c *Cursor) search(key []byte, pgid pgid) {
|
||||
p, n := c.bucket.pageNode(pgid)
|
||||
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
|
||||
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
|
||||
}
|
||||
e := elemRef{page: p, node: n}
|
||||
c.stack = append(c.stack, e)
|
||||
|
||||
// If we're on a leaf page/node then find the specific node.
|
||||
if e.isLeaf() {
|
||||
c.nsearch(key)
|
||||
return
|
||||
}
|
||||
|
||||
if n != nil {
|
||||
c.searchNode(key, n)
|
||||
return
|
||||
}
|
||||
c.searchPage(key, p)
|
||||
}
|
||||
|
||||
func (c *Cursor) searchNode(key []byte, n *node) {
|
||||
var exact bool
|
||||
index := sort.Search(len(n.inodes), func(i int) bool {
|
||||
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
||||
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
||||
ret := bytes.Compare(n.inodes[i].key, key)
|
||||
if ret == 0 {
|
||||
exact = true
|
||||
}
|
||||
return ret != -1
|
||||
})
|
||||
if !exact && index > 0 {
|
||||
index--
|
||||
}
|
||||
c.stack[len(c.stack)-1].index = index
|
||||
|
||||
// Recursively search to the next page.
|
||||
c.search(key, n.inodes[index].pgid)
|
||||
}
|
||||
|
||||
func (c *Cursor) searchPage(key []byte, p *page) {
|
||||
// Binary search for the correct range.
|
||||
inodes := p.branchPageElements()
|
||||
|
||||
var exact bool
|
||||
index := sort.Search(int(p.count), func(i int) bool {
|
||||
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
||||
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
||||
ret := bytes.Compare(inodes[i].key(), key)
|
||||
if ret == 0 {
|
||||
exact = true
|
||||
}
|
||||
return ret != -1
|
||||
})
|
||||
if !exact && index > 0 {
|
||||
index--
|
||||
}
|
||||
c.stack[len(c.stack)-1].index = index
|
||||
|
||||
// Recursively search to the next page.
|
||||
c.search(key, inodes[index].pgid)
|
||||
}
|
||||
|
||||
// nsearch searches the leaf node on the top of the stack for a key.
|
||||
func (c *Cursor) nsearch(key []byte) {
|
||||
e := &c.stack[len(c.stack)-1]
|
||||
p, n := e.page, e.node
|
||||
|
||||
// If we have a node then search its inodes.
|
||||
if n != nil {
|
||||
index := sort.Search(len(n.inodes), func(i int) bool {
|
||||
return bytes.Compare(n.inodes[i].key, key) != -1
|
||||
})
|
||||
e.index = index
|
||||
return
|
||||
}
|
||||
|
||||
// If we have a page then search its leaf elements.
|
||||
inodes := p.leafPageElements()
|
||||
index := sort.Search(int(p.count), func(i int) bool {
|
||||
return bytes.Compare(inodes[i].key(), key) != -1
|
||||
})
|
||||
e.index = index
|
||||
}
|
||||
|
||||
// keyValue returns the key and value of the current leaf element.
|
||||
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
|
||||
ref := &c.stack[len(c.stack)-1]
|
||||
|
||||
// If the cursor is pointing to the end of page/node then return nil.
|
||||
if ref.count() == 0 || ref.index >= ref.count() {
|
||||
return nil, nil, 0
|
||||
}
|
||||
|
||||
// Retrieve value from node.
|
||||
if ref.node != nil {
|
||||
inode := &ref.node.inodes[ref.index]
|
||||
return inode.key, inode.value, inode.flags
|
||||
}
|
||||
|
||||
// Or retrieve value from page.
|
||||
elem := ref.page.leafPageElement(uint16(ref.index))
|
||||
return elem.key(), elem.value(), elem.flags
|
||||
}
|
||||
|
||||
// node returns the node that the cursor is currently positioned on.
|
||||
func (c *Cursor) node() *node {
|
||||
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
|
||||
|
||||
// If the top of the stack is a leaf node then just return it.
|
||||
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
|
||||
return ref.node
|
||||
}
|
||||
|
||||
// Start from root and traverse down the hierarchy.
|
||||
var n = c.stack[0].node
|
||||
if n == nil {
|
||||
n = c.bucket.node(c.stack[0].page.id, nil)
|
||||
}
|
||||
for _, ref := range c.stack[:len(c.stack)-1] {
|
||||
_assert(!n.isLeaf, "expected branch node")
|
||||
n = n.childAt(int(ref.index))
|
||||
}
|
||||
_assert(n.isLeaf, "expected leaf node")
|
||||
return n
|
||||
}
|
||||
|
||||
// elemRef represents a reference to an element on a given page/node.
|
||||
type elemRef struct {
|
||||
page *page
|
||||
node *node
|
||||
index int
|
||||
}
|
||||
|
||||
// isLeaf returns whether the ref is pointing at a leaf page/node.
|
||||
func (r *elemRef) isLeaf() bool {
|
||||
if r.node != nil {
|
||||
return r.node.isLeaf
|
||||
}
|
||||
return (r.page.flags & leafPageFlag) != 0
|
||||
}
|
||||
|
||||
// count returns the number of inodes or page elements.
|
||||
func (r *elemRef) count() int {
|
||||
if r.node != nil {
|
||||
return len(r.node.inodes)
|
||||
}
|
||||
return int(r.page.count)
|
||||
}
|
1138
vendor/go.etcd.io/bbolt/db.go
generated
vendored
Normal file
1138
vendor/go.etcd.io/bbolt/db.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
44
vendor/go.etcd.io/bbolt/doc.go
generated
vendored
Normal file
44
vendor/go.etcd.io/bbolt/doc.go
generated
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
package bbolt implements a low-level key/value store in pure Go. It supports
|
||||
fully serializable transactions, ACID semantics, and lock-free MVCC with
|
||||
multiple readers and a single writer. Bolt can be used for projects that
|
||||
want a simple data store without the need to add large dependencies such as
|
||||
Postgres or MySQL.
|
||||
|
||||
Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
|
||||
optimized for fast read access and does not require recovery in the event of a
|
||||
system crash. Transactions which have not finished committing will simply be
|
||||
rolled back in the event of a crash.
|
||||
|
||||
The design of Bolt is based on Howard Chu's LMDB database project.
|
||||
|
||||
Bolt currently works on Windows, Mac OS X, and Linux.
|
||||
|
||||
|
||||
Basics
|
||||
|
||||
There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
|
||||
a collection of buckets and is represented by a single file on disk. A bucket is
|
||||
a collection of unique keys that are associated with values.
|
||||
|
||||
Transactions provide either read-only or read-write access to the database.
|
||||
Read-only transactions can retrieve key/value pairs and can use Cursors to
|
||||
iterate over the dataset sequentially. Read-write transactions can create and
|
||||
delete buckets and can insert and remove keys. Only one read-write transaction
|
||||
is allowed at a time.
|
||||
|
||||
|
||||
Caveats
|
||||
|
||||
The database uses a read-only, memory-mapped data file to ensure that
|
||||
applications cannot corrupt the database, however, this means that keys and
|
||||
values returned from Bolt cannot be changed. Writing to a read-only byte slice
|
||||
will cause Go to panic.
|
||||
|
||||
Keys and values retrieved from the database are only valid for the life of
|
||||
the transaction. When used outside the transaction, these byte slices can
|
||||
point to different data or can point to invalid memory which will cause a panic.
|
||||
|
||||
|
||||
*/
|
||||
package bbolt
|
71
vendor/go.etcd.io/bbolt/errors.go
generated
vendored
Normal file
71
vendor/go.etcd.io/bbolt/errors.go
generated
vendored
Normal file
@ -0,0 +1,71 @@
|
||||
package bbolt
|
||||
|
||||
import "errors"
|
||||
|
||||
// These errors can be returned when opening or calling methods on a DB.
|
||||
var (
|
||||
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
|
||||
// is opened or after it is closed.
|
||||
ErrDatabaseNotOpen = errors.New("database not open")
|
||||
|
||||
// ErrDatabaseOpen is returned when opening a database that is
|
||||
// already open.
|
||||
ErrDatabaseOpen = errors.New("database already open")
|
||||
|
||||
// ErrInvalid is returned when both meta pages on a database are invalid.
|
||||
// This typically occurs when a file is not a bolt database.
|
||||
ErrInvalid = errors.New("invalid database")
|
||||
|
||||
// ErrVersionMismatch is returned when the data file was created with a
|
||||
// different version of Bolt.
|
||||
ErrVersionMismatch = errors.New("version mismatch")
|
||||
|
||||
// ErrChecksum is returned when either meta page checksum does not match.
|
||||
ErrChecksum = errors.New("checksum error")
|
||||
|
||||
// ErrTimeout is returned when a database cannot obtain an exclusive lock
|
||||
// on the data file after the timeout passed to Open().
|
||||
ErrTimeout = errors.New("timeout")
|
||||
)
|
||||
|
||||
// These errors can occur when beginning or committing a Tx.
|
||||
var (
|
||||
// ErrTxNotWritable is returned when performing a write operation on a
|
||||
// read-only transaction.
|
||||
ErrTxNotWritable = errors.New("tx not writable")
|
||||
|
||||
// ErrTxClosed is returned when committing or rolling back a transaction
|
||||
// that has already been committed or rolled back.
|
||||
ErrTxClosed = errors.New("tx closed")
|
||||
|
||||
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
|
||||
// read-only database.
|
||||
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
|
||||
)
|
||||
|
||||
// These errors can occur when putting or deleting a value or a bucket.
|
||||
var (
|
||||
// ErrBucketNotFound is returned when trying to access a bucket that has
|
||||
// not been created yet.
|
||||
ErrBucketNotFound = errors.New("bucket not found")
|
||||
|
||||
// ErrBucketExists is returned when creating a bucket that already exists.
|
||||
ErrBucketExists = errors.New("bucket already exists")
|
||||
|
||||
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
|
||||
ErrBucketNameRequired = errors.New("bucket name required")
|
||||
|
||||
// ErrKeyRequired is returned when inserting a zero-length key.
|
||||
ErrKeyRequired = errors.New("key required")
|
||||
|
||||
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
|
||||
ErrKeyTooLarge = errors.New("key too large")
|
||||
|
||||
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
|
||||
ErrValueTooLarge = errors.New("value too large")
|
||||
|
||||
// ErrIncompatibleValue is returned when trying create or delete a bucket
|
||||
// on an existing non-bucket key or when trying to create or delete a
|
||||
// non-bucket key on an existing bucket key.
|
||||
ErrIncompatibleValue = errors.New("incompatible value")
|
||||
)
|
333
vendor/go.etcd.io/bbolt/freelist.go
generated
vendored
Normal file
333
vendor/go.etcd.io/bbolt/freelist.go
generated
vendored
Normal file
@ -0,0 +1,333 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// txPending holds a list of pgids and corresponding allocation txns
|
||||
// that are pending to be freed.
|
||||
type txPending struct {
|
||||
ids []pgid
|
||||
alloctx []txid // txids allocating the ids
|
||||
lastReleaseBegin txid // beginning txid of last matching releaseRange
|
||||
}
|
||||
|
||||
// freelist represents a list of all pages that are available for allocation.
|
||||
// It also tracks pages that have been freed but are still in use by open transactions.
|
||||
type freelist struct {
|
||||
ids []pgid // all free and available free page ids.
|
||||
allocs map[pgid]txid // mapping of txid that allocated a pgid.
|
||||
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
|
||||
cache map[pgid]bool // fast lookup of all free and pending page ids.
|
||||
}
|
||||
|
||||
// newFreelist returns an empty, initialized freelist.
|
||||
func newFreelist() *freelist {
|
||||
return &freelist{
|
||||
allocs: make(map[pgid]txid),
|
||||
pending: make(map[txid]*txPending),
|
||||
cache: make(map[pgid]bool),
|
||||
}
|
||||
}
|
||||
|
||||
// size returns the size of the page after serialization.
|
||||
func (f *freelist) size() int {
|
||||
n := f.count()
|
||||
if n >= 0xFFFF {
|
||||
// The first element will be used to store the count. See freelist.write.
|
||||
n++
|
||||
}
|
||||
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
|
||||
}
|
||||
|
||||
// count returns count of pages on the freelist
|
||||
func (f *freelist) count() int {
|
||||
return f.free_count() + f.pending_count()
|
||||
}
|
||||
|
||||
// free_count returns count of free pages
|
||||
func (f *freelist) free_count() int {
|
||||
return len(f.ids)
|
||||
}
|
||||
|
||||
// pending_count returns count of pending pages
|
||||
func (f *freelist) pending_count() int {
|
||||
var count int
|
||||
for _, txp := range f.pending {
|
||||
count += len(txp.ids)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
|
||||
// f.count returns the minimum length required for dst.
|
||||
func (f *freelist) copyall(dst []pgid) {
|
||||
m := make(pgids, 0, f.pending_count())
|
||||
for _, txp := range f.pending {
|
||||
m = append(m, txp.ids...)
|
||||
}
|
||||
sort.Sort(m)
|
||||
mergepgids(dst, f.ids, m)
|
||||
}
|
||||
|
||||
// allocate returns the starting page id of a contiguous list of pages of a given size.
|
||||
// If a contiguous block cannot be found then 0 is returned.
|
||||
func (f *freelist) allocate(txid txid, n int) pgid {
|
||||
if len(f.ids) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var initial, previd pgid
|
||||
for i, id := range f.ids {
|
||||
if id <= 1 {
|
||||
panic(fmt.Sprintf("invalid page allocation: %d", id))
|
||||
}
|
||||
|
||||
// Reset initial page if this is not contiguous.
|
||||
if previd == 0 || id-previd != 1 {
|
||||
initial = id
|
||||
}
|
||||
|
||||
// If we found a contiguous block then remove it and return it.
|
||||
if (id-initial)+1 == pgid(n) {
|
||||
// If we're allocating off the beginning then take the fast path
|
||||
// and just adjust the existing slice. This will use extra memory
|
||||
// temporarily but the append() in free() will realloc the slice
|
||||
// as is necessary.
|
||||
if (i + 1) == n {
|
||||
f.ids = f.ids[i+1:]
|
||||
} else {
|
||||
copy(f.ids[i-n+1:], f.ids[i+1:])
|
||||
f.ids = f.ids[:len(f.ids)-n]
|
||||
}
|
||||
|
||||
// Remove from the free cache.
|
||||
for i := pgid(0); i < pgid(n); i++ {
|
||||
delete(f.cache, initial+i)
|
||||
}
|
||||
f.allocs[initial] = txid
|
||||
return initial
|
||||
}
|
||||
|
||||
previd = id
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// free releases a page and its overflow for a given transaction id.
|
||||
// If the page is already free then a panic will occur.
|
||||
func (f *freelist) free(txid txid, p *page) {
|
||||
if p.id <= 1 {
|
||||
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
|
||||
}
|
||||
|
||||
// Free page and all its overflow pages.
|
||||
txp := f.pending[txid]
|
||||
if txp == nil {
|
||||
txp = &txPending{}
|
||||
f.pending[txid] = txp
|
||||
}
|
||||
allocTxid, ok := f.allocs[p.id]
|
||||
if ok {
|
||||
delete(f.allocs, p.id)
|
||||
} else if (p.flags & freelistPageFlag) != 0 {
|
||||
// Freelist is always allocated by prior tx.
|
||||
allocTxid = txid - 1
|
||||
}
|
||||
|
||||
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
|
||||
// Verify that page is not already free.
|
||||
if f.cache[id] {
|
||||
panic(fmt.Sprintf("page %d already freed", id))
|
||||
}
|
||||
// Add to the freelist and cache.
|
||||
txp.ids = append(txp.ids, id)
|
||||
txp.alloctx = append(txp.alloctx, allocTxid)
|
||||
f.cache[id] = true
|
||||
}
|
||||
}
|
||||
|
||||
// release moves all page ids for a transaction id (or older) to the freelist.
|
||||
func (f *freelist) release(txid txid) {
|
||||
m := make(pgids, 0)
|
||||
for tid, txp := range f.pending {
|
||||
if tid <= txid {
|
||||
// Move transaction's pending pages to the available freelist.
|
||||
// Don't remove from the cache since the page is still free.
|
||||
m = append(m, txp.ids...)
|
||||
delete(f.pending, tid)
|
||||
}
|
||||
}
|
||||
sort.Sort(m)
|
||||
f.ids = pgids(f.ids).merge(m)
|
||||
}
|
||||
|
||||
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
|
||||
func (f *freelist) releaseRange(begin, end txid) {
|
||||
if begin > end {
|
||||
return
|
||||
}
|
||||
var m pgids
|
||||
for tid, txp := range f.pending {
|
||||
if tid < begin || tid > end {
|
||||
continue
|
||||
}
|
||||
// Don't recompute freed pages if ranges haven't updated.
|
||||
if txp.lastReleaseBegin == begin {
|
||||
continue
|
||||
}
|
||||
for i := 0; i < len(txp.ids); i++ {
|
||||
if atx := txp.alloctx[i]; atx < begin || atx > end {
|
||||
continue
|
||||
}
|
||||
m = append(m, txp.ids[i])
|
||||
txp.ids[i] = txp.ids[len(txp.ids)-1]
|
||||
txp.ids = txp.ids[:len(txp.ids)-1]
|
||||
txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
|
||||
txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
|
||||
i--
|
||||
}
|
||||
txp.lastReleaseBegin = begin
|
||||
if len(txp.ids) == 0 {
|
||||
delete(f.pending, tid)
|
||||
}
|
||||
}
|
||||
sort.Sort(m)
|
||||
f.ids = pgids(f.ids).merge(m)
|
||||
}
|
||||
|
||||
// rollback removes the pages from a given pending tx.
|
||||
func (f *freelist) rollback(txid txid) {
|
||||
// Remove page ids from cache.
|
||||
txp := f.pending[txid]
|
||||
if txp == nil {
|
||||
return
|
||||
}
|
||||
var m pgids
|
||||
for i, pgid := range txp.ids {
|
||||
delete(f.cache, pgid)
|
||||
tx := txp.alloctx[i]
|
||||
if tx == 0 {
|
||||
continue
|
||||
}
|
||||
if tx != txid {
|
||||
// Pending free aborted; restore page back to alloc list.
|
||||
f.allocs[pgid] = tx
|
||||
} else {
|
||||
// Freed page was allocated by this txn; OK to throw away.
|
||||
m = append(m, pgid)
|
||||
}
|
||||
}
|
||||
// Remove pages from pending list and mark as free if allocated by txid.
|
||||
delete(f.pending, txid)
|
||||
sort.Sort(m)
|
||||
f.ids = pgids(f.ids).merge(m)
|
||||
}
|
||||
|
||||
// freed returns whether a given page is in the free list.
|
||||
func (f *freelist) freed(pgid pgid) bool {
|
||||
return f.cache[pgid]
|
||||
}
|
||||
|
||||
// read initializes the freelist from a freelist page.
|
||||
func (f *freelist) read(p *page) {
|
||||
if (p.flags & freelistPageFlag) == 0 {
|
||||
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
|
||||
}
|
||||
// If the page.count is at the max uint16 value (64k) then it's considered
|
||||
// an overflow and the size of the freelist is stored as the first element.
|
||||
idx, count := 0, int(p.count)
|
||||
if count == 0xFFFF {
|
||||
idx = 1
|
||||
count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
|
||||
}
|
||||
|
||||
// Copy the list of page ids from the freelist.
|
||||
if count == 0 {
|
||||
f.ids = nil
|
||||
} else {
|
||||
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
|
||||
f.ids = make([]pgid, len(ids))
|
||||
copy(f.ids, ids)
|
||||
|
||||
// Make sure they're sorted.
|
||||
sort.Sort(pgids(f.ids))
|
||||
}
|
||||
|
||||
// Rebuild the page cache.
|
||||
f.reindex()
|
||||
}
|
||||
|
||||
// read initializes the freelist from a given list of ids.
|
||||
func (f *freelist) readIDs(ids []pgid) {
|
||||
f.ids = ids
|
||||
f.reindex()
|
||||
}
|
||||
|
||||
// write writes the page ids onto a freelist page. All free and pending ids are
|
||||
// saved to disk since in the event of a program crash, all pending ids will
|
||||
// become free.
|
||||
func (f *freelist) write(p *page) error {
|
||||
// Combine the old free pgids and pgids waiting on an open transaction.
|
||||
|
||||
// Update the header flag.
|
||||
p.flags |= freelistPageFlag
|
||||
|
||||
// The page.count can only hold up to 64k elements so if we overflow that
|
||||
// number then we handle it by putting the size in the first element.
|
||||
lenids := f.count()
|
||||
if lenids == 0 {
|
||||
p.count = uint16(lenids)
|
||||
} else if lenids < 0xFFFF {
|
||||
p.count = uint16(lenids)
|
||||
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
|
||||
} else {
|
||||
p.count = 0xFFFF
|
||||
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
|
||||
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// reload reads the freelist from a page and filters out pending items.
|
||||
func (f *freelist) reload(p *page) {
|
||||
f.read(p)
|
||||
|
||||
// Build a cache of only pending pages.
|
||||
pcache := make(map[pgid]bool)
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
pcache[pendingID] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Check each page in the freelist and build a new available freelist
|
||||
// with any pages not in the pending lists.
|
||||
var a []pgid
|
||||
for _, id := range f.ids {
|
||||
if !pcache[id] {
|
||||
a = append(a, id)
|
||||
}
|
||||
}
|
||||
f.ids = a
|
||||
|
||||
// Once the available list is rebuilt then rebuild the free cache so that
|
||||
// it includes the available and pending free pages.
|
||||
f.reindex()
|
||||
}
|
||||
|
||||
// reindex rebuilds the free cache based on available and pending free lists.
|
||||
func (f *freelist) reindex() {
|
||||
f.cache = make(map[pgid]bool, len(f.ids))
|
||||
for _, id := range f.ids {
|
||||
f.cache[id] = true
|
||||
}
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
f.cache[pendingID] = true
|
||||
}
|
||||
}
|
||||
}
|
604
vendor/go.etcd.io/bbolt/node.go
generated
vendored
Normal file
604
vendor/go.etcd.io/bbolt/node.go
generated
vendored
Normal file
@ -0,0 +1,604 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// node represents an in-memory, deserialized page.
|
||||
type node struct {
|
||||
bucket *Bucket
|
||||
isLeaf bool
|
||||
unbalanced bool
|
||||
spilled bool
|
||||
key []byte
|
||||
pgid pgid
|
||||
parent *node
|
||||
children nodes
|
||||
inodes inodes
|
||||
}
|
||||
|
||||
// root returns the top-level node this node is attached to.
|
||||
func (n *node) root() *node {
|
||||
if n.parent == nil {
|
||||
return n
|
||||
}
|
||||
return n.parent.root()
|
||||
}
|
||||
|
||||
// minKeys returns the minimum number of inodes this node should have.
|
||||
func (n *node) minKeys() int {
|
||||
if n.isLeaf {
|
||||
return 1
|
||||
}
|
||||
return 2
|
||||
}
|
||||
|
||||
// size returns the size of the node after serialization.
|
||||
func (n *node) size() int {
|
||||
sz, elsz := pageHeaderSize, n.pageElementSize()
|
||||
for i := 0; i < len(n.inodes); i++ {
|
||||
item := &n.inodes[i]
|
||||
sz += elsz + len(item.key) + len(item.value)
|
||||
}
|
||||
return sz
|
||||
}
|
||||
|
||||
// sizeLessThan returns true if the node is less than a given size.
|
||||
// This is an optimization to avoid calculating a large node when we only need
|
||||
// to know if it fits inside a certain page size.
|
||||
func (n *node) sizeLessThan(v int) bool {
|
||||
sz, elsz := pageHeaderSize, n.pageElementSize()
|
||||
for i := 0; i < len(n.inodes); i++ {
|
||||
item := &n.inodes[i]
|
||||
sz += elsz + len(item.key) + len(item.value)
|
||||
if sz >= v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// pageElementSize returns the size of each page element based on the type of node.
|
||||
func (n *node) pageElementSize() int {
|
||||
if n.isLeaf {
|
||||
return leafPageElementSize
|
||||
}
|
||||
return branchPageElementSize
|
||||
}
|
||||
|
||||
// childAt returns the child node at a given index.
|
||||
func (n *node) childAt(index int) *node {
|
||||
if n.isLeaf {
|
||||
panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
|
||||
}
|
||||
return n.bucket.node(n.inodes[index].pgid, n)
|
||||
}
|
||||
|
||||
// childIndex returns the index of a given child node.
|
||||
func (n *node) childIndex(child *node) int {
|
||||
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
|
||||
return index
|
||||
}
|
||||
|
||||
// numChildren returns the number of children.
|
||||
func (n *node) numChildren() int {
|
||||
return len(n.inodes)
|
||||
}
|
||||
|
||||
// nextSibling returns the next node with the same parent.
|
||||
func (n *node) nextSibling() *node {
|
||||
if n.parent == nil {
|
||||
return nil
|
||||
}
|
||||
index := n.parent.childIndex(n)
|
||||
if index >= n.parent.numChildren()-1 {
|
||||
return nil
|
||||
}
|
||||
return n.parent.childAt(index + 1)
|
||||
}
|
||||
|
||||
// prevSibling returns the previous node with the same parent.
|
||||
func (n *node) prevSibling() *node {
|
||||
if n.parent == nil {
|
||||
return nil
|
||||
}
|
||||
index := n.parent.childIndex(n)
|
||||
if index == 0 {
|
||||
return nil
|
||||
}
|
||||
return n.parent.childAt(index - 1)
|
||||
}
|
||||
|
||||
// put inserts a key/value.
|
||||
func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
|
||||
if pgid >= n.bucket.tx.meta.pgid {
|
||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
|
||||
} else if len(oldKey) <= 0 {
|
||||
panic("put: zero-length old key")
|
||||
} else if len(newKey) <= 0 {
|
||||
panic("put: zero-length new key")
|
||||
}
|
||||
|
||||
// Find insertion index.
|
||||
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
|
||||
|
||||
// Add capacity and shift nodes if we don't have an exact match and need to insert.
|
||||
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
|
||||
if !exact {
|
||||
n.inodes = append(n.inodes, inode{})
|
||||
copy(n.inodes[index+1:], n.inodes[index:])
|
||||
}
|
||||
|
||||
inode := &n.inodes[index]
|
||||
inode.flags = flags
|
||||
inode.key = newKey
|
||||
inode.value = value
|
||||
inode.pgid = pgid
|
||||
_assert(len(inode.key) > 0, "put: zero-length inode key")
|
||||
}
|
||||
|
||||
// del removes a key from the node.
|
||||
func (n *node) del(key []byte) {
|
||||
// Find index of key.
|
||||
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })
|
||||
|
||||
// Exit if the key isn't found.
|
||||
if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
|
||||
return
|
||||
}
|
||||
|
||||
// Delete inode from the node.
|
||||
n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
|
||||
|
||||
// Mark the node as needing rebalancing.
|
||||
n.unbalanced = true
|
||||
}
|
||||
|
||||
// read initializes the node from a page.
|
||||
func (n *node) read(p *page) {
|
||||
n.pgid = p.id
|
||||
n.isLeaf = ((p.flags & leafPageFlag) != 0)
|
||||
n.inodes = make(inodes, int(p.count))
|
||||
|
||||
for i := 0; i < int(p.count); i++ {
|
||||
inode := &n.inodes[i]
|
||||
if n.isLeaf {
|
||||
elem := p.leafPageElement(uint16(i))
|
||||
inode.flags = elem.flags
|
||||
inode.key = elem.key()
|
||||
inode.value = elem.value()
|
||||
} else {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
inode.pgid = elem.pgid
|
||||
inode.key = elem.key()
|
||||
}
|
||||
_assert(len(inode.key) > 0, "read: zero-length inode key")
|
||||
}
|
||||
|
||||
// Save first key so we can find the node in the parent when we spill.
|
||||
if len(n.inodes) > 0 {
|
||||
n.key = n.inodes[0].key
|
||||
_assert(len(n.key) > 0, "read: zero-length node key")
|
||||
} else {
|
||||
n.key = nil
|
||||
}
|
||||
}
|
||||
|
||||
// write writes the items onto one or more pages.
|
||||
func (n *node) write(p *page) {
|
||||
// Initialize page.
|
||||
if n.isLeaf {
|
||||
p.flags |= leafPageFlag
|
||||
} else {
|
||||
p.flags |= branchPageFlag
|
||||
}
|
||||
|
||||
if len(n.inodes) >= 0xFFFF {
|
||||
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
|
||||
}
|
||||
p.count = uint16(len(n.inodes))
|
||||
|
||||
// Stop here if there are no items to write.
|
||||
if p.count == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Loop over each item and write it to the page.
|
||||
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
|
||||
for i, item := range n.inodes {
|
||||
_assert(len(item.key) > 0, "write: zero-length inode key")
|
||||
|
||||
// Write the page element.
|
||||
if n.isLeaf {
|
||||
elem := p.leafPageElement(uint16(i))
|
||||
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||
elem.flags = item.flags
|
||||
elem.ksize = uint32(len(item.key))
|
||||
elem.vsize = uint32(len(item.value))
|
||||
} else {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||
elem.ksize = uint32(len(item.key))
|
||||
elem.pgid = item.pgid
|
||||
_assert(elem.pgid != p.id, "write: circular dependency occurred")
|
||||
}
|
||||
|
||||
// If the length of key+value is larger than the max allocation size
|
||||
// then we need to reallocate the byte array pointer.
|
||||
//
|
||||
// See: https://github.com/boltdb/bolt/pull/335
|
||||
klen, vlen := len(item.key), len(item.value)
|
||||
if len(b) < klen+vlen {
|
||||
b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
|
||||
}
|
||||
|
||||
// Write data for the element to the end of the page.
|
||||
copy(b[0:], item.key)
|
||||
b = b[klen:]
|
||||
copy(b[0:], item.value)
|
||||
b = b[vlen:]
|
||||
}
|
||||
|
||||
// DEBUG ONLY: n.dump()
|
||||
}
|
||||
|
||||
// split breaks up a node into multiple smaller nodes, if appropriate.
|
||||
// This should only be called from the spill() function.
|
||||
func (n *node) split(pageSize int) []*node {
|
||||
var nodes []*node
|
||||
|
||||
node := n
|
||||
for {
|
||||
// Split node into two.
|
||||
a, b := node.splitTwo(pageSize)
|
||||
nodes = append(nodes, a)
|
||||
|
||||
// If we can't split then exit the loop.
|
||||
if b == nil {
|
||||
break
|
||||
}
|
||||
|
||||
// Set node to b so it gets split on the next iteration.
|
||||
node = b
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
// splitTwo breaks up a node into two smaller nodes, if appropriate.
|
||||
// This should only be called from the split() function.
|
||||
func (n *node) splitTwo(pageSize int) (*node, *node) {
|
||||
// Ignore the split if the page doesn't have at least enough nodes for
|
||||
// two pages or if the nodes can fit in a single page.
|
||||
if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Determine the threshold before starting a new node.
|
||||
var fillPercent = n.bucket.FillPercent
|
||||
if fillPercent < minFillPercent {
|
||||
fillPercent = minFillPercent
|
||||
} else if fillPercent > maxFillPercent {
|
||||
fillPercent = maxFillPercent
|
||||
}
|
||||
threshold := int(float64(pageSize) * fillPercent)
|
||||
|
||||
// Determine split position and sizes of the two pages.
|
||||
splitIndex, _ := n.splitIndex(threshold)
|
||||
|
||||
// Split node into two separate nodes.
|
||||
// If there's no parent then we'll need to create one.
|
||||
if n.parent == nil {
|
||||
n.parent = &node{bucket: n.bucket, children: []*node{n}}
|
||||
}
|
||||
|
||||
// Create a new node and add it to the parent.
|
||||
next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
|
||||
n.parent.children = append(n.parent.children, next)
|
||||
|
||||
// Split inodes across two nodes.
|
||||
next.inodes = n.inodes[splitIndex:]
|
||||
n.inodes = n.inodes[:splitIndex]
|
||||
|
||||
// Update the statistics.
|
||||
n.bucket.tx.stats.Split++
|
||||
|
||||
return n, next
|
||||
}
|
||||
|
||||
// splitIndex finds the position where a page will fill a given threshold.
|
||||
// It returns the index as well as the size of the first page.
|
||||
// This is only be called from split().
|
||||
func (n *node) splitIndex(threshold int) (index, sz int) {
|
||||
sz = pageHeaderSize
|
||||
|
||||
// Loop until we only have the minimum number of keys required for the second page.
|
||||
for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
|
||||
index = i
|
||||
inode := n.inodes[i]
|
||||
elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
|
||||
|
||||
// If we have at least the minimum number of keys and adding another
|
||||
// node would put us over the threshold then exit and return.
|
||||
if i >= minKeysPerPage && sz+elsize > threshold {
|
||||
break
|
||||
}
|
||||
|
||||
// Add the element size to the total size.
|
||||
sz += elsize
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// spill writes the nodes to dirty pages and splits nodes as it goes.
|
||||
// Returns an error if dirty pages cannot be allocated.
|
||||
func (n *node) spill() error {
|
||||
var tx = n.bucket.tx
|
||||
if n.spilled {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Spill child nodes first. Child nodes can materialize sibling nodes in
|
||||
// the case of split-merge so we cannot use a range loop. We have to check
|
||||
// the children size on every loop iteration.
|
||||
sort.Sort(n.children)
|
||||
for i := 0; i < len(n.children); i++ {
|
||||
if err := n.children[i].spill(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// We no longer need the child list because it's only used for spill tracking.
|
||||
n.children = nil
|
||||
|
||||
// Split nodes into appropriate sizes. The first node will always be n.
|
||||
var nodes = n.split(tx.db.pageSize)
|
||||
for _, node := range nodes {
|
||||
// Add node's page to the freelist if it's not new.
|
||||
if node.pgid > 0 {
|
||||
tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
|
||||
node.pgid = 0
|
||||
}
|
||||
|
||||
// Allocate contiguous space for the node.
|
||||
p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write the node.
|
||||
if p.id >= tx.meta.pgid {
|
||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
|
||||
}
|
||||
node.pgid = p.id
|
||||
node.write(p)
|
||||
node.spilled = true
|
||||
|
||||
// Insert into parent inodes.
|
||||
if node.parent != nil {
|
||||
var key = node.key
|
||||
if key == nil {
|
||||
key = node.inodes[0].key
|
||||
}
|
||||
|
||||
node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
|
||||
node.key = node.inodes[0].key
|
||||
_assert(len(node.key) > 0, "spill: zero-length node key")
|
||||
}
|
||||
|
||||
// Update the statistics.
|
||||
tx.stats.Spill++
|
||||
}
|
||||
|
||||
// If the root node split and created a new root then we need to spill that
|
||||
// as well. We'll clear out the children to make sure it doesn't try to respill.
|
||||
if n.parent != nil && n.parent.pgid == 0 {
|
||||
n.children = nil
|
||||
return n.parent.spill()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// rebalance attempts to combine the node with sibling nodes if the node fill
|
||||
// size is below a threshold or if there are not enough keys.
|
||||
func (n *node) rebalance() {
|
||||
if !n.unbalanced {
|
||||
return
|
||||
}
|
||||
n.unbalanced = false
|
||||
|
||||
// Update statistics.
|
||||
n.bucket.tx.stats.Rebalance++
|
||||
|
||||
// Ignore if node is above threshold (25%) and has enough keys.
|
||||
var threshold = n.bucket.tx.db.pageSize / 4
|
||||
if n.size() > threshold && len(n.inodes) > n.minKeys() {
|
||||
return
|
||||
}
|
||||
|
||||
// Root node has special handling.
|
||||
if n.parent == nil {
|
||||
// If root node is a branch and only has one node then collapse it.
|
||||
if !n.isLeaf && len(n.inodes) == 1 {
|
||||
// Move root's child up.
|
||||
child := n.bucket.node(n.inodes[0].pgid, n)
|
||||
n.isLeaf = child.isLeaf
|
||||
n.inodes = child.inodes[:]
|
||||
n.children = child.children
|
||||
|
||||
// Reparent all child nodes being moved.
|
||||
for _, inode := range n.inodes {
|
||||
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
||||
child.parent = n
|
||||
}
|
||||
}
|
||||
|
||||
// Remove old child.
|
||||
child.parent = nil
|
||||
delete(n.bucket.nodes, child.pgid)
|
||||
child.free()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// If node has no keys then just remove it.
|
||||
if n.numChildren() == 0 {
|
||||
n.parent.del(n.key)
|
||||
n.parent.removeChild(n)
|
||||
delete(n.bucket.nodes, n.pgid)
|
||||
n.free()
|
||||
n.parent.rebalance()
|
||||
return
|
||||
}
|
||||
|
||||
_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
|
||||
|
||||
// Destination node is right sibling if idx == 0, otherwise left sibling.
|
||||
var target *node
|
||||
var useNextSibling = (n.parent.childIndex(n) == 0)
|
||||
if useNextSibling {
|
||||
target = n.nextSibling()
|
||||
} else {
|
||||
target = n.prevSibling()
|
||||
}
|
||||
|
||||
// If both this node and the target node are too small then merge them.
|
||||
if useNextSibling {
|
||||
// Reparent all child nodes being moved.
|
||||
for _, inode := range target.inodes {
|
||||
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
||||
child.parent.removeChild(child)
|
||||
child.parent = n
|
||||
child.parent.children = append(child.parent.children, child)
|
||||
}
|
||||
}
|
||||
|
||||
// Copy over inodes from target and remove target.
|
||||
n.inodes = append(n.inodes, target.inodes...)
|
||||
n.parent.del(target.key)
|
||||
n.parent.removeChild(target)
|
||||
delete(n.bucket.nodes, target.pgid)
|
||||
target.free()
|
||||
} else {
|
||||
// Reparent all child nodes being moved.
|
||||
for _, inode := range n.inodes {
|
||||
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
||||
child.parent.removeChild(child)
|
||||
child.parent = target
|
||||
child.parent.children = append(child.parent.children, child)
|
||||
}
|
||||
}
|
||||
|
||||
// Copy over inodes to target and remove node.
|
||||
target.inodes = append(target.inodes, n.inodes...)
|
||||
n.parent.del(n.key)
|
||||
n.parent.removeChild(n)
|
||||
delete(n.bucket.nodes, n.pgid)
|
||||
n.free()
|
||||
}
|
||||
|
||||
// Either this node or the target node was deleted from the parent so rebalance it.
|
||||
n.parent.rebalance()
|
||||
}
|
||||
|
||||
// removes a node from the list of in-memory children.
|
||||
// This does not affect the inodes.
|
||||
func (n *node) removeChild(target *node) {
|
||||
for i, child := range n.children {
|
||||
if child == target {
|
||||
n.children = append(n.children[:i], n.children[i+1:]...)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// dereference causes the node to copy all its inode key/value references to heap memory.
|
||||
// This is required when the mmap is reallocated so inodes are not pointing to stale data.
|
||||
func (n *node) dereference() {
|
||||
if n.key != nil {
|
||||
key := make([]byte, len(n.key))
|
||||
copy(key, n.key)
|
||||
n.key = key
|
||||
_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
|
||||
}
|
||||
|
||||
for i := range n.inodes {
|
||||
inode := &n.inodes[i]
|
||||
|
||||
key := make([]byte, len(inode.key))
|
||||
copy(key, inode.key)
|
||||
inode.key = key
|
||||
_assert(len(inode.key) > 0, "dereference: zero-length inode key")
|
||||
|
||||
value := make([]byte, len(inode.value))
|
||||
copy(value, inode.value)
|
||||
inode.value = value
|
||||
}
|
||||
|
||||
// Recursively dereference children.
|
||||
for _, child := range n.children {
|
||||
child.dereference()
|
||||
}
|
||||
|
||||
// Update statistics.
|
||||
n.bucket.tx.stats.NodeDeref++
|
||||
}
|
||||
|
||||
// free adds the node's underlying page to the freelist.
|
||||
func (n *node) free() {
|
||||
if n.pgid != 0 {
|
||||
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
|
||||
n.pgid = 0
|
||||
}
|
||||
}
|
||||
|
||||
// dump writes the contents of the node to STDERR for debugging purposes.
|
||||
/*
|
||||
func (n *node) dump() {
|
||||
// Write node header.
|
||||
var typ = "branch"
|
||||
if n.isLeaf {
|
||||
typ = "leaf"
|
||||
}
|
||||
warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))
|
||||
|
||||
// Write out abbreviated version of each item.
|
||||
for _, item := range n.inodes {
|
||||
if n.isLeaf {
|
||||
if item.flags&bucketLeafFlag != 0 {
|
||||
bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
|
||||
warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
|
||||
} else {
|
||||
warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
|
||||
}
|
||||
} else {
|
||||
warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
|
||||
}
|
||||
}
|
||||
warn("")
|
||||
}
|
||||
*/
|
||||
|
||||
type nodes []*node
|
||||
|
||||
func (s nodes) Len() int { return len(s) }
|
||||
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
|
||||
|
||||
// inode represents an internal node inside of a node.
|
||||
// It can be used to point to elements in a page or point
|
||||
// to an element which hasn't been added to a page yet.
|
||||
type inode struct {
|
||||
flags uint32
|
||||
pgid pgid
|
||||
key []byte
|
||||
value []byte
|
||||
}
|
||||
|
||||
type inodes []inode
|
197
vendor/go.etcd.io/bbolt/page.go
generated
vendored
Normal file
197
vendor/go.etcd.io/bbolt/page.go
generated
vendored
Normal file
@ -0,0 +1,197 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
|
||||
|
||||
const minKeysPerPage = 2
|
||||
|
||||
const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
|
||||
const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
|
||||
|
||||
const (
|
||||
branchPageFlag = 0x01
|
||||
leafPageFlag = 0x02
|
||||
metaPageFlag = 0x04
|
||||
freelistPageFlag = 0x10
|
||||
)
|
||||
|
||||
const (
|
||||
bucketLeafFlag = 0x01
|
||||
)
|
||||
|
||||
type pgid uint64
|
||||
|
||||
type page struct {
|
||||
id pgid
|
||||
flags uint16
|
||||
count uint16
|
||||
overflow uint32
|
||||
ptr uintptr
|
||||
}
|
||||
|
||||
// typ returns a human readable page type string used for debugging.
|
||||
func (p *page) typ() string {
|
||||
if (p.flags & branchPageFlag) != 0 {
|
||||
return "branch"
|
||||
} else if (p.flags & leafPageFlag) != 0 {
|
||||
return "leaf"
|
||||
} else if (p.flags & metaPageFlag) != 0 {
|
||||
return "meta"
|
||||
} else if (p.flags & freelistPageFlag) != 0 {
|
||||
return "freelist"
|
||||
}
|
||||
return fmt.Sprintf("unknown<%02x>", p.flags)
|
||||
}
|
||||
|
||||
// meta returns a pointer to the metadata section of the page.
|
||||
func (p *page) meta() *meta {
|
||||
return (*meta)(unsafe.Pointer(&p.ptr))
|
||||
}
|
||||
|
||||
// leafPageElement retrieves the leaf node by index
|
||||
func (p *page) leafPageElement(index uint16) *leafPageElement {
|
||||
n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
||||
return n
|
||||
}
|
||||
|
||||
// leafPageElements retrieves a list of leaf nodes.
|
||||
func (p *page) leafPageElements() []leafPageElement {
|
||||
if p.count == 0 {
|
||||
return nil
|
||||
}
|
||||
return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
|
||||
}
|
||||
|
||||
// branchPageElement retrieves the branch node by index
|
||||
func (p *page) branchPageElement(index uint16) *branchPageElement {
|
||||
return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
||||
}
|
||||
|
||||
// branchPageElements retrieves a list of branch nodes.
|
||||
func (p *page) branchPageElements() []branchPageElement {
|
||||
if p.count == 0 {
|
||||
return nil
|
||||
}
|
||||
return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
|
||||
}
|
||||
|
||||
// dump writes n bytes of the page to STDERR as hex output.
|
||||
func (p *page) hexdump(n int) {
|
||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
|
||||
fmt.Fprintf(os.Stderr, "%x\n", buf)
|
||||
}
|
||||
|
||||
type pages []*page
|
||||
|
||||
func (s pages) Len() int { return len(s) }
|
||||
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
|
||||
|
||||
// branchPageElement represents a node on a branch page.
|
||||
type branchPageElement struct {
|
||||
pos uint32
|
||||
ksize uint32
|
||||
pgid pgid
|
||||
}
|
||||
|
||||
// key returns a byte slice of the node key.
|
||||
func (n *branchPageElement) key() []byte {
|
||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
|
||||
}
|
||||
|
||||
// leafPageElement represents a node on a leaf page.
|
||||
type leafPageElement struct {
|
||||
flags uint32
|
||||
pos uint32
|
||||
ksize uint32
|
||||
vsize uint32
|
||||
}
|
||||
|
||||
// key returns a byte slice of the node key.
|
||||
func (n *leafPageElement) key() []byte {
|
||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize]
|
||||
}
|
||||
|
||||
// value returns a byte slice of the node value.
|
||||
func (n *leafPageElement) value() []byte {
|
||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize]
|
||||
}
|
||||
|
||||
// PageInfo represents human readable information about a page.
|
||||
type PageInfo struct {
|
||||
ID int
|
||||
Type string
|
||||
Count int
|
||||
OverflowCount int
|
||||
}
|
||||
|
||||
type pgids []pgid
|
||||
|
||||
func (s pgids) Len() int { return len(s) }
|
||||
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
|
||||
|
||||
// merge returns the sorted union of a and b.
|
||||
func (a pgids) merge(b pgids) pgids {
|
||||
// Return the opposite slice if one is nil.
|
||||
if len(a) == 0 {
|
||||
return b
|
||||
}
|
||||
if len(b) == 0 {
|
||||
return a
|
||||
}
|
||||
merged := make(pgids, len(a)+len(b))
|
||||
mergepgids(merged, a, b)
|
||||
return merged
|
||||
}
|
||||
|
||||
// mergepgids copies the sorted union of a and b into dst.
|
||||
// If dst is too small, it panics.
|
||||
func mergepgids(dst, a, b pgids) {
|
||||
if len(dst) < len(a)+len(b) {
|
||||
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
|
||||
}
|
||||
// Copy in the opposite slice if one is nil.
|
||||
if len(a) == 0 {
|
||||
copy(dst, b)
|
||||
return
|
||||
}
|
||||
if len(b) == 0 {
|
||||
copy(dst, a)
|
||||
return
|
||||
}
|
||||
|
||||
// Merged will hold all elements from both lists.
|
||||
merged := dst[:0]
|
||||
|
||||
// Assign lead to the slice with a lower starting value, follow to the higher value.
|
||||
lead, follow := a, b
|
||||
if b[0] < a[0] {
|
||||
lead, follow = b, a
|
||||
}
|
||||
|
||||
// Continue while there are elements in the lead.
|
||||
for len(lead) > 0 {
|
||||
// Merge largest prefix of lead that is ahead of follow[0].
|
||||
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
|
||||
merged = append(merged, lead[:n]...)
|
||||
if n >= len(lead) {
|
||||
break
|
||||
}
|
||||
|
||||
// Swap lead and follow.
|
||||
lead, follow = follow, lead[n:]
|
||||
}
|
||||
|
||||
// Append what's left in follow.
|
||||
_ = append(merged, follow...)
|
||||
}
|
707
vendor/go.etcd.io/bbolt/tx.go
generated
vendored
Normal file
707
vendor/go.etcd.io/bbolt/tx.go
generated
vendored
Normal file
@ -0,0 +1,707 @@
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// txid represents the internal transaction identifier.
|
||||
type txid uint64
|
||||
|
||||
// Tx represents a read-only or read/write transaction on the database.
|
||||
// Read-only transactions can be used for retrieving values for keys and creating cursors.
|
||||
// Read/write transactions can create and remove buckets and create and remove keys.
|
||||
//
|
||||
// IMPORTANT: You must commit or rollback transactions when you are done with
|
||||
// them. Pages can not be reclaimed by the writer until no more transactions
|
||||
// are using them. A long running read transaction can cause the database to
|
||||
// quickly grow.
|
||||
type Tx struct {
|
||||
writable bool
|
||||
managed bool
|
||||
db *DB
|
||||
meta *meta
|
||||
root Bucket
|
||||
pages map[pgid]*page
|
||||
stats TxStats
|
||||
commitHandlers []func()
|
||||
|
||||
// WriteFlag specifies the flag for write-related methods like WriteTo().
|
||||
// Tx opens the database file with the specified flag to copy the data.
|
||||
//
|
||||
// By default, the flag is unset, which works well for mostly in-memory
|
||||
// workloads. For databases that are much larger than available RAM,
|
||||
// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
|
||||
WriteFlag int
|
||||
}
|
||||
|
||||
// init initializes the transaction.
|
||||
func (tx *Tx) init(db *DB) {
|
||||
tx.db = db
|
||||
tx.pages = nil
|
||||
|
||||
// Copy the meta page since it can be changed by the writer.
|
||||
tx.meta = &meta{}
|
||||
db.meta().copy(tx.meta)
|
||||
|
||||
// Copy over the root bucket.
|
||||
tx.root = newBucket(tx)
|
||||
tx.root.bucket = &bucket{}
|
||||
*tx.root.bucket = tx.meta.root
|
||||
|
||||
// Increment the transaction id and add a page cache for writable transactions.
|
||||
if tx.writable {
|
||||
tx.pages = make(map[pgid]*page)
|
||||
tx.meta.txid += txid(1)
|
||||
}
|
||||
}
|
||||
|
||||
// ID returns the transaction id.
|
||||
func (tx *Tx) ID() int {
|
||||
return int(tx.meta.txid)
|
||||
}
|
||||
|
||||
// DB returns a reference to the database that created the transaction.
|
||||
func (tx *Tx) DB() *DB {
|
||||
return tx.db
|
||||
}
|
||||
|
||||
// Size returns current database size in bytes as seen by this transaction.
|
||||
func (tx *Tx) Size() int64 {
|
||||
return int64(tx.meta.pgid) * int64(tx.db.pageSize)
|
||||
}
|
||||
|
||||
// Writable returns whether the transaction can perform write operations.
|
||||
func (tx *Tx) Writable() bool {
|
||||
return tx.writable
|
||||
}
|
||||
|
||||
// Cursor creates a cursor associated with the root bucket.
|
||||
// All items in the cursor will return a nil value because all root bucket keys point to buckets.
|
||||
// The cursor is only valid as long as the transaction is open.
|
||||
// Do not use a cursor after the transaction is closed.
|
||||
func (tx *Tx) Cursor() *Cursor {
|
||||
return tx.root.Cursor()
|
||||
}
|
||||
|
||||
// Stats retrieves a copy of the current transaction statistics.
|
||||
func (tx *Tx) Stats() TxStats {
|
||||
return tx.stats
|
||||
}
|
||||
|
||||
// Bucket retrieves a bucket by name.
|
||||
// Returns nil if the bucket does not exist.
|
||||
// The bucket instance is only valid for the lifetime of the transaction.
|
||||
func (tx *Tx) Bucket(name []byte) *Bucket {
|
||||
return tx.root.Bucket(name)
|
||||
}
|
||||
|
||||
// CreateBucket creates a new bucket.
|
||||
// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
|
||||
// The bucket instance is only valid for the lifetime of the transaction.
|
||||
func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
|
||||
return tx.root.CreateBucket(name)
|
||||
}
|
||||
|
||||
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
|
||||
// Returns an error if the bucket name is blank, or if the bucket name is too long.
|
||||
// The bucket instance is only valid for the lifetime of the transaction.
|
||||
func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
|
||||
return tx.root.CreateBucketIfNotExists(name)
|
||||
}
|
||||
|
||||
// DeleteBucket deletes a bucket.
|
||||
// Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
|
||||
func (tx *Tx) DeleteBucket(name []byte) error {
|
||||
return tx.root.DeleteBucket(name)
|
||||
}
|
||||
|
||||
// ForEach executes a function for each bucket in the root.
|
||||
// If the provided function returns an error then the iteration is stopped and
|
||||
// the error is returned to the caller.
|
||||
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
|
||||
return tx.root.ForEach(func(k, v []byte) error {
|
||||
return fn(k, tx.root.Bucket(k))
|
||||
})
|
||||
}
|
||||
|
||||
// OnCommit adds a handler function to be executed after the transaction successfully commits.
|
||||
func (tx *Tx) OnCommit(fn func()) {
|
||||
tx.commitHandlers = append(tx.commitHandlers, fn)
|
||||
}
|
||||
|
||||
// Commit writes all changes to disk and updates the meta page.
|
||||
// Returns an error if a disk write error occurs, or if Commit is
|
||||
// called on a read-only transaction.
|
||||
func (tx *Tx) Commit() error {
|
||||
_assert(!tx.managed, "managed tx commit not allowed")
|
||||
if tx.db == nil {
|
||||
return ErrTxClosed
|
||||
} else if !tx.writable {
|
||||
return ErrTxNotWritable
|
||||
}
|
||||
|
||||
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
|
||||
|
||||
// Rebalance nodes which have had deletions.
|
||||
var startTime = time.Now()
|
||||
tx.root.rebalance()
|
||||
if tx.stats.Rebalance > 0 {
|
||||
tx.stats.RebalanceTime += time.Since(startTime)
|
||||
}
|
||||
|
||||
// spill data onto dirty pages.
|
||||
startTime = time.Now()
|
||||
if err := tx.root.spill(); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
tx.stats.SpillTime += time.Since(startTime)
|
||||
|
||||
// Free the old root bucket.
|
||||
tx.meta.root.root = tx.root.root
|
||||
|
||||
// Free the old freelist because commit writes out a fresh freelist.
|
||||
if tx.meta.freelist != pgidNoFreelist {
|
||||
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
|
||||
}
|
||||
|
||||
if !tx.db.NoFreelistSync {
|
||||
err := tx.commitFreelist()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
tx.meta.freelist = pgidNoFreelist
|
||||
}
|
||||
|
||||
// Write dirty pages to disk.
|
||||
startTime = time.Now()
|
||||
if err := tx.write(); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
|
||||
// If strict mode is enabled then perform a consistency check.
|
||||
// Only the first consistency error is reported in the panic.
|
||||
if tx.db.StrictMode {
|
||||
ch := tx.Check()
|
||||
var errs []string
|
||||
for {
|
||||
err, ok := <-ch
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
errs = append(errs, err.Error())
|
||||
}
|
||||
if len(errs) > 0 {
|
||||
panic("check fail: " + strings.Join(errs, "\n"))
|
||||
}
|
||||
}
|
||||
|
||||
// Write meta to disk.
|
||||
if err := tx.writeMeta(); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
tx.stats.WriteTime += time.Since(startTime)
|
||||
|
||||
// Finalize the transaction.
|
||||
tx.close()
|
||||
|
||||
// Execute commit handlers now that the locks have been removed.
|
||||
for _, fn := range tx.commitHandlers {
|
||||
fn()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tx *Tx) commitFreelist() error {
|
||||
// Allocate new pages for the new free list. This will overestimate
|
||||
// the size of the freelist but not underestimate the size (which would be bad).
|
||||
opgid := tx.meta.pgid
|
||||
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
|
||||
if err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
if err := tx.db.freelist.write(p); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
tx.meta.freelist = p.id
|
||||
// If the high water mark has moved up then attempt to grow the database.
|
||||
if tx.meta.pgid > opgid {
|
||||
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Rollback closes the transaction and ignores all previous updates. Read-only
|
||||
// transactions must be rolled back and not committed.
|
||||
func (tx *Tx) Rollback() error {
|
||||
_assert(!tx.managed, "managed tx rollback not allowed")
|
||||
if tx.db == nil {
|
||||
return ErrTxClosed
|
||||
}
|
||||
tx.rollback()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tx *Tx) rollback() {
|
||||
if tx.db == nil {
|
||||
return
|
||||
}
|
||||
if tx.writable {
|
||||
tx.db.freelist.rollback(tx.meta.txid)
|
||||
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
|
||||
}
|
||||
tx.close()
|
||||
}
|
||||
|
||||
func (tx *Tx) close() {
|
||||
if tx.db == nil {
|
||||
return
|
||||
}
|
||||
if tx.writable {
|
||||
// Grab freelist stats.
|
||||
var freelistFreeN = tx.db.freelist.free_count()
|
||||
var freelistPendingN = tx.db.freelist.pending_count()
|
||||
var freelistAlloc = tx.db.freelist.size()
|
||||
|
||||
// Remove transaction ref & writer lock.
|
||||
tx.db.rwtx = nil
|
||||
tx.db.rwlock.Unlock()
|
||||
|
||||
// Merge statistics.
|
||||
tx.db.statlock.Lock()
|
||||
tx.db.stats.FreePageN = freelistFreeN
|
||||
tx.db.stats.PendingPageN = freelistPendingN
|
||||
tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
|
||||
tx.db.stats.FreelistInuse = freelistAlloc
|
||||
tx.db.stats.TxStats.add(&tx.stats)
|
||||
tx.db.statlock.Unlock()
|
||||
} else {
|
||||
tx.db.removeTx(tx)
|
||||
}
|
||||
|
||||
// Clear all references.
|
||||
tx.db = nil
|
||||
tx.meta = nil
|
||||
tx.root = Bucket{tx: tx}
|
||||
tx.pages = nil
|
||||
}
|
||||
|
||||
// Copy writes the entire database to a writer.
|
||||
// This function exists for backwards compatibility.
|
||||
//
|
||||
// Deprecated; Use WriteTo() instead.
|
||||
func (tx *Tx) Copy(w io.Writer) error {
|
||||
_, err := tx.WriteTo(w)
|
||||
return err
|
||||
}
|
||||
|
||||
// WriteTo writes the entire database to a writer.
|
||||
// If err == nil then exactly tx.Size() bytes will be written into the writer.
|
||||
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
||||
// Attempt to open reader with WriteFlag
|
||||
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer func() {
|
||||
if cerr := f.Close(); err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
// Generate a meta page. We use the same page data for both meta pages.
|
||||
buf := make([]byte, tx.db.pageSize)
|
||||
page := (*page)(unsafe.Pointer(&buf[0]))
|
||||
page.flags = metaPageFlag
|
||||
*page.meta() = *tx.meta
|
||||
|
||||
// Write meta 0.
|
||||
page.id = 0
|
||||
page.meta().checksum = page.meta().sum64()
|
||||
nn, err := w.Write(buf)
|
||||
n += int64(nn)
|
||||
if err != nil {
|
||||
return n, fmt.Errorf("meta 0 copy: %s", err)
|
||||
}
|
||||
|
||||
// Write meta 1 with a lower transaction id.
|
||||
page.id = 1
|
||||
page.meta().txid -= 1
|
||||
page.meta().checksum = page.meta().sum64()
|
||||
nn, err = w.Write(buf)
|
||||
n += int64(nn)
|
||||
if err != nil {
|
||||
return n, fmt.Errorf("meta 1 copy: %s", err)
|
||||
}
|
||||
|
||||
// Move past the meta pages in the file.
|
||||
if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
|
||||
return n, fmt.Errorf("seek: %s", err)
|
||||
}
|
||||
|
||||
// Copy data pages.
|
||||
wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
|
||||
n += wn
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// CopyFile copies the entire database to file at the given path.
|
||||
// A reader transaction is maintained during the copy so it is safe to continue
|
||||
// using the database while a copy is in progress.
|
||||
func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
|
||||
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = tx.Copy(f)
|
||||
if err != nil {
|
||||
_ = f.Close()
|
||||
return err
|
||||
}
|
||||
return f.Close()
|
||||
}
|
||||
|
||||
// Check performs several consistency checks on the database for this transaction.
|
||||
// An error is returned if any inconsistency is found.
|
||||
//
|
||||
// It can be safely run concurrently on a writable transaction. However, this
|
||||
// incurs a high cost for large databases and databases with a lot of subbuckets
|
||||
// because of caching. This overhead can be removed if running on a read-only
|
||||
// transaction, however, it is not safe to execute other writer transactions at
|
||||
// the same time.
|
||||
func (tx *Tx) Check() <-chan error {
|
||||
ch := make(chan error)
|
||||
go tx.check(ch)
|
||||
return ch
|
||||
}
|
||||
|
||||
func (tx *Tx) check(ch chan error) {
|
||||
// Force loading free list if opened in ReadOnly mode.
|
||||
tx.db.loadFreelist()
|
||||
|
||||
// Check if any pages are double freed.
|
||||
freed := make(map[pgid]bool)
|
||||
all := make([]pgid, tx.db.freelist.count())
|
||||
tx.db.freelist.copyall(all)
|
||||
for _, id := range all {
|
||||
if freed[id] {
|
||||
ch <- fmt.Errorf("page %d: already freed", id)
|
||||
}
|
||||
freed[id] = true
|
||||
}
|
||||
|
||||
// Track every reachable page.
|
||||
reachable := make(map[pgid]*page)
|
||||
reachable[0] = tx.page(0) // meta0
|
||||
reachable[1] = tx.page(1) // meta1
|
||||
if tx.meta.freelist != pgidNoFreelist {
|
||||
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
|
||||
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively check buckets.
|
||||
tx.checkBucket(&tx.root, reachable, freed, ch)
|
||||
|
||||
// Ensure all pages below high water mark are either reachable or freed.
|
||||
for i := pgid(0); i < tx.meta.pgid; i++ {
|
||||
_, isReachable := reachable[i]
|
||||
if !isReachable && !freed[i] {
|
||||
ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
|
||||
}
|
||||
}
|
||||
|
||||
// Close the channel to signal completion.
|
||||
close(ch)
|
||||
}
|
||||
|
||||
func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
|
||||
// Ignore inline buckets.
|
||||
if b.root == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Check every page used by this bucket.
|
||||
b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
|
||||
if p.id > tx.meta.pgid {
|
||||
ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
|
||||
}
|
||||
|
||||
// Ensure each page is only referenced once.
|
||||
for i := pgid(0); i <= pgid(p.overflow); i++ {
|
||||
var id = p.id + i
|
||||
if _, ok := reachable[id]; ok {
|
||||
ch <- fmt.Errorf("page %d: multiple references", int(id))
|
||||
}
|
||||
reachable[id] = p
|
||||
}
|
||||
|
||||
// We should only encounter un-freed leaf and branch pages.
|
||||
if freed[p.id] {
|
||||
ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
|
||||
} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
|
||||
ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
|
||||
}
|
||||
})
|
||||
|
||||
// Check each bucket within this bucket.
|
||||
_ = b.ForEach(func(k, v []byte) error {
|
||||
if child := b.Bucket(k); child != nil {
|
||||
tx.checkBucket(child, reachable, freed, ch)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// allocate returns a contiguous block of memory starting at a given page.
|
||||
func (tx *Tx) allocate(count int) (*page, error) {
|
||||
p, err := tx.db.allocate(tx.meta.txid, count)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Save to our page cache.
|
||||
tx.pages[p.id] = p
|
||||
|
||||
// Update statistics.
|
||||
tx.stats.PageCount += count
|
||||
tx.stats.PageAlloc += count * tx.db.pageSize
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// write writes any dirty pages to disk.
|
||||
func (tx *Tx) write() error {
|
||||
// Sort pages by id.
|
||||
pages := make(pages, 0, len(tx.pages))
|
||||
for _, p := range tx.pages {
|
||||
pages = append(pages, p)
|
||||
}
|
||||
// Clear out page cache early.
|
||||
tx.pages = make(map[pgid]*page)
|
||||
sort.Sort(pages)
|
||||
|
||||
// Write pages to disk in order.
|
||||
for _, p := range pages {
|
||||
size := (int(p.overflow) + 1) * tx.db.pageSize
|
||||
offset := int64(p.id) * int64(tx.db.pageSize)
|
||||
|
||||
// Write out page in "max allocation" sized chunks.
|
||||
ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
|
||||
for {
|
||||
// Limit our write to our max allocation size.
|
||||
sz := size
|
||||
if sz > maxAllocSize-1 {
|
||||
sz = maxAllocSize - 1
|
||||
}
|
||||
|
||||
// Write chunk to disk.
|
||||
buf := ptr[:sz]
|
||||
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Update statistics.
|
||||
tx.stats.Write++
|
||||
|
||||
// Exit inner for loop if we've written all the chunks.
|
||||
size -= sz
|
||||
if size == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// Otherwise move offset forward and move pointer to next chunk.
|
||||
offset += int64(sz)
|
||||
ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore file sync if flag is set on DB.
|
||||
if !tx.db.NoSync || IgnoreNoSync {
|
||||
if err := fdatasync(tx.db); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Put small pages back to page pool.
|
||||
for _, p := range pages {
|
||||
// Ignore page sizes over 1 page.
|
||||
// These are allocated using make() instead of the page pool.
|
||||
if int(p.overflow) != 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize]
|
||||
|
||||
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
|
||||
for i := range buf {
|
||||
buf[i] = 0
|
||||
}
|
||||
tx.db.pagePool.Put(buf)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeMeta writes the meta to the disk.
|
||||
func (tx *Tx) writeMeta() error {
|
||||
// Create a temporary buffer for the meta page.
|
||||
buf := make([]byte, tx.db.pageSize)
|
||||
p := tx.db.pageInBuffer(buf, 0)
|
||||
tx.meta.write(p)
|
||||
|
||||
// Write the meta page to file.
|
||||
if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
|
||||
return err
|
||||
}
|
||||
if !tx.db.NoSync || IgnoreNoSync {
|
||||
if err := fdatasync(tx.db); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Update statistics.
|
||||
tx.stats.Write++
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// page returns a reference to the page with a given id.
|
||||
// If page has been written to then a temporary buffered page is returned.
|
||||
func (tx *Tx) page(id pgid) *page {
|
||||
// Check the dirty pages first.
|
||||
if tx.pages != nil {
|
||||
if p, ok := tx.pages[id]; ok {
|
||||
return p
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise return directly from the mmap.
|
||||
return tx.db.page(id)
|
||||
}
|
||||
|
||||
// forEachPage iterates over every page within a given page and executes a function.
|
||||
func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
|
||||
p := tx.page(pgid)
|
||||
|
||||
// Execute function.
|
||||
fn(p, depth)
|
||||
|
||||
// Recursively loop over children.
|
||||
if (p.flags & branchPageFlag) != 0 {
|
||||
for i := 0; i < int(p.count); i++ {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
tx.forEachPage(elem.pgid, depth+1, fn)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Page returns page information for a given page number.
|
||||
// This is only safe for concurrent use when used by a writable transaction.
|
||||
func (tx *Tx) Page(id int) (*PageInfo, error) {
|
||||
if tx.db == nil {
|
||||
return nil, ErrTxClosed
|
||||
} else if pgid(id) >= tx.meta.pgid {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Build the page info.
|
||||
p := tx.db.page(pgid(id))
|
||||
info := &PageInfo{
|
||||
ID: id,
|
||||
Count: int(p.count),
|
||||
OverflowCount: int(p.overflow),
|
||||
}
|
||||
|
||||
// Determine the type (or if it's free).
|
||||
if tx.db.freelist.freed(pgid(id)) {
|
||||
info.Type = "free"
|
||||
} else {
|
||||
info.Type = p.typ()
|
||||
}
|
||||
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// TxStats represents statistics about the actions performed by the transaction.
|
||||
type TxStats struct {
|
||||
// Page statistics.
|
||||
PageCount int // number of page allocations
|
||||
PageAlloc int // total bytes allocated
|
||||
|
||||
// Cursor statistics.
|
||||
CursorCount int // number of cursors created
|
||||
|
||||
// Node statistics
|
||||
NodeCount int // number of node allocations
|
||||
NodeDeref int // number of node dereferences
|
||||
|
||||
// Rebalance statistics.
|
||||
Rebalance int // number of node rebalances
|
||||
RebalanceTime time.Duration // total time spent rebalancing
|
||||
|
||||
// Split/Spill statistics.
|
||||
Split int // number of nodes split
|
||||
Spill int // number of nodes spilled
|
||||
SpillTime time.Duration // total time spent spilling
|
||||
|
||||
// Write statistics.
|
||||
Write int // number of writes performed
|
||||
WriteTime time.Duration // total time spent writing to disk
|
||||
}
|
||||
|
||||
func (s *TxStats) add(other *TxStats) {
|
||||
s.PageCount += other.PageCount
|
||||
s.PageAlloc += other.PageAlloc
|
||||
s.CursorCount += other.CursorCount
|
||||
s.NodeCount += other.NodeCount
|
||||
s.NodeDeref += other.NodeDeref
|
||||
s.Rebalance += other.Rebalance
|
||||
s.RebalanceTime += other.RebalanceTime
|
||||
s.Split += other.Split
|
||||
s.Spill += other.Spill
|
||||
s.SpillTime += other.SpillTime
|
||||
s.Write += other.Write
|
||||
s.WriteTime += other.WriteTime
|
||||
}
|
||||
|
||||
// Sub calculates and returns the difference between two sets of transaction stats.
|
||||
// This is useful when obtaining stats at two different points and time and
|
||||
// you need the performance counters that occurred within that time span.
|
||||
func (s *TxStats) Sub(other *TxStats) TxStats {
|
||||
var diff TxStats
|
||||
diff.PageCount = s.PageCount - other.PageCount
|
||||
diff.PageAlloc = s.PageAlloc - other.PageAlloc
|
||||
diff.CursorCount = s.CursorCount - other.CursorCount
|
||||
diff.NodeCount = s.NodeCount - other.NodeCount
|
||||
diff.NodeDeref = s.NodeDeref - other.NodeDeref
|
||||
diff.Rebalance = s.Rebalance - other.Rebalance
|
||||
diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
|
||||
diff.Split = s.Split - other.Split
|
||||
diff.Spill = s.Spill - other.Spill
|
||||
diff.SpillTime = s.SpillTime - other.SpillTime
|
||||
diff.Write = s.Write - other.Write
|
||||
diff.WriteTime = s.WriteTime - other.WriteTime
|
||||
return diff
|
||||
}
|
Loading…
Reference in New Issue
Block a user