Initial commit
This commit is contained in:
parent
669da74e7e
commit
208e13d2a8
80
load.go
Normal file
80
load.go
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
|
"qurl/qurl"
|
||||||
|
"qurl/storage"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type qurljson struct {
|
||||||
|
ID uint64 `json:"id"`
|
||||||
|
IP string `json:"ip"`
|
||||||
|
Browser string `json:"browser"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Date struct {
|
||||||
|
Date int64 `json:"$date"`
|
||||||
|
} `json:"date"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadjson(stor storage.Storage, filename string) error {
|
||||||
|
file, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error opening %s: %s", filename, err.Error())
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
var qj []qurljson
|
||||||
|
decoder := json.NewDecoder(file)
|
||||||
|
err = decoder.Decode(&qj)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error parsing %s: %s", filename, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Parsing %d qurls.. \n", len(qj))
|
||||||
|
var max uint64 = 0
|
||||||
|
var count uint64 = 0
|
||||||
|
for _, e := range qj {
|
||||||
|
if e.ID > max {
|
||||||
|
max = e.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
qurl := &qurl.QURL{
|
||||||
|
ID: e.ID,
|
||||||
|
URL: e.URL,
|
||||||
|
}
|
||||||
|
|
||||||
|
if e.Date.Date > 0 {
|
||||||
|
qurl.Created = time.Unix((e.Date.Date / 1000), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if e.IP != "" {
|
||||||
|
qurl.IP = net.ParseIP(e.IP)
|
||||||
|
}
|
||||||
|
|
||||||
|
if e.Browser != "" {
|
||||||
|
qurl.Browser = e.Browser
|
||||||
|
}
|
||||||
|
|
||||||
|
err := stor.AddQURL(qurl)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error adding qurl: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
count++
|
||||||
|
if (count % 100) == 0 {
|
||||||
|
fmt.Printf("*")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = stor.SetQURLSequence(max)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error setting sequence: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("\nDone!\n")
|
||||||
|
return nil
|
||||||
|
}
|
29
main.go
Normal file
29
main.go
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"qurl/storage"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
dburl := flag.String("u", "bolt:./qurl.db", "url to database")
|
||||||
|
jsonfile := flag.String("j", "", "path to json to load into database")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
stor, err := storage.NewStorage(*dburl)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Database connection error: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer stor.Shutdown()
|
||||||
|
|
||||||
|
if *jsonfile != "" {
|
||||||
|
err := loadjson(stor, *jsonfile)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Load error: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
59
qurl/qurl.go
Normal file
59
qurl/qurl.go
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
package qurl
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type QURL struct {
|
||||||
|
ID uint64
|
||||||
|
URL string
|
||||||
|
Created time.Time
|
||||||
|
IP []byte
|
||||||
|
Browser string
|
||||||
|
}
|
||||||
|
|
||||||
|
const alpha = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||||
|
const alphalen = uint64(len(alpha))
|
||||||
|
|
||||||
|
func ToString(v uint64) string {
|
||||||
|
var r strings.Builder
|
||||||
|
var i uint64
|
||||||
|
for i = v; i >= alphalen; i = (i / alphalen) - 1 {
|
||||||
|
r.WriteByte(alpha[i%alphalen])
|
||||||
|
}
|
||||||
|
r.WriteByte(alpha[i])
|
||||||
|
return r.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func FromString(s string) (uint64, error) {
|
||||||
|
var r uint64
|
||||||
|
vlen := uint64(len(s))
|
||||||
|
|
||||||
|
for i := uint64(0); i < vlen; i++ {
|
||||||
|
x := strings.IndexByte(alpha, s[i])
|
||||||
|
if x == -1 {
|
||||||
|
return 0, fmt.Errorf("Invalid character [%s]", s[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
if i == 0 {
|
||||||
|
r += uint64(x)
|
||||||
|
} else {
|
||||||
|
r += (uint64(x+1) * pow(alphalen, i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func pow(a, b uint64) uint64 {
|
||||||
|
p := uint64(1)
|
||||||
|
for b > 0 {
|
||||||
|
if b&1 != 0 {
|
||||||
|
p *= a
|
||||||
|
}
|
||||||
|
b >>= 1
|
||||||
|
a *= a
|
||||||
|
}
|
||||||
|
return p
|
||||||
|
}
|
28
storage/bolt/bolt.go
Normal file
28
storage/bolt/bolt.go
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
package bolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
bolt "go.etcd.io/bbolt"
|
||||||
|
"net/url"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type BoltStorage struct {
|
||||||
|
DB *bolt.DB
|
||||||
|
}
|
||||||
|
|
||||||
|
func (stor *BoltStorage) Shutdown() {
|
||||||
|
stor.DB.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(u *url.URL) (*BoltStorage, error) {
|
||||||
|
path := u.Opaque
|
||||||
|
if u.Path != "" {
|
||||||
|
path = u.Path
|
||||||
|
}
|
||||||
|
|
||||||
|
db, err := bolt.Open(path, 0600, &bolt.Options{Timeout: 3 * time.Second})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &BoltStorage{DB: db}, nil
|
||||||
|
}
|
188
storage/bolt/qurl.go
Normal file
188
storage/bolt/qurl.go
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
package bolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"qurl/qurl"
|
||||||
|
// "bytes"
|
||||||
|
// "fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
QURLBucket = []byte{0x00}
|
||||||
|
ReverseBucket = []byte{0x01}
|
||||||
|
|
||||||
|
URLField = []byte{0x00}
|
||||||
|
CreatedField = []byte{0x01}
|
||||||
|
IPField = []byte{0x02}
|
||||||
|
BrowserField = []byte{0x03}
|
||||||
|
)
|
||||||
|
|
||||||
|
func (stor *BoltStorage) AddQURL(qurl *qurl.QURL) error {
|
||||||
|
tx, err := stor.DB.Begin(true)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer tx.Rollback()
|
||||||
|
|
||||||
|
rb, err := tx.CreateBucketIfNotExists(QURLBucket)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate the ID from the sequence if we don't have one
|
||||||
|
if qurl.ID == 0 {
|
||||||
|
qurl.ID, err = rb.NextSequence()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a byte array from the ID
|
||||||
|
bid := make([]byte, 8)
|
||||||
|
binary.BigEndian.PutUint64(bid, qurl.ID)
|
||||||
|
|
||||||
|
// Add an entry into the reverse indexed bucket for quickly
|
||||||
|
// determining if a URL is already in the database
|
||||||
|
ab, err := tx.CreateBucketIfNotExists(ReverseBucket)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = ab.Put([]byte(qurl.URL), bid)
|
||||||
|
|
||||||
|
qb, err := rb.CreateBucketIfNotExists(bid)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the ID to URL
|
||||||
|
err = qb.Put(URLField, []byte(qurl.URL))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !qurl.Created.IsZero() {
|
||||||
|
// Create byte array from the Created date
|
||||||
|
bdt, err := qurl.Created.MarshalBinary()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the Created date
|
||||||
|
err = qb.Put(CreatedField, bdt)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if qurl.IP != nil {
|
||||||
|
err = qb.Put(IPField, qurl.IP)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(qurl.Browser) > 0 {
|
||||||
|
err = qb.Put(BrowserField, []byte(qurl.Browser))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (stor *BoltStorage) SetQURLSequence(seq uint64) error {
|
||||||
|
tx, err := stor.DB.Begin(true)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer tx.Rollback()
|
||||||
|
|
||||||
|
qb, err := tx.CreateBucketIfNotExists(QURLBucket)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
qb.SetSequence(seq)
|
||||||
|
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (stor *BoltStorage) GetQURLByURL(u string) (*qurl.QURL, error) {
|
||||||
|
tx, err := stor.DB.Begin(false)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer tx.Rollback()
|
||||||
|
|
||||||
|
ab := tx.Bucket(ReverseBucket)
|
||||||
|
if ab == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
bid := ab.Get([]byte(u))
|
||||||
|
if bid == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
rb := tx.Bucket(QURLBucket)
|
||||||
|
if rb == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
qb := rb.Bucket(bid)
|
||||||
|
if qb == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
qurl := &qurl.QURL{ ID: binary.BigEndian.Uint64(bid) }
|
||||||
|
|
||||||
|
qu := qb.Get(URLField)
|
||||||
|
if qu != nil {
|
||||||
|
qurl.URL = string(qu)
|
||||||
|
}
|
||||||
|
|
||||||
|
return qurl, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
func (stor *BoltStorage) GetQURLByURL(u string) (*qurl.QURL, error) {
|
||||||
|
tx, err := stor.DB.Begin(false)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer tx.Rollback()
|
||||||
|
|
||||||
|
rb := tx.Bucket(QURLBucket)
|
||||||
|
if rb == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
bu := []byte(u)
|
||||||
|
rc := rb.Cursor()
|
||||||
|
for k, _ := rc.First(); k != nil; k, _ = rc.Next() {
|
||||||
|
qb := rb.Bucket(k)
|
||||||
|
if qb == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
qu := qb.Get(URLField)
|
||||||
|
if bytes.Equal(bu, qu) {
|
||||||
|
qurl := &qurl.QURL{
|
||||||
|
ID: binary.BigEndian.Uint64(k),
|
||||||
|
URL: string(qu),
|
||||||
|
}
|
||||||
|
return qurl, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
*/
|
43
storage/storage.go
Normal file
43
storage/storage.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
"qurl/qurl"
|
||||||
|
"qurl/storage/bolt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Storage interface {
|
||||||
|
AddQURL(*qurl.QURL) error
|
||||||
|
// GetQURL(uint64) (*qurl.QURL, error)
|
||||||
|
GetQURLByURL(string) (*qurl.QURL, error)
|
||||||
|
SetQURLSequence(uint64) error
|
||||||
|
Shutdown()
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewStorage(su string) (Storage, error) {
|
||||||
|
u, err := url.Parse(su)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if u.Scheme == "" {
|
||||||
|
return nil, fmt.Errorf("URL must include a scheme")
|
||||||
|
}
|
||||||
|
|
||||||
|
var stor Storage
|
||||||
|
|
||||||
|
switch strings.ToLower(u.Scheme) {
|
||||||
|
case "bolt", "boltdb", "bbolt":
|
||||||
|
stor, err = bolt.New(u)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Unsupported URL scheme")
|
||||||
|
}
|
||||||
|
|
||||||
|
return stor, nil
|
||||||
|
}
|
20
vendor/go.etcd.io/bbolt/LICENSE
generated
vendored
Normal file
20
vendor/go.etcd.io/bbolt/LICENSE
generated
vendored
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2013 Ben Johnson
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
10
vendor/go.etcd.io/bbolt/bolt_386.go
generated
vendored
Normal file
10
vendor/go.etcd.io/bbolt/bolt_386.go
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0x7FFFFFFF // 2GB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0xFFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
10
vendor/go.etcd.io/bbolt/bolt_amd64.go
generated
vendored
Normal file
10
vendor/go.etcd.io/bbolt/bolt_amd64.go
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0x7FFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
28
vendor/go.etcd.io/bbolt/bolt_arm.go
generated
vendored
Normal file
28
vendor/go.etcd.io/bbolt/bolt_arm.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import "unsafe"
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0x7FFFFFFF // 2GB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0xFFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned bool
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Simple check to see whether this arch handles unaligned load/stores
|
||||||
|
// correctly.
|
||||||
|
|
||||||
|
// ARM9 and older devices require load/stores to be from/to aligned
|
||||||
|
// addresses. If not, the lower 2 bits are cleared and that address is
|
||||||
|
// read in a jumbled up order.
|
||||||
|
|
||||||
|
// See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html
|
||||||
|
|
||||||
|
raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11}
|
||||||
|
val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2))
|
||||||
|
|
||||||
|
brokenUnaligned = val != 0x11222211
|
||||||
|
}
|
12
vendor/go.etcd.io/bbolt/bolt_arm64.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_arm64.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// +build arm64
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0x7FFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
10
vendor/go.etcd.io/bbolt/bolt_linux.go
generated
vendored
Normal file
10
vendor/go.etcd.io/bbolt/bolt_linux.go
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fdatasync flushes written data to a file descriptor.
|
||||||
|
func fdatasync(db *DB) error {
|
||||||
|
return syscall.Fdatasync(int(db.file.Fd()))
|
||||||
|
}
|
12
vendor/go.etcd.io/bbolt/bolt_mips64x.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_mips64x.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// +build mips64 mips64le
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0x8000000000 // 512GB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0x7FFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
12
vendor/go.etcd.io/bbolt/bolt_mipsx.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_mipsx.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// +build mips mipsle
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0x40000000 // 1GB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0xFFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
27
vendor/go.etcd.io/bbolt/bolt_openbsd.go
generated
vendored
Normal file
27
vendor/go.etcd.io/bbolt/bolt_openbsd.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
msAsync = 1 << iota // perform asynchronous writes
|
||||||
|
msSync // perform synchronous writes
|
||||||
|
msInvalidate // invalidate cached data
|
||||||
|
)
|
||||||
|
|
||||||
|
func msync(db *DB) error {
|
||||||
|
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
|
||||||
|
if errno != 0 {
|
||||||
|
return errno
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fdatasync(db *DB) error {
|
||||||
|
if db.data != nil {
|
||||||
|
return msync(db)
|
||||||
|
}
|
||||||
|
return db.file.Sync()
|
||||||
|
}
|
12
vendor/go.etcd.io/bbolt/bolt_ppc.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_ppc.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// +build ppc
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0x7FFFFFFF // 2GB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0xFFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
12
vendor/go.etcd.io/bbolt/bolt_ppc64.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_ppc64.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// +build ppc64
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0x7FFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
12
vendor/go.etcd.io/bbolt/bolt_ppc64le.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_ppc64le.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// +build ppc64le
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0x7FFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
12
vendor/go.etcd.io/bbolt/bolt_s390x.go
generated
vendored
Normal file
12
vendor/go.etcd.io/bbolt/bolt_s390x.go
generated
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
// +build s390x
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||||
|
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||||
|
|
||||||
|
// maxAllocSize is the size used when creating array pointers.
|
||||||
|
const maxAllocSize = 0x7FFFFFFF
|
||||||
|
|
||||||
|
// Are unaligned load/stores broken on this arch?
|
||||||
|
var brokenUnaligned = false
|
93
vendor/go.etcd.io/bbolt/bolt_unix.go
generated
vendored
Normal file
93
vendor/go.etcd.io/bbolt/bolt_unix.go
generated
vendored
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
// +build !windows,!plan9,!solaris
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// flock acquires an advisory lock on a file descriptor.
|
||||||
|
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||||
|
var t time.Time
|
||||||
|
if timeout != 0 {
|
||||||
|
t = time.Now()
|
||||||
|
}
|
||||||
|
fd := db.file.Fd()
|
||||||
|
flag := syscall.LOCK_NB
|
||||||
|
if exclusive {
|
||||||
|
flag |= syscall.LOCK_EX
|
||||||
|
} else {
|
||||||
|
flag |= syscall.LOCK_SH
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
// Attempt to obtain an exclusive lock.
|
||||||
|
err := syscall.Flock(int(fd), flag)
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
} else if err != syscall.EWOULDBLOCK {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we timed out then return an error.
|
||||||
|
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||||
|
return ErrTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for a bit and try again.
|
||||||
|
time.Sleep(flockRetryTimeout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// funlock releases an advisory lock on a file descriptor.
|
||||||
|
func funlock(db *DB) error {
|
||||||
|
return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
|
||||||
|
}
|
||||||
|
|
||||||
|
// mmap memory maps a DB's data file.
|
||||||
|
func mmap(db *DB, sz int) error {
|
||||||
|
// Map the data file to memory.
|
||||||
|
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advise the kernel that the mmap is accessed randomly.
|
||||||
|
err = madvise(b, syscall.MADV_RANDOM)
|
||||||
|
if err != nil && err != syscall.ENOSYS {
|
||||||
|
// Ignore not implemented error in kernel because it still works.
|
||||||
|
return fmt.Errorf("madvise: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the original byte slice and convert to a byte array pointer.
|
||||||
|
db.dataref = b
|
||||||
|
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
|
||||||
|
db.datasz = sz
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// munmap unmaps a DB's data file from memory.
|
||||||
|
func munmap(db *DB) error {
|
||||||
|
// Ignore the unmap if we have no mapped data.
|
||||||
|
if db.dataref == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmap using the original byte slice.
|
||||||
|
err := syscall.Munmap(db.dataref)
|
||||||
|
db.dataref = nil
|
||||||
|
db.data = nil
|
||||||
|
db.datasz = 0
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: This function is copied from stdlib because it is not available on darwin.
|
||||||
|
func madvise(b []byte, advice int) (err error) {
|
||||||
|
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
|
||||||
|
if e1 != 0 {
|
||||||
|
err = e1
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
88
vendor/go.etcd.io/bbolt/bolt_unix_solaris.go
generated
vendored
Normal file
88
vendor/go.etcd.io/bbolt/bolt_unix_solaris.go
generated
vendored
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// flock acquires an advisory lock on a file descriptor.
|
||||||
|
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||||
|
var t time.Time
|
||||||
|
if timeout != 0 {
|
||||||
|
t = time.Now()
|
||||||
|
}
|
||||||
|
fd := db.file.Fd()
|
||||||
|
var lockType int16
|
||||||
|
if exclusive {
|
||||||
|
lockType = syscall.F_WRLCK
|
||||||
|
} else {
|
||||||
|
lockType = syscall.F_RDLCK
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
// Attempt to obtain an exclusive lock.
|
||||||
|
lock := syscall.Flock_t{Type: lockType}
|
||||||
|
err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
} else if err != syscall.EAGAIN {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we timed out then return an error.
|
||||||
|
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||||
|
return ErrTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for a bit and try again.
|
||||||
|
time.Sleep(flockRetryTimeout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// funlock releases an advisory lock on a file descriptor.
|
||||||
|
func funlock(db *DB) error {
|
||||||
|
var lock syscall.Flock_t
|
||||||
|
lock.Start = 0
|
||||||
|
lock.Len = 0
|
||||||
|
lock.Type = syscall.F_UNLCK
|
||||||
|
lock.Whence = 0
|
||||||
|
return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
|
||||||
|
}
|
||||||
|
|
||||||
|
// mmap memory maps a DB's data file.
|
||||||
|
func mmap(db *DB, sz int) error {
|
||||||
|
// Map the data file to memory.
|
||||||
|
b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advise the kernel that the mmap is accessed randomly.
|
||||||
|
if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
|
||||||
|
return fmt.Errorf("madvise: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the original byte slice and convert to a byte array pointer.
|
||||||
|
db.dataref = b
|
||||||
|
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
|
||||||
|
db.datasz = sz
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// munmap unmaps a DB's data file from memory.
|
||||||
|
func munmap(db *DB) error {
|
||||||
|
// Ignore the unmap if we have no mapped data.
|
||||||
|
if db.dataref == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmap using the original byte slice.
|
||||||
|
err := unix.Munmap(db.dataref)
|
||||||
|
db.dataref = nil
|
||||||
|
db.data = nil
|
||||||
|
db.datasz = 0
|
||||||
|
return err
|
||||||
|
}
|
141
vendor/go.etcd.io/bbolt/bolt_windows.go
generated
vendored
Normal file
141
vendor/go.etcd.io/bbolt/bolt_windows.go
generated
vendored
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
|
||||||
|
var (
|
||||||
|
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
|
||||||
|
procLockFileEx = modkernel32.NewProc("LockFileEx")
|
||||||
|
procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
|
||||||
|
flagLockExclusive = 2
|
||||||
|
flagLockFailImmediately = 1
|
||||||
|
|
||||||
|
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
|
||||||
|
errLockViolation syscall.Errno = 0x21
|
||||||
|
)
|
||||||
|
|
||||||
|
func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
||||||
|
r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
|
||||||
|
if r == 0 {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
||||||
|
r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
|
||||||
|
if r == 0 {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fdatasync flushes written data to a file descriptor.
|
||||||
|
func fdatasync(db *DB) error {
|
||||||
|
return db.file.Sync()
|
||||||
|
}
|
||||||
|
|
||||||
|
// flock acquires an advisory lock on a file descriptor.
|
||||||
|
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||||
|
var t time.Time
|
||||||
|
if timeout != 0 {
|
||||||
|
t = time.Now()
|
||||||
|
}
|
||||||
|
var flag uint32 = flagLockFailImmediately
|
||||||
|
if exclusive {
|
||||||
|
flag |= flagLockExclusive
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
// Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range
|
||||||
|
// -1..0 as the lock on the database file.
|
||||||
|
var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
|
||||||
|
err := lockFileEx(syscall.Handle(db.file.Fd()), flag, 0, 1, 0, &syscall.Overlapped{
|
||||||
|
Offset: m1,
|
||||||
|
OffsetHigh: m1,
|
||||||
|
})
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
} else if err != errLockViolation {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we timed oumercit then return an error.
|
||||||
|
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||||
|
return ErrTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for a bit and try again.
|
||||||
|
time.Sleep(flockRetryTimeout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// funlock releases an advisory lock on a file descriptor.
|
||||||
|
func funlock(db *DB) error {
|
||||||
|
var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
|
||||||
|
err := unlockFileEx(syscall.Handle(db.file.Fd()), 0, 1, 0, &syscall.Overlapped{
|
||||||
|
Offset: m1,
|
||||||
|
OffsetHigh: m1,
|
||||||
|
})
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// mmap memory maps a DB's data file.
|
||||||
|
// Based on: https://github.com/edsrzf/mmap-go
|
||||||
|
func mmap(db *DB, sz int) error {
|
||||||
|
if !db.readOnly {
|
||||||
|
// Truncate the database to the size of the mmap.
|
||||||
|
if err := db.file.Truncate(int64(sz)); err != nil {
|
||||||
|
return fmt.Errorf("truncate: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open a file mapping handle.
|
||||||
|
sizelo := uint32(sz >> 32)
|
||||||
|
sizehi := uint32(sz) & 0xffffffff
|
||||||
|
h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil)
|
||||||
|
if h == 0 {
|
||||||
|
return os.NewSyscallError("CreateFileMapping", errno)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the memory map.
|
||||||
|
addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz))
|
||||||
|
if addr == 0 {
|
||||||
|
return os.NewSyscallError("MapViewOfFile", errno)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close mapping handle.
|
||||||
|
if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
|
||||||
|
return os.NewSyscallError("CloseHandle", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to a byte array.
|
||||||
|
db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
|
||||||
|
db.datasz = sz
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// munmap unmaps a pointer from a file.
|
||||||
|
// Based on: https://github.com/edsrzf/mmap-go
|
||||||
|
func munmap(db *DB) error {
|
||||||
|
if db.data == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
addr := (uintptr)(unsafe.Pointer(&db.data[0]))
|
||||||
|
if err := syscall.UnmapViewOfFile(addr); err != nil {
|
||||||
|
return os.NewSyscallError("UnmapViewOfFile", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
8
vendor/go.etcd.io/bbolt/boltsync_unix.go
generated
vendored
Normal file
8
vendor/go.etcd.io/bbolt/boltsync_unix.go
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
// +build !windows,!plan9,!linux,!openbsd
|
||||||
|
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
// fdatasync flushes written data to a file descriptor.
|
||||||
|
func fdatasync(db *DB) error {
|
||||||
|
return db.file.Sync()
|
||||||
|
}
|
775
vendor/go.etcd.io/bbolt/bucket.go
generated
vendored
Normal file
775
vendor/go.etcd.io/bbolt/bucket.go
generated
vendored
Normal file
@ -0,0 +1,775 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// MaxKeySize is the maximum length of a key, in bytes.
|
||||||
|
MaxKeySize = 32768
|
||||||
|
|
||||||
|
// MaxValueSize is the maximum length of a value, in bytes.
|
||||||
|
MaxValueSize = (1 << 31) - 2
|
||||||
|
)
|
||||||
|
|
||||||
|
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
|
||||||
|
|
||||||
|
const (
|
||||||
|
minFillPercent = 0.1
|
||||||
|
maxFillPercent = 1.0
|
||||||
|
)
|
||||||
|
|
||||||
|
// DefaultFillPercent is the percentage that split pages are filled.
|
||||||
|
// This value can be changed by setting Bucket.FillPercent.
|
||||||
|
const DefaultFillPercent = 0.5
|
||||||
|
|
||||||
|
// Bucket represents a collection of key/value pairs inside the database.
|
||||||
|
type Bucket struct {
|
||||||
|
*bucket
|
||||||
|
tx *Tx // the associated transaction
|
||||||
|
buckets map[string]*Bucket // subbucket cache
|
||||||
|
page *page // inline page reference
|
||||||
|
rootNode *node // materialized node for the root page.
|
||||||
|
nodes map[pgid]*node // node cache
|
||||||
|
|
||||||
|
// Sets the threshold for filling nodes when they split. By default,
|
||||||
|
// the bucket will fill to 50% but it can be useful to increase this
|
||||||
|
// amount if you know that your write workloads are mostly append-only.
|
||||||
|
//
|
||||||
|
// This is non-persisted across transactions so it must be set in every Tx.
|
||||||
|
FillPercent float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// bucket represents the on-file representation of a bucket.
|
||||||
|
// This is stored as the "value" of a bucket key. If the bucket is small enough,
|
||||||
|
// then its root page can be stored inline in the "value", after the bucket
|
||||||
|
// header. In the case of inline buckets, the "root" will be 0.
|
||||||
|
type bucket struct {
|
||||||
|
root pgid // page id of the bucket's root-level page
|
||||||
|
sequence uint64 // monotonically incrementing, used by NextSequence()
|
||||||
|
}
|
||||||
|
|
||||||
|
// newBucket returns a new bucket associated with a transaction.
|
||||||
|
func newBucket(tx *Tx) Bucket {
|
||||||
|
var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
|
||||||
|
if tx.writable {
|
||||||
|
b.buckets = make(map[string]*Bucket)
|
||||||
|
b.nodes = make(map[pgid]*node)
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tx returns the tx of the bucket.
|
||||||
|
func (b *Bucket) Tx() *Tx {
|
||||||
|
return b.tx
|
||||||
|
}
|
||||||
|
|
||||||
|
// Root returns the root of the bucket.
|
||||||
|
func (b *Bucket) Root() pgid {
|
||||||
|
return b.root
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writable returns whether the bucket is writable.
|
||||||
|
func (b *Bucket) Writable() bool {
|
||||||
|
return b.tx.writable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cursor creates a cursor associated with the bucket.
|
||||||
|
// The cursor is only valid as long as the transaction is open.
|
||||||
|
// Do not use a cursor after the transaction is closed.
|
||||||
|
func (b *Bucket) Cursor() *Cursor {
|
||||||
|
// Update transaction statistics.
|
||||||
|
b.tx.stats.CursorCount++
|
||||||
|
|
||||||
|
// Allocate and return a cursor.
|
||||||
|
return &Cursor{
|
||||||
|
bucket: b,
|
||||||
|
stack: make([]elemRef, 0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bucket retrieves a nested bucket by name.
|
||||||
|
// Returns nil if the bucket does not exist.
|
||||||
|
// The bucket instance is only valid for the lifetime of the transaction.
|
||||||
|
func (b *Bucket) Bucket(name []byte) *Bucket {
|
||||||
|
if b.buckets != nil {
|
||||||
|
if child := b.buckets[string(name)]; child != nil {
|
||||||
|
return child
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move cursor to key.
|
||||||
|
c := b.Cursor()
|
||||||
|
k, v, flags := c.seek(name)
|
||||||
|
|
||||||
|
// Return nil if the key doesn't exist or it is not a bucket.
|
||||||
|
if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise create a bucket and cache it.
|
||||||
|
var child = b.openBucket(v)
|
||||||
|
if b.buckets != nil {
|
||||||
|
b.buckets[string(name)] = child
|
||||||
|
}
|
||||||
|
|
||||||
|
return child
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper method that re-interprets a sub-bucket value
|
||||||
|
// from a parent into a Bucket
|
||||||
|
func (b *Bucket) openBucket(value []byte) *Bucket {
|
||||||
|
var child = newBucket(b.tx)
|
||||||
|
|
||||||
|
// If unaligned load/stores are broken on this arch and value is
|
||||||
|
// unaligned simply clone to an aligned byte array.
|
||||||
|
unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0
|
||||||
|
|
||||||
|
if unaligned {
|
||||||
|
value = cloneBytes(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is a writable transaction then we need to copy the bucket entry.
|
||||||
|
// Read-only transactions can point directly at the mmap entry.
|
||||||
|
if b.tx.writable && !unaligned {
|
||||||
|
child.bucket = &bucket{}
|
||||||
|
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
|
||||||
|
} else {
|
||||||
|
child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save a reference to the inline page if the bucket is inline.
|
||||||
|
if child.root == 0 {
|
||||||
|
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
|
||||||
|
}
|
||||||
|
|
||||||
|
return &child
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateBucket creates a new bucket at the given key and returns the new bucket.
|
||||||
|
// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
|
||||||
|
// The bucket instance is only valid for the lifetime of the transaction.
|
||||||
|
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
|
||||||
|
if b.tx.db == nil {
|
||||||
|
return nil, ErrTxClosed
|
||||||
|
} else if !b.tx.writable {
|
||||||
|
return nil, ErrTxNotWritable
|
||||||
|
} else if len(key) == 0 {
|
||||||
|
return nil, ErrBucketNameRequired
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move cursor to correct position.
|
||||||
|
c := b.Cursor()
|
||||||
|
k, _, flags := c.seek(key)
|
||||||
|
|
||||||
|
// Return an error if there is an existing key.
|
||||||
|
if bytes.Equal(key, k) {
|
||||||
|
if (flags & bucketLeafFlag) != 0 {
|
||||||
|
return nil, ErrBucketExists
|
||||||
|
}
|
||||||
|
return nil, ErrIncompatibleValue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create empty, inline bucket.
|
||||||
|
var bucket = Bucket{
|
||||||
|
bucket: &bucket{},
|
||||||
|
rootNode: &node{isLeaf: true},
|
||||||
|
FillPercent: DefaultFillPercent,
|
||||||
|
}
|
||||||
|
var value = bucket.write()
|
||||||
|
|
||||||
|
// Insert into node.
|
||||||
|
key = cloneBytes(key)
|
||||||
|
c.node().put(key, key, value, 0, bucketLeafFlag)
|
||||||
|
|
||||||
|
// Since subbuckets are not allowed on inline buckets, we need to
|
||||||
|
// dereference the inline page, if it exists. This will cause the bucket
|
||||||
|
// to be treated as a regular, non-inline bucket for the rest of the tx.
|
||||||
|
b.page = nil
|
||||||
|
|
||||||
|
return b.Bucket(key), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
|
||||||
|
// Returns an error if the bucket name is blank, or if the bucket name is too long.
|
||||||
|
// The bucket instance is only valid for the lifetime of the transaction.
|
||||||
|
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
|
||||||
|
child, err := b.CreateBucket(key)
|
||||||
|
if err == ErrBucketExists {
|
||||||
|
return b.Bucket(key), nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return child, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteBucket deletes a bucket at the given key.
|
||||||
|
// Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
|
||||||
|
func (b *Bucket) DeleteBucket(key []byte) error {
|
||||||
|
if b.tx.db == nil {
|
||||||
|
return ErrTxClosed
|
||||||
|
} else if !b.Writable() {
|
||||||
|
return ErrTxNotWritable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move cursor to correct position.
|
||||||
|
c := b.Cursor()
|
||||||
|
k, _, flags := c.seek(key)
|
||||||
|
|
||||||
|
// Return an error if bucket doesn't exist or is not a bucket.
|
||||||
|
if !bytes.Equal(key, k) {
|
||||||
|
return ErrBucketNotFound
|
||||||
|
} else if (flags & bucketLeafFlag) == 0 {
|
||||||
|
return ErrIncompatibleValue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively delete all child buckets.
|
||||||
|
child := b.Bucket(key)
|
||||||
|
err := child.ForEach(func(k, v []byte) error {
|
||||||
|
if v == nil {
|
||||||
|
if err := child.DeleteBucket(k); err != nil {
|
||||||
|
return fmt.Errorf("delete bucket: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove cached copy.
|
||||||
|
delete(b.buckets, string(key))
|
||||||
|
|
||||||
|
// Release all bucket pages to freelist.
|
||||||
|
child.nodes = nil
|
||||||
|
child.rootNode = nil
|
||||||
|
child.free()
|
||||||
|
|
||||||
|
// Delete the node if we have a matching key.
|
||||||
|
c.node().del(key)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get retrieves the value for a key in the bucket.
|
||||||
|
// Returns a nil value if the key does not exist or if the key is a nested bucket.
|
||||||
|
// The returned value is only valid for the life of the transaction.
|
||||||
|
func (b *Bucket) Get(key []byte) []byte {
|
||||||
|
k, v, flags := b.Cursor().seek(key)
|
||||||
|
|
||||||
|
// Return nil if this is a bucket.
|
||||||
|
if (flags & bucketLeafFlag) != 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If our target node isn't the same key as what's passed in then return nil.
|
||||||
|
if !bytes.Equal(key, k) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put sets the value for a key in the bucket.
|
||||||
|
// If the key exist then its previous value will be overwritten.
|
||||||
|
// Supplied value must remain valid for the life of the transaction.
|
||||||
|
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
|
||||||
|
func (b *Bucket) Put(key []byte, value []byte) error {
|
||||||
|
if b.tx.db == nil {
|
||||||
|
return ErrTxClosed
|
||||||
|
} else if !b.Writable() {
|
||||||
|
return ErrTxNotWritable
|
||||||
|
} else if len(key) == 0 {
|
||||||
|
return ErrKeyRequired
|
||||||
|
} else if len(key) > MaxKeySize {
|
||||||
|
return ErrKeyTooLarge
|
||||||
|
} else if int64(len(value)) > MaxValueSize {
|
||||||
|
return ErrValueTooLarge
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move cursor to correct position.
|
||||||
|
c := b.Cursor()
|
||||||
|
k, _, flags := c.seek(key)
|
||||||
|
|
||||||
|
// Return an error if there is an existing key with a bucket value.
|
||||||
|
if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
|
||||||
|
return ErrIncompatibleValue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert into node.
|
||||||
|
key = cloneBytes(key)
|
||||||
|
c.node().put(key, key, value, 0, 0)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete removes a key from the bucket.
|
||||||
|
// If the key does not exist then nothing is done and a nil error is returned.
|
||||||
|
// Returns an error if the bucket was created from a read-only transaction.
|
||||||
|
func (b *Bucket) Delete(key []byte) error {
|
||||||
|
if b.tx.db == nil {
|
||||||
|
return ErrTxClosed
|
||||||
|
} else if !b.Writable() {
|
||||||
|
return ErrTxNotWritable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move cursor to correct position.
|
||||||
|
c := b.Cursor()
|
||||||
|
k, _, flags := c.seek(key)
|
||||||
|
|
||||||
|
// Return nil if the key doesn't exist.
|
||||||
|
if !bytes.Equal(key, k) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an error if there is already existing bucket value.
|
||||||
|
if (flags & bucketLeafFlag) != 0 {
|
||||||
|
return ErrIncompatibleValue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete the node if we have a matching key.
|
||||||
|
c.node().del(key)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sequence returns the current integer for the bucket without incrementing it.
|
||||||
|
func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
|
||||||
|
|
||||||
|
// SetSequence updates the sequence number for the bucket.
|
||||||
|
func (b *Bucket) SetSequence(v uint64) error {
|
||||||
|
if b.tx.db == nil {
|
||||||
|
return ErrTxClosed
|
||||||
|
} else if !b.Writable() {
|
||||||
|
return ErrTxNotWritable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Materialize the root node if it hasn't been already so that the
|
||||||
|
// bucket will be saved during commit.
|
||||||
|
if b.rootNode == nil {
|
||||||
|
_ = b.node(b.root, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment and return the sequence.
|
||||||
|
b.bucket.sequence = v
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NextSequence returns an autoincrementing integer for the bucket.
|
||||||
|
func (b *Bucket) NextSequence() (uint64, error) {
|
||||||
|
if b.tx.db == nil {
|
||||||
|
return 0, ErrTxClosed
|
||||||
|
} else if !b.Writable() {
|
||||||
|
return 0, ErrTxNotWritable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Materialize the root node if it hasn't been already so that the
|
||||||
|
// bucket will be saved during commit.
|
||||||
|
if b.rootNode == nil {
|
||||||
|
_ = b.node(b.root, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment and return the sequence.
|
||||||
|
b.bucket.sequence++
|
||||||
|
return b.bucket.sequence, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ForEach executes a function for each key/value pair in a bucket.
|
||||||
|
// If the provided function returns an error then the iteration is stopped and
|
||||||
|
// the error is returned to the caller. The provided function must not modify
|
||||||
|
// the bucket; this will result in undefined behavior.
|
||||||
|
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
|
||||||
|
if b.tx.db == nil {
|
||||||
|
return ErrTxClosed
|
||||||
|
}
|
||||||
|
c := b.Cursor()
|
||||||
|
for k, v := c.First(); k != nil; k, v = c.Next() {
|
||||||
|
if err := fn(k, v); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stat returns stats on a bucket.
|
||||||
|
func (b *Bucket) Stats() BucketStats {
|
||||||
|
var s, subStats BucketStats
|
||||||
|
pageSize := b.tx.db.pageSize
|
||||||
|
s.BucketN += 1
|
||||||
|
if b.root == 0 {
|
||||||
|
s.InlineBucketN += 1
|
||||||
|
}
|
||||||
|
b.forEachPage(func(p *page, depth int) {
|
||||||
|
if (p.flags & leafPageFlag) != 0 {
|
||||||
|
s.KeyN += int(p.count)
|
||||||
|
|
||||||
|
// used totals the used bytes for the page
|
||||||
|
used := pageHeaderSize
|
||||||
|
|
||||||
|
if p.count != 0 {
|
||||||
|
// If page has any elements, add all element headers.
|
||||||
|
used += leafPageElementSize * int(p.count-1)
|
||||||
|
|
||||||
|
// Add all element key, value sizes.
|
||||||
|
// The computation takes advantage of the fact that the position
|
||||||
|
// of the last element's key/value equals to the total of the sizes
|
||||||
|
// of all previous elements' keys and values.
|
||||||
|
// It also includes the last element's header.
|
||||||
|
lastElement := p.leafPageElement(p.count - 1)
|
||||||
|
used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.root == 0 {
|
||||||
|
// For inlined bucket just update the inline stats
|
||||||
|
s.InlineBucketInuse += used
|
||||||
|
} else {
|
||||||
|
// For non-inlined bucket update all the leaf stats
|
||||||
|
s.LeafPageN++
|
||||||
|
s.LeafInuse += used
|
||||||
|
s.LeafOverflowN += int(p.overflow)
|
||||||
|
|
||||||
|
// Collect stats from sub-buckets.
|
||||||
|
// Do that by iterating over all element headers
|
||||||
|
// looking for the ones with the bucketLeafFlag.
|
||||||
|
for i := uint16(0); i < p.count; i++ {
|
||||||
|
e := p.leafPageElement(i)
|
||||||
|
if (e.flags & bucketLeafFlag) != 0 {
|
||||||
|
// For any bucket element, open the element value
|
||||||
|
// and recursively call Stats on the contained bucket.
|
||||||
|
subStats.Add(b.openBucket(e.value()).Stats())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (p.flags & branchPageFlag) != 0 {
|
||||||
|
s.BranchPageN++
|
||||||
|
lastElement := p.branchPageElement(p.count - 1)
|
||||||
|
|
||||||
|
// used totals the used bytes for the page
|
||||||
|
// Add header and all element headers.
|
||||||
|
used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
|
||||||
|
|
||||||
|
// Add size of all keys and values.
|
||||||
|
// Again, use the fact that last element's position equals to
|
||||||
|
// the total of key, value sizes of all previous elements.
|
||||||
|
used += int(lastElement.pos + lastElement.ksize)
|
||||||
|
s.BranchInuse += used
|
||||||
|
s.BranchOverflowN += int(p.overflow)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep track of maximum page depth.
|
||||||
|
if depth+1 > s.Depth {
|
||||||
|
s.Depth = (depth + 1)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Alloc stats can be computed from page counts and pageSize.
|
||||||
|
s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
|
||||||
|
s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
|
||||||
|
|
||||||
|
// Add the max depth of sub-buckets to get total nested depth.
|
||||||
|
s.Depth += subStats.Depth
|
||||||
|
// Add the stats for all sub-buckets
|
||||||
|
s.Add(subStats)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// forEachPage iterates over every page in a bucket, including inline pages.
|
||||||
|
func (b *Bucket) forEachPage(fn func(*page, int)) {
|
||||||
|
// If we have an inline page then just use that.
|
||||||
|
if b.page != nil {
|
||||||
|
fn(b.page, 0)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise traverse the page hierarchy.
|
||||||
|
b.tx.forEachPage(b.root, 0, fn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// forEachPageNode iterates over every page (or node) in a bucket.
|
||||||
|
// This also includes inline pages.
|
||||||
|
func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
|
||||||
|
// If we have an inline page or root node then just use that.
|
||||||
|
if b.page != nil {
|
||||||
|
fn(b.page, nil, 0)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
b._forEachPageNode(b.root, 0, fn)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
|
||||||
|
var p, n = b.pageNode(pgid)
|
||||||
|
|
||||||
|
// Execute function.
|
||||||
|
fn(p, n, depth)
|
||||||
|
|
||||||
|
// Recursively loop over children.
|
||||||
|
if p != nil {
|
||||||
|
if (p.flags & branchPageFlag) != 0 {
|
||||||
|
for i := 0; i < int(p.count); i++ {
|
||||||
|
elem := p.branchPageElement(uint16(i))
|
||||||
|
b._forEachPageNode(elem.pgid, depth+1, fn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !n.isLeaf {
|
||||||
|
for _, inode := range n.inodes {
|
||||||
|
b._forEachPageNode(inode.pgid, depth+1, fn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// spill writes all the nodes for this bucket to dirty pages.
|
||||||
|
func (b *Bucket) spill() error {
|
||||||
|
// Spill all child buckets first.
|
||||||
|
for name, child := range b.buckets {
|
||||||
|
// If the child bucket is small enough and it has no child buckets then
|
||||||
|
// write it inline into the parent bucket's page. Otherwise spill it
|
||||||
|
// like a normal bucket and make the parent value a pointer to the page.
|
||||||
|
var value []byte
|
||||||
|
if child.inlineable() {
|
||||||
|
child.free()
|
||||||
|
value = child.write()
|
||||||
|
} else {
|
||||||
|
if err := child.spill(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the child bucket header in this bucket.
|
||||||
|
value = make([]byte, unsafe.Sizeof(bucket{}))
|
||||||
|
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||||
|
*bucket = *child.bucket
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip writing the bucket if there are no materialized nodes.
|
||||||
|
if child.rootNode == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update parent node.
|
||||||
|
var c = b.Cursor()
|
||||||
|
k, _, flags := c.seek([]byte(name))
|
||||||
|
if !bytes.Equal([]byte(name), k) {
|
||||||
|
panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
|
||||||
|
}
|
||||||
|
if flags&bucketLeafFlag == 0 {
|
||||||
|
panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
|
||||||
|
}
|
||||||
|
c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ignore if there's not a materialized root node.
|
||||||
|
if b.rootNode == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spill nodes.
|
||||||
|
if err := b.rootNode.spill(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
b.rootNode = b.rootNode.root()
|
||||||
|
|
||||||
|
// Update the root node for this bucket.
|
||||||
|
if b.rootNode.pgid >= b.tx.meta.pgid {
|
||||||
|
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
|
||||||
|
}
|
||||||
|
b.root = b.rootNode.pgid
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// inlineable returns true if a bucket is small enough to be written inline
|
||||||
|
// and if it contains no subbuckets. Otherwise returns false.
|
||||||
|
func (b *Bucket) inlineable() bool {
|
||||||
|
var n = b.rootNode
|
||||||
|
|
||||||
|
// Bucket must only contain a single leaf node.
|
||||||
|
if n == nil || !n.isLeaf {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bucket is not inlineable if it contains subbuckets or if it goes beyond
|
||||||
|
// our threshold for inline bucket size.
|
||||||
|
var size = pageHeaderSize
|
||||||
|
for _, inode := range n.inodes {
|
||||||
|
size += leafPageElementSize + len(inode.key) + len(inode.value)
|
||||||
|
|
||||||
|
if inode.flags&bucketLeafFlag != 0 {
|
||||||
|
return false
|
||||||
|
} else if size > b.maxInlineBucketSize() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the maximum total size of a bucket to make it a candidate for inlining.
|
||||||
|
func (b *Bucket) maxInlineBucketSize() int {
|
||||||
|
return b.tx.db.pageSize / 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// write allocates and writes a bucket to a byte slice.
|
||||||
|
func (b *Bucket) write() []byte {
|
||||||
|
// Allocate the appropriate size.
|
||||||
|
var n = b.rootNode
|
||||||
|
var value = make([]byte, bucketHeaderSize+n.size())
|
||||||
|
|
||||||
|
// Write a bucket header.
|
||||||
|
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||||
|
*bucket = *b.bucket
|
||||||
|
|
||||||
|
// Convert byte slice to a fake page and write the root node.
|
||||||
|
var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
|
||||||
|
n.write(p)
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
// rebalance attempts to balance all nodes.
|
||||||
|
func (b *Bucket) rebalance() {
|
||||||
|
for _, n := range b.nodes {
|
||||||
|
n.rebalance()
|
||||||
|
}
|
||||||
|
for _, child := range b.buckets {
|
||||||
|
child.rebalance()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// node creates a node from a page and associates it with a given parent.
|
||||||
|
func (b *Bucket) node(pgid pgid, parent *node) *node {
|
||||||
|
_assert(b.nodes != nil, "nodes map expected")
|
||||||
|
|
||||||
|
// Retrieve node if it's already been created.
|
||||||
|
if n := b.nodes[pgid]; n != nil {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise create a node and cache it.
|
||||||
|
n := &node{bucket: b, parent: parent}
|
||||||
|
if parent == nil {
|
||||||
|
b.rootNode = n
|
||||||
|
} else {
|
||||||
|
parent.children = append(parent.children, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the inline page if this is an inline bucket.
|
||||||
|
var p = b.page
|
||||||
|
if p == nil {
|
||||||
|
p = b.tx.page(pgid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the page into the node and cache it.
|
||||||
|
n.read(p)
|
||||||
|
b.nodes[pgid] = n
|
||||||
|
|
||||||
|
// Update statistics.
|
||||||
|
b.tx.stats.NodeCount++
|
||||||
|
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
// free recursively frees all pages in the bucket.
|
||||||
|
func (b *Bucket) free() {
|
||||||
|
if b.root == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var tx = b.tx
|
||||||
|
b.forEachPageNode(func(p *page, n *node, _ int) {
|
||||||
|
if p != nil {
|
||||||
|
tx.db.freelist.free(tx.meta.txid, p)
|
||||||
|
} else {
|
||||||
|
n.free()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
b.root = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// dereference removes all references to the old mmap.
|
||||||
|
func (b *Bucket) dereference() {
|
||||||
|
if b.rootNode != nil {
|
||||||
|
b.rootNode.root().dereference()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, child := range b.buckets {
|
||||||
|
child.dereference()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// pageNode returns the in-memory node, if it exists.
|
||||||
|
// Otherwise returns the underlying page.
|
||||||
|
func (b *Bucket) pageNode(id pgid) (*page, *node) {
|
||||||
|
// Inline buckets have a fake page embedded in their value so treat them
|
||||||
|
// differently. We'll return the rootNode (if available) or the fake page.
|
||||||
|
if b.root == 0 {
|
||||||
|
if id != 0 {
|
||||||
|
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
|
||||||
|
}
|
||||||
|
if b.rootNode != nil {
|
||||||
|
return nil, b.rootNode
|
||||||
|
}
|
||||||
|
return b.page, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the node cache for non-inline buckets.
|
||||||
|
if b.nodes != nil {
|
||||||
|
if n := b.nodes[id]; n != nil {
|
||||||
|
return nil, n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finally lookup the page from the transaction if no node is materialized.
|
||||||
|
return b.tx.page(id), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// BucketStats records statistics about resources used by a bucket.
|
||||||
|
type BucketStats struct {
|
||||||
|
// Page count statistics.
|
||||||
|
BranchPageN int // number of logical branch pages
|
||||||
|
BranchOverflowN int // number of physical branch overflow pages
|
||||||
|
LeafPageN int // number of logical leaf pages
|
||||||
|
LeafOverflowN int // number of physical leaf overflow pages
|
||||||
|
|
||||||
|
// Tree statistics.
|
||||||
|
KeyN int // number of keys/value pairs
|
||||||
|
Depth int // number of levels in B+tree
|
||||||
|
|
||||||
|
// Page size utilization.
|
||||||
|
BranchAlloc int // bytes allocated for physical branch pages
|
||||||
|
BranchInuse int // bytes actually used for branch data
|
||||||
|
LeafAlloc int // bytes allocated for physical leaf pages
|
||||||
|
LeafInuse int // bytes actually used for leaf data
|
||||||
|
|
||||||
|
// Bucket statistics
|
||||||
|
BucketN int // total number of buckets including the top bucket
|
||||||
|
InlineBucketN int // total number on inlined buckets
|
||||||
|
InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BucketStats) Add(other BucketStats) {
|
||||||
|
s.BranchPageN += other.BranchPageN
|
||||||
|
s.BranchOverflowN += other.BranchOverflowN
|
||||||
|
s.LeafPageN += other.LeafPageN
|
||||||
|
s.LeafOverflowN += other.LeafOverflowN
|
||||||
|
s.KeyN += other.KeyN
|
||||||
|
if s.Depth < other.Depth {
|
||||||
|
s.Depth = other.Depth
|
||||||
|
}
|
||||||
|
s.BranchAlloc += other.BranchAlloc
|
||||||
|
s.BranchInuse += other.BranchInuse
|
||||||
|
s.LeafAlloc += other.LeafAlloc
|
||||||
|
s.LeafInuse += other.LeafInuse
|
||||||
|
|
||||||
|
s.BucketN += other.BucketN
|
||||||
|
s.InlineBucketN += other.InlineBucketN
|
||||||
|
s.InlineBucketInuse += other.InlineBucketInuse
|
||||||
|
}
|
||||||
|
|
||||||
|
// cloneBytes returns a copy of a given slice.
|
||||||
|
func cloneBytes(v []byte) []byte {
|
||||||
|
var clone = make([]byte, len(v))
|
||||||
|
copy(clone, v)
|
||||||
|
return clone
|
||||||
|
}
|
396
vendor/go.etcd.io/bbolt/cursor.go
generated
vendored
Normal file
396
vendor/go.etcd.io/bbolt/cursor.go
generated
vendored
Normal file
@ -0,0 +1,396 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
|
||||||
|
// Cursors see nested buckets with value == nil.
|
||||||
|
// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
|
||||||
|
//
|
||||||
|
// Keys and values returned from the cursor are only valid for the life of the transaction.
|
||||||
|
//
|
||||||
|
// Changing data while traversing with a cursor may cause it to be invalidated
|
||||||
|
// and return unexpected keys and/or values. You must reposition your cursor
|
||||||
|
// after mutating data.
|
||||||
|
type Cursor struct {
|
||||||
|
bucket *Bucket
|
||||||
|
stack []elemRef
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bucket returns the bucket that this cursor was created from.
|
||||||
|
func (c *Cursor) Bucket() *Bucket {
|
||||||
|
return c.bucket
|
||||||
|
}
|
||||||
|
|
||||||
|
// First moves the cursor to the first item in the bucket and returns its key and value.
|
||||||
|
// If the bucket is empty then a nil key and value are returned.
|
||||||
|
// The returned key and value are only valid for the life of the transaction.
|
||||||
|
func (c *Cursor) First() (key []byte, value []byte) {
|
||||||
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||||
|
c.stack = c.stack[:0]
|
||||||
|
p, n := c.bucket.pageNode(c.bucket.root)
|
||||||
|
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
||||||
|
c.first()
|
||||||
|
|
||||||
|
// If we land on an empty page then move to the next value.
|
||||||
|
// https://github.com/boltdb/bolt/issues/450
|
||||||
|
if c.stack[len(c.stack)-1].count() == 0 {
|
||||||
|
c.next()
|
||||||
|
}
|
||||||
|
|
||||||
|
k, v, flags := c.keyValue()
|
||||||
|
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||||
|
return k, nil
|
||||||
|
}
|
||||||
|
return k, v
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Last moves the cursor to the last item in the bucket and returns its key and value.
|
||||||
|
// If the bucket is empty then a nil key and value are returned.
|
||||||
|
// The returned key and value are only valid for the life of the transaction.
|
||||||
|
func (c *Cursor) Last() (key []byte, value []byte) {
|
||||||
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||||
|
c.stack = c.stack[:0]
|
||||||
|
p, n := c.bucket.pageNode(c.bucket.root)
|
||||||
|
ref := elemRef{page: p, node: n}
|
||||||
|
ref.index = ref.count() - 1
|
||||||
|
c.stack = append(c.stack, ref)
|
||||||
|
c.last()
|
||||||
|
k, v, flags := c.keyValue()
|
||||||
|
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||||
|
return k, nil
|
||||||
|
}
|
||||||
|
return k, v
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next moves the cursor to the next item in the bucket and returns its key and value.
|
||||||
|
// If the cursor is at the end of the bucket then a nil key and value are returned.
|
||||||
|
// The returned key and value are only valid for the life of the transaction.
|
||||||
|
func (c *Cursor) Next() (key []byte, value []byte) {
|
||||||
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||||
|
k, v, flags := c.next()
|
||||||
|
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||||
|
return k, nil
|
||||||
|
}
|
||||||
|
return k, v
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prev moves the cursor to the previous item in the bucket and returns its key and value.
|
||||||
|
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
|
||||||
|
// The returned key and value are only valid for the life of the transaction.
|
||||||
|
func (c *Cursor) Prev() (key []byte, value []byte) {
|
||||||
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||||
|
|
||||||
|
// Attempt to move back one element until we're successful.
|
||||||
|
// Move up the stack as we hit the beginning of each page in our stack.
|
||||||
|
for i := len(c.stack) - 1; i >= 0; i-- {
|
||||||
|
elem := &c.stack[i]
|
||||||
|
if elem.index > 0 {
|
||||||
|
elem.index--
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c.stack = c.stack[:i]
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've hit the end then return nil.
|
||||||
|
if len(c.stack) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move down the stack to find the last element of the last leaf under this branch.
|
||||||
|
c.last()
|
||||||
|
k, v, flags := c.keyValue()
|
||||||
|
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||||
|
return k, nil
|
||||||
|
}
|
||||||
|
return k, v
|
||||||
|
}
|
||||||
|
|
||||||
|
// Seek moves the cursor to a given key and returns it.
|
||||||
|
// If the key does not exist then the next key is used. If no keys
|
||||||
|
// follow, a nil key is returned.
|
||||||
|
// The returned key and value are only valid for the life of the transaction.
|
||||||
|
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
|
||||||
|
k, v, flags := c.seek(seek)
|
||||||
|
|
||||||
|
// If we ended up after the last element of a page then move to the next one.
|
||||||
|
if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
|
||||||
|
k, v, flags = c.next()
|
||||||
|
}
|
||||||
|
|
||||||
|
if k == nil {
|
||||||
|
return nil, nil
|
||||||
|
} else if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||||
|
return k, nil
|
||||||
|
}
|
||||||
|
return k, v
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete removes the current key/value under the cursor from the bucket.
|
||||||
|
// Delete fails if current key/value is a bucket or if the transaction is not writable.
|
||||||
|
func (c *Cursor) Delete() error {
|
||||||
|
if c.bucket.tx.db == nil {
|
||||||
|
return ErrTxClosed
|
||||||
|
} else if !c.bucket.Writable() {
|
||||||
|
return ErrTxNotWritable
|
||||||
|
}
|
||||||
|
|
||||||
|
key, _, flags := c.keyValue()
|
||||||
|
// Return an error if current value is a bucket.
|
||||||
|
if (flags & bucketLeafFlag) != 0 {
|
||||||
|
return ErrIncompatibleValue
|
||||||
|
}
|
||||||
|
c.node().del(key)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// seek moves the cursor to a given key and returns it.
|
||||||
|
// If the key does not exist then the next key is used.
|
||||||
|
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
|
||||||
|
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||||
|
|
||||||
|
// Start from root page/node and traverse to correct page.
|
||||||
|
c.stack = c.stack[:0]
|
||||||
|
c.search(seek, c.bucket.root)
|
||||||
|
|
||||||
|
// If this is a bucket then return a nil value.
|
||||||
|
return c.keyValue()
|
||||||
|
}
|
||||||
|
|
||||||
|
// first moves the cursor to the first leaf element under the last page in the stack.
|
||||||
|
func (c *Cursor) first() {
|
||||||
|
for {
|
||||||
|
// Exit when we hit a leaf page.
|
||||||
|
var ref = &c.stack[len(c.stack)-1]
|
||||||
|
if ref.isLeaf() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep adding pages pointing to the first element to the stack.
|
||||||
|
var pgid pgid
|
||||||
|
if ref.node != nil {
|
||||||
|
pgid = ref.node.inodes[ref.index].pgid
|
||||||
|
} else {
|
||||||
|
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
|
||||||
|
}
|
||||||
|
p, n := c.bucket.pageNode(pgid)
|
||||||
|
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// last moves the cursor to the last leaf element under the last page in the stack.
|
||||||
|
func (c *Cursor) last() {
|
||||||
|
for {
|
||||||
|
// Exit when we hit a leaf page.
|
||||||
|
ref := &c.stack[len(c.stack)-1]
|
||||||
|
if ref.isLeaf() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep adding pages pointing to the last element in the stack.
|
||||||
|
var pgid pgid
|
||||||
|
if ref.node != nil {
|
||||||
|
pgid = ref.node.inodes[ref.index].pgid
|
||||||
|
} else {
|
||||||
|
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
|
||||||
|
}
|
||||||
|
p, n := c.bucket.pageNode(pgid)
|
||||||
|
|
||||||
|
var nextRef = elemRef{page: p, node: n}
|
||||||
|
nextRef.index = nextRef.count() - 1
|
||||||
|
c.stack = append(c.stack, nextRef)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// next moves to the next leaf element and returns the key and value.
|
||||||
|
// If the cursor is at the last leaf element then it stays there and returns nil.
|
||||||
|
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
|
||||||
|
for {
|
||||||
|
// Attempt to move over one element until we're successful.
|
||||||
|
// Move up the stack as we hit the end of each page in our stack.
|
||||||
|
var i int
|
||||||
|
for i = len(c.stack) - 1; i >= 0; i-- {
|
||||||
|
elem := &c.stack[i]
|
||||||
|
if elem.index < elem.count()-1 {
|
||||||
|
elem.index++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've hit the root page then stop and return. This will leave the
|
||||||
|
// cursor on the last element of the last page.
|
||||||
|
if i == -1 {
|
||||||
|
return nil, nil, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise start from where we left off in the stack and find the
|
||||||
|
// first element of the first leaf page.
|
||||||
|
c.stack = c.stack[:i+1]
|
||||||
|
c.first()
|
||||||
|
|
||||||
|
// If this is an empty page then restart and move back up the stack.
|
||||||
|
// https://github.com/boltdb/bolt/issues/450
|
||||||
|
if c.stack[len(c.stack)-1].count() == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.keyValue()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// search recursively performs a binary search against a given page/node until it finds a given key.
|
||||||
|
func (c *Cursor) search(key []byte, pgid pgid) {
|
||||||
|
p, n := c.bucket.pageNode(pgid)
|
||||||
|
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
|
||||||
|
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
|
||||||
|
}
|
||||||
|
e := elemRef{page: p, node: n}
|
||||||
|
c.stack = append(c.stack, e)
|
||||||
|
|
||||||
|
// If we're on a leaf page/node then find the specific node.
|
||||||
|
if e.isLeaf() {
|
||||||
|
c.nsearch(key)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if n != nil {
|
||||||
|
c.searchNode(key, n)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.searchPage(key, p)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cursor) searchNode(key []byte, n *node) {
|
||||||
|
var exact bool
|
||||||
|
index := sort.Search(len(n.inodes), func(i int) bool {
|
||||||
|
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
||||||
|
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
||||||
|
ret := bytes.Compare(n.inodes[i].key, key)
|
||||||
|
if ret == 0 {
|
||||||
|
exact = true
|
||||||
|
}
|
||||||
|
return ret != -1
|
||||||
|
})
|
||||||
|
if !exact && index > 0 {
|
||||||
|
index--
|
||||||
|
}
|
||||||
|
c.stack[len(c.stack)-1].index = index
|
||||||
|
|
||||||
|
// Recursively search to the next page.
|
||||||
|
c.search(key, n.inodes[index].pgid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cursor) searchPage(key []byte, p *page) {
|
||||||
|
// Binary search for the correct range.
|
||||||
|
inodes := p.branchPageElements()
|
||||||
|
|
||||||
|
var exact bool
|
||||||
|
index := sort.Search(int(p.count), func(i int) bool {
|
||||||
|
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
||||||
|
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
||||||
|
ret := bytes.Compare(inodes[i].key(), key)
|
||||||
|
if ret == 0 {
|
||||||
|
exact = true
|
||||||
|
}
|
||||||
|
return ret != -1
|
||||||
|
})
|
||||||
|
if !exact && index > 0 {
|
||||||
|
index--
|
||||||
|
}
|
||||||
|
c.stack[len(c.stack)-1].index = index
|
||||||
|
|
||||||
|
// Recursively search to the next page.
|
||||||
|
c.search(key, inodes[index].pgid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// nsearch searches the leaf node on the top of the stack for a key.
|
||||||
|
func (c *Cursor) nsearch(key []byte) {
|
||||||
|
e := &c.stack[len(c.stack)-1]
|
||||||
|
p, n := e.page, e.node
|
||||||
|
|
||||||
|
// If we have a node then search its inodes.
|
||||||
|
if n != nil {
|
||||||
|
index := sort.Search(len(n.inodes), func(i int) bool {
|
||||||
|
return bytes.Compare(n.inodes[i].key, key) != -1
|
||||||
|
})
|
||||||
|
e.index = index
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have a page then search its leaf elements.
|
||||||
|
inodes := p.leafPageElements()
|
||||||
|
index := sort.Search(int(p.count), func(i int) bool {
|
||||||
|
return bytes.Compare(inodes[i].key(), key) != -1
|
||||||
|
})
|
||||||
|
e.index = index
|
||||||
|
}
|
||||||
|
|
||||||
|
// keyValue returns the key and value of the current leaf element.
|
||||||
|
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
|
||||||
|
ref := &c.stack[len(c.stack)-1]
|
||||||
|
|
||||||
|
// If the cursor is pointing to the end of page/node then return nil.
|
||||||
|
if ref.count() == 0 || ref.index >= ref.count() {
|
||||||
|
return nil, nil, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retrieve value from node.
|
||||||
|
if ref.node != nil {
|
||||||
|
inode := &ref.node.inodes[ref.index]
|
||||||
|
return inode.key, inode.value, inode.flags
|
||||||
|
}
|
||||||
|
|
||||||
|
// Or retrieve value from page.
|
||||||
|
elem := ref.page.leafPageElement(uint16(ref.index))
|
||||||
|
return elem.key(), elem.value(), elem.flags
|
||||||
|
}
|
||||||
|
|
||||||
|
// node returns the node that the cursor is currently positioned on.
|
||||||
|
func (c *Cursor) node() *node {
|
||||||
|
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
|
||||||
|
|
||||||
|
// If the top of the stack is a leaf node then just return it.
|
||||||
|
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
|
||||||
|
return ref.node
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start from root and traverse down the hierarchy.
|
||||||
|
var n = c.stack[0].node
|
||||||
|
if n == nil {
|
||||||
|
n = c.bucket.node(c.stack[0].page.id, nil)
|
||||||
|
}
|
||||||
|
for _, ref := range c.stack[:len(c.stack)-1] {
|
||||||
|
_assert(!n.isLeaf, "expected branch node")
|
||||||
|
n = n.childAt(int(ref.index))
|
||||||
|
}
|
||||||
|
_assert(n.isLeaf, "expected leaf node")
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
// elemRef represents a reference to an element on a given page/node.
|
||||||
|
type elemRef struct {
|
||||||
|
page *page
|
||||||
|
node *node
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
|
||||||
|
// isLeaf returns whether the ref is pointing at a leaf page/node.
|
||||||
|
func (r *elemRef) isLeaf() bool {
|
||||||
|
if r.node != nil {
|
||||||
|
return r.node.isLeaf
|
||||||
|
}
|
||||||
|
return (r.page.flags & leafPageFlag) != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// count returns the number of inodes or page elements.
|
||||||
|
func (r *elemRef) count() int {
|
||||||
|
if r.node != nil {
|
||||||
|
return len(r.node.inodes)
|
||||||
|
}
|
||||||
|
return int(r.page.count)
|
||||||
|
}
|
1138
vendor/go.etcd.io/bbolt/db.go
generated
vendored
Normal file
1138
vendor/go.etcd.io/bbolt/db.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
44
vendor/go.etcd.io/bbolt/doc.go
generated
vendored
Normal file
44
vendor/go.etcd.io/bbolt/doc.go
generated
vendored
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
package bbolt implements a low-level key/value store in pure Go. It supports
|
||||||
|
fully serializable transactions, ACID semantics, and lock-free MVCC with
|
||||||
|
multiple readers and a single writer. Bolt can be used for projects that
|
||||||
|
want a simple data store without the need to add large dependencies such as
|
||||||
|
Postgres or MySQL.
|
||||||
|
|
||||||
|
Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
|
||||||
|
optimized for fast read access and does not require recovery in the event of a
|
||||||
|
system crash. Transactions which have not finished committing will simply be
|
||||||
|
rolled back in the event of a crash.
|
||||||
|
|
||||||
|
The design of Bolt is based on Howard Chu's LMDB database project.
|
||||||
|
|
||||||
|
Bolt currently works on Windows, Mac OS X, and Linux.
|
||||||
|
|
||||||
|
|
||||||
|
Basics
|
||||||
|
|
||||||
|
There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
|
||||||
|
a collection of buckets and is represented by a single file on disk. A bucket is
|
||||||
|
a collection of unique keys that are associated with values.
|
||||||
|
|
||||||
|
Transactions provide either read-only or read-write access to the database.
|
||||||
|
Read-only transactions can retrieve key/value pairs and can use Cursors to
|
||||||
|
iterate over the dataset sequentially. Read-write transactions can create and
|
||||||
|
delete buckets and can insert and remove keys. Only one read-write transaction
|
||||||
|
is allowed at a time.
|
||||||
|
|
||||||
|
|
||||||
|
Caveats
|
||||||
|
|
||||||
|
The database uses a read-only, memory-mapped data file to ensure that
|
||||||
|
applications cannot corrupt the database, however, this means that keys and
|
||||||
|
values returned from Bolt cannot be changed. Writing to a read-only byte slice
|
||||||
|
will cause Go to panic.
|
||||||
|
|
||||||
|
Keys and values retrieved from the database are only valid for the life of
|
||||||
|
the transaction. When used outside the transaction, these byte slices can
|
||||||
|
point to different data or can point to invalid memory which will cause a panic.
|
||||||
|
|
||||||
|
|
||||||
|
*/
|
||||||
|
package bbolt
|
71
vendor/go.etcd.io/bbolt/errors.go
generated
vendored
Normal file
71
vendor/go.etcd.io/bbolt/errors.go
generated
vendored
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import "errors"
|
||||||
|
|
||||||
|
// These errors can be returned when opening or calling methods on a DB.
|
||||||
|
var (
|
||||||
|
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
|
||||||
|
// is opened or after it is closed.
|
||||||
|
ErrDatabaseNotOpen = errors.New("database not open")
|
||||||
|
|
||||||
|
// ErrDatabaseOpen is returned when opening a database that is
|
||||||
|
// already open.
|
||||||
|
ErrDatabaseOpen = errors.New("database already open")
|
||||||
|
|
||||||
|
// ErrInvalid is returned when both meta pages on a database are invalid.
|
||||||
|
// This typically occurs when a file is not a bolt database.
|
||||||
|
ErrInvalid = errors.New("invalid database")
|
||||||
|
|
||||||
|
// ErrVersionMismatch is returned when the data file was created with a
|
||||||
|
// different version of Bolt.
|
||||||
|
ErrVersionMismatch = errors.New("version mismatch")
|
||||||
|
|
||||||
|
// ErrChecksum is returned when either meta page checksum does not match.
|
||||||
|
ErrChecksum = errors.New("checksum error")
|
||||||
|
|
||||||
|
// ErrTimeout is returned when a database cannot obtain an exclusive lock
|
||||||
|
// on the data file after the timeout passed to Open().
|
||||||
|
ErrTimeout = errors.New("timeout")
|
||||||
|
)
|
||||||
|
|
||||||
|
// These errors can occur when beginning or committing a Tx.
|
||||||
|
var (
|
||||||
|
// ErrTxNotWritable is returned when performing a write operation on a
|
||||||
|
// read-only transaction.
|
||||||
|
ErrTxNotWritable = errors.New("tx not writable")
|
||||||
|
|
||||||
|
// ErrTxClosed is returned when committing or rolling back a transaction
|
||||||
|
// that has already been committed or rolled back.
|
||||||
|
ErrTxClosed = errors.New("tx closed")
|
||||||
|
|
||||||
|
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
|
||||||
|
// read-only database.
|
||||||
|
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
|
||||||
|
)
|
||||||
|
|
||||||
|
// These errors can occur when putting or deleting a value or a bucket.
|
||||||
|
var (
|
||||||
|
// ErrBucketNotFound is returned when trying to access a bucket that has
|
||||||
|
// not been created yet.
|
||||||
|
ErrBucketNotFound = errors.New("bucket not found")
|
||||||
|
|
||||||
|
// ErrBucketExists is returned when creating a bucket that already exists.
|
||||||
|
ErrBucketExists = errors.New("bucket already exists")
|
||||||
|
|
||||||
|
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
|
||||||
|
ErrBucketNameRequired = errors.New("bucket name required")
|
||||||
|
|
||||||
|
// ErrKeyRequired is returned when inserting a zero-length key.
|
||||||
|
ErrKeyRequired = errors.New("key required")
|
||||||
|
|
||||||
|
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
|
||||||
|
ErrKeyTooLarge = errors.New("key too large")
|
||||||
|
|
||||||
|
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
|
||||||
|
ErrValueTooLarge = errors.New("value too large")
|
||||||
|
|
||||||
|
// ErrIncompatibleValue is returned when trying create or delete a bucket
|
||||||
|
// on an existing non-bucket key or when trying to create or delete a
|
||||||
|
// non-bucket key on an existing bucket key.
|
||||||
|
ErrIncompatibleValue = errors.New("incompatible value")
|
||||||
|
)
|
333
vendor/go.etcd.io/bbolt/freelist.go
generated
vendored
Normal file
333
vendor/go.etcd.io/bbolt/freelist.go
generated
vendored
Normal file
@ -0,0 +1,333 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// txPending holds a list of pgids and corresponding allocation txns
|
||||||
|
// that are pending to be freed.
|
||||||
|
type txPending struct {
|
||||||
|
ids []pgid
|
||||||
|
alloctx []txid // txids allocating the ids
|
||||||
|
lastReleaseBegin txid // beginning txid of last matching releaseRange
|
||||||
|
}
|
||||||
|
|
||||||
|
// freelist represents a list of all pages that are available for allocation.
|
||||||
|
// It also tracks pages that have been freed but are still in use by open transactions.
|
||||||
|
type freelist struct {
|
||||||
|
ids []pgid // all free and available free page ids.
|
||||||
|
allocs map[pgid]txid // mapping of txid that allocated a pgid.
|
||||||
|
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
|
||||||
|
cache map[pgid]bool // fast lookup of all free and pending page ids.
|
||||||
|
}
|
||||||
|
|
||||||
|
// newFreelist returns an empty, initialized freelist.
|
||||||
|
func newFreelist() *freelist {
|
||||||
|
return &freelist{
|
||||||
|
allocs: make(map[pgid]txid),
|
||||||
|
pending: make(map[txid]*txPending),
|
||||||
|
cache: make(map[pgid]bool),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// size returns the size of the page after serialization.
|
||||||
|
func (f *freelist) size() int {
|
||||||
|
n := f.count()
|
||||||
|
if n >= 0xFFFF {
|
||||||
|
// The first element will be used to store the count. See freelist.write.
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// count returns count of pages on the freelist
|
||||||
|
func (f *freelist) count() int {
|
||||||
|
return f.free_count() + f.pending_count()
|
||||||
|
}
|
||||||
|
|
||||||
|
// free_count returns count of free pages
|
||||||
|
func (f *freelist) free_count() int {
|
||||||
|
return len(f.ids)
|
||||||
|
}
|
||||||
|
|
||||||
|
// pending_count returns count of pending pages
|
||||||
|
func (f *freelist) pending_count() int {
|
||||||
|
var count int
|
||||||
|
for _, txp := range f.pending {
|
||||||
|
count += len(txp.ids)
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
|
// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
|
||||||
|
// f.count returns the minimum length required for dst.
|
||||||
|
func (f *freelist) copyall(dst []pgid) {
|
||||||
|
m := make(pgids, 0, f.pending_count())
|
||||||
|
for _, txp := range f.pending {
|
||||||
|
m = append(m, txp.ids...)
|
||||||
|
}
|
||||||
|
sort.Sort(m)
|
||||||
|
mergepgids(dst, f.ids, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocate returns the starting page id of a contiguous list of pages of a given size.
|
||||||
|
// If a contiguous block cannot be found then 0 is returned.
|
||||||
|
func (f *freelist) allocate(txid txid, n int) pgid {
|
||||||
|
if len(f.ids) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var initial, previd pgid
|
||||||
|
for i, id := range f.ids {
|
||||||
|
if id <= 1 {
|
||||||
|
panic(fmt.Sprintf("invalid page allocation: %d", id))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset initial page if this is not contiguous.
|
||||||
|
if previd == 0 || id-previd != 1 {
|
||||||
|
initial = id
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we found a contiguous block then remove it and return it.
|
||||||
|
if (id-initial)+1 == pgid(n) {
|
||||||
|
// If we're allocating off the beginning then take the fast path
|
||||||
|
// and just adjust the existing slice. This will use extra memory
|
||||||
|
// temporarily but the append() in free() will realloc the slice
|
||||||
|
// as is necessary.
|
||||||
|
if (i + 1) == n {
|
||||||
|
f.ids = f.ids[i+1:]
|
||||||
|
} else {
|
||||||
|
copy(f.ids[i-n+1:], f.ids[i+1:])
|
||||||
|
f.ids = f.ids[:len(f.ids)-n]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove from the free cache.
|
||||||
|
for i := pgid(0); i < pgid(n); i++ {
|
||||||
|
delete(f.cache, initial+i)
|
||||||
|
}
|
||||||
|
f.allocs[initial] = txid
|
||||||
|
return initial
|
||||||
|
}
|
||||||
|
|
||||||
|
previd = id
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// free releases a page and its overflow for a given transaction id.
|
||||||
|
// If the page is already free then a panic will occur.
|
||||||
|
func (f *freelist) free(txid txid, p *page) {
|
||||||
|
if p.id <= 1 {
|
||||||
|
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free page and all its overflow pages.
|
||||||
|
txp := f.pending[txid]
|
||||||
|
if txp == nil {
|
||||||
|
txp = &txPending{}
|
||||||
|
f.pending[txid] = txp
|
||||||
|
}
|
||||||
|
allocTxid, ok := f.allocs[p.id]
|
||||||
|
if ok {
|
||||||
|
delete(f.allocs, p.id)
|
||||||
|
} else if (p.flags & freelistPageFlag) != 0 {
|
||||||
|
// Freelist is always allocated by prior tx.
|
||||||
|
allocTxid = txid - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
|
||||||
|
// Verify that page is not already free.
|
||||||
|
if f.cache[id] {
|
||||||
|
panic(fmt.Sprintf("page %d already freed", id))
|
||||||
|
}
|
||||||
|
// Add to the freelist and cache.
|
||||||
|
txp.ids = append(txp.ids, id)
|
||||||
|
txp.alloctx = append(txp.alloctx, allocTxid)
|
||||||
|
f.cache[id] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// release moves all page ids for a transaction id (or older) to the freelist.
|
||||||
|
func (f *freelist) release(txid txid) {
|
||||||
|
m := make(pgids, 0)
|
||||||
|
for tid, txp := range f.pending {
|
||||||
|
if tid <= txid {
|
||||||
|
// Move transaction's pending pages to the available freelist.
|
||||||
|
// Don't remove from the cache since the page is still free.
|
||||||
|
m = append(m, txp.ids...)
|
||||||
|
delete(f.pending, tid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Sort(m)
|
||||||
|
f.ids = pgids(f.ids).merge(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
|
||||||
|
func (f *freelist) releaseRange(begin, end txid) {
|
||||||
|
if begin > end {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var m pgids
|
||||||
|
for tid, txp := range f.pending {
|
||||||
|
if tid < begin || tid > end {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Don't recompute freed pages if ranges haven't updated.
|
||||||
|
if txp.lastReleaseBegin == begin {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for i := 0; i < len(txp.ids); i++ {
|
||||||
|
if atx := txp.alloctx[i]; atx < begin || atx > end {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m = append(m, txp.ids[i])
|
||||||
|
txp.ids[i] = txp.ids[len(txp.ids)-1]
|
||||||
|
txp.ids = txp.ids[:len(txp.ids)-1]
|
||||||
|
txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
|
||||||
|
txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
|
||||||
|
i--
|
||||||
|
}
|
||||||
|
txp.lastReleaseBegin = begin
|
||||||
|
if len(txp.ids) == 0 {
|
||||||
|
delete(f.pending, tid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Sort(m)
|
||||||
|
f.ids = pgids(f.ids).merge(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// rollback removes the pages from a given pending tx.
|
||||||
|
func (f *freelist) rollback(txid txid) {
|
||||||
|
// Remove page ids from cache.
|
||||||
|
txp := f.pending[txid]
|
||||||
|
if txp == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var m pgids
|
||||||
|
for i, pgid := range txp.ids {
|
||||||
|
delete(f.cache, pgid)
|
||||||
|
tx := txp.alloctx[i]
|
||||||
|
if tx == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if tx != txid {
|
||||||
|
// Pending free aborted; restore page back to alloc list.
|
||||||
|
f.allocs[pgid] = tx
|
||||||
|
} else {
|
||||||
|
// Freed page was allocated by this txn; OK to throw away.
|
||||||
|
m = append(m, pgid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Remove pages from pending list and mark as free if allocated by txid.
|
||||||
|
delete(f.pending, txid)
|
||||||
|
sort.Sort(m)
|
||||||
|
f.ids = pgids(f.ids).merge(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// freed returns whether a given page is in the free list.
|
||||||
|
func (f *freelist) freed(pgid pgid) bool {
|
||||||
|
return f.cache[pgid]
|
||||||
|
}
|
||||||
|
|
||||||
|
// read initializes the freelist from a freelist page.
|
||||||
|
func (f *freelist) read(p *page) {
|
||||||
|
if (p.flags & freelistPageFlag) == 0 {
|
||||||
|
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
|
||||||
|
}
|
||||||
|
// If the page.count is at the max uint16 value (64k) then it's considered
|
||||||
|
// an overflow and the size of the freelist is stored as the first element.
|
||||||
|
idx, count := 0, int(p.count)
|
||||||
|
if count == 0xFFFF {
|
||||||
|
idx = 1
|
||||||
|
count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the list of page ids from the freelist.
|
||||||
|
if count == 0 {
|
||||||
|
f.ids = nil
|
||||||
|
} else {
|
||||||
|
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
|
||||||
|
f.ids = make([]pgid, len(ids))
|
||||||
|
copy(f.ids, ids)
|
||||||
|
|
||||||
|
// Make sure they're sorted.
|
||||||
|
sort.Sort(pgids(f.ids))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rebuild the page cache.
|
||||||
|
f.reindex()
|
||||||
|
}
|
||||||
|
|
||||||
|
// read initializes the freelist from a given list of ids.
|
||||||
|
func (f *freelist) readIDs(ids []pgid) {
|
||||||
|
f.ids = ids
|
||||||
|
f.reindex()
|
||||||
|
}
|
||||||
|
|
||||||
|
// write writes the page ids onto a freelist page. All free and pending ids are
|
||||||
|
// saved to disk since in the event of a program crash, all pending ids will
|
||||||
|
// become free.
|
||||||
|
func (f *freelist) write(p *page) error {
|
||||||
|
// Combine the old free pgids and pgids waiting on an open transaction.
|
||||||
|
|
||||||
|
// Update the header flag.
|
||||||
|
p.flags |= freelistPageFlag
|
||||||
|
|
||||||
|
// The page.count can only hold up to 64k elements so if we overflow that
|
||||||
|
// number then we handle it by putting the size in the first element.
|
||||||
|
lenids := f.count()
|
||||||
|
if lenids == 0 {
|
||||||
|
p.count = uint16(lenids)
|
||||||
|
} else if lenids < 0xFFFF {
|
||||||
|
p.count = uint16(lenids)
|
||||||
|
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
|
||||||
|
} else {
|
||||||
|
p.count = 0xFFFF
|
||||||
|
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
|
||||||
|
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// reload reads the freelist from a page and filters out pending items.
|
||||||
|
func (f *freelist) reload(p *page) {
|
||||||
|
f.read(p)
|
||||||
|
|
||||||
|
// Build a cache of only pending pages.
|
||||||
|
pcache := make(map[pgid]bool)
|
||||||
|
for _, txp := range f.pending {
|
||||||
|
for _, pendingID := range txp.ids {
|
||||||
|
pcache[pendingID] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check each page in the freelist and build a new available freelist
|
||||||
|
// with any pages not in the pending lists.
|
||||||
|
var a []pgid
|
||||||
|
for _, id := range f.ids {
|
||||||
|
if !pcache[id] {
|
||||||
|
a = append(a, id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f.ids = a
|
||||||
|
|
||||||
|
// Once the available list is rebuilt then rebuild the free cache so that
|
||||||
|
// it includes the available and pending free pages.
|
||||||
|
f.reindex()
|
||||||
|
}
|
||||||
|
|
||||||
|
// reindex rebuilds the free cache based on available and pending free lists.
|
||||||
|
func (f *freelist) reindex() {
|
||||||
|
f.cache = make(map[pgid]bool, len(f.ids))
|
||||||
|
for _, id := range f.ids {
|
||||||
|
f.cache[id] = true
|
||||||
|
}
|
||||||
|
for _, txp := range f.pending {
|
||||||
|
for _, pendingID := range txp.ids {
|
||||||
|
f.cache[pendingID] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
604
vendor/go.etcd.io/bbolt/node.go
generated
vendored
Normal file
604
vendor/go.etcd.io/bbolt/node.go
generated
vendored
Normal file
@ -0,0 +1,604 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// node represents an in-memory, deserialized page.
|
||||||
|
type node struct {
|
||||||
|
bucket *Bucket
|
||||||
|
isLeaf bool
|
||||||
|
unbalanced bool
|
||||||
|
spilled bool
|
||||||
|
key []byte
|
||||||
|
pgid pgid
|
||||||
|
parent *node
|
||||||
|
children nodes
|
||||||
|
inodes inodes
|
||||||
|
}
|
||||||
|
|
||||||
|
// root returns the top-level node this node is attached to.
|
||||||
|
func (n *node) root() *node {
|
||||||
|
if n.parent == nil {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
return n.parent.root()
|
||||||
|
}
|
||||||
|
|
||||||
|
// minKeys returns the minimum number of inodes this node should have.
|
||||||
|
func (n *node) minKeys() int {
|
||||||
|
if n.isLeaf {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// size returns the size of the node after serialization.
|
||||||
|
func (n *node) size() int {
|
||||||
|
sz, elsz := pageHeaderSize, n.pageElementSize()
|
||||||
|
for i := 0; i < len(n.inodes); i++ {
|
||||||
|
item := &n.inodes[i]
|
||||||
|
sz += elsz + len(item.key) + len(item.value)
|
||||||
|
}
|
||||||
|
return sz
|
||||||
|
}
|
||||||
|
|
||||||
|
// sizeLessThan returns true if the node is less than a given size.
|
||||||
|
// This is an optimization to avoid calculating a large node when we only need
|
||||||
|
// to know if it fits inside a certain page size.
|
||||||
|
func (n *node) sizeLessThan(v int) bool {
|
||||||
|
sz, elsz := pageHeaderSize, n.pageElementSize()
|
||||||
|
for i := 0; i < len(n.inodes); i++ {
|
||||||
|
item := &n.inodes[i]
|
||||||
|
sz += elsz + len(item.key) + len(item.value)
|
||||||
|
if sz >= v {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// pageElementSize returns the size of each page element based on the type of node.
|
||||||
|
func (n *node) pageElementSize() int {
|
||||||
|
if n.isLeaf {
|
||||||
|
return leafPageElementSize
|
||||||
|
}
|
||||||
|
return branchPageElementSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// childAt returns the child node at a given index.
|
||||||
|
func (n *node) childAt(index int) *node {
|
||||||
|
if n.isLeaf {
|
||||||
|
panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
|
||||||
|
}
|
||||||
|
return n.bucket.node(n.inodes[index].pgid, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// childIndex returns the index of a given child node.
|
||||||
|
func (n *node) childIndex(child *node) int {
|
||||||
|
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
// numChildren returns the number of children.
|
||||||
|
func (n *node) numChildren() int {
|
||||||
|
return len(n.inodes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextSibling returns the next node with the same parent.
|
||||||
|
func (n *node) nextSibling() *node {
|
||||||
|
if n.parent == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
index := n.parent.childIndex(n)
|
||||||
|
if index >= n.parent.numChildren()-1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return n.parent.childAt(index + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// prevSibling returns the previous node with the same parent.
|
||||||
|
func (n *node) prevSibling() *node {
|
||||||
|
if n.parent == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
index := n.parent.childIndex(n)
|
||||||
|
if index == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return n.parent.childAt(index - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// put inserts a key/value.
|
||||||
|
func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
|
||||||
|
if pgid >= n.bucket.tx.meta.pgid {
|
||||||
|
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
|
||||||
|
} else if len(oldKey) <= 0 {
|
||||||
|
panic("put: zero-length old key")
|
||||||
|
} else if len(newKey) <= 0 {
|
||||||
|
panic("put: zero-length new key")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find insertion index.
|
||||||
|
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
|
||||||
|
|
||||||
|
// Add capacity and shift nodes if we don't have an exact match and need to insert.
|
||||||
|
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
|
||||||
|
if !exact {
|
||||||
|
n.inodes = append(n.inodes, inode{})
|
||||||
|
copy(n.inodes[index+1:], n.inodes[index:])
|
||||||
|
}
|
||||||
|
|
||||||
|
inode := &n.inodes[index]
|
||||||
|
inode.flags = flags
|
||||||
|
inode.key = newKey
|
||||||
|
inode.value = value
|
||||||
|
inode.pgid = pgid
|
||||||
|
_assert(len(inode.key) > 0, "put: zero-length inode key")
|
||||||
|
}
|
||||||
|
|
||||||
|
// del removes a key from the node.
|
||||||
|
func (n *node) del(key []byte) {
|
||||||
|
// Find index of key.
|
||||||
|
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })
|
||||||
|
|
||||||
|
// Exit if the key isn't found.
|
||||||
|
if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete inode from the node.
|
||||||
|
n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
|
||||||
|
|
||||||
|
// Mark the node as needing rebalancing.
|
||||||
|
n.unbalanced = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// read initializes the node from a page.
|
||||||
|
func (n *node) read(p *page) {
|
||||||
|
n.pgid = p.id
|
||||||
|
n.isLeaf = ((p.flags & leafPageFlag) != 0)
|
||||||
|
n.inodes = make(inodes, int(p.count))
|
||||||
|
|
||||||
|
for i := 0; i < int(p.count); i++ {
|
||||||
|
inode := &n.inodes[i]
|
||||||
|
if n.isLeaf {
|
||||||
|
elem := p.leafPageElement(uint16(i))
|
||||||
|
inode.flags = elem.flags
|
||||||
|
inode.key = elem.key()
|
||||||
|
inode.value = elem.value()
|
||||||
|
} else {
|
||||||
|
elem := p.branchPageElement(uint16(i))
|
||||||
|
inode.pgid = elem.pgid
|
||||||
|
inode.key = elem.key()
|
||||||
|
}
|
||||||
|
_assert(len(inode.key) > 0, "read: zero-length inode key")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save first key so we can find the node in the parent when we spill.
|
||||||
|
if len(n.inodes) > 0 {
|
||||||
|
n.key = n.inodes[0].key
|
||||||
|
_assert(len(n.key) > 0, "read: zero-length node key")
|
||||||
|
} else {
|
||||||
|
n.key = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// write writes the items onto one or more pages.
|
||||||
|
func (n *node) write(p *page) {
|
||||||
|
// Initialize page.
|
||||||
|
if n.isLeaf {
|
||||||
|
p.flags |= leafPageFlag
|
||||||
|
} else {
|
||||||
|
p.flags |= branchPageFlag
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(n.inodes) >= 0xFFFF {
|
||||||
|
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
|
||||||
|
}
|
||||||
|
p.count = uint16(len(n.inodes))
|
||||||
|
|
||||||
|
// Stop here if there are no items to write.
|
||||||
|
if p.count == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop over each item and write it to the page.
|
||||||
|
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
|
||||||
|
for i, item := range n.inodes {
|
||||||
|
_assert(len(item.key) > 0, "write: zero-length inode key")
|
||||||
|
|
||||||
|
// Write the page element.
|
||||||
|
if n.isLeaf {
|
||||||
|
elem := p.leafPageElement(uint16(i))
|
||||||
|
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||||
|
elem.flags = item.flags
|
||||||
|
elem.ksize = uint32(len(item.key))
|
||||||
|
elem.vsize = uint32(len(item.value))
|
||||||
|
} else {
|
||||||
|
elem := p.branchPageElement(uint16(i))
|
||||||
|
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||||
|
elem.ksize = uint32(len(item.key))
|
||||||
|
elem.pgid = item.pgid
|
||||||
|
_assert(elem.pgid != p.id, "write: circular dependency occurred")
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the length of key+value is larger than the max allocation size
|
||||||
|
// then we need to reallocate the byte array pointer.
|
||||||
|
//
|
||||||
|
// See: https://github.com/boltdb/bolt/pull/335
|
||||||
|
klen, vlen := len(item.key), len(item.value)
|
||||||
|
if len(b) < klen+vlen {
|
||||||
|
b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write data for the element to the end of the page.
|
||||||
|
copy(b[0:], item.key)
|
||||||
|
b = b[klen:]
|
||||||
|
copy(b[0:], item.value)
|
||||||
|
b = b[vlen:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// DEBUG ONLY: n.dump()
|
||||||
|
}
|
||||||
|
|
||||||
|
// split breaks up a node into multiple smaller nodes, if appropriate.
|
||||||
|
// This should only be called from the spill() function.
|
||||||
|
func (n *node) split(pageSize int) []*node {
|
||||||
|
var nodes []*node
|
||||||
|
|
||||||
|
node := n
|
||||||
|
for {
|
||||||
|
// Split node into two.
|
||||||
|
a, b := node.splitTwo(pageSize)
|
||||||
|
nodes = append(nodes, a)
|
||||||
|
|
||||||
|
// If we can't split then exit the loop.
|
||||||
|
if b == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set node to b so it gets split on the next iteration.
|
||||||
|
node = b
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodes
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitTwo breaks up a node into two smaller nodes, if appropriate.
|
||||||
|
// This should only be called from the split() function.
|
||||||
|
func (n *node) splitTwo(pageSize int) (*node, *node) {
|
||||||
|
// Ignore the split if the page doesn't have at least enough nodes for
|
||||||
|
// two pages or if the nodes can fit in a single page.
|
||||||
|
if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine the threshold before starting a new node.
|
||||||
|
var fillPercent = n.bucket.FillPercent
|
||||||
|
if fillPercent < minFillPercent {
|
||||||
|
fillPercent = minFillPercent
|
||||||
|
} else if fillPercent > maxFillPercent {
|
||||||
|
fillPercent = maxFillPercent
|
||||||
|
}
|
||||||
|
threshold := int(float64(pageSize) * fillPercent)
|
||||||
|
|
||||||
|
// Determine split position and sizes of the two pages.
|
||||||
|
splitIndex, _ := n.splitIndex(threshold)
|
||||||
|
|
||||||
|
// Split node into two separate nodes.
|
||||||
|
// If there's no parent then we'll need to create one.
|
||||||
|
if n.parent == nil {
|
||||||
|
n.parent = &node{bucket: n.bucket, children: []*node{n}}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new node and add it to the parent.
|
||||||
|
next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
|
||||||
|
n.parent.children = append(n.parent.children, next)
|
||||||
|
|
||||||
|
// Split inodes across two nodes.
|
||||||
|
next.inodes = n.inodes[splitIndex:]
|
||||||
|
n.inodes = n.inodes[:splitIndex]
|
||||||
|
|
||||||
|
// Update the statistics.
|
||||||
|
n.bucket.tx.stats.Split++
|
||||||
|
|
||||||
|
return n, next
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitIndex finds the position where a page will fill a given threshold.
|
||||||
|
// It returns the index as well as the size of the first page.
|
||||||
|
// This is only be called from split().
|
||||||
|
func (n *node) splitIndex(threshold int) (index, sz int) {
|
||||||
|
sz = pageHeaderSize
|
||||||
|
|
||||||
|
// Loop until we only have the minimum number of keys required for the second page.
|
||||||
|
for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
|
||||||
|
index = i
|
||||||
|
inode := n.inodes[i]
|
||||||
|
elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
|
||||||
|
|
||||||
|
// If we have at least the minimum number of keys and adding another
|
||||||
|
// node would put us over the threshold then exit and return.
|
||||||
|
if i >= minKeysPerPage && sz+elsize > threshold {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the element size to the total size.
|
||||||
|
sz += elsize
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// spill writes the nodes to dirty pages and splits nodes as it goes.
|
||||||
|
// Returns an error if dirty pages cannot be allocated.
|
||||||
|
func (n *node) spill() error {
|
||||||
|
var tx = n.bucket.tx
|
||||||
|
if n.spilled {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spill child nodes first. Child nodes can materialize sibling nodes in
|
||||||
|
// the case of split-merge so we cannot use a range loop. We have to check
|
||||||
|
// the children size on every loop iteration.
|
||||||
|
sort.Sort(n.children)
|
||||||
|
for i := 0; i < len(n.children); i++ {
|
||||||
|
if err := n.children[i].spill(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We no longer need the child list because it's only used for spill tracking.
|
||||||
|
n.children = nil
|
||||||
|
|
||||||
|
// Split nodes into appropriate sizes. The first node will always be n.
|
||||||
|
var nodes = n.split(tx.db.pageSize)
|
||||||
|
for _, node := range nodes {
|
||||||
|
// Add node's page to the freelist if it's not new.
|
||||||
|
if node.pgid > 0 {
|
||||||
|
tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
|
||||||
|
node.pgid = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate contiguous space for the node.
|
||||||
|
p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the node.
|
||||||
|
if p.id >= tx.meta.pgid {
|
||||||
|
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
|
||||||
|
}
|
||||||
|
node.pgid = p.id
|
||||||
|
node.write(p)
|
||||||
|
node.spilled = true
|
||||||
|
|
||||||
|
// Insert into parent inodes.
|
||||||
|
if node.parent != nil {
|
||||||
|
var key = node.key
|
||||||
|
if key == nil {
|
||||||
|
key = node.inodes[0].key
|
||||||
|
}
|
||||||
|
|
||||||
|
node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
|
||||||
|
node.key = node.inodes[0].key
|
||||||
|
_assert(len(node.key) > 0, "spill: zero-length node key")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the statistics.
|
||||||
|
tx.stats.Spill++
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the root node split and created a new root then we need to spill that
|
||||||
|
// as well. We'll clear out the children to make sure it doesn't try to respill.
|
||||||
|
if n.parent != nil && n.parent.pgid == 0 {
|
||||||
|
n.children = nil
|
||||||
|
return n.parent.spill()
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// rebalance attempts to combine the node with sibling nodes if the node fill
|
||||||
|
// size is below a threshold or if there are not enough keys.
|
||||||
|
func (n *node) rebalance() {
|
||||||
|
if !n.unbalanced {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
n.unbalanced = false
|
||||||
|
|
||||||
|
// Update statistics.
|
||||||
|
n.bucket.tx.stats.Rebalance++
|
||||||
|
|
||||||
|
// Ignore if node is above threshold (25%) and has enough keys.
|
||||||
|
var threshold = n.bucket.tx.db.pageSize / 4
|
||||||
|
if n.size() > threshold && len(n.inodes) > n.minKeys() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Root node has special handling.
|
||||||
|
if n.parent == nil {
|
||||||
|
// If root node is a branch and only has one node then collapse it.
|
||||||
|
if !n.isLeaf && len(n.inodes) == 1 {
|
||||||
|
// Move root's child up.
|
||||||
|
child := n.bucket.node(n.inodes[0].pgid, n)
|
||||||
|
n.isLeaf = child.isLeaf
|
||||||
|
n.inodes = child.inodes[:]
|
||||||
|
n.children = child.children
|
||||||
|
|
||||||
|
// Reparent all child nodes being moved.
|
||||||
|
for _, inode := range n.inodes {
|
||||||
|
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
||||||
|
child.parent = n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove old child.
|
||||||
|
child.parent = nil
|
||||||
|
delete(n.bucket.nodes, child.pgid)
|
||||||
|
child.free()
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// If node has no keys then just remove it.
|
||||||
|
if n.numChildren() == 0 {
|
||||||
|
n.parent.del(n.key)
|
||||||
|
n.parent.removeChild(n)
|
||||||
|
delete(n.bucket.nodes, n.pgid)
|
||||||
|
n.free()
|
||||||
|
n.parent.rebalance()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
|
||||||
|
|
||||||
|
// Destination node is right sibling if idx == 0, otherwise left sibling.
|
||||||
|
var target *node
|
||||||
|
var useNextSibling = (n.parent.childIndex(n) == 0)
|
||||||
|
if useNextSibling {
|
||||||
|
target = n.nextSibling()
|
||||||
|
} else {
|
||||||
|
target = n.prevSibling()
|
||||||
|
}
|
||||||
|
|
||||||
|
// If both this node and the target node are too small then merge them.
|
||||||
|
if useNextSibling {
|
||||||
|
// Reparent all child nodes being moved.
|
||||||
|
for _, inode := range target.inodes {
|
||||||
|
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
||||||
|
child.parent.removeChild(child)
|
||||||
|
child.parent = n
|
||||||
|
child.parent.children = append(child.parent.children, child)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy over inodes from target and remove target.
|
||||||
|
n.inodes = append(n.inodes, target.inodes...)
|
||||||
|
n.parent.del(target.key)
|
||||||
|
n.parent.removeChild(target)
|
||||||
|
delete(n.bucket.nodes, target.pgid)
|
||||||
|
target.free()
|
||||||
|
} else {
|
||||||
|
// Reparent all child nodes being moved.
|
||||||
|
for _, inode := range n.inodes {
|
||||||
|
if child, ok := n.bucket.nodes[inode.pgid]; ok {
|
||||||
|
child.parent.removeChild(child)
|
||||||
|
child.parent = target
|
||||||
|
child.parent.children = append(child.parent.children, child)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy over inodes to target and remove node.
|
||||||
|
target.inodes = append(target.inodes, n.inodes...)
|
||||||
|
n.parent.del(n.key)
|
||||||
|
n.parent.removeChild(n)
|
||||||
|
delete(n.bucket.nodes, n.pgid)
|
||||||
|
n.free()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Either this node or the target node was deleted from the parent so rebalance it.
|
||||||
|
n.parent.rebalance()
|
||||||
|
}
|
||||||
|
|
||||||
|
// removes a node from the list of in-memory children.
|
||||||
|
// This does not affect the inodes.
|
||||||
|
func (n *node) removeChild(target *node) {
|
||||||
|
for i, child := range n.children {
|
||||||
|
if child == target {
|
||||||
|
n.children = append(n.children[:i], n.children[i+1:]...)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// dereference causes the node to copy all its inode key/value references to heap memory.
|
||||||
|
// This is required when the mmap is reallocated so inodes are not pointing to stale data.
|
||||||
|
func (n *node) dereference() {
|
||||||
|
if n.key != nil {
|
||||||
|
key := make([]byte, len(n.key))
|
||||||
|
copy(key, n.key)
|
||||||
|
n.key = key
|
||||||
|
_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range n.inodes {
|
||||||
|
inode := &n.inodes[i]
|
||||||
|
|
||||||
|
key := make([]byte, len(inode.key))
|
||||||
|
copy(key, inode.key)
|
||||||
|
inode.key = key
|
||||||
|
_assert(len(inode.key) > 0, "dereference: zero-length inode key")
|
||||||
|
|
||||||
|
value := make([]byte, len(inode.value))
|
||||||
|
copy(value, inode.value)
|
||||||
|
inode.value = value
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively dereference children.
|
||||||
|
for _, child := range n.children {
|
||||||
|
child.dereference()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update statistics.
|
||||||
|
n.bucket.tx.stats.NodeDeref++
|
||||||
|
}
|
||||||
|
|
||||||
|
// free adds the node's underlying page to the freelist.
|
||||||
|
func (n *node) free() {
|
||||||
|
if n.pgid != 0 {
|
||||||
|
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
|
||||||
|
n.pgid = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// dump writes the contents of the node to STDERR for debugging purposes.
|
||||||
|
/*
|
||||||
|
func (n *node) dump() {
|
||||||
|
// Write node header.
|
||||||
|
var typ = "branch"
|
||||||
|
if n.isLeaf {
|
||||||
|
typ = "leaf"
|
||||||
|
}
|
||||||
|
warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))
|
||||||
|
|
||||||
|
// Write out abbreviated version of each item.
|
||||||
|
for _, item := range n.inodes {
|
||||||
|
if n.isLeaf {
|
||||||
|
if item.flags&bucketLeafFlag != 0 {
|
||||||
|
bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
|
||||||
|
warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
|
||||||
|
} else {
|
||||||
|
warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
warn("")
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
type nodes []*node
|
||||||
|
|
||||||
|
func (s nodes) Len() int { return len(s) }
|
||||||
|
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
|
||||||
|
|
||||||
|
// inode represents an internal node inside of a node.
|
||||||
|
// It can be used to point to elements in a page or point
|
||||||
|
// to an element which hasn't been added to a page yet.
|
||||||
|
type inode struct {
|
||||||
|
flags uint32
|
||||||
|
pgid pgid
|
||||||
|
key []byte
|
||||||
|
value []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
type inodes []inode
|
197
vendor/go.etcd.io/bbolt/page.go
generated
vendored
Normal file
197
vendor/go.etcd.io/bbolt/page.go
generated
vendored
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
|
||||||
|
|
||||||
|
const minKeysPerPage = 2
|
||||||
|
|
||||||
|
const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
|
||||||
|
const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
|
||||||
|
|
||||||
|
const (
|
||||||
|
branchPageFlag = 0x01
|
||||||
|
leafPageFlag = 0x02
|
||||||
|
metaPageFlag = 0x04
|
||||||
|
freelistPageFlag = 0x10
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
bucketLeafFlag = 0x01
|
||||||
|
)
|
||||||
|
|
||||||
|
type pgid uint64
|
||||||
|
|
||||||
|
type page struct {
|
||||||
|
id pgid
|
||||||
|
flags uint16
|
||||||
|
count uint16
|
||||||
|
overflow uint32
|
||||||
|
ptr uintptr
|
||||||
|
}
|
||||||
|
|
||||||
|
// typ returns a human readable page type string used for debugging.
|
||||||
|
func (p *page) typ() string {
|
||||||
|
if (p.flags & branchPageFlag) != 0 {
|
||||||
|
return "branch"
|
||||||
|
} else if (p.flags & leafPageFlag) != 0 {
|
||||||
|
return "leaf"
|
||||||
|
} else if (p.flags & metaPageFlag) != 0 {
|
||||||
|
return "meta"
|
||||||
|
} else if (p.flags & freelistPageFlag) != 0 {
|
||||||
|
return "freelist"
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("unknown<%02x>", p.flags)
|
||||||
|
}
|
||||||
|
|
||||||
|
// meta returns a pointer to the metadata section of the page.
|
||||||
|
func (p *page) meta() *meta {
|
||||||
|
return (*meta)(unsafe.Pointer(&p.ptr))
|
||||||
|
}
|
||||||
|
|
||||||
|
// leafPageElement retrieves the leaf node by index
|
||||||
|
func (p *page) leafPageElement(index uint16) *leafPageElement {
|
||||||
|
n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
// leafPageElements retrieves a list of leaf nodes.
|
||||||
|
func (p *page) leafPageElements() []leafPageElement {
|
||||||
|
if p.count == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// branchPageElement retrieves the branch node by index
|
||||||
|
func (p *page) branchPageElement(index uint16) *branchPageElement {
|
||||||
|
return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
||||||
|
}
|
||||||
|
|
||||||
|
// branchPageElements retrieves a list of branch nodes.
|
||||||
|
func (p *page) branchPageElements() []branchPageElement {
|
||||||
|
if p.count == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// dump writes n bytes of the page to STDERR as hex output.
|
||||||
|
func (p *page) hexdump(n int) {
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
|
||||||
|
fmt.Fprintf(os.Stderr, "%x\n", buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
type pages []*page
|
||||||
|
|
||||||
|
func (s pages) Len() int { return len(s) }
|
||||||
|
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
|
||||||
|
|
||||||
|
// branchPageElement represents a node on a branch page.
|
||||||
|
type branchPageElement struct {
|
||||||
|
pos uint32
|
||||||
|
ksize uint32
|
||||||
|
pgid pgid
|
||||||
|
}
|
||||||
|
|
||||||
|
// key returns a byte slice of the node key.
|
||||||
|
func (n *branchPageElement) key() []byte {
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||||
|
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
|
||||||
|
}
|
||||||
|
|
||||||
|
// leafPageElement represents a node on a leaf page.
|
||||||
|
type leafPageElement struct {
|
||||||
|
flags uint32
|
||||||
|
pos uint32
|
||||||
|
ksize uint32
|
||||||
|
vsize uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// key returns a byte slice of the node key.
|
||||||
|
func (n *leafPageElement) key() []byte {
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||||
|
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize]
|
||||||
|
}
|
||||||
|
|
||||||
|
// value returns a byte slice of the node value.
|
||||||
|
func (n *leafPageElement) value() []byte {
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||||
|
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize]
|
||||||
|
}
|
||||||
|
|
||||||
|
// PageInfo represents human readable information about a page.
|
||||||
|
type PageInfo struct {
|
||||||
|
ID int
|
||||||
|
Type string
|
||||||
|
Count int
|
||||||
|
OverflowCount int
|
||||||
|
}
|
||||||
|
|
||||||
|
type pgids []pgid
|
||||||
|
|
||||||
|
func (s pgids) Len() int { return len(s) }
|
||||||
|
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
|
||||||
|
|
||||||
|
// merge returns the sorted union of a and b.
|
||||||
|
func (a pgids) merge(b pgids) pgids {
|
||||||
|
// Return the opposite slice if one is nil.
|
||||||
|
if len(a) == 0 {
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
if len(b) == 0 {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
merged := make(pgids, len(a)+len(b))
|
||||||
|
mergepgids(merged, a, b)
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
// mergepgids copies the sorted union of a and b into dst.
|
||||||
|
// If dst is too small, it panics.
|
||||||
|
func mergepgids(dst, a, b pgids) {
|
||||||
|
if len(dst) < len(a)+len(b) {
|
||||||
|
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
|
||||||
|
}
|
||||||
|
// Copy in the opposite slice if one is nil.
|
||||||
|
if len(a) == 0 {
|
||||||
|
copy(dst, b)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(b) == 0 {
|
||||||
|
copy(dst, a)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merged will hold all elements from both lists.
|
||||||
|
merged := dst[:0]
|
||||||
|
|
||||||
|
// Assign lead to the slice with a lower starting value, follow to the higher value.
|
||||||
|
lead, follow := a, b
|
||||||
|
if b[0] < a[0] {
|
||||||
|
lead, follow = b, a
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue while there are elements in the lead.
|
||||||
|
for len(lead) > 0 {
|
||||||
|
// Merge largest prefix of lead that is ahead of follow[0].
|
||||||
|
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
|
||||||
|
merged = append(merged, lead[:n]...)
|
||||||
|
if n >= len(lead) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Swap lead and follow.
|
||||||
|
lead, follow = follow, lead[n:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append what's left in follow.
|
||||||
|
_ = append(merged, follow...)
|
||||||
|
}
|
707
vendor/go.etcd.io/bbolt/tx.go
generated
vendored
Normal file
707
vendor/go.etcd.io/bbolt/tx.go
generated
vendored
Normal file
@ -0,0 +1,707 @@
|
|||||||
|
package bbolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// txid represents the internal transaction identifier.
|
||||||
|
type txid uint64
|
||||||
|
|
||||||
|
// Tx represents a read-only or read/write transaction on the database.
|
||||||
|
// Read-only transactions can be used for retrieving values for keys and creating cursors.
|
||||||
|
// Read/write transactions can create and remove buckets and create and remove keys.
|
||||||
|
//
|
||||||
|
// IMPORTANT: You must commit or rollback transactions when you are done with
|
||||||
|
// them. Pages can not be reclaimed by the writer until no more transactions
|
||||||
|
// are using them. A long running read transaction can cause the database to
|
||||||
|
// quickly grow.
|
||||||
|
type Tx struct {
|
||||||
|
writable bool
|
||||||
|
managed bool
|
||||||
|
db *DB
|
||||||
|
meta *meta
|
||||||
|
root Bucket
|
||||||
|
pages map[pgid]*page
|
||||||
|
stats TxStats
|
||||||
|
commitHandlers []func()
|
||||||
|
|
||||||
|
// WriteFlag specifies the flag for write-related methods like WriteTo().
|
||||||
|
// Tx opens the database file with the specified flag to copy the data.
|
||||||
|
//
|
||||||
|
// By default, the flag is unset, which works well for mostly in-memory
|
||||||
|
// workloads. For databases that are much larger than available RAM,
|
||||||
|
// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
|
||||||
|
WriteFlag int
|
||||||
|
}
|
||||||
|
|
||||||
|
// init initializes the transaction.
|
||||||
|
func (tx *Tx) init(db *DB) {
|
||||||
|
tx.db = db
|
||||||
|
tx.pages = nil
|
||||||
|
|
||||||
|
// Copy the meta page since it can be changed by the writer.
|
||||||
|
tx.meta = &meta{}
|
||||||
|
db.meta().copy(tx.meta)
|
||||||
|
|
||||||
|
// Copy over the root bucket.
|
||||||
|
tx.root = newBucket(tx)
|
||||||
|
tx.root.bucket = &bucket{}
|
||||||
|
*tx.root.bucket = tx.meta.root
|
||||||
|
|
||||||
|
// Increment the transaction id and add a page cache for writable transactions.
|
||||||
|
if tx.writable {
|
||||||
|
tx.pages = make(map[pgid]*page)
|
||||||
|
tx.meta.txid += txid(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ID returns the transaction id.
|
||||||
|
func (tx *Tx) ID() int {
|
||||||
|
return int(tx.meta.txid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DB returns a reference to the database that created the transaction.
|
||||||
|
func (tx *Tx) DB() *DB {
|
||||||
|
return tx.db
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size returns current database size in bytes as seen by this transaction.
|
||||||
|
func (tx *Tx) Size() int64 {
|
||||||
|
return int64(tx.meta.pgid) * int64(tx.db.pageSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writable returns whether the transaction can perform write operations.
|
||||||
|
func (tx *Tx) Writable() bool {
|
||||||
|
return tx.writable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cursor creates a cursor associated with the root bucket.
|
||||||
|
// All items in the cursor will return a nil value because all root bucket keys point to buckets.
|
||||||
|
// The cursor is only valid as long as the transaction is open.
|
||||||
|
// Do not use a cursor after the transaction is closed.
|
||||||
|
func (tx *Tx) Cursor() *Cursor {
|
||||||
|
return tx.root.Cursor()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stats retrieves a copy of the current transaction statistics.
|
||||||
|
func (tx *Tx) Stats() TxStats {
|
||||||
|
return tx.stats
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bucket retrieves a bucket by name.
|
||||||
|
// Returns nil if the bucket does not exist.
|
||||||
|
// The bucket instance is only valid for the lifetime of the transaction.
|
||||||
|
func (tx *Tx) Bucket(name []byte) *Bucket {
|
||||||
|
return tx.root.Bucket(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateBucket creates a new bucket.
|
||||||
|
// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
|
||||||
|
// The bucket instance is only valid for the lifetime of the transaction.
|
||||||
|
func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
|
||||||
|
return tx.root.CreateBucket(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
|
||||||
|
// Returns an error if the bucket name is blank, or if the bucket name is too long.
|
||||||
|
// The bucket instance is only valid for the lifetime of the transaction.
|
||||||
|
func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
|
||||||
|
return tx.root.CreateBucketIfNotExists(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteBucket deletes a bucket.
|
||||||
|
// Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
|
||||||
|
func (tx *Tx) DeleteBucket(name []byte) error {
|
||||||
|
return tx.root.DeleteBucket(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ForEach executes a function for each bucket in the root.
|
||||||
|
// If the provided function returns an error then the iteration is stopped and
|
||||||
|
// the error is returned to the caller.
|
||||||
|
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
|
||||||
|
return tx.root.ForEach(func(k, v []byte) error {
|
||||||
|
return fn(k, tx.root.Bucket(k))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// OnCommit adds a handler function to be executed after the transaction successfully commits.
|
||||||
|
func (tx *Tx) OnCommit(fn func()) {
|
||||||
|
tx.commitHandlers = append(tx.commitHandlers, fn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit writes all changes to disk and updates the meta page.
|
||||||
|
// Returns an error if a disk write error occurs, or if Commit is
|
||||||
|
// called on a read-only transaction.
|
||||||
|
func (tx *Tx) Commit() error {
|
||||||
|
_assert(!tx.managed, "managed tx commit not allowed")
|
||||||
|
if tx.db == nil {
|
||||||
|
return ErrTxClosed
|
||||||
|
} else if !tx.writable {
|
||||||
|
return ErrTxNotWritable
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
|
||||||
|
|
||||||
|
// Rebalance nodes which have had deletions.
|
||||||
|
var startTime = time.Now()
|
||||||
|
tx.root.rebalance()
|
||||||
|
if tx.stats.Rebalance > 0 {
|
||||||
|
tx.stats.RebalanceTime += time.Since(startTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
// spill data onto dirty pages.
|
||||||
|
startTime = time.Now()
|
||||||
|
if err := tx.root.spill(); err != nil {
|
||||||
|
tx.rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
tx.stats.SpillTime += time.Since(startTime)
|
||||||
|
|
||||||
|
// Free the old root bucket.
|
||||||
|
tx.meta.root.root = tx.root.root
|
||||||
|
|
||||||
|
// Free the old freelist because commit writes out a fresh freelist.
|
||||||
|
if tx.meta.freelist != pgidNoFreelist {
|
||||||
|
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
|
||||||
|
}
|
||||||
|
|
||||||
|
if !tx.db.NoFreelistSync {
|
||||||
|
err := tx.commitFreelist()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tx.meta.freelist = pgidNoFreelist
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write dirty pages to disk.
|
||||||
|
startTime = time.Now()
|
||||||
|
if err := tx.write(); err != nil {
|
||||||
|
tx.rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If strict mode is enabled then perform a consistency check.
|
||||||
|
// Only the first consistency error is reported in the panic.
|
||||||
|
if tx.db.StrictMode {
|
||||||
|
ch := tx.Check()
|
||||||
|
var errs []string
|
||||||
|
for {
|
||||||
|
err, ok := <-ch
|
||||||
|
if !ok {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
errs = append(errs, err.Error())
|
||||||
|
}
|
||||||
|
if len(errs) > 0 {
|
||||||
|
panic("check fail: " + strings.Join(errs, "\n"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write meta to disk.
|
||||||
|
if err := tx.writeMeta(); err != nil {
|
||||||
|
tx.rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
tx.stats.WriteTime += time.Since(startTime)
|
||||||
|
|
||||||
|
// Finalize the transaction.
|
||||||
|
tx.close()
|
||||||
|
|
||||||
|
// Execute commit handlers now that the locks have been removed.
|
||||||
|
for _, fn := range tx.commitHandlers {
|
||||||
|
fn()
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tx *Tx) commitFreelist() error {
|
||||||
|
// Allocate new pages for the new free list. This will overestimate
|
||||||
|
// the size of the freelist but not underestimate the size (which would be bad).
|
||||||
|
opgid := tx.meta.pgid
|
||||||
|
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
|
||||||
|
if err != nil {
|
||||||
|
tx.rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := tx.db.freelist.write(p); err != nil {
|
||||||
|
tx.rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
tx.meta.freelist = p.id
|
||||||
|
// If the high water mark has moved up then attempt to grow the database.
|
||||||
|
if tx.meta.pgid > opgid {
|
||||||
|
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
|
||||||
|
tx.rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rollback closes the transaction and ignores all previous updates. Read-only
|
||||||
|
// transactions must be rolled back and not committed.
|
||||||
|
func (tx *Tx) Rollback() error {
|
||||||
|
_assert(!tx.managed, "managed tx rollback not allowed")
|
||||||
|
if tx.db == nil {
|
||||||
|
return ErrTxClosed
|
||||||
|
}
|
||||||
|
tx.rollback()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tx *Tx) rollback() {
|
||||||
|
if tx.db == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if tx.writable {
|
||||||
|
tx.db.freelist.rollback(tx.meta.txid)
|
||||||
|
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
|
||||||
|
}
|
||||||
|
tx.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tx *Tx) close() {
|
||||||
|
if tx.db == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if tx.writable {
|
||||||
|
// Grab freelist stats.
|
||||||
|
var freelistFreeN = tx.db.freelist.free_count()
|
||||||
|
var freelistPendingN = tx.db.freelist.pending_count()
|
||||||
|
var freelistAlloc = tx.db.freelist.size()
|
||||||
|
|
||||||
|
// Remove transaction ref & writer lock.
|
||||||
|
tx.db.rwtx = nil
|
||||||
|
tx.db.rwlock.Unlock()
|
||||||
|
|
||||||
|
// Merge statistics.
|
||||||
|
tx.db.statlock.Lock()
|
||||||
|
tx.db.stats.FreePageN = freelistFreeN
|
||||||
|
tx.db.stats.PendingPageN = freelistPendingN
|
||||||
|
tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
|
||||||
|
tx.db.stats.FreelistInuse = freelistAlloc
|
||||||
|
tx.db.stats.TxStats.add(&tx.stats)
|
||||||
|
tx.db.statlock.Unlock()
|
||||||
|
} else {
|
||||||
|
tx.db.removeTx(tx)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear all references.
|
||||||
|
tx.db = nil
|
||||||
|
tx.meta = nil
|
||||||
|
tx.root = Bucket{tx: tx}
|
||||||
|
tx.pages = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy writes the entire database to a writer.
|
||||||
|
// This function exists for backwards compatibility.
|
||||||
|
//
|
||||||
|
// Deprecated; Use WriteTo() instead.
|
||||||
|
func (tx *Tx) Copy(w io.Writer) error {
|
||||||
|
_, err := tx.WriteTo(w)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteTo writes the entire database to a writer.
|
||||||
|
// If err == nil then exactly tx.Size() bytes will be written into the writer.
|
||||||
|
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
||||||
|
// Attempt to open reader with WriteFlag
|
||||||
|
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if cerr := f.Close(); err == nil {
|
||||||
|
err = cerr
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Generate a meta page. We use the same page data for both meta pages.
|
||||||
|
buf := make([]byte, tx.db.pageSize)
|
||||||
|
page := (*page)(unsafe.Pointer(&buf[0]))
|
||||||
|
page.flags = metaPageFlag
|
||||||
|
*page.meta() = *tx.meta
|
||||||
|
|
||||||
|
// Write meta 0.
|
||||||
|
page.id = 0
|
||||||
|
page.meta().checksum = page.meta().sum64()
|
||||||
|
nn, err := w.Write(buf)
|
||||||
|
n += int64(nn)
|
||||||
|
if err != nil {
|
||||||
|
return n, fmt.Errorf("meta 0 copy: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write meta 1 with a lower transaction id.
|
||||||
|
page.id = 1
|
||||||
|
page.meta().txid -= 1
|
||||||
|
page.meta().checksum = page.meta().sum64()
|
||||||
|
nn, err = w.Write(buf)
|
||||||
|
n += int64(nn)
|
||||||
|
if err != nil {
|
||||||
|
return n, fmt.Errorf("meta 1 copy: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move past the meta pages in the file.
|
||||||
|
if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
|
||||||
|
return n, fmt.Errorf("seek: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy data pages.
|
||||||
|
wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
|
||||||
|
n += wn
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CopyFile copies the entire database to file at the given path.
|
||||||
|
// A reader transaction is maintained during the copy so it is safe to continue
|
||||||
|
// using the database while a copy is in progress.
|
||||||
|
func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
|
||||||
|
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = tx.Copy(f)
|
||||||
|
if err != nil {
|
||||||
|
_ = f.Close()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return f.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check performs several consistency checks on the database for this transaction.
|
||||||
|
// An error is returned if any inconsistency is found.
|
||||||
|
//
|
||||||
|
// It can be safely run concurrently on a writable transaction. However, this
|
||||||
|
// incurs a high cost for large databases and databases with a lot of subbuckets
|
||||||
|
// because of caching. This overhead can be removed if running on a read-only
|
||||||
|
// transaction, however, it is not safe to execute other writer transactions at
|
||||||
|
// the same time.
|
||||||
|
func (tx *Tx) Check() <-chan error {
|
||||||
|
ch := make(chan error)
|
||||||
|
go tx.check(ch)
|
||||||
|
return ch
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tx *Tx) check(ch chan error) {
|
||||||
|
// Force loading free list if opened in ReadOnly mode.
|
||||||
|
tx.db.loadFreelist()
|
||||||
|
|
||||||
|
// Check if any pages are double freed.
|
||||||
|
freed := make(map[pgid]bool)
|
||||||
|
all := make([]pgid, tx.db.freelist.count())
|
||||||
|
tx.db.freelist.copyall(all)
|
||||||
|
for _, id := range all {
|
||||||
|
if freed[id] {
|
||||||
|
ch <- fmt.Errorf("page %d: already freed", id)
|
||||||
|
}
|
||||||
|
freed[id] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track every reachable page.
|
||||||
|
reachable := make(map[pgid]*page)
|
||||||
|
reachable[0] = tx.page(0) // meta0
|
||||||
|
reachable[1] = tx.page(1) // meta1
|
||||||
|
if tx.meta.freelist != pgidNoFreelist {
|
||||||
|
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
|
||||||
|
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively check buckets.
|
||||||
|
tx.checkBucket(&tx.root, reachable, freed, ch)
|
||||||
|
|
||||||
|
// Ensure all pages below high water mark are either reachable or freed.
|
||||||
|
for i := pgid(0); i < tx.meta.pgid; i++ {
|
||||||
|
_, isReachable := reachable[i]
|
||||||
|
if !isReachable && !freed[i] {
|
||||||
|
ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the channel to signal completion.
|
||||||
|
close(ch)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
|
||||||
|
// Ignore inline buckets.
|
||||||
|
if b.root == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check every page used by this bucket.
|
||||||
|
b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
|
||||||
|
if p.id > tx.meta.pgid {
|
||||||
|
ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure each page is only referenced once.
|
||||||
|
for i := pgid(0); i <= pgid(p.overflow); i++ {
|
||||||
|
var id = p.id + i
|
||||||
|
if _, ok := reachable[id]; ok {
|
||||||
|
ch <- fmt.Errorf("page %d: multiple references", int(id))
|
||||||
|
}
|
||||||
|
reachable[id] = p
|
||||||
|
}
|
||||||
|
|
||||||
|
// We should only encounter un-freed leaf and branch pages.
|
||||||
|
if freed[p.id] {
|
||||||
|
ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
|
||||||
|
} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
|
||||||
|
ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Check each bucket within this bucket.
|
||||||
|
_ = b.ForEach(func(k, v []byte) error {
|
||||||
|
if child := b.Bucket(k); child != nil {
|
||||||
|
tx.checkBucket(child, reachable, freed, ch)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocate returns a contiguous block of memory starting at a given page.
|
||||||
|
func (tx *Tx) allocate(count int) (*page, error) {
|
||||||
|
p, err := tx.db.allocate(tx.meta.txid, count)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save to our page cache.
|
||||||
|
tx.pages[p.id] = p
|
||||||
|
|
||||||
|
// Update statistics.
|
||||||
|
tx.stats.PageCount += count
|
||||||
|
tx.stats.PageAlloc += count * tx.db.pageSize
|
||||||
|
|
||||||
|
return p, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// write writes any dirty pages to disk.
|
||||||
|
func (tx *Tx) write() error {
|
||||||
|
// Sort pages by id.
|
||||||
|
pages := make(pages, 0, len(tx.pages))
|
||||||
|
for _, p := range tx.pages {
|
||||||
|
pages = append(pages, p)
|
||||||
|
}
|
||||||
|
// Clear out page cache early.
|
||||||
|
tx.pages = make(map[pgid]*page)
|
||||||
|
sort.Sort(pages)
|
||||||
|
|
||||||
|
// Write pages to disk in order.
|
||||||
|
for _, p := range pages {
|
||||||
|
size := (int(p.overflow) + 1) * tx.db.pageSize
|
||||||
|
offset := int64(p.id) * int64(tx.db.pageSize)
|
||||||
|
|
||||||
|
// Write out page in "max allocation" sized chunks.
|
||||||
|
ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
|
||||||
|
for {
|
||||||
|
// Limit our write to our max allocation size.
|
||||||
|
sz := size
|
||||||
|
if sz > maxAllocSize-1 {
|
||||||
|
sz = maxAllocSize - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write chunk to disk.
|
||||||
|
buf := ptr[:sz]
|
||||||
|
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update statistics.
|
||||||
|
tx.stats.Write++
|
||||||
|
|
||||||
|
// Exit inner for loop if we've written all the chunks.
|
||||||
|
size -= sz
|
||||||
|
if size == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise move offset forward and move pointer to next chunk.
|
||||||
|
offset += int64(sz)
|
||||||
|
ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ignore file sync if flag is set on DB.
|
||||||
|
if !tx.db.NoSync || IgnoreNoSync {
|
||||||
|
if err := fdatasync(tx.db); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put small pages back to page pool.
|
||||||
|
for _, p := range pages {
|
||||||
|
// Ignore page sizes over 1 page.
|
||||||
|
// These are allocated using make() instead of the page pool.
|
||||||
|
if int(p.overflow) != 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize]
|
||||||
|
|
||||||
|
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
|
||||||
|
for i := range buf {
|
||||||
|
buf[i] = 0
|
||||||
|
}
|
||||||
|
tx.db.pagePool.Put(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeMeta writes the meta to the disk.
|
||||||
|
func (tx *Tx) writeMeta() error {
|
||||||
|
// Create a temporary buffer for the meta page.
|
||||||
|
buf := make([]byte, tx.db.pageSize)
|
||||||
|
p := tx.db.pageInBuffer(buf, 0)
|
||||||
|
tx.meta.write(p)
|
||||||
|
|
||||||
|
// Write the meta page to file.
|
||||||
|
if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !tx.db.NoSync || IgnoreNoSync {
|
||||||
|
if err := fdatasync(tx.db); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update statistics.
|
||||||
|
tx.stats.Write++
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// page returns a reference to the page with a given id.
|
||||||
|
// If page has been written to then a temporary buffered page is returned.
|
||||||
|
func (tx *Tx) page(id pgid) *page {
|
||||||
|
// Check the dirty pages first.
|
||||||
|
if tx.pages != nil {
|
||||||
|
if p, ok := tx.pages[id]; ok {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise return directly from the mmap.
|
||||||
|
return tx.db.page(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// forEachPage iterates over every page within a given page and executes a function.
|
||||||
|
func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
|
||||||
|
p := tx.page(pgid)
|
||||||
|
|
||||||
|
// Execute function.
|
||||||
|
fn(p, depth)
|
||||||
|
|
||||||
|
// Recursively loop over children.
|
||||||
|
if (p.flags & branchPageFlag) != 0 {
|
||||||
|
for i := 0; i < int(p.count); i++ {
|
||||||
|
elem := p.branchPageElement(uint16(i))
|
||||||
|
tx.forEachPage(elem.pgid, depth+1, fn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Page returns page information for a given page number.
|
||||||
|
// This is only safe for concurrent use when used by a writable transaction.
|
||||||
|
func (tx *Tx) Page(id int) (*PageInfo, error) {
|
||||||
|
if tx.db == nil {
|
||||||
|
return nil, ErrTxClosed
|
||||||
|
} else if pgid(id) >= tx.meta.pgid {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the page info.
|
||||||
|
p := tx.db.page(pgid(id))
|
||||||
|
info := &PageInfo{
|
||||||
|
ID: id,
|
||||||
|
Count: int(p.count),
|
||||||
|
OverflowCount: int(p.overflow),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine the type (or if it's free).
|
||||||
|
if tx.db.freelist.freed(pgid(id)) {
|
||||||
|
info.Type = "free"
|
||||||
|
} else {
|
||||||
|
info.Type = p.typ()
|
||||||
|
}
|
||||||
|
|
||||||
|
return info, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// TxStats represents statistics about the actions performed by the transaction.
|
||||||
|
type TxStats struct {
|
||||||
|
// Page statistics.
|
||||||
|
PageCount int // number of page allocations
|
||||||
|
PageAlloc int // total bytes allocated
|
||||||
|
|
||||||
|
// Cursor statistics.
|
||||||
|
CursorCount int // number of cursors created
|
||||||
|
|
||||||
|
// Node statistics
|
||||||
|
NodeCount int // number of node allocations
|
||||||
|
NodeDeref int // number of node dereferences
|
||||||
|
|
||||||
|
// Rebalance statistics.
|
||||||
|
Rebalance int // number of node rebalances
|
||||||
|
RebalanceTime time.Duration // total time spent rebalancing
|
||||||
|
|
||||||
|
// Split/Spill statistics.
|
||||||
|
Split int // number of nodes split
|
||||||
|
Spill int // number of nodes spilled
|
||||||
|
SpillTime time.Duration // total time spent spilling
|
||||||
|
|
||||||
|
// Write statistics.
|
||||||
|
Write int // number of writes performed
|
||||||
|
WriteTime time.Duration // total time spent writing to disk
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *TxStats) add(other *TxStats) {
|
||||||
|
s.PageCount += other.PageCount
|
||||||
|
s.PageAlloc += other.PageAlloc
|
||||||
|
s.CursorCount += other.CursorCount
|
||||||
|
s.NodeCount += other.NodeCount
|
||||||
|
s.NodeDeref += other.NodeDeref
|
||||||
|
s.Rebalance += other.Rebalance
|
||||||
|
s.RebalanceTime += other.RebalanceTime
|
||||||
|
s.Split += other.Split
|
||||||
|
s.Spill += other.Spill
|
||||||
|
s.SpillTime += other.SpillTime
|
||||||
|
s.Write += other.Write
|
||||||
|
s.WriteTime += other.WriteTime
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sub calculates and returns the difference between two sets of transaction stats.
|
||||||
|
// This is useful when obtaining stats at two different points and time and
|
||||||
|
// you need the performance counters that occurred within that time span.
|
||||||
|
func (s *TxStats) Sub(other *TxStats) TxStats {
|
||||||
|
var diff TxStats
|
||||||
|
diff.PageCount = s.PageCount - other.PageCount
|
||||||
|
diff.PageAlloc = s.PageAlloc - other.PageAlloc
|
||||||
|
diff.CursorCount = s.CursorCount - other.CursorCount
|
||||||
|
diff.NodeCount = s.NodeCount - other.NodeCount
|
||||||
|
diff.NodeDeref = s.NodeDeref - other.NodeDeref
|
||||||
|
diff.Rebalance = s.Rebalance - other.Rebalance
|
||||||
|
diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
|
||||||
|
diff.Split = s.Split - other.Split
|
||||||
|
diff.Spill = s.Spill - other.Spill
|
||||||
|
diff.SpillTime = s.SpillTime - other.SpillTime
|
||||||
|
diff.Write = s.Write - other.Write
|
||||||
|
diff.WriteTime = s.WriteTime - other.WriteTime
|
||||||
|
return diff
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user