a simple url shortener in Go (check it out at qurl.org)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1138 lines
30 KiB

6 years ago
  1. package bbolt
  2. import (
  3. "errors"
  4. "fmt"
  5. "hash/fnv"
  6. "log"
  7. "os"
  8. "runtime"
  9. "sort"
  10. "sync"
  11. "time"
  12. "unsafe"
  13. )
  14. // The largest step that can be taken when remapping the mmap.
  15. const maxMmapStep = 1 << 30 // 1GB
  16. // The data file format version.
  17. const version = 2
  18. // Represents a marker value to indicate that a file is a Bolt DB.
  19. const magic uint32 = 0xED0CDAED
  20. const pgidNoFreelist pgid = 0xffffffffffffffff
  21. // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
  22. // syncing changes to a file. This is required as some operating systems,
  23. // such as OpenBSD, do not have a unified buffer cache (UBC) and writes
  24. // must be synchronized using the msync(2) syscall.
  25. const IgnoreNoSync = runtime.GOOS == "openbsd"
  26. // Default values if not set in a DB instance.
  27. const (
  28. DefaultMaxBatchSize int = 1000
  29. DefaultMaxBatchDelay = 10 * time.Millisecond
  30. DefaultAllocSize = 16 * 1024 * 1024
  31. )
  32. // default page size for db is set to the OS page size.
  33. var defaultPageSize = os.Getpagesize()
  34. // The time elapsed between consecutive file locking attempts.
  35. const flockRetryTimeout = 50 * time.Millisecond
  36. // DB represents a collection of buckets persisted to a file on disk.
  37. // All data access is performed through transactions which can be obtained through the DB.
  38. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
  39. type DB struct {
  40. // When enabled, the database will perform a Check() after every commit.
  41. // A panic is issued if the database is in an inconsistent state. This
  42. // flag has a large performance impact so it should only be used for
  43. // debugging purposes.
  44. StrictMode bool
  45. // Setting the NoSync flag will cause the database to skip fsync()
  46. // calls after each commit. This can be useful when bulk loading data
  47. // into a database and you can restart the bulk load in the event of
  48. // a system failure or database corruption. Do not set this flag for
  49. // normal use.
  50. //
  51. // If the package global IgnoreNoSync constant is true, this value is
  52. // ignored. See the comment on that constant for more details.
  53. //
  54. // THIS IS UNSAFE. PLEASE USE WITH CAUTION.
  55. NoSync bool
  56. // When true, skips syncing freelist to disk. This improves the database
  57. // write performance under normal operation, but requires a full database
  58. // re-sync during recovery.
  59. NoFreelistSync bool
  60. // When true, skips the truncate call when growing the database.
  61. // Setting this to true is only safe on non-ext3/ext4 systems.
  62. // Skipping truncation avoids preallocation of hard drive space and
  63. // bypasses a truncate() and fsync() syscall on remapping.
  64. //
  65. // https://github.com/boltdb/bolt/issues/284
  66. NoGrowSync bool
  67. // If you want to read the entire database fast, you can set MmapFlag to
  68. // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
  69. MmapFlags int
  70. // MaxBatchSize is the maximum size of a batch. Default value is
  71. // copied from DefaultMaxBatchSize in Open.
  72. //
  73. // If <=0, disables batching.
  74. //
  75. // Do not change concurrently with calls to Batch.
  76. MaxBatchSize int
  77. // MaxBatchDelay is the maximum delay before a batch starts.
  78. // Default value is copied from DefaultMaxBatchDelay in Open.
  79. //
  80. // If <=0, effectively disables batching.
  81. //
  82. // Do not change concurrently with calls to Batch.
  83. MaxBatchDelay time.Duration
  84. // AllocSize is the amount of space allocated when the database
  85. // needs to create new pages. This is done to amortize the cost
  86. // of truncate() and fsync() when growing the data file.
  87. AllocSize int
  88. path string
  89. file *os.File
  90. dataref []byte // mmap'ed readonly, write throws SEGV
  91. data *[maxMapSize]byte
  92. datasz int
  93. filesz int // current on disk file size
  94. meta0 *meta
  95. meta1 *meta
  96. pageSize int
  97. opened bool
  98. rwtx *Tx
  99. txs []*Tx
  100. stats Stats
  101. freelist *freelist
  102. freelistLoad sync.Once
  103. pagePool sync.Pool
  104. batchMu sync.Mutex
  105. batch *batch
  106. rwlock sync.Mutex // Allows only one writer at a time.
  107. metalock sync.Mutex // Protects meta page access.
  108. mmaplock sync.RWMutex // Protects mmap access during remapping.
  109. statlock sync.RWMutex // Protects stats access.
  110. ops struct {
  111. writeAt func(b []byte, off int64) (n int, err error)
  112. }
  113. // Read only mode.
  114. // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
  115. readOnly bool
  116. }
  117. // Path returns the path to currently open database file.
  118. func (db *DB) Path() string {
  119. return db.path
  120. }
  121. // GoString returns the Go string representation of the database.
  122. func (db *DB) GoString() string {
  123. return fmt.Sprintf("bolt.DB{path:%q}", db.path)
  124. }
  125. // String returns the string representation of the database.
  126. func (db *DB) String() string {
  127. return fmt.Sprintf("DB<%q>", db.path)
  128. }
  129. // Open creates and opens a database at the given path.
  130. // If the file does not exist then it will be created automatically.
  131. // Passing in nil options will cause Bolt to open the database with the default options.
  132. func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
  133. db := &DB{
  134. opened: true,
  135. }
  136. // Set default options if no options are provided.
  137. if options == nil {
  138. options = DefaultOptions
  139. }
  140. db.NoSync = options.NoSync
  141. db.NoGrowSync = options.NoGrowSync
  142. db.MmapFlags = options.MmapFlags
  143. db.NoFreelistSync = options.NoFreelistSync
  144. // Set default values for later DB operations.
  145. db.MaxBatchSize = DefaultMaxBatchSize
  146. db.MaxBatchDelay = DefaultMaxBatchDelay
  147. db.AllocSize = DefaultAllocSize
  148. flag := os.O_RDWR
  149. if options.ReadOnly {
  150. flag = os.O_RDONLY
  151. db.readOnly = true
  152. }
  153. // Open data file and separate sync handler for metadata writes.
  154. db.path = path
  155. var err error
  156. if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
  157. _ = db.close()
  158. return nil, err
  159. }
  160. // Lock file so that other processes using Bolt in read-write mode cannot
  161. // use the database at the same time. This would cause corruption since
  162. // the two processes would write meta pages and free pages separately.
  163. // The database file is locked exclusively (only one process can grab the lock)
  164. // if !options.ReadOnly.
  165. // The database file is locked using the shared lock (more than one process may
  166. // hold a lock at the same time) otherwise (options.ReadOnly is set).
  167. if err := flock(db, !db.readOnly, options.Timeout); err != nil {
  168. _ = db.close()
  169. return nil, err
  170. }
  171. // Default values for test hooks
  172. db.ops.writeAt = db.file.WriteAt
  173. if db.pageSize = options.PageSize; db.pageSize == 0 {
  174. // Set the default page size to the OS page size.
  175. db.pageSize = defaultPageSize
  176. }
  177. // Initialize the database if it doesn't exist.
  178. if info, err := db.file.Stat(); err != nil {
  179. _ = db.close()
  180. return nil, err
  181. } else if info.Size() == 0 {
  182. // Initialize new files with meta pages.
  183. if err := db.init(); err != nil {
  184. // clean up file descriptor on initialization fail
  185. _ = db.close()
  186. return nil, err
  187. }
  188. } else {
  189. // Read the first meta page to determine the page size.
  190. var buf [0x1000]byte
  191. // If we can't read the page size, but can read a page, assume
  192. // it's the same as the OS or one given -- since that's how the
  193. // page size was chosen in the first place.
  194. //
  195. // If the first page is invalid and this OS uses a different
  196. // page size than what the database was created with then we
  197. // are out of luck and cannot access the database.
  198. //
  199. // TODO: scan for next page
  200. if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
  201. if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
  202. db.pageSize = int(m.pageSize)
  203. }
  204. } else {
  205. _ = db.close()
  206. return nil, ErrInvalid
  207. }
  208. }
  209. // Initialize page pool.
  210. db.pagePool = sync.Pool{
  211. New: func() interface{} {
  212. return make([]byte, db.pageSize)
  213. },
  214. }
  215. // Memory map the data file.
  216. if err := db.mmap(options.InitialMmapSize); err != nil {
  217. _ = db.close()
  218. return nil, err
  219. }
  220. if db.readOnly {
  221. return db, nil
  222. }
  223. db.loadFreelist()
  224. // Flush freelist when transitioning from no sync to sync so
  225. // NoFreelistSync unaware boltdb can open the db later.
  226. if !db.NoFreelistSync && !db.hasSyncedFreelist() {
  227. tx, err := db.Begin(true)
  228. if tx != nil {
  229. err = tx.Commit()
  230. }
  231. if err != nil {
  232. _ = db.close()
  233. return nil, err
  234. }
  235. }
  236. // Mark the database as opened and return.
  237. return db, nil
  238. }
  239. // loadFreelist reads the freelist if it is synced, or reconstructs it
  240. // by scanning the DB if it is not synced. It assumes there are no
  241. // concurrent accesses being made to the freelist.
  242. func (db *DB) loadFreelist() {
  243. db.freelistLoad.Do(func() {
  244. db.freelist = newFreelist()
  245. if !db.hasSyncedFreelist() {
  246. // Reconstruct free list by scanning the DB.
  247. db.freelist.readIDs(db.freepages())
  248. } else {
  249. // Read free list from freelist page.
  250. db.freelist.read(db.page(db.meta().freelist))
  251. }
  252. db.stats.FreePageN = len(db.freelist.ids)
  253. })
  254. }
  255. func (db *DB) hasSyncedFreelist() bool {
  256. return db.meta().freelist != pgidNoFreelist
  257. }
  258. // mmap opens the underlying memory-mapped file and initializes the meta references.
  259. // minsz is the minimum size that the new mmap can be.
  260. func (db *DB) mmap(minsz int) error {
  261. db.mmaplock.Lock()
  262. defer db.mmaplock.Unlock()
  263. info, err := db.file.Stat()
  264. if err != nil {
  265. return fmt.Errorf("mmap stat error: %s", err)
  266. } else if int(info.Size()) < db.pageSize*2 {
  267. return fmt.Errorf("file size too small")
  268. }
  269. // Ensure the size is at least the minimum size.
  270. var size = int(info.Size())
  271. if size < minsz {
  272. size = minsz
  273. }
  274. size, err = db.mmapSize(size)
  275. if err != nil {
  276. return err
  277. }
  278. // Dereference all mmap references before unmapping.
  279. if db.rwtx != nil {
  280. db.rwtx.root.dereference()
  281. }
  282. // Unmap existing data before continuing.
  283. if err := db.munmap(); err != nil {
  284. return err
  285. }
  286. // Memory-map the data file as a byte slice.
  287. if err := mmap(db, size); err != nil {
  288. return err
  289. }
  290. // Save references to the meta pages.
  291. db.meta0 = db.page(0).meta()
  292. db.meta1 = db.page(1).meta()
  293. // Validate the meta pages. We only return an error if both meta pages fail
  294. // validation, since meta0 failing validation means that it wasn't saved
  295. // properly -- but we can recover using meta1. And vice-versa.
  296. err0 := db.meta0.validate()
  297. err1 := db.meta1.validate()
  298. if err0 != nil && err1 != nil {
  299. return err0
  300. }
  301. return nil
  302. }
  303. // munmap unmaps the data file from memory.
  304. func (db *DB) munmap() error {
  305. if err := munmap(db); err != nil {
  306. return fmt.Errorf("unmap error: " + err.Error())
  307. }
  308. return nil
  309. }
  310. // mmapSize determines the appropriate size for the mmap given the current size
  311. // of the database. The minimum size is 32KB and doubles until it reaches 1GB.
  312. // Returns an error if the new mmap size is greater than the max allowed.
  313. func (db *DB) mmapSize(size int) (int, error) {
  314. // Double the size from 32KB until 1GB.
  315. for i := uint(15); i <= 30; i++ {
  316. if size <= 1<<i {
  317. return 1 << i, nil
  318. }
  319. }
  320. // Verify the requested size is not above the maximum allowed.
  321. if size > maxMapSize {
  322. return 0, fmt.Errorf("mmap too large")
  323. }
  324. // If larger than 1GB then grow by 1GB at a time.
  325. sz := int64(size)
  326. if remainder := sz % int64(maxMmapStep); remainder > 0 {
  327. sz += int64(maxMmapStep) - remainder
  328. }
  329. // Ensure that the mmap size is a multiple of the page size.
  330. // This should always be true since we're incrementing in MBs.
  331. pageSize := int64(db.pageSize)
  332. if (sz % pageSize) != 0 {
  333. sz = ((sz / pageSize) + 1) * pageSize
  334. }
  335. // If we've exceeded the max size then only grow up to the max size.
  336. if sz > maxMapSize {
  337. sz = maxMapSize
  338. }
  339. return int(sz), nil
  340. }
  341. // init creates a new database file and initializes its meta pages.
  342. func (db *DB) init() error {
  343. // Create two meta pages on a buffer.
  344. buf := make([]byte, db.pageSize*4)
  345. for i := 0; i < 2; i++ {
  346. p := db.pageInBuffer(buf[:], pgid(i))
  347. p.id = pgid(i)
  348. p.flags = metaPageFlag
  349. // Initialize the meta page.
  350. m := p.meta()
  351. m.magic = magic
  352. m.version = version
  353. m.pageSize = uint32(db.pageSize)
  354. m.freelist = 2
  355. m.root = bucket{root: 3}
  356. m.pgid = 4
  357. m.txid = txid(i)
  358. m.checksum = m.sum64()
  359. }
  360. // Write an empty freelist at page 3.
  361. p := db.pageInBuffer(buf[:], pgid(2))
  362. p.id = pgid(2)
  363. p.flags = freelistPageFlag
  364. p.count = 0
  365. // Write an empty leaf page at page 4.
  366. p = db.pageInBuffer(buf[:], pgid(3))
  367. p.id = pgid(3)
  368. p.flags = leafPageFlag
  369. p.count = 0
  370. // Write the buffer to our data file.
  371. if _, err := db.ops.writeAt(buf, 0); err != nil {
  372. return err
  373. }
  374. if err := fdatasync(db); err != nil {
  375. return err
  376. }
  377. return nil
  378. }
  379. // Close releases all database resources.
  380. // It will block waiting for any open transactions to finish
  381. // before closing the database and returning.
  382. func (db *DB) Close() error {
  383. db.rwlock.Lock()
  384. defer db.rwlock.Unlock()
  385. db.metalock.Lock()
  386. defer db.metalock.Unlock()
  387. db.mmaplock.Lock()
  388. defer db.mmaplock.Unlock()
  389. return db.close()
  390. }
  391. func (db *DB) close() error {
  392. if !db.opened {
  393. return nil
  394. }
  395. db.opened = false
  396. db.freelist = nil
  397. // Clear ops.
  398. db.ops.writeAt = nil
  399. // Close the mmap.
  400. if err := db.munmap(); err != nil {
  401. return err
  402. }
  403. // Close file handles.
  404. if db.file != nil {
  405. // No need to unlock read-only file.
  406. if !db.readOnly {
  407. // Unlock the file.
  408. if err := funlock(db); err != nil {
  409. log.Printf("bolt.Close(): funlock error: %s", err)
  410. }
  411. }
  412. // Close the file descriptor.
  413. if err := db.file.Close(); err != nil {
  414. return fmt.Errorf("db file close: %s", err)
  415. }
  416. db.file = nil
  417. }
  418. db.path = ""
  419. return nil
  420. }
  421. // Begin starts a new transaction.
  422. // Multiple read-only transactions can be used concurrently but only one
  423. // write transaction can be used at a time. Starting multiple write transactions
  424. // will cause the calls to block and be serialized until the current write
  425. // transaction finishes.
  426. //
  427. // Transactions should not be dependent on one another. Opening a read
  428. // transaction and a write transaction in the same goroutine can cause the
  429. // writer to deadlock because the database periodically needs to re-mmap itself
  430. // as it grows and it cannot do that while a read transaction is open.
  431. //
  432. // If a long running read transaction (for example, a snapshot transaction) is
  433. // needed, you might want to set DB.InitialMmapSize to a large enough value
  434. // to avoid potential blocking of write transaction.
  435. //
  436. // IMPORTANT: You must close read-only transactions after you are finished or
  437. // else the database will not reclaim old pages.
  438. func (db *DB) Begin(writable bool) (*Tx, error) {
  439. if writable {
  440. return db.beginRWTx()
  441. }
  442. return db.beginTx()
  443. }
  444. func (db *DB) beginTx() (*Tx, error) {
  445. // Lock the meta pages while we initialize the transaction. We obtain
  446. // the meta lock before the mmap lock because that's the order that the
  447. // write transaction will obtain them.
  448. db.metalock.Lock()
  449. // Obtain a read-only lock on the mmap. When the mmap is remapped it will
  450. // obtain a write lock so all transactions must finish before it can be
  451. // remapped.
  452. db.mmaplock.RLock()
  453. // Exit if the database is not open yet.
  454. if !db.opened {
  455. db.mmaplock.RUnlock()
  456. db.metalock.Unlock()
  457. return nil, ErrDatabaseNotOpen
  458. }
  459. // Create a transaction associated with the database.
  460. t := &Tx{}
  461. t.init(db)
  462. // Keep track of transaction until it closes.
  463. db.txs = append(db.txs, t)
  464. n := len(db.txs)
  465. // Unlock the meta pages.
  466. db.metalock.Unlock()
  467. // Update the transaction stats.
  468. db.statlock.Lock()
  469. db.stats.TxN++
  470. db.stats.OpenTxN = n
  471. db.statlock.Unlock()
  472. return t, nil
  473. }
  474. func (db *DB) beginRWTx() (*Tx, error) {
  475. // If the database was opened with Options.ReadOnly, return an error.
  476. if db.readOnly {
  477. return nil, ErrDatabaseReadOnly
  478. }
  479. // Obtain writer lock. This is released by the transaction when it closes.
  480. // This enforces only one writer transaction at a time.
  481. db.rwlock.Lock()
  482. // Once we have the writer lock then we can lock the meta pages so that
  483. // we can set up the transaction.
  484. db.metalock.Lock()
  485. defer db.metalock.Unlock()
  486. // Exit if the database is not open yet.
  487. if !db.opened {
  488. db.rwlock.Unlock()
  489. return nil, ErrDatabaseNotOpen
  490. }
  491. // Create a transaction associated with the database.
  492. t := &Tx{writable: true}
  493. t.init(db)
  494. db.rwtx = t
  495. db.freePages()
  496. return t, nil
  497. }
  498. // freePages releases any pages associated with closed read-only transactions.
  499. func (db *DB) freePages() {
  500. // Free all pending pages prior to earliest open transaction.
  501. sort.Sort(txsById(db.txs))
  502. minid := txid(0xFFFFFFFFFFFFFFFF)
  503. if len(db.txs) > 0 {
  504. minid = db.txs[0].meta.txid
  505. }
  506. if minid > 0 {
  507. db.freelist.release(minid - 1)
  508. }
  509. // Release unused txid extents.
  510. for _, t := range db.txs {
  511. db.freelist.releaseRange(minid, t.meta.txid-1)
  512. minid = t.meta.txid + 1
  513. }
  514. db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
  515. // Any page both allocated and freed in an extent is safe to release.
  516. }
  517. type txsById []*Tx
  518. func (t txsById) Len() int { return len(t) }
  519. func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
  520. func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
  521. // removeTx removes a transaction from the database.
  522. func (db *DB) removeTx(tx *Tx) {
  523. // Release the read lock on the mmap.
  524. db.mmaplock.RUnlock()
  525. // Use the meta lock to restrict access to the DB object.
  526. db.metalock.Lock()
  527. // Remove the transaction.
  528. for i, t := range db.txs {
  529. if t == tx {
  530. last := len(db.txs) - 1
  531. db.txs[i] = db.txs[last]
  532. db.txs[last] = nil
  533. db.txs = db.txs[:last]
  534. break
  535. }
  536. }
  537. n := len(db.txs)
  538. // Unlock the meta pages.
  539. db.metalock.Unlock()
  540. // Merge statistics.
  541. db.statlock.Lock()
  542. db.stats.OpenTxN = n
  543. db.stats.TxStats.add(&tx.stats)
  544. db.statlock.Unlock()
  545. }
  546. // Update executes a function within the context of a read-write managed transaction.
  547. // If no error is returned from the function then the transaction is committed.
  548. // If an error is returned then the entire transaction is rolled back.
  549. // Any error that is returned from the function or returned from the commit is
  550. // returned from the Update() method.
  551. //
  552. // Attempting to manually commit or rollback within the function will cause a panic.
  553. func (db *DB) Update(fn func(*Tx) error) error {
  554. t, err := db.Begin(true)
  555. if err != nil {
  556. return err
  557. }
  558. // Make sure the transaction rolls back in the event of a panic.
  559. defer func() {
  560. if t.db != nil {
  561. t.rollback()
  562. }
  563. }()
  564. // Mark as a managed tx so that the inner function cannot manually commit.
  565. t.managed = true
  566. // If an error is returned from the function then rollback and return error.
  567. err = fn(t)
  568. t.managed = false
  569. if err != nil {
  570. _ = t.Rollback()
  571. return err
  572. }
  573. return t.Commit()
  574. }
  575. // View executes a function within the context of a managed read-only transaction.
  576. // Any error that is returned from the function is returned from the View() method.
  577. //
  578. // Attempting to manually rollback within the function will cause a panic.
  579. func (db *DB) View(fn func(*Tx) error) error {
  580. t, err := db.Begin(false)
  581. if err != nil {
  582. return err
  583. }
  584. // Make sure the transaction rolls back in the event of a panic.
  585. defer func() {
  586. if t.db != nil {
  587. t.rollback()
  588. }
  589. }()
  590. // Mark as a managed tx so that the inner function cannot manually rollback.
  591. t.managed = true
  592. // If an error is returned from the function then pass it through.
  593. err = fn(t)
  594. t.managed = false
  595. if err != nil {
  596. _ = t.Rollback()
  597. return err
  598. }
  599. return t.Rollback()
  600. }
  601. // Batch calls fn as part of a batch. It behaves similar to Update,
  602. // except:
  603. //
  604. // 1. concurrent Batch calls can be combined into a single Bolt
  605. // transaction.
  606. //
  607. // 2. the function passed to Batch may be called multiple times,
  608. // regardless of whether it returns error or not.
  609. //
  610. // This means that Batch function side effects must be idempotent and
  611. // take permanent effect only after a successful return is seen in
  612. // caller.
  613. //
  614. // The maximum batch size and delay can be adjusted with DB.MaxBatchSize
  615. // and DB.MaxBatchDelay, respectively.
  616. //
  617. // Batch is only useful when there are multiple goroutines calling it.
  618. func (db *DB) Batch(fn func(*Tx) error) error {
  619. errCh := make(chan error, 1)
  620. db.batchMu.Lock()
  621. if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
  622. // There is no existing batch, or the existing batch is full; start a new one.
  623. db.batch = &batch{
  624. db: db,
  625. }
  626. db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
  627. }
  628. db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
  629. if len(db.batch.calls) >= db.MaxBatchSize {
  630. // wake up batch, it's ready to run
  631. go db.batch.trigger()
  632. }
  633. db.batchMu.Unlock()
  634. err := <-errCh
  635. if err == trySolo {
  636. err = db.Update(fn)
  637. }
  638. return err
  639. }
  640. type call struct {
  641. fn func(*Tx) error
  642. err chan<- error
  643. }
  644. type batch struct {
  645. db *DB
  646. timer *time.Timer
  647. start sync.Once
  648. calls []call
  649. }
  650. // trigger runs the batch if it hasn't already been run.
  651. func (b *batch) trigger() {
  652. b.start.Do(b.run)
  653. }
  654. // run performs the transactions in the batch and communicates results
  655. // back to DB.Batch.
  656. func (b *batch) run() {
  657. b.db.batchMu.Lock()
  658. b.timer.Stop()
  659. // Make sure no new work is added to this batch, but don't break
  660. // other batches.
  661. if b.db.batch == b {
  662. b.db.batch = nil
  663. }
  664. b.db.batchMu.Unlock()
  665. retry:
  666. for len(b.calls) > 0 {
  667. var failIdx = -1
  668. err := b.db.Update(func(tx *Tx) error {
  669. for i, c := range b.calls {
  670. if err := safelyCall(c.fn, tx); err != nil {
  671. failIdx = i
  672. return err
  673. }
  674. }
  675. return nil
  676. })
  677. if failIdx >= 0 {
  678. // take the failing transaction out of the batch. it's
  679. // safe to shorten b.calls here because db.batch no longer
  680. // points to us, and we hold the mutex anyway.
  681. c := b.calls[failIdx]
  682. b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
  683. // tell the submitter re-run it solo, continue with the rest of the batch
  684. c.err <- trySolo
  685. continue retry
  686. }
  687. // pass success, or bolt internal errors, to all callers
  688. for _, c := range b.calls {
  689. c.err <- err
  690. }
  691. break retry
  692. }
  693. }
  694. // trySolo is a special sentinel error value used for signaling that a
  695. // transaction function should be re-run. It should never be seen by
  696. // callers.
  697. var trySolo = errors.New("batch function returned an error and should be re-run solo")
  698. type panicked struct {
  699. reason interface{}
  700. }
  701. func (p panicked) Error() string {
  702. if err, ok := p.reason.(error); ok {
  703. return err.Error()
  704. }
  705. return fmt.Sprintf("panic: %v", p.reason)
  706. }
  707. func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
  708. defer func() {
  709. if p := recover(); p != nil {
  710. err = panicked{p}
  711. }
  712. }()
  713. return fn(tx)
  714. }
  715. // Sync executes fdatasync() against the database file handle.
  716. //
  717. // This is not necessary under normal operation, however, if you use NoSync
  718. // then it allows you to force the database file to sync against the disk.
  719. func (db *DB) Sync() error { return fdatasync(db) }
  720. // Stats retrieves ongoing performance stats for the database.
  721. // This is only updated when a transaction closes.
  722. func (db *DB) Stats() Stats {
  723. db.statlock.RLock()
  724. defer db.statlock.RUnlock()
  725. return db.stats
  726. }
  727. // This is for internal access to the raw data bytes from the C cursor, use
  728. // carefully, or not at all.
  729. func (db *DB) Info() *Info {
  730. return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
  731. }
  732. // page retrieves a page reference from the mmap based on the current page size.
  733. func (db *DB) page(id pgid) *page {
  734. pos := id * pgid(db.pageSize)
  735. return (*page)(unsafe.Pointer(&db.data[pos]))
  736. }
  737. // pageInBuffer retrieves a page reference from a given byte array based on the current page size.
  738. func (db *DB) pageInBuffer(b []byte, id pgid) *page {
  739. return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
  740. }
  741. // meta retrieves the current meta page reference.
  742. func (db *DB) meta() *meta {
  743. // We have to return the meta with the highest txid which doesn't fail
  744. // validation. Otherwise, we can cause errors when in fact the database is
  745. // in a consistent state. metaA is the one with the higher txid.
  746. metaA := db.meta0
  747. metaB := db.meta1
  748. if db.meta1.txid > db.meta0.txid {
  749. metaA = db.meta1
  750. metaB = db.meta0
  751. }
  752. // Use higher meta page if valid. Otherwise fallback to previous, if valid.
  753. if err := metaA.validate(); err == nil {
  754. return metaA
  755. } else if err := metaB.validate(); err == nil {
  756. return metaB
  757. }
  758. // This should never be reached, because both meta1 and meta0 were validated
  759. // on mmap() and we do fsync() on every write.
  760. panic("bolt.DB.meta(): invalid meta pages")
  761. }
  762. // allocate returns a contiguous block of memory starting at a given page.
  763. func (db *DB) allocate(txid txid, count int) (*page, error) {
  764. // Allocate a temporary buffer for the page.
  765. var buf []byte
  766. if count == 1 {
  767. buf = db.pagePool.Get().([]byte)
  768. } else {
  769. buf = make([]byte, count*db.pageSize)
  770. }
  771. p := (*page)(unsafe.Pointer(&buf[0]))
  772. p.overflow = uint32(count - 1)
  773. // Use pages from the freelist if they are available.
  774. if p.id = db.freelist.allocate(txid, count); p.id != 0 {
  775. return p, nil
  776. }
  777. // Resize mmap() if we're at the end.
  778. p.id = db.rwtx.meta.pgid
  779. var minsz = int((p.id+pgid(count))+1) * db.pageSize
  780. if minsz >= db.datasz {
  781. if err := db.mmap(minsz); err != nil {
  782. return nil, fmt.Errorf("mmap allocate error: %s", err)
  783. }
  784. }
  785. // Move the page id high water mark.
  786. db.rwtx.meta.pgid += pgid(count)
  787. return p, nil
  788. }
  789. // grow grows the size of the database to the given sz.
  790. func (db *DB) grow(sz int) error {
  791. // Ignore if the new size is less than available file size.
  792. if sz <= db.filesz {
  793. return nil
  794. }
  795. // If the data is smaller than the alloc size then only allocate what's needed.
  796. // Once it goes over the allocation size then allocate in chunks.
  797. if db.datasz < db.AllocSize {
  798. sz = db.datasz
  799. } else {
  800. sz += db.AllocSize
  801. }
  802. // Truncate and fsync to ensure file size metadata is flushed.
  803. // https://github.com/boltdb/bolt/issues/284
  804. if !db.NoGrowSync && !db.readOnly {
  805. if runtime.GOOS != "windows" {
  806. if err := db.file.Truncate(int64(sz)); err != nil {
  807. return fmt.Errorf("file resize error: %s", err)
  808. }
  809. }
  810. if err := db.file.Sync(); err != nil {
  811. return fmt.Errorf("file sync error: %s", err)
  812. }
  813. }
  814. db.filesz = sz
  815. return nil
  816. }
  817. func (db *DB) IsReadOnly() bool {
  818. return db.readOnly
  819. }
  820. func (db *DB) freepages() []pgid {
  821. tx, err := db.beginTx()
  822. defer func() {
  823. err = tx.Rollback()
  824. if err != nil {
  825. panic("freepages: failed to rollback tx")
  826. }
  827. }()
  828. if err != nil {
  829. panic("freepages: failed to open read only tx")
  830. }
  831. reachable := make(map[pgid]*page)
  832. nofreed := make(map[pgid]bool)
  833. ech := make(chan error)
  834. go func() {
  835. for e := range ech {
  836. panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
  837. }
  838. }()
  839. tx.checkBucket(&tx.root, reachable, nofreed, ech)
  840. close(ech)
  841. var fids []pgid
  842. for i := pgid(2); i < db.meta().pgid; i++ {
  843. if _, ok := reachable[i]; !ok {
  844. fids = append(fids, i)
  845. }
  846. }
  847. return fids
  848. }
  849. // Options represents the options that can be set when opening a database.
  850. type Options struct {
  851. // Timeout is the amount of time to wait to obtain a file lock.
  852. // When set to zero it will wait indefinitely. This option is only
  853. // available on Darwin and Linux.
  854. Timeout time.Duration
  855. // Sets the DB.NoGrowSync flag before memory mapping the file.
  856. NoGrowSync bool
  857. // Do not sync freelist to disk. This improves the database write performance
  858. // under normal operation, but requires a full database re-sync during recovery.
  859. NoFreelistSync bool
  860. // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
  861. // grab a shared lock (UNIX).
  862. ReadOnly bool
  863. // Sets the DB.MmapFlags flag before memory mapping the file.
  864. MmapFlags int
  865. // InitialMmapSize is the initial mmap size of the database
  866. // in bytes. Read transactions won't block write transaction
  867. // if the InitialMmapSize is large enough to hold database mmap
  868. // size. (See DB.Begin for more information)
  869. //
  870. // If <=0, the initial map size is 0.
  871. // If initialMmapSize is smaller than the previous database size,
  872. // it takes no effect.
  873. InitialMmapSize int
  874. // PageSize overrides the default OS page size.
  875. PageSize int
  876. // NoSync sets the initial value of DB.NoSync. Normally this can just be
  877. // set directly on the DB itself when returned from Open(), but this option
  878. // is useful in APIs which expose Options but not the underlying DB.
  879. NoSync bool
  880. }
  881. // DefaultOptions represent the options used if nil options are passed into Open().
  882. // No timeout is used which will cause Bolt to wait indefinitely for a lock.
  883. var DefaultOptions = &Options{
  884. Timeout: 0,
  885. NoGrowSync: false,
  886. }
  887. // Stats represents statistics about the database.
  888. type Stats struct {
  889. // Freelist stats
  890. FreePageN int // total number of free pages on the freelist
  891. PendingPageN int // total number of pending pages on the freelist
  892. FreeAlloc int // total bytes allocated in free pages
  893. FreelistInuse int // total bytes used by the freelist
  894. // Transaction stats
  895. TxN int // total number of started read transactions
  896. OpenTxN int // number of currently open read transactions
  897. TxStats TxStats // global, ongoing stats.
  898. }
  899. // Sub calculates and returns the difference between two sets of database stats.
  900. // This is useful when obtaining stats at two different points and time and
  901. // you need the performance counters that occurred within that time span.
  902. func (s *Stats) Sub(other *Stats) Stats {
  903. if other == nil {
  904. return *s
  905. }
  906. var diff Stats
  907. diff.FreePageN = s.FreePageN
  908. diff.PendingPageN = s.PendingPageN
  909. diff.FreeAlloc = s.FreeAlloc
  910. diff.FreelistInuse = s.FreelistInuse
  911. diff.TxN = s.TxN - other.TxN
  912. diff.TxStats = s.TxStats.Sub(&other.TxStats)
  913. return diff
  914. }
  915. type Info struct {
  916. Data uintptr
  917. PageSize int
  918. }
  919. type meta struct {
  920. magic uint32
  921. version uint32
  922. pageSize uint32
  923. flags uint32
  924. root bucket
  925. freelist pgid
  926. pgid pgid
  927. txid txid
  928. checksum uint64
  929. }
  930. // validate checks the marker bytes and version of the meta page to ensure it matches this binary.
  931. func (m *meta) validate() error {
  932. if m.magic != magic {
  933. return ErrInvalid
  934. } else if m.version != version {
  935. return ErrVersionMismatch
  936. } else if m.checksum != 0 && m.checksum != m.sum64() {
  937. return ErrChecksum
  938. }
  939. return nil
  940. }
  941. // copy copies one meta object to another.
  942. func (m *meta) copy(dest *meta) {
  943. *dest = *m
  944. }
  945. // write writes the meta onto a page.
  946. func (m *meta) write(p *page) {
  947. if m.root.root >= m.pgid {
  948. panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
  949. } else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
  950. // TODO: reject pgidNoFreeList if !NoFreelistSync
  951. panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
  952. }
  953. // Page id is either going to be 0 or 1 which we can determine by the transaction ID.
  954. p.id = pgid(m.txid % 2)
  955. p.flags |= metaPageFlag
  956. // Calculate the checksum.
  957. m.checksum = m.sum64()
  958. m.copy(p.meta())
  959. }
  960. // generates the checksum for the meta.
  961. func (m *meta) sum64() uint64 {
  962. var h = fnv.New64a()
  963. _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
  964. return h.Sum64()
  965. }
  966. // _assert will panic with a given formatted message if the given condition is false.
  967. func _assert(condition bool, msg string, v ...interface{}) {
  968. if !condition {
  969. panic(fmt.Sprintf("assertion failed: "+msg, v...))
  970. }
  971. }