Skip to content

Commit

Permalink
feat(externalMagic): Introduce external magic number (#1745)
Browse files Browse the repository at this point in the history
Badger writes a magic number in the manifest. It helps badger to decide if its current version is compatible with the data on the disk or not. But now the internal data storage of badger is not changed but the data format for dgraph has changed. This causes a problem if someone starts dgraph-2109 on an older dgraph directory. This change adds an external magic number to badger which will help avoid data corruption by causing panic if opening Dgraph on the wrong directory.

There are 8 magic bytes in the manifest.
Prior to this change:

0-4: magic text
4-8: badgerMagicNumber
After this change:

0-4: magic text
4-6: externalMagicNumber
6-8: badgerMagicNumber
  • Loading branch information
ahsanbarkati authored Sep 16, 2021
1 parent 560e319 commit 2a9a524
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 19 deletions.
8 changes: 6 additions & 2 deletions badger/cmd/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type flagOptions struct {
encryptionKey string
checksumVerificationMode string
discard bool
externalMagicVersion uint16
}

var (
Expand Down Expand Up @@ -82,6 +83,8 @@ func init() {
"[none, table, block, tableAndBlock] Specifies when the db should verify checksum for SST.")
infoCmd.Flags().BoolVar(&opt.discard, "discard", false,
"Parse and print DISCARD file from value logs.")
infoCmd.Flags().Uint16Var(&opt.externalMagicVersion, "external-magic", 0,
"External magic number")
}

var infoCmd = &cobra.Command{
Expand All @@ -104,7 +107,8 @@ func handleInfo(cmd *cobra.Command, args []string) error {
WithBlockCacheSize(100 << 20).
WithIndexCacheSize(200 << 20).
WithEncryptionKey([]byte(opt.encryptionKey)).
WithChecksumVerificationMode(cvMode)
WithChecksumVerificationMode(cvMode).
WithExternalMagic(opt.externalMagicVersion)

if opt.discard {
ds, err := badger.InitDiscardStats(bopt)
Expand Down Expand Up @@ -322,7 +326,7 @@ func printInfo(dir, valueDir string) error {
fp.Close()
}
}()
manifest, truncOffset, err := badger.ReplayManifestFile(fp)
manifest, truncOffset, err := badger.ReplayManifestFile(fp, opt.externalMagicVersion)
if err != nil {
return err
}
Expand Down
51 changes: 37 additions & 14 deletions manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"fmt"
"hash/crc32"
"io"
"math"
"os"
"path/filepath"
"sync"
Expand Down Expand Up @@ -79,6 +80,10 @@ type TableManifest struct {
type manifestFile struct {
fp *os.File
directory string

// The external magic number used by the application running badger.
externalMagic uint16

// We make this configurable so that unit tests can hit rewrite() code quickly
deletionsRewriteThreshold int

Expand Down Expand Up @@ -124,11 +129,12 @@ func openOrCreateManifestFile(opt Options) (
if opt.InMemory {
return &manifestFile{inMemory: true, manifest: createManifest()}, Manifest{}, nil
}
return helpOpenOrCreateManifestFile(opt.Dir, opt.ReadOnly, manifestDeletionsRewriteThreshold)
return helpOpenOrCreateManifestFile(opt.Dir, opt.ReadOnly, opt.ExternalMagicVersion,
manifestDeletionsRewriteThreshold)
}

func helpOpenOrCreateManifestFile(dir string, readOnly bool, deletionsThreshold int) (
*manifestFile, Manifest, error) {
func helpOpenOrCreateManifestFile(dir string, readOnly bool, extMagic uint16,
deletionsThreshold int) (*manifestFile, Manifest, error) {

path := filepath.Join(dir, ManifestFilename)
var flags y.Flags
Expand All @@ -144,21 +150,22 @@ func helpOpenOrCreateManifestFile(dir string, readOnly bool, deletionsThreshold
return nil, Manifest{}, fmt.Errorf("no manifest found, required for read-only db")
}
m := createManifest()
fp, netCreations, err := helpRewrite(dir, &m)
fp, netCreations, err := helpRewrite(dir, &m, extMagic)
if err != nil {
return nil, Manifest{}, err
}
y.AssertTrue(netCreations == 0)
mf := &manifestFile{
fp: fp,
directory: dir,
externalMagic: extMagic,
manifest: m.clone(),
deletionsRewriteThreshold: deletionsThreshold,
}
return mf, m, nil
}

manifest, truncOffset, err := ReplayManifestFile(fp)
manifest, truncOffset, err := ReplayManifestFile(fp, extMagic)
if err != nil {
_ = fp.Close()
return nil, Manifest{}, err
Expand All @@ -179,6 +186,7 @@ func helpOpenOrCreateManifestFile(dir string, readOnly bool, deletionsThreshold
mf := &manifestFile{
fp: fp,
directory: dir,
externalMagic: extMagic,
manifest: manifest.clone(),
deletionsRewriteThreshold: deletionsThreshold,
}
Expand Down Expand Up @@ -237,20 +245,27 @@ func (mf *manifestFile) addChanges(changesParam []*pb.ManifestChange) error {
// Has to be 4 bytes. The value can never change, ever, anyway.
var magicText = [4]byte{'B', 'd', 'g', 'r'}

// The magic version number.
const magicVersion = 8
// The magic version number. It is allocated 2 bytes, so it's value must be <= math.MaxUint16
const badgerMagicVersion = 8

func helpRewrite(dir string, m *Manifest) (*os.File, int, error) {
func helpRewrite(dir string, m *Manifest, extMagic uint16) (*os.File, int, error) {
rewritePath := filepath.Join(dir, manifestRewriteFilename)
// We explicitly sync.
fp, err := y.OpenTruncFile(rewritePath, false)
if err != nil {
return nil, 0, err
}

// magic bytes are structured as
// +---------------------+-------------------------+-----------------------+
// | magicText (4 bytes) | externalMagic (2 bytes) | badgerMagic (2 bytes) |
// +---------------------+-------------------------+-----------------------+

y.AssertTrue(badgerMagicVersion <= math.MaxUint16)
buf := make([]byte, 8)
copy(buf[0:4], magicText[:])
binary.BigEndian.PutUint32(buf[4:8], magicVersion)
binary.BigEndian.PutUint16(buf[4:6], extMagic)
binary.BigEndian.PutUint16(buf[6:8], badgerMagicVersion)

netCreations := len(m.Tables)
changes := m.asChanges()
Expand Down Expand Up @@ -305,7 +320,7 @@ func (mf *manifestFile) rewrite() error {
if err := mf.fp.Close(); err != nil {
return err
}
fp, netCreations, err := helpRewrite(mf.directory, &mf.manifest)
fp, netCreations, err := helpRewrite(mf.directory, &mf.manifest, mf.externalMagic)
if err != nil {
return err
}
Expand Down Expand Up @@ -345,7 +360,7 @@ var (
// Also, returns the last offset after a completely read manifest entry -- the file must be
// truncated at that point before further appends are made (if there is a partial entry after
// that). In normal conditions, truncOffset is the file size.
func ReplayManifestFile(fp *os.File) (Manifest, int64, error) {
func ReplayManifestFile(fp *os.File, extMagic uint16) (Manifest, int64, error) {
r := countingReader{wrapped: bufio.NewReader(fp)}

var magicBuf [8]byte
Expand All @@ -355,14 +370,22 @@ func ReplayManifestFile(fp *os.File) (Manifest, int64, error) {
if !bytes.Equal(magicBuf[0:4], magicText[:]) {
return Manifest{}, 0, errBadMagic
}
version := y.BytesToU32(magicBuf[4:8])
if version != magicVersion {

extVersion := y.BytesToU16(magicBuf[4:6])
version := y.BytesToU16(magicBuf[6:8])

if version != badgerMagicVersion {
return Manifest{}, 0,
//nolint:lll
fmt.Errorf("manifest has unsupported version: %d (we support %d).\n"+
"Please see https://github.com/dgraph-io/badger/blob/master/README.md#i-see-manifest-has-unsupported-version-x-we-support-y-error"+
" on how to fix this.",
version, magicVersion)
version, badgerMagicVersion)
}
if extVersion != extMagic {
return Manifest{}, 0,
fmt.Errorf("Cannot open DB because the external magic number doesn't match. "+
"Expected: %d, version present in manifest: %d\n", extMagic, extVersion)
}

stat, err := fp.Stat()
Expand Down
6 changes: 3 additions & 3 deletions manifest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func TestManifestMagic(t *testing.T) {
}

func TestManifestVersion(t *testing.T) {
helpTestManifestFileCorruption(t, 4, "unsupported version")
helpTestManifestFileCorruption(t, 6, "unsupported version")
}

func TestManifestChecksum(t *testing.T) {
Expand Down Expand Up @@ -213,7 +213,7 @@ func TestManifestRewrite(t *testing.T) {
require.NoError(t, err)
defer removeDir(dir)
deletionsThreshold := 10
mf, m, err := helpOpenOrCreateManifestFile(dir, false, deletionsThreshold)
mf, m, err := helpOpenOrCreateManifestFile(dir, false, 0, deletionsThreshold)
defer func() {
if mf != nil {
mf.close()
Expand All @@ -239,7 +239,7 @@ func TestManifestRewrite(t *testing.T) {
err = mf.close()
require.NoError(t, err)
mf = nil
mf, m, err = helpOpenOrCreateManifestFile(dir, false, deletionsThreshold)
mf, m, err = helpOpenOrCreateManifestFile(dir, false, 0, deletionsThreshold)
require.NoError(t, err)
require.Equal(t, map[uint64]TableManifest{
uint64(deletionsThreshold * 3): {Level: 0},
Expand Down
11 changes: 11 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ type Options struct {
// NamespaceOffset specifies the offset from where the next 8 bytes contains the namespace.
NamespaceOffset int

// Magic version used by the application using badger to ensure that it doesn't open the DB
// with incompatible data format.
ExternalMagicVersion uint16

// Transaction start and commit timestamps are managed by end-user.
// This is only useful for databases built on top of Badger (like Dgraph).
// Not recommended for most users.
Expand Down Expand Up @@ -796,6 +800,13 @@ func (opt Options) WithNamespaceOffset(offset int) Options {
return opt
}

// WithExternalMagic returns a new Options value with ExternalMagicVersion set to the given value.
// The DB would fail to start if either the internal or the external magic number fails validated.
func (opt Options) WithExternalMagic(magic uint16) Options {
opt.ExternalMagicVersion = magic
return opt
}

func (opt Options) getFileFlags() int {
var flags int
// opt.SyncWrites would be using msync to sync. All writes go through mmap.
Expand Down
12 changes: 12 additions & 0 deletions y/y.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,18 @@ func (t *Throttle) Finish() error {
return t.finishErr
}

// U16ToBytes converts the given Uint16 to bytes
func U16ToBytes(v uint16) []byte {
var uBuf [2]byte
binary.BigEndian.PutUint16(uBuf[:], v)
return uBuf[:]
}

// BytesToU16 converts the given byte slice to uint16
func BytesToU16(b []byte) uint16 {
return binary.BigEndian.Uint16(b)
}

// U32ToBytes converts the given Uint32 to bytes
func U32ToBytes(v uint32) []byte {
var uBuf [4]byte
Expand Down

0 comments on commit 2a9a524

Please # to comment.