From 6837f6f6085080baa4410d69d216c2c69148c0e1 Mon Sep 17 00:00:00 2001 From: Ibrahim Jarif Date: Fri, 22 Nov 2019 16:55:02 +0530 Subject: [PATCH] Set level 15 as default compression level in ZSTD (#1111) The default level is 5. This PR sets the compression level to 15 which gives the best speed vs compression ratio trade-off. --- badger/options.go | 39 ++++++++++++++++++++++++++++++-------- badger/table/builder.go | 2 +- badger/table/table.go | 3 +++ badger/table/table_test.go | 9 +++++---- badger/y/zstd_cgo.go | 4 ++-- badger/y/zstd_nocgo.go | 2 +- 6 files changed, 43 insertions(+), 16 deletions(-) diff --git a/badger/options.go b/badger/options.go index b1121f399..0dbd00d07 100644 --- a/badger/options.go +++ b/badger/options.go @@ -72,9 +72,11 @@ type Options struct { ValueLogFileSize int64 ValueLogMaxEntries uint32 - NumCompactors int - CompactL0OnClose bool - LogRotatesToFlush int32 + NumCompactors int + CompactL0OnClose bool + LogRotatesToFlush int32 + ZSTDCompressionLevel int + // When set, checksum will be validated for each entry read from the value log file. VerifyValueChecksum bool @@ -128,6 +130,14 @@ func DefaultOptions(path string) Options { VerifyValueChecksum: false, Compression: defaultCompression, MaxCacheSize: 1 << 30, // 1 GB + // Benchmarking compression level against performance showed that level 15 gives + // the best speed vs ratio tradeoff. + // For a data size of 4KB we get + // Level: 3 Ratio: 2.72 Time: 24112 n/s + // Level: 10 Ratio: 2.95 Time: 75655 n/s + // Level: 15 Ratio: 4.38 Time: 239042 n/s + // See https://github.com/dgraph-io/badger/pull/1111#issue-338120757 + ZSTDCompressionLevel: 15, // Nothing to read/write value log using standard File I/O // MemoryMap to mmap() the value log files // (2^30 - 1)*2 when mmapping < 2^31 - 1, max int32. @@ -147,11 +157,12 @@ func DefaultOptions(path string) Options { func buildTableOptions(opt Options) table.Options { return table.Options{ - BlockSize: opt.BlockSize, - BloomFalsePositive: opt.BloomFalsePositive, - LoadingMode: opt.TableLoadingMode, - ChkMode: opt.ChecksumVerificationMode, - Compression: opt.Compression, + BlockSize: opt.BlockSize, + BloomFalsePositive: opt.BloomFalsePositive, + LoadingMode: opt.TableLoadingMode, + ChkMode: opt.ChecksumVerificationMode, + Compression: opt.Compression, + ZSTDCompressionLevel: opt.ZSTDCompressionLevel, } } @@ -532,3 +543,15 @@ func (opt Options) WithMaxCacheSize(size int64) Options { opt.MaxCacheSize = size return opt } + +// WithZSTDCompressionLevel returns a new Options value with ZSTDCompressionLevel set +// to the given value. +// +// The ZSTD compression algorithm supports 20 compression levels. The higher the compression +// level, the better is the compression ratio but lower is the performance. Lower levels +// have better performance and higher levels have better compression ratios. +// The default value of ZSTDCompressionLevel is 15. +func (opt Options) WithZSTDCompressionLevel(cLevel int) Options { + opt.ZSTDCompressionLevel = cLevel + return opt +} diff --git a/badger/table/builder.go b/badger/table/builder.go index a869e48de..064b9e4fa 100644 --- a/badger/table/builder.go +++ b/badger/table/builder.go @@ -345,7 +345,7 @@ func (b *Builder) compressData(data []byte) ([]byte, error) { case options.Snappy: return snappy.Encode(nil, data), nil case options.ZSTD: - return y.ZSTDCompress(nil, data) + return y.ZSTDCompress(nil, data, b.opt.ZSTDCompressionLevel) } return nil, errors.New("Unsupported compression type") } diff --git a/badger/table/table.go b/badger/table/table.go index 0dfc9d2f0..9a9140119 100644 --- a/badger/table/table.go +++ b/badger/table/table.go @@ -69,6 +69,9 @@ type Options struct { Compression options.CompressionType Cache *ristretto.Cache + + // ZSTDCompressionLevel is the ZSTD compression level used for compressing blocks. + ZSTDCompressionLevel int } // TableInterface is useful for testing. diff --git a/badger/table/table_test.go b/badger/table/table_test.go index 56f9b3032..597595d48 100644 --- a/badger/table/table_test.go +++ b/badger/table/table_test.go @@ -46,10 +46,11 @@ func key(prefix string, i int) string { func getTestTableOptions() Options { return Options{ - Compression: options.ZSTD, - LoadingMode: options.LoadToRAM, - BlockSize: 4 * 1024, - BloomFalsePositive: 0.01, + Compression: options.ZSTD, + ZSTDCompressionLevel: 15, + LoadingMode: options.LoadToRAM, + BlockSize: 4 * 1024, + BloomFalsePositive: 0.01, } } diff --git a/badger/y/zstd_cgo.go b/badger/y/zstd_cgo.go index d0ec119ca..083b70aae 100644 --- a/badger/y/zstd_cgo.go +++ b/badger/y/zstd_cgo.go @@ -31,6 +31,6 @@ func ZSTDDecompress(dst, src []byte) ([]byte, error) { } // ZSTDCompress compresses a block using ZSTD algorithm. -func ZSTDCompress(dst, src []byte) ([]byte, error) { - return zstd.Compress(dst, src) +func ZSTDCompress(dst, src []byte, compressionLevel int) ([]byte, error) { + return zstd.CompressLevel(dst, src, compressionLevel) } diff --git a/badger/y/zstd_nocgo.go b/badger/y/zstd_nocgo.go index e79d51d55..5c33a9f97 100644 --- a/badger/y/zstd_nocgo.go +++ b/badger/y/zstd_nocgo.go @@ -33,6 +33,6 @@ func ZSTDDecompress(dst, src []byte) ([]byte, error) { } // ZSTDCompress compresses a block using ZSTD algorithm. -func ZSTDCompress(dst, src []byte) ([]byte, error) { +func ZSTDCompress(dst, src []byte, compressionLevel int) ([]byte, error) { return nil, errZstdCgo }