From 712659ba5fad833725b2d58458a3693154eaf26a Mon Sep 17 00:00:00 2001 From: Brant Burnett Date: Sun, 1 Dec 2024 10:27:58 -0500 Subject: [PATCH] Use VarIntEncoding.TryRead in SnappyDecompressor (#104) Simplified code and shows improved performance on streaming decompression for modern frameworks. BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2314) Unknown processor .NET SDK 9.0.100 [Host] : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2 Job-SXYOZE : .NET Framework 4.8.1 (4.8.9282.0), X64 RyuJIT VectorSize=256 Job-YZRTRU : .NET Framework 4.8.1 (4.8.9282.0), X64 RyuJIT VectorSize=256 Job-LUMTGV : .NET 6.0.36 (6.0.3624.51421), X64 RyuJIT AVX2 Job-WIRWZO : .NET 6.0.36 (6.0.3624.51421), X64 RyuJIT AVX2 Job-CCWZUZ : .NET 8.0.11 (8.0.1124.51707), X64 RyuJIT AVX2 Job-JRBCAZ : .NET 8.0.11 (8.0.1124.51707), X64 RyuJIT AVX2 Job-AXYWAW : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2 Job-RYXATQ : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2 | Method | Runtime | BuildConfiguration | ReadSize | Mean | Error | StdDev | Ratio | Rank | |----------- |------------------- |------------------- |--------- |---------:|--------:|--------:|------:|-----:| | Decompress | .NET Framework 4.8 | Previous | 16384 | 296.1 us | 1.37 us | 1.28 us | 1.00 | 1 | | Decompress | .NET Framework 4.8 | Default | 16384 | 288.4 us | 1.22 us | 1.02 us | 0.97 | 1 | | | | | | | | | | | | Decompress | .NET 6.0 | Previous | 16384 | 141.9 us | 0.53 us | 0.47 us | 1.00 | 1 | | Decompress | .NET 6.0 | Default | 16384 | 138.7 us | 0.53 us | 0.49 us | 0.98 | 1 | | | | | | | | | | | | Decompress | .NET 8.0 | Previous | 16384 | 159.5 us | 0.40 us | 0.38 us | 1.00 | 2 | | Decompress | .NET 8.0 | Default | 16384 | 124.5 us | 0.50 us | 0.47 us | 0.78 | 1 | | | | | | | | | | | | Decompress | .NET 9.0 | Previous | 16384 | 142.4 us | 0.58 us | 0.54 us | 1.00 | 2 | | Decompress | .NET 9.0 | Default | 16384 | 112.6 us | 0.37 us | 0.35 us | 0.79 | 1 | --- .../Snappier.Benchmarks.csproj | 2 + .../Internal/SnappyDecompressorTests.cs | 32 ++++ Snappier/Internal/SnappyDecompressor.cs | 160 ++++++++---------- 3 files changed, 101 insertions(+), 93 deletions(-) diff --git a/Snappier.Benchmarks/Snappier.Benchmarks.csproj b/Snappier.Benchmarks/Snappier.Benchmarks.csproj index eed0b96..b7f929e 100644 --- a/Snappier.Benchmarks/Snappier.Benchmarks.csproj +++ b/Snappier.Benchmarks/Snappier.Benchmarks.csproj @@ -43,6 +43,8 @@ + + diff --git a/Snappier.Tests/Internal/SnappyDecompressorTests.cs b/Snappier.Tests/Internal/SnappyDecompressorTests.cs index be2c900..1bc09f0 100644 --- a/Snappier.Tests/Internal/SnappyDecompressorTests.cs +++ b/Snappier.Tests/Internal/SnappyDecompressorTests.cs @@ -7,6 +7,38 @@ namespace Snappier.Tests.Internal { public class SnappyDecompressorTests { + #region Decompress + + [Fact] + public void Decompress_SplitLength_Succeeds() + { + // Arrange + + using var decompressor = new SnappyDecompressor(); + + // Requires 3 bytes to varint encode the length + var data = new byte[65536]; + using var compressed = Snappy.CompressToMemory(data); + + // Act + + decompressor.Decompress(compressed.Memory.Span.Slice(0, 1)); + Assert.True(decompressor.NeedMoreData); + decompressor.Decompress(compressed.Memory.Span.Slice(1, 1)); + Assert.True(decompressor.NeedMoreData); + decompressor.Decompress(compressed.Memory.Span.Slice(2)); + Assert.False(decompressor.NeedMoreData); + + using var result = decompressor.ExtractData(); + + // Assert + + Assert.Equal(65536, result.Memory.Length); + Assert.True(result.Memory.Span.SequenceEqual(data)); + } + + #endregion + #region DecompressAllTags [Fact] diff --git a/Snappier/Internal/SnappyDecompressor.cs b/Snappier/Internal/SnappyDecompressor.cs index 871ee3f..79dcb09 100644 --- a/Snappier/Internal/SnappyDecompressor.cs +++ b/Snappier/Internal/SnappyDecompressor.cs @@ -20,27 +20,27 @@ private struct ScratchBuffer } private ScratchBuffer _scratch; + + private Span Scratch => _scratch; #pragma warning restore CS0649 // Field is never assigned to, and will always have its default value #pragma warning restore IDE0044 #pragma warning restore IDE0051 #else private readonly byte[] _scratch = new byte[Constants.MaximumTagLength]; + + private Span Scratch => _scratch.AsSpan(); #endif - private uint _scratchLength = 0; + private int _scratchLength = 0; private int _remainingLiteral; - private int _uncompressedLengthShift; - private int _uncompressedLength; - public bool NeedMoreData => !AllDataDecompressed && UnreadBytes == 0; /// /// Decompress a portion of the input. /// /// Input to process. - /// Number of bytes processed from the input. /// /// The first call to this method after construction or after a call to start at the /// beginning of a new Snappy block, leading with the encoded block size. It may be called multiple times @@ -51,16 +51,17 @@ public void Decompress(ReadOnlySpan input) { if (!ExpectedLength.HasValue) { - int? readLength = ReadUncompressedLength(ref input); - if (readLength.HasValue) + OperationStatus status = TryReadUncompressedLength(input, out int bytesConsumed); + if (status == OperationStatus.InvalidData) { - ExpectedLength = readLength.GetValueOrDefault(); + ThrowHelper.ThrowInvalidOperationException("Invalid stream length"); } - else + else if (status != OperationStatus.Done) { - // Not enough data yet to process the length return; } + + input = input.Slice(bytesConsumed); } // Process any input into the write buffer @@ -88,62 +89,74 @@ public void Reset() _scratchLength = 0; _remainingLiteral = 0; - _uncompressedLength = 0; - _uncompressedLengthShift = 0; - _lookbackPosition = 0; _readPosition = 0; ExpectedLength = null; } - /// - /// Read the uncompressed length stored at the start of the compressed data. - /// - /// Input data, which should begin with the varint encoded uncompressed length. - /// The length of the compressed data, or null if the length is not yet complete. - /// - /// This variant is used when reading a stream, and will pause if there aren't enough bytes available - /// in the input. Subsequent calls with more data will resume processing. - /// - private int? ReadUncompressedLength(ref ReadOnlySpan input) + private OperationStatus TryReadUncompressedLength(ReadOnlySpan input, out int bytesConsumed) { - int result = _uncompressedLength; - int shift = _uncompressedLengthShift; - bool foundEnd = false; + OperationStatus status; - int i = 0; - while (input.Length > i) + if (_scratchLength > 0) { - byte c = input[i]; - i += 1; + // We have a partial length in the scratch buffer, so we need to finish reading that first + // The maximum tag length of 5 bytes is also the maximum varint length, so we can reuse _scratch - int val = c & 0x7f; - if (Helpers.LeftShiftOverflows((byte) val, shift)) - { - ThrowHelper.ThrowInvalidOperationException("Invalid stream length"); - } + // Copy the remaining bytes from the input to the scratch buffer + Span scratch = Scratch; + int toCopy = Math.Min(input.Length, scratch.Length - _scratchLength); + input.Slice(0, toCopy).CopyTo(scratch.Slice(_scratchLength)); - result |= val << shift; + status = VarIntEncoding.TryRead(scratch.Slice(0, _scratchLength + toCopy), out uint length, out int scratchBytesConsumed); - if (c < 128) + switch (status) { - foundEnd = true; - break; - } + case OperationStatus.Done: + ExpectedLength = (int)length; - shift += 7; + // The number of bytes consumed from the input is the number of bytes used by VarIntEncoding.TryRead + // less the number of bytes previously found in the scratch buffer + bytesConsumed = scratchBytesConsumed - _scratchLength; - if (shift >= 32) - { - ThrowHelper.ThrowInvalidOperationException("Invalid stream length"); + // Reset scratch buffer + _scratchLength = 0; + break; + + case OperationStatus.NeedMoreData: + // We consumed all the input, but still need more data to finish reading the length + bytesConsumed = toCopy; + _scratchLength += toCopy; + + Debug.Assert(_scratchLength < scratch.Length); + break; + + default: + bytesConsumed = 0; + break; } } + else + { + // No data in the scratch buffer, try to read directly from the input + status = VarIntEncoding.TryRead(input, out uint length, out bytesConsumed); - input = input.Slice(i); - _uncompressedLength = result; - _uncompressedLengthShift = shift; + switch (status) + { + case OperationStatus.Done: + ExpectedLength = (int)length; + break; + + case OperationStatus.NeedMoreData: + // Copy all of the input to the scratch buffer + input.CopyTo(Scratch); + _scratchLength = input.Length; + bytesConsumed = input.Length; + break; + } + } - return foundEnd ? result : null; + return status; } /// @@ -152,47 +165,8 @@ public void Reset() /// Input data, which should begin with the varint encoded uncompressed length. /// The length of the uncompressed data. /// Invalid stream length - public static int ReadUncompressedLength(ReadOnlySpan input) - { - int result = 0; - int shift = 0; - bool foundEnd = false; - - int i = 0; - while (input.Length > 0) - { - byte c = input[i]; - i += 1; - - int val = c & 0x7f; - if (Helpers.LeftShiftOverflows((byte) val, shift)) - { - ThrowHelper.ThrowInvalidDataException("Invalid stream length"); - } - - result |= val << shift; - - if (c < 128) - { - foundEnd = true; - break; - } - - shift += 7; - - if (shift >= 32) - { - ThrowHelper.ThrowInvalidDataException("Invalid stream length"); - } - } - - if (!foundEnd) - { - ThrowHelper.ThrowInvalidDataException("Invalid stream length"); - } - - return result; - } + public static int ReadUncompressedLength(ReadOnlySpan input) => + (int) VarIntEncoding.Read(input, out _); internal void DecompressAllTags(ReadOnlySpan inputSpan) { @@ -451,10 +425,10 @@ private uint RefillTagFromScratch(ref byte input, ref byte inputEnd) uint entry = Constants.CharTable[_scratch[0]]; uint needed = (entry >> 11) + 1; // +1 byte for 'c' - uint toCopy = Math.Min((uint)Unsafe.ByteOffset(ref input, ref inputEnd), needed - _scratchLength); + uint toCopy = Math.Min((uint)Unsafe.ByteOffset(ref input, ref inputEnd), needed - (uint) _scratchLength); Unsafe.CopyBlockUnaligned(ref _scratch[(int)_scratchLength], ref input, toCopy); - _scratchLength += toCopy; + _scratchLength += (int) toCopy; if (_scratchLength < needed) { @@ -491,7 +465,7 @@ private uint RefillTag(ref byte input, ref byte inputEnd) // Data is insufficient, copy to scratch Unsafe.CopyBlockUnaligned(ref _scratch[0], ref input, inputLength); - _scratchLength = inputLength; + _scratchLength = (int) inputLength; return uint.MaxValue; } @@ -679,7 +653,7 @@ internal void WriteToBufferForTest(ReadOnlySpan toWrite) /// /// Load a byte array into _scratch, only used for testing. /// - internal void LoadScratchForTest(byte[] newScratch, uint newScratchLength) + internal void LoadScratchForTest(byte[] newScratch, int newScratchLength) { ThrowHelper.ThrowIfNull(newScratch); if (newScratchLength > ((ReadOnlySpan)_scratch).Length)