Skip to content

Commit

Permalink
Use VarIntEncoding.TryRead in SnappyDecompressor (#104)
Browse files Browse the repository at this point in the history
Simplified code and shows improved performance on streaming
decompression for modern frameworks.

BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2314) Unknown processor
.NET SDK 9.0.100
  [Host]     : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2
Job-SXYOZE : .NET Framework 4.8.1 (4.8.9282.0), X64 RyuJIT
VectorSize=256
Job-YZRTRU : .NET Framework 4.8.1 (4.8.9282.0), X64 RyuJIT
VectorSize=256
  Job-LUMTGV : .NET 6.0.36 (6.0.3624.51421), X64 RyuJIT AVX2
  Job-WIRWZO : .NET 6.0.36 (6.0.3624.51421), X64 RyuJIT AVX2
  Job-CCWZUZ : .NET 8.0.11 (8.0.1124.51707), X64 RyuJIT AVX2
  Job-JRBCAZ : .NET 8.0.11 (8.0.1124.51707), X64 RyuJIT AVX2
  Job-AXYWAW : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2
  Job-RYXATQ : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2

| Method | Runtime | BuildConfiguration | ReadSize | Mean | Error |
StdDev | Ratio | Rank |
|----------- |------------------- |------------------- |---------
|---------:|--------:|--------:|------:|-----:|
| Decompress | .NET Framework 4.8 | Previous | 16384 | 296.1 us | 1.37
us | 1.28 us | 1.00 | 1 |
| Decompress | .NET Framework 4.8 | Default | 16384 | 288.4 us | 1.22 us
| 1.02 us | 0.97 | 1 |
| | | | | | | | | |
| Decompress | .NET 6.0 | Previous | 16384 | 141.9 us | 0.53 us | 0.47
us | 1.00 | 1 |
| Decompress | .NET 6.0 | Default | 16384 | 138.7 us | 0.53 us | 0.49 us
| 0.98 | 1 |
| | | | | | | | | |
| Decompress | .NET 8.0 | Previous | 16384 | 159.5 us | 0.40 us | 0.38
us | 1.00 | 2 |
| Decompress | .NET 8.0 | Default | 16384 | 124.5 us | 0.50 us | 0.47 us
| 0.78 | 1 |
| | | | | | | | | |
| Decompress | .NET 9.0 | Previous | 16384 | 142.4 us | 0.58 us | 0.54
us | 1.00 | 2 |
| Decompress | .NET 9.0 | Default | 16384 | 112.6 us | 0.37 us | 0.35 us
| 0.79 | 1 |
  • Loading branch information
brantburnett authored Dec 1, 2024
1 parent b7b8105 commit 712659b
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 93 deletions.
2 changes: 2 additions & 0 deletions Snappier.Benchmarks/Snappier.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
<Compile Remove="FindMatchLength.cs" />
<Compile Remove="IncrementalCopy.cs" />
<Compile Remove="Unaligned*.cs" />
<Compile Remove="Internal\VarIntEncodingReadTests.cs" />
<Compile Remove="Internal\VarIntEncodingWriteTests.cs" />
</ItemGroup>

</Project>
32 changes: 32 additions & 0 deletions Snappier.Tests/Internal/SnappyDecompressorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,38 @@ namespace Snappier.Tests.Internal
{
public class SnappyDecompressorTests
{
#region Decompress

[Fact]
public void Decompress_SplitLength_Succeeds()
{
// Arrange

using var decompressor = new SnappyDecompressor();

// Requires 3 bytes to varint encode the length
var data = new byte[65536];
using var compressed = Snappy.CompressToMemory(data);

// Act

decompressor.Decompress(compressed.Memory.Span.Slice(0, 1));
Assert.True(decompressor.NeedMoreData);
decompressor.Decompress(compressed.Memory.Span.Slice(1, 1));
Assert.True(decompressor.NeedMoreData);
decompressor.Decompress(compressed.Memory.Span.Slice(2));
Assert.False(decompressor.NeedMoreData);

using var result = decompressor.ExtractData();

// Assert

Assert.Equal(65536, result.Memory.Length);
Assert.True(result.Memory.Span.SequenceEqual(data));
}

#endregion

#region DecompressAllTags

[Fact]
Expand Down
160 changes: 67 additions & 93 deletions Snappier/Internal/SnappyDecompressor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,27 @@ private struct ScratchBuffer
}

private ScratchBuffer _scratch;

private Span<byte> Scratch => _scratch;
#pragma warning restore CS0649 // Field is never assigned to, and will always have its default value
#pragma warning restore IDE0044
#pragma warning restore IDE0051
#else
private readonly byte[] _scratch = new byte[Constants.MaximumTagLength];

private Span<byte> Scratch => _scratch.AsSpan();
#endif

private uint _scratchLength = 0;
private int _scratchLength = 0;

private int _remainingLiteral;

private int _uncompressedLengthShift;
private int _uncompressedLength;

public bool NeedMoreData => !AllDataDecompressed && UnreadBytes == 0;

/// <summary>
/// Decompress a portion of the input.
/// </summary>
/// <param name="input">Input to process.</param>
/// <returns>Number of bytes processed from the input.</returns>
/// <remarks>
/// The first call to this method after construction or after a call to <see cref="Reset"/> start at the
/// beginning of a new Snappy block, leading with the encoded block size. It may be called multiple times
Expand All @@ -51,16 +51,17 @@ public void Decompress(ReadOnlySpan<byte> input)
{
if (!ExpectedLength.HasValue)
{
int? readLength = ReadUncompressedLength(ref input);
if (readLength.HasValue)
OperationStatus status = TryReadUncompressedLength(input, out int bytesConsumed);
if (status == OperationStatus.InvalidData)
{
ExpectedLength = readLength.GetValueOrDefault();
ThrowHelper.ThrowInvalidOperationException("Invalid stream length");
}
else
else if (status != OperationStatus.Done)
{
// Not enough data yet to process the length
return;
}

input = input.Slice(bytesConsumed);
}

// Process any input into the write buffer
Expand Down Expand Up @@ -88,62 +89,74 @@ public void Reset()
_scratchLength = 0;
_remainingLiteral = 0;

_uncompressedLength = 0;
_uncompressedLengthShift = 0;

_lookbackPosition = 0;
_readPosition = 0;
ExpectedLength = null;
}

/// <summary>
/// Read the uncompressed length stored at the start of the compressed data.
/// </summary>
/// <param name="input">Input data, which should begin with the varint encoded uncompressed length.</param>
/// <returns>The length of the compressed data, or null if the length is not yet complete.</returns>
/// <remarks>
/// This variant is used when reading a stream, and will pause if there aren't enough bytes available
/// in the input. Subsequent calls with more data will resume processing.
/// </remarks>
private int? ReadUncompressedLength(ref ReadOnlySpan<byte> input)
private OperationStatus TryReadUncompressedLength(ReadOnlySpan<byte> input, out int bytesConsumed)
{
int result = _uncompressedLength;
int shift = _uncompressedLengthShift;
bool foundEnd = false;
OperationStatus status;

int i = 0;
while (input.Length > i)
if (_scratchLength > 0)
{
byte c = input[i];
i += 1;
// We have a partial length in the scratch buffer, so we need to finish reading that first
// The maximum tag length of 5 bytes is also the maximum varint length, so we can reuse _scratch

int val = c & 0x7f;
if (Helpers.LeftShiftOverflows((byte) val, shift))
{
ThrowHelper.ThrowInvalidOperationException("Invalid stream length");
}
// Copy the remaining bytes from the input to the scratch buffer
Span<byte> scratch = Scratch;
int toCopy = Math.Min(input.Length, scratch.Length - _scratchLength);
input.Slice(0, toCopy).CopyTo(scratch.Slice(_scratchLength));

result |= val << shift;
status = VarIntEncoding.TryRead(scratch.Slice(0, _scratchLength + toCopy), out uint length, out int scratchBytesConsumed);

if (c < 128)
switch (status)
{
foundEnd = true;
break;
}
case OperationStatus.Done:
ExpectedLength = (int)length;

shift += 7;
// The number of bytes consumed from the input is the number of bytes used by VarIntEncoding.TryRead
// less the number of bytes previously found in the scratch buffer
bytesConsumed = scratchBytesConsumed - _scratchLength;

if (shift >= 32)
{
ThrowHelper.ThrowInvalidOperationException("Invalid stream length");
// Reset scratch buffer
_scratchLength = 0;
break;

case OperationStatus.NeedMoreData:
// We consumed all the input, but still need more data to finish reading the length
bytesConsumed = toCopy;
_scratchLength += toCopy;

Debug.Assert(_scratchLength < scratch.Length);
break;

default:
bytesConsumed = 0;
break;
}
}
else
{
// No data in the scratch buffer, try to read directly from the input
status = VarIntEncoding.TryRead(input, out uint length, out bytesConsumed);

input = input.Slice(i);
_uncompressedLength = result;
_uncompressedLengthShift = shift;
switch (status)
{
case OperationStatus.Done:
ExpectedLength = (int)length;
break;

case OperationStatus.NeedMoreData:
// Copy all of the input to the scratch buffer
input.CopyTo(Scratch);
_scratchLength = input.Length;
bytesConsumed = input.Length;
break;
}
}

return foundEnd ? result : null;
return status;
}

/// <summary>
Expand All @@ -152,47 +165,8 @@ public void Reset()
/// <param name="input">Input data, which should begin with the varint encoded uncompressed length.</param>
/// <returns>The length of the uncompressed data.</returns>
/// <exception cref="InvalidDataException">Invalid stream length</exception>
public static int ReadUncompressedLength(ReadOnlySpan<byte> input)
{
int result = 0;
int shift = 0;
bool foundEnd = false;

int i = 0;
while (input.Length > 0)
{
byte c = input[i];
i += 1;

int val = c & 0x7f;
if (Helpers.LeftShiftOverflows((byte) val, shift))
{
ThrowHelper.ThrowInvalidDataException("Invalid stream length");
}

result |= val << shift;

if (c < 128)
{
foundEnd = true;
break;
}

shift += 7;

if (shift >= 32)
{
ThrowHelper.ThrowInvalidDataException("Invalid stream length");
}
}

if (!foundEnd)
{
ThrowHelper.ThrowInvalidDataException("Invalid stream length");
}

return result;
}
public static int ReadUncompressedLength(ReadOnlySpan<byte> input) =>
(int) VarIntEncoding.Read(input, out _);

internal void DecompressAllTags(ReadOnlySpan<byte> inputSpan)
{
Expand Down Expand Up @@ -451,10 +425,10 @@ private uint RefillTagFromScratch(ref byte input, ref byte inputEnd)
uint entry = Constants.CharTable[_scratch[0]];
uint needed = (entry >> 11) + 1; // +1 byte for 'c'

uint toCopy = Math.Min((uint)Unsafe.ByteOffset(ref input, ref inputEnd), needed - _scratchLength);
uint toCopy = Math.Min((uint)Unsafe.ByteOffset(ref input, ref inputEnd), needed - (uint) _scratchLength);
Unsafe.CopyBlockUnaligned(ref _scratch[(int)_scratchLength], ref input, toCopy);

_scratchLength += toCopy;
_scratchLength += (int) toCopy;

if (_scratchLength < needed)
{
Expand Down Expand Up @@ -491,7 +465,7 @@ private uint RefillTag(ref byte input, ref byte inputEnd)
// Data is insufficient, copy to scratch
Unsafe.CopyBlockUnaligned(ref _scratch[0], ref input, inputLength);

_scratchLength = inputLength;
_scratchLength = (int) inputLength;
return uint.MaxValue;
}

Expand Down Expand Up @@ -679,7 +653,7 @@ internal void WriteToBufferForTest(ReadOnlySpan<byte> toWrite)
/// <summary>
/// Load a byte array into _scratch, only used for testing.
/// </summary>
internal void LoadScratchForTest(byte[] newScratch, uint newScratchLength)
internal void LoadScratchForTest(byte[] newScratch, int newScratchLength)
{
ThrowHelper.ThrowIfNull(newScratch);
if (newScratchLength > ((ReadOnlySpan<byte>)_scratch).Length)
Expand Down

0 comments on commit 712659b

Please # to comment.