From 6fd5ab8dab21e1f658cda6768bfe61ce0325fcb8 Mon Sep 17 00:00:00 2001 From: Brant Burnett Date: Mon, 6 Feb 2023 23:21:05 -0500 Subject: [PATCH] Switch decompression buffer and op to use ref byte (#51) Motivation ---------- Eliminate decompression pinning which can help with GC when compression/decompression is run a lot. GC will be able to move memory even when in the middle of a compression or decompression run and update the ref pointers. Modifications ------------- Switch all usages of buffer, bufferEnd, and op to be `ref byte`. Results ------- Neutral or a performance improvement across all platforms. This also finishes eliminating pinning for decompression. BenchmarkDotNet=v0.13.4, OS=Windows 11 (10.0.22000.1455/21H2) Intel Core i7-10850H CPU 2.70GHz, 1 CPU, 12 logical and 6 physical cores .NET SDK=7.0.102 [Host] : .NET 7.0.2 (7.0.222.60605), X64 RyuJIT AVX2 MediumRun-.NET 6.0 : .NET 6.0.13 (6.0.1322.58009), X64 RyuJIT AVX2 MediumRun-.NET 7.0 : .NET 7.0.2 (7.0.222.60605), X64 RyuJIT AVX2 MediumRun-.NET Framework 4.8 : .NET Framework 4.8 (4.8.4515.0), X64 RyuJIT VectorSize=256 IterationCount=15 LaunchCount=2 WarmupCount=10 | Method | Job | Runtime | Mean | Error | StdDev | Median | Ratio | RatioSD | Code Size | |-------- |----------------------------- |------------------- |----------:|---------:|---------:|----------:|------:|--------:|----------:| | Pointer | MediumRun-.NET 6.0 | .NET 6.0 | 101.02 us | 0.460 us | 0.660 us | 100.74 us | 1.00 | 0.00 | 5,784 B | | Ref | MediumRun-.NET 6.0 | .NET 6.0 | 95.49 us | 0.446 us | 0.626 us | 95.63 us | 0.95 | 0.00 | 5,836 B | | | | | | | | | | | | | Pointer | MediumRun-.NET 7.0 | .NET 7.0 | 86.22 us | 0.749 us | 1.097 us | 85.74 us | 1.00 | 0.00 | 4,609 B | | Ref | MediumRun-.NET 7.0 | .NET 7.0 | 87.46 us | 1.091 us | 1.634 us | 86.54 us | 1.01 | 0.02 | 4,532 B | | | | | | | | | | | | | Pointer | MediumRun-.NET Framework 4.8 | .NET Framework 4.8 | 105.81 us | 1.126 us | 1.685 us | 104.87 us | 1.00 | 0.00 | 6,015 B | | Ref | MediumRun-.NET Framework 4.8 | .NET Framework 4.8 | 98.72 us | 1.483 us | 2.174 us | 99.05 us | 0.93 | 0.03 | 5,918 B | --- Snappier/Internal/SnappyDecompressor.cs | 366 ++++++++++++------------ 1 file changed, 183 insertions(+), 183 deletions(-) diff --git a/Snappier/Internal/SnappyDecompressor.cs b/Snappier/Internal/SnappyDecompressor.cs index 73e0806..9a8d1bf 100644 --- a/Snappier/Internal/SnappyDecompressor.cs +++ b/Snappier/Internal/SnappyDecompressor.cs @@ -195,221 +195,161 @@ internal unsafe void DecompressAllTags(ReadOnlySpan inputSpan) // Track the point in the input before which input is guaranteed to have at least Constants.MaxTagLength bytes left ref byte inputLimitMinMaxTagLength = ref Unsafe.Subtract(ref inputEnd, Math.Min(inputSpan.Length, Constants.MaximumTagLength - 1) - 1); - fixed (byte* buffer = _lookbackBuffer.Span) - { - byte* bufferEnd = buffer + _lookbackBuffer.Length; - byte* op = buffer + _lookbackPosition; + // We always allocate buffer with at least one extra byte on the end, so bufferEnd doesn't have the same + // restrictions as inputEnd. + ref byte buffer = ref _lookbackBuffer.Span[0]; + ref byte bufferEnd = ref Unsafe.Add(ref buffer, _lookbackBuffer.Length); + ref byte op = ref Unsafe.Add(ref buffer, _lookbackPosition); - // Get a reference to the first byte in the scratch buffer, we'll reuse this so that we don't repeat range checks every time - ref byte scratch = ref _scratch[0]; + // Get a reference to the first byte in the scratch buffer, we'll reuse this so that we don't repeat range checks every time + ref byte scratch = ref _scratch[0]; - if (_scratchLength > 0) + if (_scratchLength > 0) + { + // Have partial tag remaining from a previous decompress run + // Get the combined tag in the scratch buffer, then run through + // special case processing that gets the tag from the scratch buffer + // and any literal data from the _input buffer + + // scratch will be the scratch buffer with only the tag if true is returned + (bool sufficientData, uint inputUsed) = RefillTagFromScratch(ref input, ref inputEnd, ref scratch); + input = ref Unsafe.Add(ref input, inputUsed); + if (!sufficientData) { - // Have partial tag remaining from a previous decompress run - // Get the combined tag in the scratch buffer, then run through - // special case processing that gets the tag from the scratch buffer - // and any literal data from the _input buffer - - // scratch will be the scratch buffer with only the tag if true is returned - (bool sufficientData, uint inputUsed) = RefillTagFromScratch(ref input, ref inputEnd, ref scratch); - input = ref Unsafe.Add(ref input, inputUsed); - if (!sufficientData) - { - return; - } + return; + } - // No more scratch for next cycle, we have a full buffer we're about to use - _scratchLength = 0; + // No more scratch for next cycle, we have a full buffer we're about to use + _scratchLength = 0; - byte c = scratch; - scratch = ref Unsafe.Add(ref scratch, 1); + byte c = scratch; + scratch = ref Unsafe.Add(ref scratch, 1); - if ((c & 0x03) == Constants.Literal) + if ((c & 0x03) == Constants.Literal) + { + nint literalLength = (c >> 2) + 1; + if (literalLength >= 61) { - nint literalLength = (c >> 2) + 1; - if (literalLength >= 61) - { - // Long literal. - nint literalLengthLength = literalLength - 60; - uint literalLengthTemp = Helpers.UnsafeReadUInt32(ref scratch); - - literalLength = (nint) Helpers.ExtractLowBytes(literalLengthTemp, - (int) literalLengthLength) + 1; - } + // Long literal. + nint literalLengthLength = literalLength - 60; + uint literalLengthTemp = Helpers.UnsafeReadUInt32(ref scratch); - nint inputRemaining = Unsafe.ByteOffset(ref input, ref inputEnd) + 1; - if (inputRemaining < literalLength) - { - Append(ref Unsafe.AsRef(op), ref Unsafe.AsRef(bufferEnd), in input, inputRemaining); - op += inputRemaining; - _remainingLiteral = (int) (literalLength - inputRemaining); - _lookbackPosition = (int)(op - buffer); - return; - } - else - { - Append(ref Unsafe.AsRef(op), ref Unsafe.AsRef(bufferEnd), in input, literalLength); - op += literalLength; - input = ref Unsafe.Add(ref input, literalLength); - } + literalLength = (nint) Helpers.ExtractLowBytes(literalLengthTemp, + (int) literalLengthLength) + 1; } - else if ((c & 3) == Constants.Copy4ByteOffset) - { - uint copyOffset = Helpers.UnsafeReadUInt32(ref scratch); - - nint length = (c >> 2) + 1; - AppendFromSelf(ref Unsafe.AsRef(op), ref Unsafe.AsRef(buffer), ref Unsafe.AsRef(bufferEnd), copyOffset, length); - op += length; + nint inputRemaining = Unsafe.ByteOffset(ref input, ref inputEnd) + 1; + if (inputRemaining < literalLength) + { + Append(ref op, ref bufferEnd, in input, inputRemaining); + op = ref Unsafe.Add(ref op, inputRemaining); + _remainingLiteral = (int) (literalLength - inputRemaining); + _lookbackPosition += (int)Unsafe.ByteOffset(ref buffer, ref op); + return; } else { - ushort entry = charTable[c]; - uint data = Helpers.UnsafeReadUInt32(ref scratch); + Append(ref op, ref bufferEnd, in input, literalLength); + op = ref Unsafe.Add(ref op, literalLength); + input = ref Unsafe.Add(ref input, literalLength); + } + } + else if ((c & 3) == Constants.Copy4ByteOffset) + { + uint copyOffset = Helpers.UnsafeReadUInt32(ref scratch); - uint trailer = Helpers.ExtractLowBytes(data, c & 3); - nint length = entry & 0xff; + nint length = (c >> 2) + 1; - // copy_offset/256 is encoded in bits 8..10. By just fetching - // those bits, we get copy_offset (since the bit-field starts at - // bit 8). - uint copyOffset = (entry & 0x700u) + trailer; + AppendFromSelf(ref op, ref buffer, ref bufferEnd, copyOffset, length); + op = ref Unsafe.Add(ref op, length); + } + else + { + ushort entry = charTable[c]; + uint data = Helpers.UnsafeReadUInt32(ref scratch); - AppendFromSelf(ref Unsafe.AsRef(op), ref Unsafe.AsRef(buffer), ref Unsafe.AsRef(bufferEnd), copyOffset, length); - op += length; - } + uint trailer = Helpers.ExtractLowBytes(data, c & 3); + nint length = entry & 0xff; + + // copy_offset/256 is encoded in bits 8..10. By just fetching + // those bits, we get copy_offset (since the bit-field starts at + // bit 8). + uint copyOffset = (entry & 0x700u) + trailer; - // Make sure scratch is reset - scratch = ref _scratch[0]; + AppendFromSelf(ref op, ref buffer, ref bufferEnd, copyOffset, length); + op = ref Unsafe.Add(ref op, length); } - if (!Unsafe.IsAddressLessThan(ref input, ref inputLimitMinMaxTagLength)) + // Make sure scratch is reset + scratch = ref _scratch[0]; + } + + if (!Unsafe.IsAddressLessThan(ref input, ref inputLimitMinMaxTagLength)) + { + uint newScratchLength = RefillTag(ref input, ref inputEnd, ref scratch); + if (newScratchLength == uint.MaxValue) { - uint newScratchLength = RefillTag(ref input, ref inputEnd, ref scratch); - if (newScratchLength == uint.MaxValue) - { - goto exit; - } + goto exit; + } - if (newScratchLength > 0) - { - // Data has been moved to the scratch buffer - input = ref scratch; - inputEnd = ref Unsafe.Add(ref input, newScratchLength - 1); - inputLimitMinMaxTagLength = ref Unsafe.Subtract(ref inputEnd, - Math.Min(newScratchLength, Constants.MaximumTagLength - 1) - 1); - } + if (newScratchLength > 0) + { + // Data has been moved to the scratch buffer + input = ref scratch; + inputEnd = ref Unsafe.Add(ref input, newScratchLength - 1); + inputLimitMinMaxTagLength = ref Unsafe.Subtract(ref inputEnd, + Math.Min(newScratchLength, Constants.MaximumTagLength - 1) - 1); } + } + + uint preload = Helpers.UnsafeReadUInt32(ref input); - uint preload = Helpers.UnsafeReadUInt32(ref input); + while (true) + { + byte c = (byte) preload; + input = ref Unsafe.Add(ref input, 1); - while (true) + if ((c & 0x03) == Constants.Literal) { - byte c = (byte) preload; - input = ref Unsafe.Add(ref input, 1); + nint literalLength = unchecked((c >> 2) + 1); - if ((c & 0x03) == Constants.Literal) + if (TryFastAppend(ref op, ref bufferEnd, in input, Unsafe.ByteOffset(ref input, ref inputEnd) + 1, literalLength)) { - nint literalLength = unchecked((c >> 2) + 1); - - if (TryFastAppend(ref Unsafe.AsRef(op), ref Unsafe.AsRef(bufferEnd), in input, Unsafe.ByteOffset(ref input, ref inputEnd) + 1, literalLength)) - { - Debug.Assert(literalLength < 61); - op += literalLength; - input = ref Unsafe.Add(ref input, literalLength); - // NOTE: There is no RefillTag here, as TryFastAppend() - // will not return true unless there's already at least five spare - // bytes in addition to the literal. - preload = Helpers.UnsafeReadUInt32(ref input); - continue; - } - - if (literalLength >= 61) - { - // Long literal. - nint literalLengthLength = literalLength - 60; - uint literalLengthTemp = Helpers.UnsafeReadUInt32(ref input); + Debug.Assert(literalLength < 61); + op = ref Unsafe.Add(ref op, literalLength); + input = ref Unsafe.Add(ref input, literalLength); + // NOTE: There is no RefillTag here, as TryFastAppend() + // will not return true unless there's already at least five spare + // bytes in addition to the literal. + preload = Helpers.UnsafeReadUInt32(ref input); + continue; + } - literalLength = (nint) Helpers.ExtractLowBytes(literalLengthTemp, - (int) literalLengthLength) + 1; + if (literalLength >= 61) + { + // Long literal. + nint literalLengthLength = literalLength - 60; + uint literalLengthTemp = Helpers.UnsafeReadUInt32(ref input); - input = ref Unsafe.Add(ref input, literalLengthLength); - } + literalLength = (nint) Helpers.ExtractLowBytes(literalLengthTemp, + (int) literalLengthLength) + 1; - nint inputRemaining = Unsafe.ByteOffset(ref input, ref inputEnd) + 1; - if (inputRemaining < literalLength) - { - Append(ref Unsafe.AsRef(op), ref Unsafe.AsRef(bufferEnd), in input, inputRemaining); - op += inputRemaining; - _remainingLiteral = (int) (literalLength - inputRemaining); - goto exit; - } - else - { - Append(ref Unsafe.AsRef(op), ref Unsafe.AsRef(bufferEnd), in input, literalLength); - op += literalLength; - input = ref Unsafe.Add(ref input, literalLength); - - if (!Unsafe.IsAddressLessThan(ref input, ref inputLimitMinMaxTagLength)) - { - uint newScratchLength = RefillTag(ref input, ref inputEnd, ref scratch); - if (newScratchLength == uint.MaxValue) - { - goto exit; - } - - if (newScratchLength > 0) - { - // Data has been moved to the scratch buffer - input = ref scratch; - inputEnd = ref Unsafe.Add(ref input, newScratchLength - 1); - inputLimitMinMaxTagLength = ref Unsafe.Subtract(ref inputEnd, - Math.Min(newScratchLength, Constants.MaximumTagLength - 1) - 1); - - } - } + input = ref Unsafe.Add(ref input, literalLengthLength); + } - preload = Helpers.UnsafeReadUInt32(ref input); - } + nint inputRemaining = Unsafe.ByteOffset(ref input, ref inputEnd) + 1; + if (inputRemaining < literalLength) + { + Append(ref op, ref bufferEnd, in input, inputRemaining); + op = ref Unsafe.Add(ref op, inputRemaining); + _remainingLiteral = (int) (literalLength - inputRemaining); + goto exit; } else { - if ((c & 3) == Constants.Copy4ByteOffset) - { - uint copyOffset = Helpers.UnsafeReadUInt32(ref input); - input = ref Unsafe.Add(ref input, 4); - - nint length = (c >> 2) + 1; - AppendFromSelf(ref Unsafe.AsRef(op), ref Unsafe.AsRef(buffer), ref Unsafe.AsRef(bufferEnd), copyOffset, length); - op += length; - } - else - { - ushort entry = charTable[c]; - - // We don't use BitConverter to read because we might be reading past the end of the span - // But we know that's safe because we'll be doing it in _scratch with extra data on the end. - // This reduces this step by several operations - preload = Helpers.UnsafeReadUInt32(ref input); - - uint trailer = Helpers.ExtractLowBytes(preload, c & 3); - nint length = entry & 0xff; - - // copy_offset/256 is encoded in bits 8..10. By just fetching - // those bits, we get copy_offset (since the bit-field starts at - // bit 8). - uint copyOffset = (entry & 0x700u) + trailer; - - AppendFromSelf(ref Unsafe.AsRef(op), ref Unsafe.AsRef(buffer), ref Unsafe.AsRef(bufferEnd), copyOffset, length); - op += length; - - input = ref Unsafe.Add(ref input, c & 3); - - // By using the result of the previous load we reduce the critical - // dependency chain of ip to 4 cycles. - preload >>= (c & 3) * 8; - if (Unsafe.IsAddressLessThan(ref input, ref inputLimitMinMaxTagLength)) continue; - } + Append(ref op, ref bufferEnd, in input, literalLength); + op = ref Unsafe.Add(ref op, literalLength); + input = ref Unsafe.Add(ref input, literalLength); if (!Unsafe.IsAddressLessThan(ref input, ref inputLimitMinMaxTagLength)) { @@ -426,16 +366,76 @@ internal unsafe void DecompressAllTags(ReadOnlySpan inputSpan) inputEnd = ref Unsafe.Add(ref input, newScratchLength - 1); inputLimitMinMaxTagLength = ref Unsafe.Subtract(ref inputEnd, Math.Min(newScratchLength, Constants.MaximumTagLength - 1) - 1); + } } preload = Helpers.UnsafeReadUInt32(ref input); } } + else + { + if ((c & 3) == Constants.Copy4ByteOffset) + { + uint copyOffset = Helpers.UnsafeReadUInt32(ref input); + input = ref Unsafe.Add(ref input, 4); + + nint length = (c >> 2) + 1; + AppendFromSelf(ref op, ref buffer, ref bufferEnd, copyOffset, length); + op = ref Unsafe.Add(ref op, length); + } + else + { + ushort entry = charTable[c]; + + // We don't use BitConverter to read because we might be reading past the end of the span + // But we know that's safe because we'll be doing it in _scratch with extra data on the end. + // This reduces this step by several operations + preload = Helpers.UnsafeReadUInt32(ref input); + + uint trailer = Helpers.ExtractLowBytes(preload, c & 3); + nint length = entry & 0xff; + + // copy_offset/256 is encoded in bits 8..10. By just fetching + // those bits, we get copy_offset (since the bit-field starts at + // bit 8). + uint copyOffset = (entry & 0x700u) + trailer; + + AppendFromSelf(ref op, ref buffer, ref bufferEnd, copyOffset, length); + op = ref Unsafe.Add(ref op, length); + + input = ref Unsafe.Add(ref input, c & 3); - exit: ; // All input data is processed - _lookbackPosition = (int)(op - buffer); + // By using the result of the previous load we reduce the critical + // dependency chain of ip to 4 cycles. + preload >>= (c & 3) * 8; + if (Unsafe.IsAddressLessThan(ref input, ref inputLimitMinMaxTagLength)) continue; + } + + if (!Unsafe.IsAddressLessThan(ref input, ref inputLimitMinMaxTagLength)) + { + uint newScratchLength = RefillTag(ref input, ref inputEnd, ref scratch); + if (newScratchLength == uint.MaxValue) + { + goto exit; + } + + if (newScratchLength > 0) + { + // Data has been moved to the scratch buffer + input = ref scratch; + inputEnd = ref Unsafe.Add(ref input, newScratchLength - 1); + inputLimitMinMaxTagLength = ref Unsafe.Subtract(ref inputEnd, + Math.Min(newScratchLength, Constants.MaximumTagLength - 1) - 1); + } + } + + preload = Helpers.UnsafeReadUInt32(ref input); + } } + + exit: ; // All input data is processed + _lookbackPosition = (int)Unsafe.ByteOffset(ref buffer, ref op); } }