-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement a faster encoding algoritm for varint
In .NET 6 and later we can use intrinsics, especially on Intel, to encode the length as a varint more quickly. This isn't a huge difference given the overall cost of compression but may still be worthwhile. BenchmarkDotNet v0.13.10, Windows 11 (10.0.22631.3880/23H2/2023Update/SunValley3) 12th Gen Intel Core i7-1270P, 1 CPU, 16 logical and 12 physical cores .NET SDK 8.0.303 [Host] : .NET 8.0.7 (8.0.724.31311), X64 RyuJIT AVX2 DefaultJob : .NET 8.0.7 (8.0.724.31311), X64 RyuJIT AVX2 | Method | Value | Mean | Error | StdDev | Ratio | Rank | |-------- |------ |---------:|----------:|----------:|------:|-----:| | Current | 0 | 1.385 ns | 0.0176 ns | 0.0164 ns | 1.00 | 2 | | New | 0 | 1.091 ns | 0.0200 ns | 0.0188 ns | 0.79 | 1 | | | | | | | | | | Current | 256 | 1.599 ns | 0.0282 ns | 0.0220 ns | 1.00 | 2 | | New | 256 | 1.340 ns | 0.0104 ns | 0.0081 ns | 0.84 | 1 | | | | | | | | | | Current | 65536 | 1.593 ns | 0.0132 ns | 0.0117 ns | 1.00 | 2 | | New | 65536 | 1.367 ns | 0.0181 ns | 0.0169 ns | 0.86 | 1 |
- Loading branch information
1 parent
e7e3934
commit 440a2c5
Showing
8 changed files
with
288 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
opensource@google.com | ||
bburnett@centeredgesoftware.com | ||
info@couchbase.com |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#if !PREVIOUS | ||
|
||
using BenchmarkDotNet.Attributes; | ||
using Snappier.Internal; | ||
|
||
namespace Snappier.Benchmarks | ||
{ | ||
public class VarIntEncodingWrite | ||
{ | ||
[Params(0u, 256u, 65536u)] | ||
public uint Value { get; set; } | ||
|
||
readonly byte[] _dest = new byte[8]; | ||
|
||
[Benchmark(Baseline = true)] | ||
public int Baseline() | ||
{ | ||
return VarIntEncoding.Write(_dest, Value); | ||
} | ||
} | ||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Snappier.Internal; | ||
using Xunit; | ||
|
||
namespace Snappier.Tests.Internal | ||
{ | ||
public class VarIntEncodingWriteTests | ||
{ | ||
public static TheoryData<uint, byte[]> TestData() => | ||
new() { | ||
{ 0x00, [ 0x00 ] }, | ||
{ 0x01, [ 0x01 ] }, | ||
{ 0x7F, [ 0x7F ] }, | ||
{ 0x80, [ 0x80, 0x01 ] }, | ||
{ 0x555, [ 0xD5, 0x0A ] }, | ||
{ 0x7FFF, [ 0xFF, 0xFF, 0x01 ] }, | ||
{ 0xBFFF, [ 0xFF, 0xFF, 0x02 ] }, | ||
{ 0xFFFF, [ 0XFF, 0xFF, 0x03 ] }, | ||
{ 0x8000, [ 0x80, 0x80, 0x02 ] }, | ||
{ 0x5555, [ 0xD5, 0xAA, 0x01 ] }, | ||
{ 0xCAFEF00, [ 0x80, 0xDE, 0xBF, 0x65 ] }, | ||
{ 0xCAFEF00D, [ 0x8D, 0xE0, 0xFB, 0xD7, 0x0C ] }, | ||
{ 0xFFFFFFFF, [ 0xFF, 0xFF, 0xFF, 0xFF, 0x0F ] }, | ||
}; | ||
|
||
[Theory] | ||
[MemberData(nameof(TestData))] | ||
public void Test_Write(uint value, byte[] expected) | ||
{ | ||
var bytes = new byte[5]; | ||
|
||
var length = VarIntEncoding.Write(bytes, value); | ||
Assert.Equal(expected, bytes.Take(length)); | ||
} | ||
|
||
[Theory] | ||
[MemberData(nameof(TestData))] | ||
public void Test_WriteWithPadding(uint value, byte[] expected) | ||
{ | ||
// Test of the fast path where there are at least 8 bytes in the buffer | ||
|
||
var bytes = new byte[sizeof(ulong)]; | ||
|
||
var length = VarIntEncoding.Write(bytes, value); | ||
Assert.Equal(expected, bytes.Take(length)); | ||
} | ||
} | ||
} | ||
|
||
/* ************************************************************ | ||
* | ||
* @author Couchbase <info@couchbase.com> | ||
* @copyright 2021 Couchbase, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
* ************************************************************/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
using System; | ||
|
||
namespace Snappier.Internal | ||
{ | ||
internal static partial class VarIntEncoding | ||
{ | ||
private static int WriteSlow(Span<byte> output, uint length) | ||
{ | ||
const int b = 0b1000_0000; | ||
|
||
unchecked | ||
{ | ||
if (length < (1 << 7)) | ||
{ | ||
output[0] = (byte) length; | ||
return 1; | ||
} | ||
else if (length < (1 << 14)) | ||
{ | ||
output[0] = (byte) (length | b); | ||
output[1] = (byte) (length >> 7); | ||
return 2; | ||
} | ||
else if (length < (1 << 21)) | ||
{ | ||
output[0] = (byte) (length | b); | ||
output[1] = (byte) ((length >> 7) | b); | ||
output[2] = (byte) (length >> 14); | ||
return 3; | ||
} | ||
else if (length < (1 << 28)) | ||
{ | ||
output[0] = (byte) (length | b); | ||
output[1] = (byte) ((length >> 7) | b); | ||
output[2] = (byte) ((length >> 14) | b); | ||
output[3] = (byte) (length >> 21); | ||
return 4; | ||
} | ||
else | ||
{ | ||
output[0] = (byte) (length | b); | ||
output[1] = (byte) ((length >> 7) | b); | ||
output[2] = (byte) ((length >> 14) | b); | ||
output[3] = (byte) ((length >> 21) | b); | ||
output[4] = (byte) (length >> 28); | ||
return 5; | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
using System; | ||
|
||
#if NET6_0_OR_GREATER | ||
using System.Numerics; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
using System.Runtime.Intrinsics.X86; | ||
#endif | ||
|
||
/* | ||
* This file is ported from https://github.com/couchbase/couchbase-net-client/blob/c10fe9ef09beadb8512f696d764b7a770429e641/src/Couchbase/Core/Utils/Leb128.cs | ||
* and therefore retains a Couchbase copyright. | ||
**/ | ||
|
||
namespace Snappier.Internal | ||
{ | ||
internal static partial class VarIntEncoding | ||
{ | ||
/// <summary> | ||
/// Maximum length, in bytes, when encoding a 32-bit integer. | ||
/// </summary> | ||
public const int MaxLength = 5; | ||
|
||
/// <summary> | ||
/// Encodes a value onto a buffer using little-ending varint encoding. | ||
/// </summary> | ||
/// <param name="buffer">Buffer to receive the value.</param> | ||
/// <param name="value">Value to encode.</param> | ||
/// <returns>Number of bytes encoded.</returns> | ||
public static int Write(Span<byte> buffer, uint value) | ||
{ | ||
// Note: This method is likely to be inlined into the caller, potentially | ||
// eliding the size check if JIT knows the size of the buffer. BitConverter.IsLittleEndian | ||
// will always be elided based on CPU architecture. | ||
|
||
#if NET6_0_OR_GREATER | ||
if (BitConverter.IsLittleEndian && buffer.Length >= sizeof(ulong)) | ||
{ | ||
// Only use the fast path on little-endian CPUs and when there's enough padding in the | ||
// buffer to write an ulong. At most there will be 5 real bytes written, but for speed | ||
// up to 8 bytes are being copied to the buffer from a register. This guard prevents a | ||
// potential buffer overrun. | ||
|
||
return WriteFast(ref MemoryMarshal.GetReference(buffer), value); | ||
} | ||
#endif | ||
|
||
return WriteSlow(buffer, value); | ||
} | ||
|
||
#if NET6_0_OR_GREATER | ||
|
||
private static int WriteFast(ref byte buffer, uint value) | ||
{ | ||
// The use of unsafe writes below is made safe because this method is never | ||
// called without at least 8 bytes available in the buffer. | ||
|
||
if (value < 128) | ||
{ | ||
// We need to special case 0 to ensure we write one byte, so go ahead and | ||
// special case 0-127, which all write only one byte with the continuation bit unset. | ||
|
||
buffer = (byte)value; | ||
return 1; | ||
} | ||
|
||
// First get the value spread onto an ulong with 7 bit groups | ||
|
||
ulong result = Spread7BitGroupsIntoBytes(value); | ||
|
||
// Next, calculate the size of the output in bytes | ||
|
||
int unusedBytes = BitOperations.LeadingZeroCount(result) >>> 3; // right shift is the equivalent of divide by 8 | ||
|
||
// Build a mask to set the continuation bits | ||
|
||
const ulong allContinuationBits = 0x8080808080808080UL; | ||
ulong mask = allContinuationBits >>> ((unusedBytes + 1) << 3); // left shift is the equivalent of multiply by 8 | ||
|
||
// Finally, write the result to the buffer | ||
|
||
Unsafe.WriteUnaligned(ref buffer, result | mask); | ||
|
||
return sizeof(ulong) - unusedBytes; | ||
} | ||
|
||
// This spreads the 4 bytes of an uint into the lower 5 bytes of an 8 byte ulong | ||
// as 7 bit blocks, with the high bit of each byte set to 0. This is the basis | ||
// of LEB128 encoding, but without the continuation bit set. | ||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
private static ulong Spread7BitGroupsIntoBytes(uint value) | ||
{ | ||
// Only one of the three branches below will be included in the JIT output | ||
// based on CPU support at runtime | ||
|
||
if (Bmi2.X64.IsSupported) | ||
{ | ||
return Bmi2.X64.ParallelBitDeposit(value, 0xf7f7f7f7fUL); | ||
} | ||
|
||
if (Bmi2.IsSupported) | ||
{ | ||
// Intel x86 branch, using 32-bit BMI2 instruction | ||
|
||
return Bmi2.ParallelBitDeposit(value, 0x7f7f7f7fU) | | ||
((value & 0xf0000000UL) << 4); | ||
} | ||
|
||
// Fallback for unsupported CPUs (i.e. ARM) | ||
return value & 0x0000007fUL | ||
| ((value & 0x00003f80UL) << 1) | ||
| ((value & 0x001fc000UL) << 2) | ||
| ((value & 0x0fe00000UL) << 3) | ||
| ((value & 0xf0000000UL) << 4); | ||
} | ||
|
||
#endif | ||
} | ||
} | ||
|
||
/* ************************************************************ | ||
* | ||
* @author Couchbase <info@couchbase.com> | ||
* @copyright 2021 Couchbase, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
* ************************************************************/ |