Skip to content

Commit

Permalink
Implement a faster encoding algoritm for varint
Browse files Browse the repository at this point in the history
In .NET 6 and later we can use intrinsics, especially on Intel, to
encode the length as a varint more quickly. This isn't a huge difference
given the overall cost of compression but may still be worthwhile.

BenchmarkDotNet v0.13.10, Windows 11 (10.0.22631.3880/23H2/2023Update/SunValley3)
12th Gen Intel Core i7-1270P, 1 CPU, 16 logical and 12 physical cores
.NET SDK 8.0.303
  [Host]     : .NET 8.0.7 (8.0.724.31311), X64 RyuJIT AVX2
  DefaultJob : .NET 8.0.7 (8.0.724.31311), X64 RyuJIT AVX2

| Method  | Value | Mean     | Error     | StdDev    | Ratio | Rank |
|-------- |------ |---------:|----------:|----------:|------:|-----:|
| Current | 0     | 1.385 ns | 0.0176 ns | 0.0164 ns |  1.00 |    2 |
| New     | 0     | 1.091 ns | 0.0200 ns | 0.0188 ns |  0.79 |    1 |
|         |       |          |           |           |       |      |
| Current | 256   | 1.599 ns | 0.0282 ns | 0.0220 ns |  1.00 |    2 |
| New     | 256   | 1.340 ns | 0.0104 ns | 0.0081 ns |  0.84 |    1 |
|         |       |          |           |           |       |      |
| Current | 65536 | 1.593 ns | 0.0132 ns | 0.0117 ns |  1.00 |    2 |
| New     | 65536 | 1.367 ns | 0.0181 ns | 0.0169 ns |  0.86 |    1 |
  • Loading branch information
brantburnett committed Jul 12, 2024
1 parent e7e3934 commit 440a2c5
Show file tree
Hide file tree
Showing 8 changed files with 288 additions and 50 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
opensource@google.com
bburnett@centeredgesoftware.com
info@couchbase.com
2 changes: 1 addition & 1 deletion Snappier.Benchmarks/Snappier.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<ProjectReference Include="..\Snappier\Snappier.csproj" />
</ItemGroup>
<ItemGroup Condition=" '$(Configuration)' == 'Previous' ">
<PackageReference Include="Snappier" Version="1.1.3" />
<PackageReference Include="Snappier" Version="1.1.6" />

<Compile Remove="FindMatchLength.cs" />
<Compile Remove="IncrementalCopy.cs" />
Expand Down
23 changes: 23 additions & 0 deletions Snappier.Benchmarks/VarIntEncodingWrite.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#if !PREVIOUS

using BenchmarkDotNet.Attributes;
using Snappier.Internal;

namespace Snappier.Benchmarks
{
public class VarIntEncodingWrite
{
[Params(0u, 256u, 65536u)]
public uint Value { get; set; }

readonly byte[] _dest = new byte[8];

[Benchmark(Baseline = true)]
public int Baseline()
{
return VarIntEncoding.Write(_dest, Value);
}
}
}

#endif
69 changes: 69 additions & 0 deletions Snappier.Tests/Internal/VarIntEncodingWriteTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Snappier.Internal;
using Xunit;

namespace Snappier.Tests.Internal
{
public class VarIntEncodingWriteTests
{
public static TheoryData<uint, byte[]> TestData() =>
new() {
{ 0x00, [ 0x00 ] },
{ 0x01, [ 0x01 ] },
{ 0x7F, [ 0x7F ] },
{ 0x80, [ 0x80, 0x01 ] },
{ 0x555, [ 0xD5, 0x0A ] },
{ 0x7FFF, [ 0xFF, 0xFF, 0x01 ] },
{ 0xBFFF, [ 0xFF, 0xFF, 0x02 ] },
{ 0xFFFF, [ 0XFF, 0xFF, 0x03 ] },
{ 0x8000, [ 0x80, 0x80, 0x02 ] },
{ 0x5555, [ 0xD5, 0xAA, 0x01 ] },
{ 0xCAFEF00, [ 0x80, 0xDE, 0xBF, 0x65 ] },
{ 0xCAFEF00D, [ 0x8D, 0xE0, 0xFB, 0xD7, 0x0C ] },
{ 0xFFFFFFFF, [ 0xFF, 0xFF, 0xFF, 0xFF, 0x0F ] },
};

[Theory]
[MemberData(nameof(TestData))]
public void Test_Write(uint value, byte[] expected)
{
var bytes = new byte[5];

var length = VarIntEncoding.Write(bytes, value);
Assert.Equal(expected, bytes.Take(length));
}

[Theory]
[MemberData(nameof(TestData))]
public void Test_WriteWithPadding(uint value, byte[] expected)
{
// Test of the fast path where there are at least 8 bytes in the buffer

var bytes = new byte[sizeof(ulong)];

var length = VarIntEncoding.Write(bytes, value);
Assert.Equal(expected, bytes.Take(length));
}
}
}

/* ************************************************************
*
* @author Couchbase <info@couchbase.com>
* @copyright 2021 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* ************************************************************/
8 changes: 4 additions & 4 deletions Snappier.Tests/Snappier.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@

<ItemGroup>
<PackageReference Include="JUnitTestLogger" Version="1.1.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="xunit" Version="2.6.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.5.4">
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
<PackageReference Include="xunit" Version="2.9.0" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="coverlet.collector" Version="6.0.0">
<PackageReference Include="coverlet.collector" Version="6.0.2">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
Expand Down
46 changes: 1 addition & 45 deletions Snappier/Internal/SnappyCompressor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public int Compress(ReadOnlySpan<byte> input, Span<byte> output)

_workingMemory.EnsureCapacity(input.Length);

int bytesWritten = WriteUncompressedLength(output, input.Length);
int bytesWritten = VarIntEncoding.Write(output, (uint)input.Length);
output = output.Slice(bytesWritten);

while (input.Length > 0)
Expand Down Expand Up @@ -69,50 +69,6 @@ public void Dispose()
_workingMemory = null;
}

private static int WriteUncompressedLength(Span<byte> output, int length)
{
const int b = 0b1000_0000;

unchecked
{
if (length < (1 << 7))
{
output[0] = (byte) length;
return 1;
}
else if (length < (1 << 14))
{
output[0] = (byte) (length | b);
output[1] = (byte) (length >> 7);
return 2;
}
else if (length < (1 << 21))
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) (length >> 14);
return 3;
}
else if (length < (1 << 28))
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) ((length >> 14) | b);
output[3] = (byte) (length >> 21);
return 4;
}
else
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) ((length >> 14) | b);
output[3] = (byte) ((length >> 21) | b);
output[4] = (byte) (length >> 28);
return 5;
}
}
}

#region CompressFragment

private static int CompressFragment(ReadOnlySpan<byte> input, Span<byte> output, Span<ushort> tableSpan)
Expand Down
51 changes: 51 additions & 0 deletions Snappier/Internal/VarIntEncoding.Write.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
using System;

namespace Snappier.Internal
{
internal static partial class VarIntEncoding
{
private static int WriteSlow(Span<byte> output, uint length)
{
const int b = 0b1000_0000;

unchecked
{
if (length < (1 << 7))
{
output[0] = (byte) length;
return 1;
}
else if (length < (1 << 14))
{
output[0] = (byte) (length | b);
output[1] = (byte) (length >> 7);
return 2;
}
else if (length < (1 << 21))
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) (length >> 14);
return 3;
}
else if (length < (1 << 28))
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) ((length >> 14) | b);
output[3] = (byte) (length >> 21);
return 4;
}
else
{
output[0] = (byte) (length | b);
output[1] = (byte) ((length >> 7) | b);
output[2] = (byte) ((length >> 14) | b);
output[3] = (byte) ((length >> 21) | b);
output[4] = (byte) (length >> 28);
return 5;
}
}
}
}
}
138 changes: 138 additions & 0 deletions Snappier/Internal/VarIntEncoding.WriteFast.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
using System;

#if NET6_0_OR_GREATER
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
#endif

/*
* This file is ported from https://github.com/couchbase/couchbase-net-client/blob/c10fe9ef09beadb8512f696d764b7a770429e641/src/Couchbase/Core/Utils/Leb128.cs
* and therefore retains a Couchbase copyright.
**/

namespace Snappier.Internal
{
internal static partial class VarIntEncoding
{
/// <summary>
/// Maximum length, in bytes, when encoding a 32-bit integer.
/// </summary>
public const int MaxLength = 5;

/// <summary>
/// Encodes a value onto a buffer using little-ending varint encoding.
/// </summary>
/// <param name="buffer">Buffer to receive the value.</param>
/// <param name="value">Value to encode.</param>
/// <returns>Number of bytes encoded.</returns>
public static int Write(Span<byte> buffer, uint value)
{
// Note: This method is likely to be inlined into the caller, potentially
// eliding the size check if JIT knows the size of the buffer. BitConverter.IsLittleEndian
// will always be elided based on CPU architecture.

#if NET6_0_OR_GREATER
if (BitConverter.IsLittleEndian && buffer.Length >= sizeof(ulong))
{
// Only use the fast path on little-endian CPUs and when there's enough padding in the
// buffer to write an ulong. At most there will be 5 real bytes written, but for speed
// up to 8 bytes are being copied to the buffer from a register. This guard prevents a
// potential buffer overrun.

return WriteFast(ref MemoryMarshal.GetReference(buffer), value);
}
#endif

return WriteSlow(buffer, value);
}

#if NET6_0_OR_GREATER

private static int WriteFast(ref byte buffer, uint value)
{
// The use of unsafe writes below is made safe because this method is never
// called without at least 8 bytes available in the buffer.

if (value < 128)
{
// We need to special case 0 to ensure we write one byte, so go ahead and
// special case 0-127, which all write only one byte with the continuation bit unset.

buffer = (byte)value;
return 1;
}

// First get the value spread onto an ulong with 7 bit groups

ulong result = Spread7BitGroupsIntoBytes(value);

// Next, calculate the size of the output in bytes

int unusedBytes = BitOperations.LeadingZeroCount(result) >>> 3; // right shift is the equivalent of divide by 8

// Build a mask to set the continuation bits

const ulong allContinuationBits = 0x8080808080808080UL;
ulong mask = allContinuationBits >>> ((unusedBytes + 1) << 3); // left shift is the equivalent of multiply by 8

// Finally, write the result to the buffer

Unsafe.WriteUnaligned(ref buffer, result | mask);

return sizeof(ulong) - unusedBytes;
}

// This spreads the 4 bytes of an uint into the lower 5 bytes of an 8 byte ulong
// as 7 bit blocks, with the high bit of each byte set to 0. This is the basis
// of LEB128 encoding, but without the continuation bit set.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong Spread7BitGroupsIntoBytes(uint value)
{
// Only one of the three branches below will be included in the JIT output
// based on CPU support at runtime

if (Bmi2.X64.IsSupported)
{
return Bmi2.X64.ParallelBitDeposit(value, 0xf7f7f7f7fUL);
}

if (Bmi2.IsSupported)
{
// Intel x86 branch, using 32-bit BMI2 instruction

return Bmi2.ParallelBitDeposit(value, 0x7f7f7f7fU) |
((value & 0xf0000000UL) << 4);
}

// Fallback for unsupported CPUs (i.e. ARM)
return value & 0x0000007fUL
| ((value & 0x00003f80UL) << 1)
| ((value & 0x001fc000UL) << 2)
| ((value & 0x0fe00000UL) << 3)
| ((value & 0xf0000000UL) << 4);
}

#endif
}
}

/* ************************************************************
*
* @author Couchbase <info@couchbase.com>
* @copyright 2021 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* ************************************************************/

0 comments on commit 440a2c5

Please # to comment.