Skip to content

Commit

Permalink
Merge pull request #47 from axw/binaryappender
Browse files Browse the repository at this point in the history
Implement BinaryAppender
  • Loading branch information
seiflotfy authored Jan 6, 2025
2 parents 30a916b + 44cf830 commit ccca8cb
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 52 deletions.
51 changes: 24 additions & 27 deletions compressed.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package hyperloglog

import "encoding/binary"
import (
"encoding/binary"
"slices"
)

// Original author of this file is github.com/clarkduvall/hyperloglog
type iterable interface {
Expand Down Expand Up @@ -52,32 +55,26 @@ func (v *compressedList) Clone() *compressedList {
return newV
}

func (v *compressedList) MarshalBinary() (data []byte, err error) {
// Marshal the variableLengthList
bdata, err := v.b.MarshalBinary()
if err != nil {
return nil, err
}

// At least 4 bytes for the two fixed sized values plus the size of bdata.
data = make([]byte, 0, 4+4+len(bdata))
func (v *compressedList) AppendBinary(data []byte) ([]byte, error) {
// At least 4 bytes for the two fixed sized values
data = slices.Grow(data, 4+4)

// Marshal the count and last values.
data = append(data, []byte{
data = append(data,
// Number of items in the list.
byte(v.count >> 24),
byte(v.count >> 16),
byte(v.count >> 8),
byte(v.count>>24),
byte(v.count>>16),
byte(v.count>>8),
byte(v.count),
// The last item in the list.
byte(v.last >> 24),
byte(v.last >> 16),
byte(v.last >> 8),
byte(v.last>>24),
byte(v.last>>16),
byte(v.last>>8),
byte(v.last),
}...)
)

// Append the list
return append(data, bdata...), nil
// Append the variableLengthList
return v.b.AppendBinary(data)
}

func (v *compressedList) UnmarshalBinary(data []byte) error {
Expand Down Expand Up @@ -130,20 +127,20 @@ func (v *compressedList) Iter() *iterator {

type variableLengthList []uint8

func (v variableLengthList) MarshalBinary() (data []byte, err error) {
func (v variableLengthList) AppendBinary(data []byte) ([]byte, error) {
// 4 bytes for the size of the list, and a byte for each element in the
// list.
data = make([]byte, 0, 4+v.Len())
data = slices.Grow(data, 4+v.Len())

// Length of the list. We only need 32 bits because the size of the set
// couldn't exceed that on 32 bit architectures.
sz := v.Len()
data = append(data, []byte{
byte(sz >> 24),
byte(sz >> 16),
byte(sz >> 8),
data = append(data,
byte(sz>>24),
byte(sz>>16),
byte(sz>>8),
byte(sz),
}...)
)

// Marshal each element in the list.
for i := 0; i < sz; i++ {
Expand Down
30 changes: 17 additions & 13 deletions hyperloglog.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"math"
"slices"
"sort"
)

Expand Down Expand Up @@ -203,8 +204,16 @@ func (sk *Sketch) mergeSparse() {
}

// MarshalBinary implements the encoding.BinaryMarshaler interface.
//
// When the result will be appended to another buffer, consider using
// AppendBinary to avoid additional allocations and copying.
func (sk *Sketch) MarshalBinary() (data []byte, err error) {
data = make([]byte, 0, 8+len(sk.regs))
return sk.AppendBinary(nil)
}

// AppendBinary implements the encoding.BinaryAppender interface.
func (sk *Sketch) AppendBinary(data []byte) ([]byte, error) {
data = slices.Grow(data, 8+len(sk.regs))
// Marshal a version marker.
data = append(data, version)
// Marshal p.
Expand All @@ -217,31 +226,26 @@ func (sk *Sketch) MarshalBinary() (data []byte, err error) {
data = append(data, byte(1))

// Add the tmp_set
tsdata, err := sk.tmpSet.MarshalBinary()
data, err := sk.tmpSet.AppendBinary(data)
if err != nil {
return nil, err
}
data = append(data, tsdata...)

// Add the sparse Sketch
sdata, err := sk.sparseList.MarshalBinary()
if err != nil {
return nil, err
}
return append(data, sdata...), nil
return sk.sparseList.AppendBinary(data)
}

// It's using the dense Sketch.
data = append(data, byte(0))

// Add the dense sketch Sketch.
sz := len(sk.regs)
data = append(data, []byte{
byte(sz >> 24),
byte(sz >> 16),
byte(sz >> 8),
data = append(data,
byte(sz>>24),
byte(sz>>16),
byte(sz>>8),
byte(sz),
}...)
)

// Marshal each element in the list.
for _, v := range sk.regs {
Expand Down
53 changes: 53 additions & 0 deletions hyperloglog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"math"
"math/rand"
"reflect"
"slices"
"testing"

"github.com/davecgh/go-spew/spew"
Expand Down Expand Up @@ -469,6 +470,58 @@ func TestHLL_Unmarshal_ErrorTooShort(t *testing.T) {
}
}

func TestHLL_AppendBinary(t *testing.T) {
sk := NewTestSketch(16)
for i := 0; i < 10; i++ {
sk.InsertHash(uint64(rand.Int()))
}
data1, err := sk.MarshalBinary()
require.NoError(t, err)

bufSize := rand.Intn(100)
buf := make([]byte, bufSize)
for i := range buf {
buf[i] = byte(rand.Intn(256))
}
bufCopy := slices.Clone(buf)
data2, err := sk.AppendBinary(buf)
require.NoError(t, err)

require.Len(t, data2, len(data1)+len(bufCopy))
require.Equal(t, bufCopy, data2[:len(bufCopy)])
require.Equal(t, data1, data2[len(bufCopy):])
}

func Benchmark_HLL_Marshal(b *testing.B) {
run := func(precision uint8, sparse bool) {
name := fmt.Sprintf("precision%d_", precision)
if sparse {
name += "sparse"
} else {
name += "dense"
}
b.Run(name, func(b *testing.B) {
sk, _ := NewSketch(precision, sparse)
for i := 0; i < 1000; i++ {
sk.InsertHash(uint64(rand.Int()))
}
b.Run("MarshalBinary", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = sk.MarshalBinary()
}
})
b.Run("AppendBinary", func(b *testing.B) {
var buf []byte
for i := 0; i < b.N; i++ {
buf, _ = sk.AppendBinary(buf[:0])
}
})
})
}
run(16, true)
run(16, false)
}

func TestHLL_Clone(t *testing.T) {
sk1 := NewTestSketch(16)

Expand Down
25 changes: 13 additions & 12 deletions sparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package hyperloglog

import (
"math/bits"
"slices"

"github.com/kamstrup/intmap"
)
Expand Down Expand Up @@ -83,29 +84,29 @@ func (s *set) Clone() *set {
return &set{m: newS}
}

func (s *set) MarshalBinary() (data []byte, err error) {
func (s *set) AppendBinary(data []byte) ([]byte, error) {
// 4 bytes for the size of the set, and 4 bytes for each key.
// list.
data = make([]byte, 0, 4+(4*s.m.Len()))
data = slices.Grow(data, 4+(4*s.m.Len()))

// Length of the set. We only need 32 bits because the size of the set
// couldn't exceed that on 32 bit architectures.
sl := s.m.Len()
data = append(data, []byte{
byte(sl >> 24),
byte(sl >> 16),
byte(sl >> 8),
data = append(data,
byte(sl>>24),
byte(sl>>16),
byte(sl>>8),
byte(sl),
}...)
)

// Marshal each element in the set.
s.m.ForEach(func(k uint32) bool {
data = append(data, []byte{
byte(k >> 24),
byte(k >> 16),
byte(k >> 8),
data = append(data,
byte(k>>24),
byte(k>>16),
byte(k>>8),
byte(k),
}...)
)
return true
})

Expand Down

0 comments on commit ccca8cb

Please # to comment.