-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathprobabalistic_test.go
133 lines (108 loc) · 4.29 KB
/
probabalistic_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package hll
import (
"math"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_smallRangeSmokeTest(t *testing.T) {
m := 1 << uint(sparseTestSettings.Log2m)
// only one register set
{
hll, err := NewHll(sparseTestSettings)
require.NoError(t, err)
hll.AddRaw(constructHllValue(sparseTestSettings.Log2m, 0, 1))
assertSparse(t, hll)
// Trivially true that small correction conditions hold: one register
// set implies zeroes exist, and estimator trivially smaller than 5m/2.
// Small range correction: m * log(m/V)
expected := uint64(math.Ceil(float64(m) * math.Log(float64(m)/float64(m-1) /*# of zeroes*/)))
assert.Equal(t, expected, hll.Cardinality())
}
// at sparse capacity
{
hll, err := NewHll(sparseTestSettings)
require.NoError(t, err)
for i := 0; i < int(hll.settings.sparseThreshold); i++ {
hll.AddRaw(constructHllValue(sparseTestSettings.Log2m, i, 1))
}
assertSparse(t, hll)
// Small range correction: m * log(m/V)
expected := uint64(math.Ceil(float64(m) * math.Log(float64(m)/float64(m-int(hll.settings.sparseThreshold)) /*# of zeroes*/)))
assert.Equal(t, expected, hll.Cardinality())
}
// all but one register set
{
hll, err := NewHll(sparseTestSettings)
require.NoError(t, err)
for i := 0; i < m-1; i++ {
hll.AddRaw(constructHllValue(sparseTestSettings.Log2m, i, 1))
}
assertDense(t, hll)
// Small range correction: m * log(m/V)
expected := uint64(math.Ceil(float64(m) * math.Log(float64(m)/float64(1) /*# of zeroes*/)))
assert.Equal(t, expected, hll.Cardinality())
}
}
func Test_normalRangeSmokeTest(t *testing.T) {
m := 1 << uint(sparseTestSettings.Log2m)
// regwidth = 5, so hash space is
// log2m + (2^5 - 1 - 1), so L = log2m + 30
l := sparseTestSettings.Log2m + 30
// all registers at 'medium' value
{
hll, err := NewHll(sparseTestSettings)
require.NoError(t, err)
registerValue := 7 /*chosen to ensure neither correction kicks in*/
for i := 0; i < m; i++ {
hll.AddRaw(constructHllValue(sparseTestSettings.Log2m, i, registerValue))
}
assertDense(t, hll)
// Simplified estimator when all registers take same value: alpha / (m/2^val)
twoToRegValue := 1 << uint(registerValue)
estimator := alphaMSquared(sparseTestSettings.Log2m) / (float64(m) / float64(twoToRegValue))
// Assert conditions for uncorrected range
twoToL := 1 << uint(l)
assert.True(t, estimator <= float64(twoToL)/30)
assert.True(t, estimator > (float64(5)*float64(m)/float64(2)))
expected := uint64(math.Ceil(estimator))
assert.Equal(t, expected, hll.Cardinality())
}
}
func Test_largeRangeSmokeTest(t *testing.T) {
m := 1 << uint(sparseTestSettings.Log2m)
// regwidth = 5, so hash space is
// log2m + (2^5 - 1 - 1), so L = log2m + 30
l := sparseTestSettings.Log2m + 30
// all registers at large value
{
hll, err := NewHll(sparseTestSettings)
require.NoError(t, err)
// NOTE : java test uses 31 here, but that is too large and results in
// NaN for cardinality calculation (PG agrees)
registerValue := 28 /*chosen to ensure large correction kicks in*/
for i := 0; i < m; i++ {
hll.AddRaw(constructHllValue(sparseTestSettings.Log2m, i, registerValue))
}
assertDense(t, hll)
// Simplified estimator when all registers take same value: alpha / (m/2^val)
twoToRegValue := 1 << uint(registerValue)
estimator := alphaMSquared(sparseTestSettings.Log2m) / (float64(m) / float64(twoToRegValue))
// Assert conditions for uncorrected range
assert.True(t, estimator > math.Pow(2, float64(l))/30)
// Large range correction: -2^32 * log(1 - E/2^32)
expected := uint64(math.Ceil(-1.0 * math.Pow(2, float64(l)) * math.Log(1.0-estimator/math.Pow(2, float64(l)))))
assert.Equal(t, expected, hll.Cardinality())
}
}
func Test_LargeEstimatorCutoff(t *testing.T) {
for log2m := minimumLog2mParam; log2m <= maximumLog2mParam; log2m++ {
for regwidth := minimumRegwidthParam; regwidth <= maximumRegwidthParam; regwidth++ {
cutoff := largeEstimatorCutoff(twoToL(log2m, regwidth))
// See blog post (http://research.neustar.biz/2013/01/24/hyperloglog-googles-take-on-engineering-hll/)
// and original paper (Fig. 3) for information on 2^L and "large range correction" cutoff.
expected := math.Pow(2, math.Pow(2, float64(regwidth))-2+float64(log2m)) / 30.0
assert.Equal(t, expected, cutoff)
}
}
}