forked from robskie/chd
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathchd.go
177 lines (156 loc) · 3.28 KB
/
chd.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
// Package chd implements the compress, hash, and displace (CHD) minimal perfect
// hash algorithm. It provides a map builder that manages adding of items and
// map creation.
//
// See http://cmph.sourceforge.net/papers/esa09.pdf for more details.
package chd
import (
"bytes"
"encoding/binary"
"io"
)
// Map represents a map that uses
// CHD minimal perfect hash algorithm.
type Map struct {
seed [2]uint64
length uint64
tableSize uint64
indexLen uint64
index []uint64
keys [][]byte
values [][]byte
}
// NewMap returns an empty map.
// Call Map.Read to populate it.
func NewMap() *Map {
return &Map{}
}
func (m *Map) Get(key []byte) []byte {
if len(key) == 0 {
return nil
}
if m.length == 0 {
return nil
}
idx := m.getIndex(key)
if bytes.Compare(key, m.keys[idx]) != 0 {
return nil
}
return m.values[idx]
}
// Get a random entry from the hash table
func (m *Map) GetRandomValue() []byte {
if m.length == 0 || len(m.values) == 0 {
return nil
}
return pickRandom(m.values)
}
// Get a random entry from the hash table
func (m *Map) GetRandomKey() []byte {
if m.length == 0 || len(m.keys) == 0 {
return nil
}
return pickRandom(m.keys)
}
func (m *Map) Values(yield func([]byte) bool) bool {
for _, v := range m.values {
if !yield(v) {
return false
}
}
return true
}
func (m *Map) getIndex(key []byte) (idx uint64) {
if m.length == 0 {
return 0
}
h1, h2, h3, _ := spookyHash(key, m.seed[0], m.seed[1])
hlen := m.indexLen
hidx := m.index[int(h1%hlen)]
// tableSize :=
h2 %= m.tableSize
h3 %= m.tableSize
d0 := hidx / m.tableSize
d1 := hidx % m.tableSize
idx = (h2 + (d0 * h3) + d1) % m.tableSize
return
}
// Len returns the total number of keys.
func (m *Map) Len() int {
return int(m.length)
}
// Size returns the size in bytes.
func (m *Map) Size() int {
return int(m.length)
}
func (m *Map) Read(p []byte) (n int, err error) {
bi := &sliceReader{b: p}
m.seed[0] = bi.ReadInt()
m.seed[1] = bi.ReadInt()
m.tableSize = bi.ReadInt()
m.indexLen = bi.ReadInt()
m.length = bi.ReadInt()
// index
m.index = make([]uint64, m.length)
var i uint64
for i = 0; i < m.indexLen; i++ {
m.index[i] = bi.ReadInt()
}
var kl, vl uint64
m.keys = make([][]byte, m.tableSize)
m.values = make([][]byte, m.tableSize)
for i = 0; i < m.tableSize; i++ {
kl = bi.ReadInt()
vl = bi.ReadInt()
m.keys[i] = bi.Read(kl)
m.values[i] = bi.Read(vl)
}
return
}
// Serialize to given Writer
func (m *Map) WriteTo(w io.Writer) (n int64, err error) {
write := func(nd ...interface{}) (n int64, err error) {
for _, d := range nd {
if err = binary.Write(w, binary.LittleEndian, d); err != nil {
return
}
n += int64(binary.Size(d))
}
return
}
n, err = write(
m.seed[0],
m.seed[1],
m.tableSize,
m.indexLen,
uint64(m.length),
)
var n1 int64
for i := range m.index {
if n1, err = write(m.index[i]); err != nil {
return
}
n += n1
}
var ni int
var i uint64
for i = 0; i < m.tableSize; i++ {
if n1, err = write(uint64(len(m.keys[i]))); err != nil {
return
}
n += n1
if n1, err = write(uint64(len(m.values[i]))); err != nil {
return
}
n += n1
if ni, err = w.Write(m.keys[i]); err != nil {
return
}
n += int64(ni)
if ni, err = w.Write(m.values[i]); err != nil {
return
}
n += int64(ni)
}
return
}