Skip to content

Commit

Permalink
Add MurmurHash3's 128-bit x64 version
Browse files Browse the repository at this point in the history
  • Loading branch information
goncalossilva committed Dec 11, 2021
1 parent c1d0b63 commit 807807b
Show file tree
Hide file tree
Showing 4 changed files with 70,098 additions and 27 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ Notable changes are documented in this file, whose format follows [Keep a Change

## [Unreleased]

## [0.2.0] - 2021-12-11

### Added

- Support for MurmurHash3's 128-bit x64 version.

## [0.1.0] - 2021-12-10

### Added
Expand Down
180 changes: 160 additions & 20 deletions src/commonMain/kotlin/MurmurHash3.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,25 @@ package com.goncalossilva.murmurhash

public class MurmurHash3(private val seed: UInt = 0u) {
public fun hash32x86(key: ByteArray): UInt {
fun UInt.mix(): UInt {
var k = this
k *= C1_32
k = k.rotateLeft(R1_32)
k *= C2_32
return k
}

var h = seed
val len = key.size
val nblocks = len shr 2

for (i in 0 until nblocks) {
val index = i shl 2
val k = key.getUInt(index) or
(key.getUInt(index + 1) shl 8) or
(key.getUInt(index + 2) shl 16) or
(key.getUInt(index + 3) shl 24)
val k = key.getLittleEndianUInt(index)

h = h xor scramble(k)
h = h.rotateLeft(R2)
h = h * M + N
h = h xor k.mix()
h = h.rotateLeft(R2_32)
h = h * M_32 + N_32
}

val index = nblocks shl 2
Expand All @@ -29,11 +34,131 @@ public class MurmurHash3(private val seed: UInt = 0u) {
}
if (rem >= 1) {
k = k xor key.getUInt(index)
h = h xor scramble(k)
h = h xor k.mix()
}

h = h xor len.toUInt()
h = h.fmix()
return h
}

public fun hash128x64(key: ByteArray): Array<ULong> {
fun ULong.mix(r: Int, c1: ULong, c2: ULong): ULong {
var k = this
k *= c1
k = k.rotateLeft(r)
k *= c2
return k
}

var h1 = seed.toULong()
var h2 = seed.toULong()
val len = key.size
val nblocks = len shr 4

for (i in 0 until nblocks) {
val index = i shl 4
val k1 = key.getLittleEndianLong(index)
val k2 = key.getLittleEndianLong(index + 8)

h1 = h1 xor k1.mix(R1_128, C1_128, C2_128)
h1 = h1.rotateLeft(R2_128)
h1 += h2
h1 = h1 * M_128 + N1_128

h2 = h2 xor k2.mix(R3_128, C2_128, C1_128)
h2 = h2.rotateLeft(R1_128)
h2 += h1
h2 = h2 * M_128 + N2_128
}

val index = nblocks shl 4
val rem = len - index
var k1 = 0uL
var k2 = 0uL
if (rem == 15) {
k2 = k2 xor (key.getULong(index + 14) shl 48)
}
if (rem >= 14) {
k2 = k2 xor (key.getULong(index + 13) shl 40)
}
if (rem >= 13) {
k2 = k2 xor (key.getULong(index + 12) shl 32)
}
if (rem >= 12) {
k2 = k2 xor (key.getULong(index + 11) shl 24)
}
if (rem >= 11) {
k2 = k2 xor (key.getULong(index + 10) shl 16)
}
if (rem >= 10) {
k2 = k2 xor (key.getULong(index + 9) shl 8)
}
if (rem >= 9) {
k2 = k2 xor key.getULong(index + 8)
h2 = h2 xor k2.mix(R3_128, C2_128, C1_128)
}
if (rem >= 8) {
k1 = k1 xor (key.getULong(index + 7) shl 56)
}
if (rem >= 7) {
k1 = k1 xor (key.getULong(index + 6) shl 48)
}
if (rem >= 6) {
k1 = k1 xor (key.getULong(index + 5) shl 40)
}
if (rem >= 5) {
k1 = k1 xor (key.getULong(index + 4) shl 32)
}
if (rem >= 4) {
k1 = k1 xor (key.getULong(index + 3) shl 24)
}
if (rem >= 3) {
k1 = k1 xor (key.getULong(index + 2) shl 16)
}
if (rem >= 2) {
k1 = k1 xor (key.getULong(index + 1) shl 8)
}
if (rem >= 1) {
k1 = k1 xor key.getULong(index)
h1 = h1 xor k1.mix(R1_128, C1_128, C2_128)
}

h1 = h1 xor len.toULong()
h2 = h2 xor len.toULong()

h1 += h2
h2 += h1

h1 = h1.fmix()
h2 = h2.fmix()

h1 += h2
h2 += h1

return arrayOf(h1, h2)
}

private fun ByteArray.getLittleEndianUInt(index: Int): UInt {
return this.getUInt(index) or
(this.getUInt(index + 1) shl 8) or
(this.getUInt(index + 2) shl 16) or
(this.getUInt(index + 3) shl 24)
}

private fun ByteArray.getLittleEndianLong(index: Int): ULong {
return this.getULong(index) or
(this.getULong(index + 1) shl 8) or
(this.getULong(index + 2) shl 16) or
(this.getULong(index + 3) shl 24) or
(this.getULong(index + 4) shl 32) or
(this.getULong(index + 5) shl 40) or
(this.getULong(index + 6) shl 48) or
(this.getULong(index + 7) shl 56)
}

private fun UInt.fmix(): UInt {
var h = this
h = h xor (h shr 16)
h *= 0x85ebca6bu
h = h xor (h shr 13)
Expand All @@ -42,24 +167,39 @@ public class MurmurHash3(private val seed: UInt = 0u) {
return h
}

private fun scramble(karg: UInt): UInt {
var k = karg
k *= C1
k = k.rotateLeft(R1)
k *= C2
return k
private fun ULong.fmix(): ULong {
var h = this
h = h xor (h shr 33)
h *= 0xff51afd7ed558ccduL
h = h xor (h shr 33)
h *= 0xc4ceb9fe1a85ec53uL
h = h xor (h shr 33)
return h
}

private fun ByteArray.getUInt(index: Int) = get(index).toUInt()

private fun ByteArray.getULong(index: Int) = get(index).toULong()

private companion object {
private const val C1: UInt = 0xcc9e2d51u
private const val C2: UInt = 0x1b873593u
private const val C1_32: UInt = 0xcc9e2d51u
private const val C2_32: UInt = 0x1b873593u

private const val R1_32: Int = 15
private const val R2_32: Int = 13

private const val M_32: UInt = 5u
private const val N_32: UInt = 0xe6546b64u

private const val C1_128 = 0x87c37b91114253d5uL
private const val C2_128 = 0x4cf5ad432745937fuL

private const val R1: Int = 15
private const val R2: Int = 13
private const val R1_128 = 31
private const val R2_128 = 27
private const val R3_128 = 33

private const val M: UInt = 5u
private const val N: UInt = 0xe6546b64u
private const val M_128 = 5u
private const val N1_128 = 0x52dce729u
private const val N2_128 = 0x38495ab5u
}
}
36 changes: 29 additions & 7 deletions src/commonTest/kotlin/MurmurHash3Test.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package com.goncalossilva.murmurhash

import com.goncalossilva.resources.Resource
import kotlin.test.Test
import kotlin.test.assertContentEquals
import kotlin.test.assertEquals
import kotlin.test.assertTrue

class MurmurHash3Test {
private val words: List<String> by lazy {
Expand All @@ -11,19 +13,39 @@ class MurmurHash3Test {
Resource("src/commonTest/resources/wordlist.txt").readText().trim().lines()
}

private val results32Bit: List<UInt> by lazy {
// Hashes computed by the canonical C++ implementation.
Resource("src/commonTest/resources/murmurhash3-32x86.txt").readText().trim().lines().map {
it.toUInt()
}
private val results32x86: List<UInt> by lazy {
// Hashes computed by the canonical C++ implementation:
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L94-L146
Resource("src/commonTest/resources/murmurhash3-32x86.txt")
.readText()
.trim()
.lines()
.map { line -> line.toUInt() }
}

private val results128x64: List<Array<ULong>> by lazy {
// Hashes computed by the canonical C++ implementation:
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L255-L332
Resource("src/commonTest/resources/murmurhash3-128x64.txt")
.readText()
.trim()
.lines()
.map { line -> line.split(",").map(String::toULong).toTypedArray() }
}

private val murmurHash3 = MurmurHash3(seed = 0x7f3a21eau)

@Test
fun hash32Bit() {
words.zip(results32Bit).forEach { (word, hash) ->
fun hash32x86() {
words.zip(results32x86).forEach { (word, hash) ->
assertEquals(hash, murmurHash3.hash32x86(word.encodeToByteArray()), word)
}
}

@Test
fun hash128x64() {
words.zip(results128x64).forEach { (word, hash) ->
assertContentEquals(hash, murmurHash3.hash128x64(word.encodeToByteArray()), word)
}
}
}
Loading

0 comments on commit 807807b

Please # to comment.