-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
- Loading branch information
1 parent
34bec68
commit e86c538
Showing
12 changed files
with
94 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 73 additions & 0 deletions
73
phileas-model/src/main/java/ai/philterd/phileas/model/utils/BloomFilter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package ai.philterd.phileas.model.utils; | ||
|
||
import org.apache.commons.codec.digest.MurmurHash3; | ||
|
||
import java.nio.charset.StandardCharsets; | ||
import java.security.MessageDigest; | ||
import java.security.NoSuchAlgorithmException; | ||
import java.util.BitSet; | ||
import java.util.function.Function; | ||
|
||
import static org.apache.commons.codec.digest.MurmurHash3.DEFAULT_SEED; | ||
|
||
public class BloomFilter<T> { | ||
|
||
private final BitSet bitSet; | ||
private final Function<T, Integer>[] hashFunctions; | ||
|
||
public BloomFilter(int size) { | ||
this.bitSet = new BitSet(size); | ||
this.hashFunctions = createHashFunctions(); | ||
} | ||
|
||
public void put(T element) { | ||
for (final Function<T, Integer> hashFunction : hashFunctions) { | ||
int hash = hashFunction.apply(element); | ||
bitSet.set(Math.abs(hash) % bitSet.size(), true); | ||
} | ||
} | ||
|
||
public boolean mightContain(T element) { | ||
for (final Function<T, Integer> hashFunction : hashFunctions) { | ||
int hash = hashFunction.apply(element); | ||
if (!bitSet.get(Math.abs(hash) % bitSet.size())) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
private Function<T, Integer>[] createHashFunctions() { | ||
|
||
Function<T, Integer>[] functions = new Function[2]; | ||
|
||
functions[0] = (T element) -> { | ||
final byte[] data = element.toString().getBytes(StandardCharsets.UTF_8); | ||
return MurmurHash3.hash32x86(data, 0, data.length, DEFAULT_SEED); | ||
}; | ||
|
||
functions[1] = (T element) -> { | ||
|
||
try { | ||
|
||
final MessageDigest digest = MessageDigest.getInstance("MD5"); | ||
byte[] hash = digest.digest(element.toString().getBytes(StandardCharsets.UTF_8)); | ||
|
||
int hashCode = 0; | ||
for (int i = 0; i < 4; i++) { | ||
hashCode = (hashCode << 8) | (hash[i] & 0xFF); | ||
} | ||
|
||
return hashCode; | ||
|
||
} catch (NoSuchAlgorithmException e) { | ||
throw new RuntimeException("MD5 algorithm not found", e); | ||
} | ||
|
||
}; | ||
|
||
return functions; | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters