diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 3254a4a..a817ad2 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -1,10 +1,7 @@
 name: CI
 on:
-  push:
-    branches:
-      - main
-    tags: '*'
-  pull_request:
+  - push
+  - pull_request
 jobs:
   test:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
@@ -13,8 +10,8 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.0'
           - '1.6'
+          - '1.7'
           - 'nightly'
         os:
           - ubuntu-latest
@@ -22,10 +19,6 @@ jobs:
           - windows-latest
         arch:
           - x64
-          - x86
-        exclude:
-          - os: macOS-latest
-            arch: x86
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
diff --git a/.gitignore b/.gitignore
index 20fe29d..f181182 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
 *.jl.mem
 /Manifest.toml
 /docs/build/
+.vscode
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index 690e774..63a7c5e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -3,11 +3,18 @@ uuid = "2e3c4037-312d-4650-b9c0-fcd0fc09aae4"
 authors = ["Bernard Brenyah"]
 version = "0.1.0"
 
+[deps]
+CircularArrays = "7a955b69-7140-5f4e-a0ed-f168c5e2e749"
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
+ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
+
 [compat]
 julia = "1"
 
 [extras]
+Faker = "0efc519c-db33-5916-ab87-703215c3906f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test"]
+test = ["Test", "Faker"]
diff --git a/README.md b/README.md
index 893e546..9f07e30 100644
--- a/README.md
+++ b/README.md
@@ -6,3 +6,45 @@
 [![Coverage](https://codecov.io/gh/PyDataBlog/SimString.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/PyDataBlog/SimString.jl)
 [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle)
 [![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
+
+A native Julia implementation of the CPMerge algorithm, which is designed for approximate string matching.
+This package is be particulary useful for natural language processing tasks which demand the retrieval of strings/texts from a very large corpora (big amounts of texts). Currently, this package supports both Character and Word based N-grams feature generations and there are plans to open the package up for custom user defined feature generation methods.
+
+## Features
+
+- [X] Fast algorithm for string matching
+- [X] 100% exact retrieval
+- [X] Support for unicodes
+- [ ] Custom user defined feature generation methods
+- [ ] Mecab-based tokenizer support
+
+## Suported String Similarity Measures
+
+- [X] Dice coefficient
+- [X] Jaccard coefficient
+- [X] Cosine coefficient
+- [X] Overlap coefficient
+
+## Installation
+
+You can grab the latest stable version of this package from Julia registries by simply running;
+
+*NB:* Don't forget to invoke Julia's package manager with `]`
+
+```julia
+pkg> add SimString
+```
+
+The few (and selected) brave ones can simply grab the current experimental features by simply adding the master branch to your development environment after invoking the package manager with `]`:
+
+```julia
+pkg> add SimString#master
+```
+
+You are good to go with bleeding edge features and breakages!
+
+To revert to a stable version, you can simply run:
+
+```julia
+pkg> free SimString
+```
diff --git a/docs/src/index.md b/docs/src/index.md
index 26be98c..807e880 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -6,6 +6,76 @@ CurrentModule = SimString
 
 Documentation for [SimString](https://github.com/PyDataBlog/SimString.jl).
 
+A native Julia implementation of the CPMerge algorithm, which is designed for approximate string matching.
+This package is be particulary useful for natural language processing tasks which demand the retrieval of strings/texts from a very large corpora (big amounts of texts). Currently, this package supports both Character and Word based N-grams feature generations and there are plans to open the package up for custom user defined feature generation methods.
+
+## Features
+
+- [X] Fast algorithm for string matching
+- [X] 100% exact retrieval
+- [X] Support for unicodes
+- [ ] Custom user defined feature generation methods
+- [ ] Mecab-based tokenizer support
+
+## Suported String Similarity Measures
+
+- [X] Dice coefficient
+- [X] Jaccard coefficient
+- [X] Cosine coefficient
+- [X] Overlap coefficient
+
+## Installation
+
+You can grab the latest stable version of this package from Julia registries by simply running;
+
+*NB:* Don't forget to invoke Julia's package manager with `]`
+
+```julia
+pkg> add SimString
+```
+
+The few (and selected) brave ones can simply grab the current experimental features by simply adding the master branch to your development environment after invoking the package manager with `]`:
+
+```julia
+pkg> add SimString#master
+```
+
+You are good to go with bleeding edge features and breakages!
+
+To revert to a stable version, you can simply run:
+
+```julia
+pkg> free SimString
+```
+
+## Usage
+
+```julia
+using SimString
+
+# Inilisate database and some strings
+db = DictDB(CharacterNGrams(2, " "));
+push!(db, "foo");
+push!(db, "bar");
+push!(db, "fooo");
+
+# Convinient approach is to use an array of strings for multiple entries: `append!(db, ["foo", "bar", "fooo"]);`
+
+# Retrieve the closest match(es)
+res = search(Dice(), db, "foo"; α=0.8, ranked=true)
+# 2-element Vector{Tuple{String, Float64}}:
+#  ("foo", 1.0)
+#  ("fooo", 0.8888888888888888)
+
+
+```
+
+## TODO: Benchmarks
+
+## Release History
+
+- 0.1.0 Initial release.
+
 ```@index
 ```
 
diff --git a/extras/examples.jl b/extras/examples.jl
new file mode 100644
index 0000000..b4d9595
--- /dev/null
+++ b/extras/examples.jl
@@ -0,0 +1,46 @@
+using SimString
+using Faker
+using BenchmarkTools
+using DataStructures
+
+################################# Benchmark Bulk addition #####################
+db = DictDB(CharacterNGrams(3, " "));
+Faker.seed(2020)
+@time fake_names = [string(Faker.first_name(), " ", Faker.last_name()) for i in 1:100_000];
+
+
+f(d, x) = append!(d, x)
+@time f(db, fake_names)
+
+
+
+################################ Simple Addition ###############################
+
+db = DictDB(CharacterNGrams(2, " "));
+push!(db, "foo");
+push!(db, "bar");
+push!(db, "fooo");
+
+f(x, c, s) = search(x, c, s)
+test = "foo";
+col = db;
+sim = Cosine();
+
+f(Cosine(),  db, "foo")
+
+@btime f($sim,  $col, $test)
+@btime search(Cosine(), db, "foo"; α=0.8, ranked=true)
+
+
+
+db2 = DictDB(CharacterNGrams(3, " "));
+append!(db2, ["foo", "bar", "fooo", "foor"]) # also works via multiple dispatch on a vector
+
+results = search(Cosine(), db, "foo"; α=0.8, ranked=true)  # yet to be implemented
+
+bs = ["foo", "bar", "foo", "foo", "bar"]
+SimString.extract_features(CharacterNGrams(3, " "), "prepress")
+SimString.extract_features(WordNGrams(2, " ", " "), "You are a really really really cool dude.")
+
+db = DictDB(WordNGrams(2, " ", " "))
+push!(db, "You are a really really really cool dude.")
diff --git a/extras/py_benchmarks.py b/extras/py_benchmarks.py
new file mode 100644
index 0000000..be23cb8
--- /dev/null
+++ b/extras/py_benchmarks.py
@@ -0,0 +1,16 @@
+from simstring.feature_extractor.character_ngram import CharacterNgramFeatureExtractor
+from simstring.measure.cosine import CosineMeasure
+from simstring.database.dict import DictDatabase
+from simstring.searcher import Searcher
+from faker import Faker
+
+db = DictDatabase(CharacterNgramFeatureExtractor(3))
+
+fake = Faker()
+fake_names = [fake.name() for i in range(100_000)]
+
+def f(x):
+    for i in x:
+        db.add(i)
+
+# %time f(fake_names)
\ No newline at end of file
diff --git a/src/SimString.jl b/src/SimString.jl
index 6af507d..7bec0a7 100644
--- a/src/SimString.jl
+++ b/src/SimString.jl
@@ -1,5 +1,29 @@
 module SimString
 
-# Write your package code here.
+import Base: push!, append!
+using DataStructures: DefaultOrderedDict, DefaultDict
+# using ProgressMeter
+# using CircularArrays
+# using OffsetArrays
+
+######### Import modules & utils ################
+include("db_collection.jl")
+include("dictdb.jl")
+include("features.jl")
+include("measures.jl")
+include("search.jl")
+
+
+
+####### Global export of user API #######
+export Dice, Jaccard, Cosine, Overlap,
+    AbstractSimStringDB, DictDB,
+    CharacterNGrams, WordNGrams,
+    search
+
+
+
+
+
 
 end
diff --git a/src/db_collection.jl b/src/db_collection.jl
new file mode 100644
index 0000000..cffce8b
--- /dev/null
+++ b/src/db_collection.jl
@@ -0,0 +1,35 @@
+# Custom Collections
+
+"""
+Base type for all custom db collections.
+"""
+abstract type AbstractSimStringDB end
+
+
+"""
+Abstract type for feature extraction structs
+"""
+abstract type FeatureExtractor end
+
+
+# Feature Extraction Definitions
+
+"""
+Feature extraction on character-level ngrams
+"""
+struct CharacterNGrams{T1<:Int, T2<:AbstractString} <: FeatureExtractor
+    n::T1         # number of n-grams to extract
+    padder::T2    # string to use to pad n-grams
+end
+
+
+"""
+Feature extraction based on word-level ngrams
+"""
+struct WordNGrams{T1<:Int, T2<:AbstractString} <: FeatureExtractor
+    n::T1           # number of n-grams to extract
+    padder::T2      # string to use to pad n-grams
+    splitter::T2    # string to use to split words
+end
+
+
diff --git a/src/dictdb.jl b/src/dictdb.jl
new file mode 100644
index 0000000..77136b1
--- /dev/null
+++ b/src/dictdb.jl
@@ -0,0 +1,102 @@
+"""
+Custom DB collection for storing SimString data using base Dictionary `Dict`
+"""
+struct DictDB{
+    T1<:FeatureExtractor,
+    T2<:AbstractString,
+    T3<:AbstractDict,
+    T4<:AbstractDict,
+    T5<:AbstractDict,
+    } <: AbstractSimStringDB
+
+    feature_extractor::T1                       # NGram feature extractor
+    string_collection::Vector{T2}               # Collection of strings in the DB
+    string_size_map::T3                         # Index map of feature sizes
+    string_feature_map::T4                      # Index map of all features with associated strings and sizes
+    lookup_cache::T5                            # Cache for lookup results
+end
+
+
+"""
+    DictDB(x::CharacterNGrams)
+
+Initialize a dict DB with additional containers and Metadata for CharacterNGrams
+
+# Arguments
+* `x`: CharacterNGrams object
+
+# Example
+```julia
+db = DictDB(CharacterNGrams(2, " "))
+```
+
+# Returns
+* `DictDB`: A DictDB object with additional containers and Metadata for CharacterNGrams
+"""
+function DictDB(x::CharacterNGrams)
+    DictDB(
+        x,
+        String[],
+        DefaultDict{Int, Set{String}}( () -> Set{String}() ),
+        DefaultDict{ Int, DefaultOrderedDict{Tuple{String, Int64}, Set{String}}  }( () -> DefaultOrderedDict{Tuple{String, Int64}, Set{String} }(Set{String})),
+        DefaultDict{ Int, DefaultDict{Tuple{String, Int64}, Set{String}}  }( () -> DefaultDict{Tuple{String, Int64}, Set{String}}(Set{String}))
+    )
+end
+
+
+"""
+    DictDB(x::WordNGrams)
+
+Initialize a dict DB with additional containers and Metadata for WordNGrams
+
+# Arguments
+* `x`: WordNGrams object
+
+# Example
+```julia
+db = DictDB(WordNGrams(2, " ", " "))
+```
+
+# Returns
+* `DictDB`: A DictDB object with additional containers and Metadata for WordNGrams
+"""
+function DictDB(x::WordNGrams)
+    DictDB(
+        x,
+        String[],
+        DefaultDict{Int, Set{String}}( () -> Set{String}() ),
+        DefaultDict{ Int, DefaultOrderedDict{Tuple{NTuple{x.n, String}, Int}, Set{String}}  }( () -> DefaultOrderedDict{Tuple{NTuple{x.n, String}, Int}, Set{String} }(Set{String})),
+        DefaultDict{ Int, DefaultDict{Tuple{NTuple{x.n, String}, Int}, Set{String}} }( () -> DefaultDict{Tuple{NTuple{x.n, String}, Int}, Set{String}}(Set{String}))
+    )
+end
+
+
+
+
+################################## DictDB UTIL Functions  ############################
+"""
+Internal function for retrieving existing features by size
+"""
+function retrieve_existing_feature_by_size(db::DictDB, size, feature)
+    return db.string_feature_map[size][feature]
+end
+
+
+# """
+# Basic summary stats for the DB
+# """
+# function describe_db(db::DictDB)
+
+# end
+
+
+"""
+Internal function to lookup feature sets by size and feature
+"""
+function lookup_feature_set_by_size_feature(db::DictDB, size, feature)
+    # TODO: Clean this up and make it more efficient. Shouldn't updated db.string_feature_map
+    if feature ∉ keys(db.lookup_cache[size])
+        db.lookup_cache[size][feature] = retrieve_existing_feature_by_size(db, size, feature)
+    end
+    return db.lookup_cache[size][feature]
+end
\ No newline at end of file
diff --git a/src/features.jl b/src/features.jl
new file mode 100644
index 0000000..f53acb5
--- /dev/null
+++ b/src/features.jl
@@ -0,0 +1,130 @@
+"""
+Internal function to pad AbstractString types with specified padder
+"""
+function pad_string(x::AbstractString, padder::AbstractString)
+    return string(padder, x, padder)
+end
+
+
+"""
+Internal function to pad AbstractVector types with specified padder
+"""
+function pad_string(x::AbstractVector, padder::AbstractString)
+    # Insert a padder as the first and last element of x
+    insert!(x, 1, padder)
+    push!(x, padder)
+    return x
+end
+
+
+"""
+Internal function to generate intial uncounted ngrams on a character level
+"""
+function init_ngrams(extractor::CharacterNGrams, x, n)
+    map(0:length(x)-n) do i
+        x[i+1: i+n]
+    end
+end
+
+
+"""
+Internal function to generate intial uncounted ngrams on a word level
+"""
+function init_ngrams(extractor::WordNGrams, x, n)
+    map(0:length(x)-n) do i
+        tuple(String.(x[i+1: i+n])...)
+    end
+end
+
+
+"""
+Internal function to create character-level ngrams features from an AbstractString
+"""
+function n_grams(extractor::CharacterNGrams, x, n)
+    # Return counted n-grams (including duplicates)
+    return cummulative_ngram_count(init_ngrams(extractor, x, n))
+end
+
+
+"""
+Internal function to create word-level ngrams from an AbstractVector
+"""
+function n_grams(extractor::WordNGrams, x, n)
+    return cummulative_ngram_count(init_ngrams(extractor, x, n))
+end
+
+
+"""
+Internal function to generate character-level ngrams features from an AbstractString
+"""
+function extract_features(extractor::CharacterNGrams, str)
+    n = extractor.n - 1 == 0 ? 1 : extractor.n - 1
+    str = pad_string(str, repeat(extractor.padder, n))
+    return n_grams(extractor, str, extractor.n)
+end
+
+
+"""
+Internal function to generate word-level ngrams features from an AbstractString
+"""
+function extract_features(extractor::WordNGrams, str)
+    words_split = split(str, extractor.splitter)
+    padded_words = pad_string(words_split, extractor.padder)
+    return n_grams(extractor, padded_words, extractor.n)
+end
+
+
+"""
+Internal function to count and pad generated character-level ngrams (including duplicates)
+"""
+function cummulative_ngram_count(x)
+    counter = Dict{eltype(x), Int}()
+
+    unique_list = map(x) do val
+        if val in keys(counter)
+            counter[val] += 1
+        else
+            counter[val] = 1
+        end
+        (val, counter[val])
+    end
+
+    return unique_list
+end
+
+
+"""
+Add a new item to a new or existing collection of strings using
+the custom AbstractSimStringDB type.
+"""
+function push!(db::AbstractSimStringDB, str::AbstractString)
+    # Extract features based on the specified feature extractor
+    features = extract_features(db.feature_extractor, str)
+
+    # Size of the new feature
+    size = length(features)
+
+    # Add the string to the database
+    push!(db.string_collection, str)
+
+    # Add the size of the incoming string to size map
+    push!(db.string_size_map[size], str)
+
+    # Map each feature to a size map along with the originating string
+    @inbounds for n in features
+        push!(db.string_feature_map[size][n], str)
+    end
+
+    return db
+end
+
+
+"""
+Add bulk items to a new or existing collection of strings using
+the custom AbstractSimStringDB type.
+"""
+function append!(db::AbstractSimStringDB, str::Vector)
+    @inbounds @simd for i in str
+        push!(db, i)
+    end
+end
\ No newline at end of file
diff --git a/src/measures.jl b/src/measures.jl
new file mode 100644
index 0000000..286a656
--- /dev/null
+++ b/src/measures.jl
@@ -0,0 +1,169 @@
+############## String Similarity Measure Definitions  ##############
+
+"""
+Abstract base type for all string similarity measures.
+"""
+abstract type AbstractSimilarityMeasure end
+
+
+"""
+Dice Similarity Measure.
+"""
+struct Dice <: AbstractSimilarityMeasure end
+
+
+"""
+Jaccard Similarity Measure.
+"""
+struct Jaccard <: AbstractSimilarityMeasure end
+
+
+"""
+Cosine Similarity Measure.
+"""
+struct Cosine <: AbstractSimilarityMeasure end
+
+
+"""
+Overlap Similarity Measure.
+"""
+struct Overlap <: AbstractSimilarityMeasure end
+
+
+
+############## Minimum Feature Sizes Per Measure  ##############
+"""
+Calculate minimum feature size for Dice similarity measure.
+"""
+function minimum_feature_size(measure::Dice, query_size, α)
+    return ceil(Int, ( (α / (2 - α)) * query_size) )
+end
+
+
+"""
+Calculate minimum feature size for Jaccard similarity measure.
+"""
+function minimum_feature_size(measure::Jaccard, query_size, α)
+    return ceil(Int, (α * query_size))
+end
+
+
+"""
+Calculate minimum feature size for Cosine similarity measure.
+"""
+function minimum_feature_size(measure::Cosine, query_size, α)
+    return ceil(Int, (α * α * query_size) )
+end
+
+
+"""
+Calculate minimum feature size for Overlap similarity measure.
+"""
+function minimum_feature_size(measure::Overlap, query_size, α)
+    return 1
+end
+
+
+############## Maximum Feature Size Per Measure  ##############
+
+"""
+Calculate maximum feature size for Dice similarity measure.
+"""
+function maximum_feature_size(measure::Dice, db::AbstractSimStringDB, query_size, α)
+    return floor(Int, ( ((2 - α) / α) * query_size) )
+end
+
+
+"""
+Calculate maximum feature size for Jaccard similarity measure.
+"""
+function maximum_feature_size(measure::Jaccard, db::AbstractSimStringDB, query_size, α)
+    return floor(Int, (query_size / α))
+end
+
+
+"""
+Calculate maximum feature size for Cosine similarity measure.
+"""
+function maximum_feature_size(measure::Cosine, db::AbstractSimStringDB, query_size, α)
+    return floor(Int, ( query_size / (α * α) ))
+end
+
+
+"""
+Calculate maximum feature size for Overlap similarity measure.
+"""
+function maximum_feature_size(measure::Overlap, db::AbstractSimStringDB, query_size, α)
+    return min(typemax(Int), maximum(keys(db.string_feature_map)))
+end
+
+
+############## Similarity Score Per Measure  ##############
+"""
+Calculate similarity score between X and Y using Dice similarity measure.
+"""
+function similarity_score(measure::Dice, X, Y)
+    return 2 * ( length( Set(X) ∩ Set(Y) ) ) / ( length( Set(X) ) + length( Set(Y) ) )
+end
+
+
+"""
+Calculate similarity score between X and Y using Jaccard similarity measure.
+"""
+function similarity_score(measure::Jaccard, X, Y)
+    return length( Set(X) ∩ Set(Y) ) / ( length( Set(X) ∪ Set(Y) ) )
+end
+
+
+"""
+Calculate similarity score between X and Y using Cosine similarity measure.
+"""
+function similarity_score(measure::Cosine, X, Y)
+    return length( Set(X) ∩ Set(Y) ) / ( √(length( Set(X) ) * length( Set(Y) )) )
+end
+
+
+"""
+Calculate similarity score between X and Y using Overlap similarity measure.
+"""
+function similarity_score(measure::Overlap, X, Y)
+    return length( Set(X) ∩ Set(Y) ) / min(length( Set(X) ), length( Set(Y) ))
+end
+
+
+
+############## Number of Minimum Overlaps Per Measure  ##############
+"""
+Calculate the minimum overlap (τ) for a query size, candidate size, and α
+using Dice similarity measure.
+"""
+function minimum_overlap(measure::Dice, query_size, candidate_size, α)
+    return ceil(Int, (0.5 * α * query_size * candidate_size))
+end
+
+
+"""
+Calculate the minimum overlap (τ) for a query size, candidate size, and α
+using Jaccard similarity measure.
+"""
+function minimum_overlap(measure::Jaccard, query_size, candidate_size, α)
+    return ceil(Int,  ((α * (query_size + candidate_size)) / (1 + α)) )
+end
+
+
+"""
+Calculate the minimum overlap (τ) for a query size, candidate size, and α
+using Cosine similarity measure.
+"""
+function minimum_overlap(measure::Cosine, query_size, candidate_size, α)
+    return ceil(Int, ( α * √(query_size * candidate_size) ))
+end
+
+
+"""
+Calculate the minimum overlap (τ) for a query size, candidate size, and α
+using Overlap similarity measure.
+"""
+function minimum_overlap(measure::Overlap, query_size, candidate_size, α)
+    return ceil(Int, (α * min(query_size, candidate_size)) )
+end
\ No newline at end of file
diff --git a/src/search.jl b/src/search.jl
new file mode 100644
index 0000000..70d3b84
--- /dev/null
+++ b/src/search.jl
@@ -0,0 +1,146 @@
+# Main SimString search algorithm
+
+"""
+    search(measure::AbstractSimilarityMeasure, db_collection::AbstractSimStringDB, query::AbstractString;
+        α=0.7, ranked=true)
+
+Search for strings in a string collection using the SimString algorithm and a
+similarity measure.
+
+# Arguments:
+* `measure`::AbstractSimilarityMeasure - The similarity measure to use.
+* `db_collection`::AbstractSimStringDB - The database collection to search.
+* `query`::AbstractString - The query string to search for.
+* `α`::float - The α parameter for the SimString algorithm.
+* `ranked`::Boolean - Whether to return the results in ranked order.
+
+# Example
+```julia
+db = DictDB(CharacterNGrams(2, " "));
+append!(db, ["foo", "bar", "fooo"]);
+
+search(Dice(), db, "foo"; α=0.8, ranked=true)
+# 2-element Vector{Tuple{String, Float64}}:
+#  ("foo", 1.0)
+#  ("fooo", 0.8888888888888888)
+```
+
+# Returns
+* A Vector of results, where each element is a Tuple of the form (`string`, `similarity measure score`).
+"""
+function search(measure::AbstractSimilarityMeasure, db_collection::AbstractSimStringDB, query::AbstractString; α=0.7, ranked=true)
+    return search!(measure, db_collection, query; α=α, ranked=ranked)
+end
+
+
+"""
+Internal function which ranks the results of a search using the specified similarity measure.
+"""
+function rank_search_results(measure::AbstractSimilarityMeasure, db_collection::DictDB, query, results; ranked=true)
+    features = extract_features(db_collection.feature_extractor, query)
+
+    # Compute similarity scores for each result
+    ranked_results = map(results) do x
+        x, similarity_score(measure, features, extract_features(db_collection.feature_extractor, x) )
+    end
+
+    # Sort by similarity score and return
+    return ranked ? sort(ranked_results, by = i -> i[2], rev=true) : ranked_results
+end
+
+
+"""
+Internal function which performs the overlap join
+"""
+function overlap_join(db_collection::AbstractSimStringDB, features, τ, candidate_size)
+    # length of features
+    query_feature_length = length(features)
+
+    # Sort features from the most uncommon and the most common
+    features = sort(features, by = i -> length(lookup_feature_set_by_size_feature(db_collection, candidate_size, i) ) )
+
+    # Count the occurrences of each feature
+    candidate_match_counts = DefaultDict(0)
+
+    feature_slice_index = query_feature_length - τ + 1
+
+    if feature_slice_index < 0
+        focus_features = features[1:end + feature_slice_index]
+    else
+        focus_features = features[1:feature_slice_index]
+    end
+
+    for i in focus_features
+        for s in lookup_feature_set_by_size_feature(db_collection, candidate_size, i)
+            candidate_match_counts[s] += 1
+        end
+    end
+
+    results = String[]
+
+    # TODO: Return results in case of a perfect match??
+    # if τ == 1
+    #     results = collect(keys(candidate_match_counts))
+    # end
+
+    for (candidate, match_count) in candidate_match_counts
+
+        for i in (query_feature_length - τ + 1) : query_feature_length - 1  # TODO: Verify
+
+            if i < 0
+                feature = features[end + i]
+            elseif i == 0
+                feature = features[i+1]
+            else
+                feature = features[i]
+
+            end
+
+            if candidate in lookup_feature_set_by_size_feature(db_collection, candidate_size, feature)
+                match_count += 1
+            end
+
+            if match_count >= τ
+                append!(results, [candidate])
+                break
+            end
+
+            remaining_count = query_feature_length - i - 1
+
+            if (match_count + remaining_count) < τ
+                break
+            end
+
+        end
+    end
+    return results
+end
+
+
+"""
+Search for strings in custom DictDB string collection using the SimString algorithm
+and a similarity measure.
+"""
+function search!(measure::AbstractSimilarityMeasure, db_collection::DictDB, query::AbstractString; α=0.7, ranked=true)
+    # Generate features from query string
+    features = extract_features(db_collection.feature_extractor, query)
+
+    # Metadata from the generated features (length, min & max sizes)
+    length_of_features = length(features)
+    min_feature_size = minimum_feature_size(measure, length_of_features, α)
+    max_feature_size = maximum_feature_size(measure, db_collection, length_of_features, α)
+
+    results = String[]
+
+    # Generate and return results from the potential candidate size pool
+    for candidate_size in min_feature_size:max_feature_size
+        # Minimum overlap
+        τ = minimum_overlap(measure, length_of_features, candidate_size, α)
+
+        # Generate approximate candidates from the overlap join
+        append!(results, overlap_join(db_collection, features, τ, candidate_size))
+    end
+
+    # Rank search results
+    return rank_search_results(measure, db_collection, query, results; ranked=ranked)
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 63b8101..333e0e6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,23 @@
-using SimString
+module TestSimString
 using Test
 
-@testset "SimString.jl" begin
-    # Write your tests here.
+for file in sort([file for file in readdir(@__DIR__) if occursin(r"^test[_0-9]+.*\.jl$", file)])
+    m = match(r"test([0-9]+)_(.*).jl", file)
+    filename = String(m[2])
+    testnum = string(parse(Int, m[1]))
+
+    # with this test one can run only specific tests, for example
+    # Pkg.test("SimString", test_args = ["features"])
+    # or
+    # Pkg.test("SimString", test_args = ["6"])
+    if isempty(ARGS) || (filename in ARGS) || (testnum in ARGS) || (m[1] in ARGS)
+        @testset "$filename" begin
+            # Here you can optionally exclude some test files
+            # VERSION < v"1.1" && file == "test_xxx.jl" && continue
+
+            include(file)
+        end
+    end
 end
+
+end  # module
\ No newline at end of file
diff --git a/test/test01_dictdb.jl b/test/test01_dictdb.jl
new file mode 100644
index 0000000..31f4e41
--- /dev/null
+++ b/test/test01_dictdb.jl
@@ -0,0 +1,71 @@
+module TestDBCollection
+using SimString
+using Test
+
+
+@testset "Check single updates of DictDB using CharacterNGrams" begin
+    db = DictDB(CharacterNGrams(3, " "))
+    push!(db, "foo")
+    push!(db, "bar")
+    push!(db, "fooo")
+
+    @test db.string_collection == ["foo", "bar", "fooo"]
+    @test db.string_size_map[5] == Set(["bar", "foo"])
+    @test db.string_size_map[6] == Set(["fooo"])
+
+    @test collect(keys(db.string_feature_map)) == [5, 6]
+
+    @test collect(values(db.string_feature_map[5])) == vcat( (repeat([Set(["foo"])], 5)), (repeat([Set(["bar"])], 5)) )
+    @test collect(values(db.string_feature_map[6])) ==  repeat([Set(["fooo"])], 6)
+end
+
+
+@testset "Check single update of DictDB using WordNGrams" begin
+    db = DictDB(WordNGrams(2, " ", " "))
+    push!(db, "You are a really really really cool dude.")
+
+    @test db.string_collection == ["You are a really really really cool dude."]
+    @test db.string_size_map[9] == Set(["You are a really really really cool dude."])
+    @test collect(keys(db.string_feature_map)) == [9]
+    @test collect(values(db.string_feature_map[9])) == repeat([Set(["You are a really really really cool dude."])], 9)
+end
+
+
+@testset "Check bulk updates of DictDB using CharacterNGrams" begin
+    db = DictDB(CharacterNGrams(3, " "))
+    append!(db, ["foo", "bar", "fooo"])
+
+    @test db.string_collection == ["foo", "bar", "fooo"]
+    @test db.string_size_map[5] == Set(["bar", "foo"])
+    @test db.string_size_map[6] == Set(["fooo"])
+
+    @test collect(keys(db.string_feature_map)) == [5, 6]
+
+    @test collect(values(db.string_feature_map[5])) == vcat( (repeat([Set(["foo"])], 5)), (repeat([Set(["bar"])], 5)) )
+    @test collect(values(db.string_feature_map[6])) ==  repeat([Set(["fooo"])], 6)
+
+    @test eltype(collect(keys(db.string_feature_map[5]))) == Tuple{String, Int64}
+end
+
+
+@testset "Check bulk updates of DictDB using WordNGrams" begin
+    db = DictDB(WordNGrams(2, " ", " "))
+    append!(db, ["You are a really really really cool dude.", "Sometimes you are not really really cool tho"])
+
+    @test db.string_collection == ["You are a really really really cool dude.", "Sometimes you are not really really cool tho"]
+    @test db.string_size_map[9] == Set(["You are a really really really cool dude.", "Sometimes you are not really really cool tho"])
+
+    @test collect(keys(db.string_feature_map)) == [9]
+    @test collect(values(db.string_feature_map[9]))[5] == Set(["You are a really really really cool dude.", "Sometimes you are not really really cool tho"])
+    @test collect(values(db.string_feature_map[9]))[7] == Set(["You are a really really really cool dude.", "Sometimes you are not really really cool tho"])
+
+    @test eltype(collect(keys(db.string_feature_map[9]))) == Tuple{Tuple{String, String}, Int64}
+end
+
+
+
+
+
+
+
+end  # module
\ No newline at end of file
diff --git a/test/test02_features.jl b/test/test02_features.jl
new file mode 100644
index 0000000..dcf6146
--- /dev/null
+++ b/test/test02_features.jl
@@ -0,0 +1,35 @@
+module TestFeatures
+using SimString
+using Test
+
+
+@testset "Test feature extraction" begin
+    char_ngram_res = SimString.extract_features(CharacterNGrams(3, " "), "prepress")
+    @test char_ngram_res[6] == ("pre", 2)
+
+    word_ngram_res = SimString.extract_features(WordNGrams(2, " ", " "), "You are a really really really cool dude.")
+    @test word_ngram_res[6] == (("really", "really"), 2)
+end
+
+
+@testset "Test padding" begin
+    @test SimString.pad_string(["one", "word"], " ") == [" ", "one", "word", " "]
+    @test SimString.pad_string("one word", " ") == " one word "
+end
+
+
+
+@testset "Test cummulative_ngram_count" begin
+
+end
+
+
+
+
+
+
+
+
+
+
+end  # module
\ No newline at end of file
diff --git a/test/test03_measures.jl b/test/test03_measures.jl
new file mode 100644
index 0000000..3083b80
--- /dev/null
+++ b/test/test03_measures.jl
@@ -0,0 +1,70 @@
+module TestMeasureUtils
+using SimString
+using Test
+
+
+@testset "Test Similarity Scores" begin
+    X = [1, 2, 3]
+    Y = [1, 2, 4, 5]
+    @test SimString.similarity_score(Dice(), X, Y)        ≈ 0.5714285714285714
+    @test SimString.similarity_score(Jaccard(), X, Y)     ≈ 0.4
+    @test SimString.similarity_score(Cosine(), X, Y)      ≈ 0.5773502691896258
+    @test SimString.similarity_score(Overlap(), X, Y)     ≈ 0.6666666666666666
+end
+
+
+@testset "Test Minimum Candidate Feature Size" begin
+    @test SimString.minimum_feature_size(Dice(), 5, 1.)      == 5
+    @test SimString.minimum_feature_size(Dice(), 5, 0.5)     == 2
+
+    @test SimString.minimum_feature_size(Jaccard(), 5, 1.)   == 5
+    @test SimString.minimum_feature_size(Jaccard(), 5, 0.5)  == 3
+
+    @test SimString.minimum_feature_size(Cosine(), 5, 1.)    == 5
+    @test SimString.minimum_feature_size(Cosine(), 5, 0.5)   == 2
+
+    @test SimString.minimum_feature_size(Overlap(), 5, 1.)   == 1
+    @test SimString.minimum_feature_size(Overlap(), 5, 0.5)  == 1
+end
+
+
+@testset "Test Maximum Candidate Feature Size" begin
+    db = DictDB(CharacterNGrams(3, " "))
+    append!(db, ["foo", "bar", "fooo"])
+
+    @test SimString.maximum_feature_size(Dice(), db, 5, 1.)      == 5
+    @test SimString.maximum_feature_size(Dice(), db, 5, 0.5)     == 15
+
+    @test SimString.maximum_feature_size(Jaccard(), db, 5, 1.)   == 5
+    @test SimString.maximum_feature_size(Jaccard(), db, 5, 0.5)  == 10
+
+    @test SimString.maximum_feature_size(Cosine(), db, 5, 1.)    == 5
+    @test SimString.maximum_feature_size(Cosine(), db, 5, 0.5)   == 20
+
+    @test SimString.maximum_feature_size(Overlap(), db, 5, 1.)   == 6
+    @test SimString.maximum_feature_size(Overlap(), db, 5, 0.5)  == 6
+end
+
+
+@testset "Test Minimum Feature Overlap" begin
+    @test SimString.minimum_overlap(Dice(), 5, 5, 1.0)           == 13
+    @test SimString.minimum_overlap(Dice(), 5, 20, 1.0)          == 50
+    @test SimString.minimum_overlap(Dice(), 5, 5, 0.5)           == 7
+
+    @test SimString.minimum_overlap(Jaccard(), 5, 5, 1.0)        == 5
+    @test SimString.minimum_overlap(Jaccard(), 5, 20, 1.0)       == 13
+    @test SimString.minimum_overlap(Jaccard(), 5, 5, 0.5)        == 4
+
+    @test SimString.minimum_overlap(Cosine(), 5, 5, 1.0)         == 5
+    @test SimString.minimum_overlap(Cosine(), 5, 20, 1.0)        == 10
+    @test SimString.minimum_overlap(Cosine(), 5, 5, 0.5)         == 3
+
+    @test SimString.minimum_overlap(Overlap(), 5, 5, 1.0)        == 5
+    @test SimString.minimum_overlap(Overlap(), 5, 20, 1.0)       == 5
+    @test SimString.minimum_overlap(Overlap(), 5, 5, 0.5)        == 3
+
+end
+
+
+
+end  # module
\ No newline at end of file
diff --git a/test/test04_search.jl b/test/test04_search.jl
new file mode 100644
index 0000000..b062bcb
--- /dev/null
+++ b/test/test04_search.jl
@@ -0,0 +1,52 @@
+module TestMeasures
+using SimString
+using Test
+using Faker
+
+
+@testset "Test Dice Search" begin
+    db = DictDB(CharacterNGrams(2, " "));
+    append!(db, ["foo", "bar", "fooo"]);
+
+    res = search(Dice(), db, "foo"; α=0.8, ranked=true)
+    @test res == [("foo", 1.0), ("fooo", 0.8888888888888888)]
+end
+
+
+@testset "Test Jaccard Search" begin
+    db = DictDB(CharacterNGrams(2, " "));
+    append!(db, ["foo", "bar", "fooo"]);
+
+    res = search(Jaccard(), db, "foo"; α=0.8, ranked=true)
+    @test res == [("foo", 1.0), ("fooo", 0.8)]
+
+end
+
+
+@testset "Test Cosine Search" begin
+    db = DictDB(CharacterNGrams(2, " "));
+    append!(db, ["foo", "bar", "fooo"]);
+
+    res = search(Cosine(), db, "foo"; α=0.8, ranked=true)
+    @test res == [("foo", 1.0), ("fooo", 0.8944271909999159)]
+
+end
+
+
+@testset "Test Overlap Search" begin
+    db = DictDB(CharacterNGrams(2, " "));
+    append!(db, ["foo", "bar", "fooo"]);
+
+    res = search(Overlap(), db, "foo"; α=0.8, ranked=true)
+    @test res == [("foo", 1.0), ("fooo", 1.0)]
+
+end
+
+
+
+
+
+
+
+
+end  # module
\ No newline at end of file