diff --git a/README.md b/README.md
index df918fb..932ddf4 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,22 @@ I intend to port some commands over from [VCF-kit](https://github.com/AndersenLa
  
 ## Tools
 
+### fq-dedup
+
+The `fq-dedup` command de-duplicates a FASTQ by read ID (e.g. `@@D00446:1:140101_HWI-D00446_0001_C8HN4ANXX:8:2210:1238:2018`). Ideally, this should never happen, and I honestly do not know how it happens. The command has to read through the entire FASTQ twice. 
+
+The command uses a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) to identify duplicates. If no duplicates are found during the first pass, the command will return 0 and print "No Duplicates Found" to `stderr`. If you are
+checking a set of FASTQs to remove duplicates, you can use the following bash to handle cases where duplicates are
+not found.
+
+```bash
+sc fq-dedup myfastq.fq.gz > myfastq.dedup.fq 2> result.err
+if [[ `head -n 1 result.err` == "No Duplicates Found" ]]; then
+    # Handle case where duplicates are not found (probably by moving file)
+    mv myfastq.fq.gz myfastq.dedup.fq
+fi
+```
+
 ### fq-meta
 
 __Scenario:__ You are given an old dusty hard drive packed with sequence data. Your collaborator says "We have some great sequencing data here, if only someone could analyze it." You peek at the filesystem and discover that FASTQs have been renamed, removing crucial information about how they were generated. Your collaborator, however, recalls certain details about which data was sequenced on which sequencer and he has a list of sequencing barcodes and associated samples that you can match on." If only there was a way to determine the barcodes, sequencer, or other essential metadata for each FASTQ...
diff --git a/sc.nim b/sc.nim
index 5f4b34a..10a9e6b 100644
--- a/sc.nim
+++ b/sc.nim
@@ -15,6 +15,7 @@ import terminal
 import asyncfile
 import zip/gzipfiles
 import src/fq_meta
+import src/fq_dedup
 #import src/index_swap
 import src/utils/helpers
 from constants import ANN_header
@@ -310,6 +311,11 @@ var p = newParser("sc"):
             if opts.fastq.len > 0:
                 for fastq in opts.fastq:
                     fq_meta.fq_meta(fastq, parseInt(opts.lines), opts.symlinks)
+    command("fq-dedup", group="FASTQ"):
+        help("Removes exact duplicates from FASTQ Files")
+        arg("fastq", nargs = 1, help = "Input FASTQ")
+        run:
+            fq_dedup.fq_dedup(opts.fastq)
     command("json", group="VCF"):
         help("Convert a VCF to JSON")
         arg("vcf", nargs = 1, help="VCF to convert to JSON")
diff --git a/sc.nimble b/sc.nimble
index fc2b652..c4b729f 100644
--- a/sc.nimble
+++ b/sc.nimble
@@ -7,7 +7,7 @@ license       = "MIT"
 
 # Dependencies
 
-requires "argparse >= 0.7.1", "hts >= 0.2.8", "colorize", "zip >= 0.2.1"
+requires "argparse >= 0.7.1", "hts >= 0.2.8", "colorize", "zip >= 0.2.1",  "bitvector >= 0.4.10"
 
 bin = @["sc"]
 skipDirs = @["test"]
diff --git a/scripts/functional-tests.sh b/scripts/functional-tests.sh
index 961f8c0..5a9aa4f 100644
--- a/scripts/functional-tests.sh
+++ b/scripts/functional-tests.sh
@@ -3,9 +3,13 @@ test -e ssshtest || wget -q https://raw.githubusercontent.com/ryanlayer/ssshtest
 
 . ssshtest
 
-PARENT_DIR="`git rev-parse --show-toplevel`"
+PARENT_DIR=`git rev-parse --show-toplevel`
 export PATH="${PATH}:${PARENT_DIR}"
 
+#======#
+# json #
+#======#
+
 set -o nounset
 run test_json sc json "${PARENT_DIR}/tests/data/test.vcf.gz" X:17276844-17276844
 assert_equal \"X\" "$(cat "$STDOUT_FILE" | jq '.CHROM')"
@@ -31,11 +35,30 @@ assert_equal 0.5 "$(cat $STDOUT_FILE | jq '.INFO.HOB')"
 run format_dp sc json --format="DP" "${PARENT_DIR}/tests/data/test.vcf.gz" I:41947-41947 | jq '.FORMAT.DP|add'
 assert_equal 2094 "$(cat $STDOUT_FILE | jq '.FORMAT.DP|add')"
 
+#==========#
+# fq-dedup #
+#==========#
+
+run fq_dedup sc fq-dedup tests/fastq/dup.fq
+assert_equal 4 "$(cat $STDOUT_FILE | grep '@' | wc -l)"
+assert_equal 4 "$(cat $STDOUT_FILE | grep '@' | wc -l)"
 
-# fq-meta
+run fq_dedup_gz sc fq-dedup tests/fastq/dup.fq.gz
+assert_equal 4 "$(cat $STDOUT_FILE | grep '@' | wc -l)"
+assert_equal 4 "$(cat $STDOUT_FILE | grep '@' | wc -l)"
+
+#=========#
+# fq-meta #
+#=========#
 curl https://raw.githubusercontent.com/10XGenomics/supernova/master/tenkit/lib/python/tenkit/illumina_instrument.py | \
         grep -v 'import martian' | \
-        sed 's/martian.exit/exit/g' > illumina_instrument.py
+        sed 's/martian.exit/exit/g' | \
+        sed 's/f.readline()/f.readline().decode("utf-8")/g' | \
+        sed 's/exit/print/g' > "${PARENT_DIR}/illumina_instrument.py"
+
+# Fix python script
+echo "Fixing ${PARENT_DIR}/illumina_instrument.py"
+2to3 --write --nobackups "${PARENT_DIR}/illumina_instrument.py"
 
 function test_fq() {
     gzip -c $1 > test.fq.gz
diff --git a/src/fq_dedup.nim b/src/fq_dedup.nim
new file mode 100644
index 0000000..9fb546c
--- /dev/null
+++ b/src/fq_dedup.nim
@@ -0,0 +1,89 @@
+import tables
+import sequtils
+import strutils
+import utils/gz
+import os
+import re
+import sets
+import streams
+import zip/gzipfiles
+import utils/helpers
+import bitvector
+import bloom
+
+type
+    H = uint64
+
+proc fq_dedup*(fastq: string) =
+    #[
+        Deduplicates reads by ID in FASTQs
+
+        Based on Brent Pedersens implementation:
+        https://gist.github.com/brentp/640806
+    ]#
+
+    var 
+        i = 0
+        n_reads: int
+        n_dups: int
+        check = initCountTable[string]()
+        putative_false_positives = initCountTable[string]()
+
+    var bloom = newBloomFilter[uint32](1e8.int, 0.0001, 0, true)
+
+    let stream: Stream =
+        if fastq[^3 .. ^1] == ".gz":
+            newGZFileStream(fastq)
+        else:
+            newFileStream(fastq, fmRead)
+    if stream == nil:
+        quit_error("Unable to open file: " & fastq, 2)
+    
+    # Iterate once to place dups in.
+    for record in stream.lines:
+        if i mod 4 == 0:
+            if bloom.lookup($record):
+                check.inc(record, 1)
+            bloom.insert($record)
+        i.inc()
+        if i mod 100000 == 0:
+            stderr.writeLine $i
+
+    n_reads = i div 4.int
+
+    if check.len == 0:
+        stderr.writeLine("No Duplicates Found")
+        quit(0)
+
+    stream.setPosition(0)
+    var write_ln = true
+    i = 0
+    for record in stream.lines:
+        i.inc()
+        if (i-1) mod 4 == 0:
+            if record in check == false:
+                echo record
+                write_ln = true
+                continue
+            else:
+                putative_false_positives.inc(record, 1)
+                if putative_false_positives[record] > 1:
+                    write_ln = false
+                    n_dups.inc(1)
+                    continue
+                echo record
+                write_ln = true
+        elif write_ln:
+            echo record
+    
+    stderr.writeLine("total_reads: ", n_reads)
+    stderr.writeLine("duplicates ", n_dups)
+    var fp = 0
+    for v in putative_false_positives.values():
+        if v == 1:
+            fp.inc(1)
+    stderr.writeLine("false-positive: ", fp)
+    stderr.writeLine("false-positive-rate: ", fp.float / n_dups.float)
+
+
+    stream.close()
\ No newline at end of file
diff --git a/src/private/murmer3.c b/src/private/murmer3.c
new file mode 100644
index 0000000..508940c
--- /dev/null
+++ b/src/private/murmer3.c
@@ -0,0 +1 @@
+<html><body>You are being <a href="https://raw.githubusercontent.com/MarcAzar/BitVector/master/src/private/murmur3.c">redirected</a>.</body></html>
\ No newline at end of file
diff --git a/src/private/murmer3.h b/src/private/murmer3.h
new file mode 100644
index 0000000..73ed0d3
--- /dev/null
+++ b/src/private/murmer3.h
@@ -0,0 +1 @@
+<html><body>You are being <a href="https://raw.githubusercontent.com/MarcAzar/BitVector/master/src/private/murmur3.h">redirected</a>.</body></html>
\ No newline at end of file
diff --git a/src/utils/bloom.nim b/src/utils/bloom.nim
new file mode 100644
index 0000000..367a7ee
--- /dev/null
+++ b/src/utils/bloom.nim
@@ -0,0 +1,36 @@
+import stint, nimcrypto
+
+type UInt2048 = StUint[2048]
+
+iterator chunksForBloom(h: MDigest[256]): array[2, uint8] =
+  yield [h.data[0], h.data[1]]
+  yield [h.data[2], h.data[3]]
+  yield [h.data[4], h.data[5]]
+
+proc chunkToBloomBits(chunk: array[2, uint8]): UInt2048 =
+  let h = chunk[0].int
+  let l = chunk[1].int
+  one(UInt2048) shl ((l + (h shl 8)) and 2047)
+
+iterator bloomBits(h: MDigest[256]): UInt2048 =
+  for chunk in chunksForBloom(h):
+    yield chunkToBloomBits(chunk)
+
+type BloomFilter* = object
+  value*: UInt2048
+
+proc incl(f: var BloomFilter, h: MDigest[256]) = # Should this be public?
+  for bits in bloomBits(h):
+    f.value = f.value or bits
+
+# TODO: The following 2 procs should be one genric, but it doesn't compile. Nim bug?
+proc incl*(f: var BloomFilter, v: string) = f.incl(keccak256.digest(v))
+proc incl*(f: var BloomFilter, v: openarray[byte]) = f.incl(keccak256.digest(v))
+
+proc contains(f: BloomFilter, h: MDigest[256]): bool = # Should this be public?
+  for bits in bloomBits(h):
+    if (f.value and bits).isZero: return false
+  return true
+
+template contains*[T](f: BloomFilter, v: openarray[T]): bool =
+  f.contains(keccak256.digest(v))
\ No newline at end of file
diff --git a/tests/fastq/dup.fq b/tests/fastq/dup.fq
new file mode 100644
index 0000000..e3eb946
--- /dev/null
+++ b/tests/fastq/dup.fq
@@ -0,0 +1,32 @@
+@t1
+AGGA
++
+AAAA
+@t2
+AGGA
++
+AAAA
+@t1
+AGGA
++
+AAAA
+@t3
+AGGA
++
+AAAA
+@t3
+AGGA
++
+AAJA
+@t4
+AGGA
++
+AAJA
+@t4
+AGGG
++
+AAAA
+@t1
+AGGA
++
+AAAA
diff --git a/tests/fastq/dup.fq.gz b/tests/fastq/dup.fq.gz
new file mode 100644
index 0000000..9a30cd4
Binary files /dev/null and b/tests/fastq/dup.fq.gz differ
diff --git a/tests/fastq/illumina_2000_2500.fq b/tests/fastq/illumina_2000_2500.fq
new file mode 100644
index 0000000..bf469e6
--- /dev/null
+++ b/tests/fastq/illumina_2000_2500.fq
@@ -0,0 +1,4 @@
+@D00446:1:140101_HWI-D00446_0001_C8HN4ANXX:8:2210:1238:2018 1:Y:0:GCTCGGTA
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
++
+/////////////////////////////////////////////////////////////////////////////////////////////////////
\ No newline at end of file
diff --git a/tests/fastq/illumina_3000_4000.fq b/tests/fastq/illumina_3000_4000.fq
new file mode 100644
index 0000000..945e429
--- /dev/null
+++ b/tests/fastq/illumina_3000_4000.fq
@@ -0,0 +1,4 @@
+@K00100:33:H300JBBXX:6:1101:1538:1527 1:N:0:GCCAAT
+CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
++
+/////////////////////////////////////////////////////////////////////////////////////////////////////
\ No newline at end of file
diff --git a/tests/fastq/illumina_hiseq_x.fq b/tests/fastq/illumina_hiseq_x.fq
new file mode 100644
index 0000000..2cfc669
--- /dev/null
+++ b/tests/fastq/illumina_hiseq_x.fq
@@ -0,0 +1,4 @@
+@ST-E00314:132:HLCJTCCXX:6:2206:31213:47966 1:N:0
+GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
++
+!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
diff --git a/tests/fastq/nodup.fq b/tests/fastq/nodup.fq
new file mode 100644
index 0000000..b3ab646
--- /dev/null
+++ b/tests/fastq/nodup.fq
@@ -0,0 +1,16 @@
+@t1
+AGGA
++
+AAAA
+@t2
+AGGA
++
+AAAA
+@t3
+AGGA
++
+AAAA
+@t4
+AGGA
++
+AAJA
\ No newline at end of file