From e9917c98074e104d546e697c69df78f09bc72620 Mon Sep 17 00:00:00 2001
From: BlaiseMuhirwa <blaisemuhirwa3@gmail.com>
Date: Sat, 2 Nov 2024 16:38:11 -0700
Subject: [PATCH 1/3] standardize code formatting

---
 .clang-format                             |  81 +++++
 .gitignore                                |   3 +
 Makefile                                  |   7 +
 bin/format.sh                             |   7 +-
 flatnav/distances/DistanceInterface.h     |  30 +-
 flatnav/distances/IPDistanceDispatcher.h  |  31 +-
 flatnav/distances/InnerProductDistance.h  |  37 +--
 flatnav/distances/L2DistanceDispatcher.h  |  30 +-
 flatnav/distances/SquaredL2Distance.h     |  38 +--
 flatnav/index/Index.h                     | 264 +++++++--------
 flatnav/tests/test_distances.cpp          |  28 +-
 flatnav/tests/test_serialization.cpp      |  34 +-
 flatnav/util/Datatype.h                   | 126 ++++----
 flatnav/util/GorderPriorityQueue.h        |  25 +-
 flatnav/util/InnerProductSimdExtensions.h |  78 ++---
 flatnav/util/Macros.h                     |  16 +-
 flatnav/util/Multithreading.h             |   9 +-
 flatnav/util/Reordering.h                 |  40 ++-
 flatnav/util/SquaredL2SimdExtensions.h    | 111 +++----
 flatnav/util/VisitedSetPool.h             |  59 ++--
 flatnav_python/python_bindings.cpp        | 377 +++++++++-------------
 21 files changed, 689 insertions(+), 742 deletions(-)
 create mode 100644 .clang-format
 create mode 100644 Makefile

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..1bfb3be
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,81 @@
+# Google C/C++ Code Style settings
+# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+
+Language: Cpp
+BasedOnStyle: Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: None
+AlignOperands: Align
+AllowAllArgumentsOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: Empty
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: Inline
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: true
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterCaseLabel: false
+  AfterClass: false
+  AfterStruct: false
+  AfterControlStatement: Never
+  AfterEnum: false
+  AfterFunction: false
+  AfterNamespace: false
+  AfterUnion: false
+  AfterExternBlock: false
+  BeforeCatch: false
+  BeforeElse: false
+  BeforeLambdaBody: false
+  IndentBraces: false
+  SplitEmptyFunction: false
+  SplitEmptyRecord: false
+  SplitEmptyNamespace: false
+BreakBeforeBinaryOperators: None
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: BeforeColon
+BreakInheritanceList: BeforeColon
+ColumnLimit: 110
+CompactNamespaces: false
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false  
+EmptyLineBeforeAccessModifier: LogicalBlock
+FixNamespaceComments: true
+IncludeBlocks: Preserve
+IndentCaseLabels: true
+IndentPPDirectives: None
+IndentWidth: 2
+KeepEmptyLinesAtTheStartOfBlocks: true
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PointerAlignment: Left
+ReflowComments: false
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: c++17
+TabWidth: 4
+UseTab: Never
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index eca58a8..fc5082f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,9 @@ build
 metrics
 node-access-distributions
 
+# PYcache
+**/__pycache__
+
 # Python wheel related folders/files
 flatnav_python/flatnav.egg-info/
 flatnav_python/poetry.lock
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..17f7ffb
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,7 @@
+CPP_FILES := $(wildcard flatnav/**/*.h flatnav/**/*.cc flatnav/**/*.cpp flatnav_python/*.cpp)
+
+format-cpp:
+	clang-format -i $(CPP_FILES)
+
+build-cpp:
+	./bin/build.sh -e -t
\ No newline at end of file
diff --git a/bin/format.sh b/bin/format.sh
index 5bfa07f..8de3fe2 100755
--- a/bin/format.sh
+++ b/bin/format.sh
@@ -1,11 +1,8 @@
 #!/bin/bash
 
+# First install clang-format and cmake-format
 
-# Install cmake-format if it is not installed via pip 
-if ! command -v cmake-format &> /dev/null
-then
-    pip install cmake-format
-fi
+clang-format -i 
 
 # Format all header files with clang-format 
 # TODO: Use a recursive find solution to format headers/src files
diff --git a/flatnav/distances/DistanceInterface.h b/flatnav/distances/DistanceInterface.h
index 717df4c..d5b7617 100644
--- a/flatnav/distances/DistanceInterface.h
+++ b/flatnav/distances/DistanceInterface.h
@@ -1,8 +1,8 @@
 #pragma once
 
 #include <cereal/access.hpp>
-#include <cstddef> // for size_t
-#include <fstream> // for ifstream, ofstream
+#include <cstddef>  // for size_t
+#include <fstream>  // for ifstream, ofstream
 #include <iostream>
 
 namespace flatnav::distances {
@@ -15,39 +15,41 @@ enum class MetricType { L2, IP };
 // distance function through a pointer or virtual function call.
 // CRTP: https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern
 
-template <typename T> class DistanceInterface {
-public:
+template <typename T>
+class DistanceInterface {
+ public:
   // The asymmetric flag is used to indicate whether the distance function
   // is between two database vectors (symmetric) or between a database vector
   // and a query vector. For regular distances (l2, inner product), there is
   // no difference between the two. However, for quantization techniques, such
   // as product quantization, the two distance modes are different.
-  float distance(const void *x, const void *y, bool asymmetric = false) {
-    return static_cast<T *>(this)->distanceImpl(x, y, asymmetric);
+  float distance(const void* x, const void* y, bool asymmetric = false) {
+    return static_cast<T*>(this)->distanceImpl(x, y, asymmetric);
   }
 
   // Returns the dimension of the input data.
-  size_t dimension() { return static_cast<T *>(this)->getDimension(); }
+  size_t dimension() { return static_cast<T*>(this)->getDimension(); }
 
   // Returns the size, in bytes, of the transformed data representation.
-  size_t dataSize() { return static_cast<T *>(this)->dataSizeImpl(); }
+  size_t dataSize() { return static_cast<T*>(this)->dataSizeImpl(); }
 
   // Prints the parameters of the distance function.
-  void getSummary() { static_cast<T *>(this)->getSummaryImpl(); }
+  void getSummary() { static_cast<T*>(this)->getSummaryImpl(); }
 
   // This transforms the data located at src into a form that is writeable
   // to disk / storable in RAM. For distance functions that don't
   // compress the input, this just passses through a copy from src to
   // destination. However, there are functions (e.g. with quantization) where
   // the in-memory representation is not the same as the raw input.
-  void transformData(void *destination, const void *src) {
-    static_cast<T *>(this)->transformDataImpl(destination, src);
+  void transformData(void* destination, const void* src) {
+    static_cast<T*>(this)->transformDataImpl(destination, src);
   }
 
   // Serializes the distance function to disk.
-  template <typename Archive> void serialize(Archive &archive) {
-    static_cast<T *>(this)->template serialize<Archive>(archive);
+  template <typename Archive>
+  void serialize(Archive& archive) {
+    static_cast<T*>(this)->template serialize<Archive>(archive);
   }
 };
 
-} // namespace flatnav::distances
\ No newline at end of file
+}  // namespace flatnav::distances
\ No newline at end of file
diff --git a/flatnav/distances/IPDistanceDispatcher.h b/flatnav/distances/IPDistanceDispatcher.h
index 1e5149c..0124dae 100644
--- a/flatnav/distances/IPDistanceDispatcher.h
+++ b/flatnav/distances/IPDistanceDispatcher.h
@@ -7,8 +7,7 @@
 namespace flatnav::distances {
 
 template <typename T>
-static float defaultInnerProduct(const T *x, const T *y,
-                                 const size_t &dimension) {
+static float defaultInnerProduct(const T* x, const T* y, const size_t& dimension) {
   float inner_product = 0;
   for (size_t i = 0; i < dimension; i++) {
     inner_product += x[i] * y[i];
@@ -16,16 +15,16 @@ static float defaultInnerProduct(const T *x, const T *y,
   return 1.0f - inner_product;
 }
 
-template <typename T> struct InnerProductImpl {
-  static float computeDistance(const T *x, const T *y,
-                               const size_t &dimension) {
+template <typename T>
+struct InnerProductImpl {
+  static float computeDistance(const T* x, const T* y, const size_t& dimension) {
     return defaultInnerProduct<T>(x, y, dimension);
   }
 };
 
-template <> struct InnerProductImpl<float> {
-  static float computeDistance(const float *x, const float *y,
-                               const size_t &dimension) {
+template <>
+struct InnerProductImpl<float> {
+  static float computeDistance(const float* x, const float* y, const size_t& dimension) {
 #if defined(USE_AVX512)
     if (platformSupportsAvx512()) {
       if (dimension % 16 == 0) {
@@ -78,26 +77,26 @@ template <> struct InnerProductImpl<float> {
 };
 
 // TODO: Include SIMD optimized implementations for int8_t.
-template <> struct InnerProductImpl<int8_t> {
-  static float computeDistance(const int8_t *x, const int8_t *y,
-                               const size_t &dimension) {
+template <>
+struct InnerProductImpl<int8_t> {
+  static float computeDistance(const int8_t* x, const int8_t* y, const size_t& dimension) {
     return defaultInnerProduct<int8_t>(x, y, dimension);
   }
 };
 
 // TODO: Include SIMD optimized implementations for uint8_t.
-template <> struct InnerProductImpl<uint8_t> {
-  static float computeDistance(const uint8_t *x, const uint8_t *y,
-                               const size_t &dimension) {
+template <>
+struct InnerProductImpl<uint8_t> {
+  static float computeDistance(const uint8_t* x, const uint8_t* y, const size_t& dimension) {
     return defaultInnerProduct<uint8_t>(x, y, dimension);
   }
 };
 
 struct IPDistanceDispatcher {
   template <typename T>
-  static float dispatch(const T *x, const T *y, const size_t &dimension) {
+  static float dispatch(const T* x, const T* y, const size_t& dimension) {
     return InnerProductImpl<T>::computeDistance(x, y, dimension);
   }
 };
 
-} // namespace flatnav::distances
\ No newline at end of file
+}  // namespace flatnav::distances
\ No newline at end of file
diff --git a/flatnav/distances/InnerProductDistance.h b/flatnav/distances/InnerProductDistance.h
index 29efd7f..559b262 100644
--- a/flatnav/distances/InnerProductDistance.h
+++ b/flatnav/distances/InnerProductDistance.h
@@ -1,13 +1,13 @@
 #pragma once
 
-#include <cereal/access.hpp>
-#include <cereal/cereal.hpp>
-#include <cstddef> // for size_t
-#include <cstring> // for memcpy
 #include <flatnav/distances/DistanceInterface.h>
 #include <flatnav/distances/IPDistanceDispatcher.h>
 #include <flatnav/util/Datatype.h>
 #include <flatnav/util/InnerProductSimdExtensions.h>
+#include <cereal/access.hpp>
+#include <cereal/cereal.hpp>
+#include <cstddef>  // for size_t
+#include <cstring>  // for memcpy
 #include <functional>
 #include <iostream>
 #include <limits>
@@ -21,38 +21,35 @@ using util::DataType;
 using util::type_for_data_type;
 
 template <DataType data_type = DataType::float32>
-class InnerProductDistance
-    : public DistanceInterface<InnerProductDistance<data_type>> {
+class InnerProductDistance : public DistanceInterface<InnerProductDistance<data_type>> {
 
   friend class DistanceInterface<InnerProductDistance>;
   // Enum for compile-time constant
   enum { DISTANCE_ID = 1 };
 
-public:
+ public:
   InnerProductDistance() = default;
   InnerProductDistance(size_t dim)
-      : _dimension(dim),
-        _data_size_bytes(dim * flatnav::util::size(data_type)) {}
+      : _dimension(dim), _data_size_bytes(dim * flatnav::util::size(data_type)) {}
 
   static std::unique_ptr<InnerProductDistance<data_type>> create(size_t dim) {
     return std::make_unique<InnerProductDistance<data_type>>(dim);
   }
 
-  constexpr float distanceImpl(const void *x, const void *y,
-                               [[maybe_unused]] bool asymmetric = false) const {
-    return IPDistanceDispatcher::dispatch(
-        static_cast<const typename type_for_data_type<data_type>::type *>(x),
-        static_cast<const typename type_for_data_type<data_type>::type *>(y),
-        _dimension);
+  constexpr float distanceImpl(const void* x, const void* y, [[maybe_unused]] bool asymmetric = false) const {
+    return IPDistanceDispatcher::dispatch(static_cast<const typename type_for_data_type<data_type>::type*>(x),
+                                          static_cast<const typename type_for_data_type<data_type>::type*>(y),
+                                          _dimension);
   }
 
-private:
+ private:
   size_t _dimension;
   size_t _data_size_bytes;
 
   friend class cereal::access;
 
-  template <typename Archive> void serialize(Archive &ar) {
+  template <typename Archive>
+  void serialize(Archive& ar) {
     ar(_dimension, _data_size_bytes);
   }
 
@@ -60,9 +57,7 @@ class InnerProductDistance
 
   size_t dataSizeImpl() { return _data_size_bytes; }
 
-  void transformDataImpl(void *dst, const void *src) {
-    std::memcpy(dst, src, _data_size_bytes);
-  }
+  void transformDataImpl(void* dst, const void* src) { std::memcpy(dst, src, _data_size_bytes); }
 
   void getSummaryImpl() {
     std::cout << "\nInnerProductDistance Parameters" << std::flush;
@@ -73,4 +68,4 @@ class InnerProductDistance
   }
 };
 
-} // namespace flatnav::distances
\ No newline at end of file
+}  // namespace flatnav::distances
\ No newline at end of file
diff --git a/flatnav/distances/L2DistanceDispatcher.h b/flatnav/distances/L2DistanceDispatcher.h
index f84dc6d..671e951 100644
--- a/flatnav/distances/L2DistanceDispatcher.h
+++ b/flatnav/distances/L2DistanceDispatcher.h
@@ -7,7 +7,7 @@
 namespace flatnav::distances {
 
 template <typename T>
-static float defaultSquaredL2(const T *x, const T *y, const size_t &dimension) {
+static float defaultSquaredL2(const T* x, const T* y, const size_t& dimension) {
   float squared_distance = 0;
   for (size_t i = 0; i < dimension; i++) {
     float difference = x[i] - y[i];
@@ -20,7 +20,8 @@ static float defaultSquaredL2(const T *x, const T *y, const size_t &dimension) {
 // distance
 //  between two arrays of type T.
 // @TODO: We should add constraints to the T type.
-template <typename T> struct SquaredL2Impl {
+template <typename T>
+struct SquaredL2Impl {
   /**
    * Computes the squared L2 distance between two arrays of type T.
    *
@@ -29,16 +30,15 @@ template <typename T> struct SquaredL2Impl {
    * @param dimension The dimension of the arrays.
    * @return The squared L2 distance between the two arrays.
    */
-  static float computeDistance(const T *x, const T *y,
-                               const size_t &dimension) {
+  static float computeDistance(const T* x, const T* y, const size_t& dimension) {
     return defaultSquaredL2<T>(x, y, dimension);
   }
 };
 
 // Specialization of SquaredL2Impl for the float type.
-template <> struct SquaredL2Impl<float> {
-  static float computeDistance(const float *x, const float *y,
-                               const size_t &dimension) {
+template <>
+struct SquaredL2Impl<float> {
+  static float computeDistance(const float* x, const float* y, const size_t& dimension) {
 #if defined(USE_AVX512)
     if (platformSupportsAvx512()) {
       if (dimension % 16 == 0) {
@@ -86,9 +86,9 @@ template <> struct SquaredL2Impl<float> {
   }
 };
 
-template <> struct SquaredL2Impl<int8_t> {
-  static float computeDistance(const int8_t *x, const int8_t *y,
-                               const size_t &dimension) {
+template <>
+struct SquaredL2Impl<int8_t> {
+  static float computeDistance(const int8_t* x, const int8_t* y, const size_t& dimension) {
 // #if defined(USE_AVX512BW) && defined(USE_AVX512VNNI)
 //     if (platformSupportsAvx512()) {
 //       return flatnav::util::computeL2_Avx512_int8(x, y, dimension);
@@ -103,9 +103,9 @@ template <> struct SquaredL2Impl<int8_t> {
   }
 };
 
-template <> struct SquaredL2Impl<uint8_t> {
-  static float computeDistance(const uint8_t *x, const uint8_t *y,
-                               const size_t &dimension) {
+template <>
+struct SquaredL2Impl<uint8_t> {
+  static float computeDistance(const uint8_t* x, const uint8_t* y, const size_t& dimension) {
 #if defined(USE_AVX512)
     if (platformSupportsAvx512()) {
       if (dimension % 64 == 0) {
@@ -120,9 +120,9 @@ template <> struct SquaredL2Impl<uint8_t> {
 
 struct L2DistanceDispatcher {
   template <typename T>
-  static float dispatch(const T *x, const T *y, const size_t &dimension) {
+  static float dispatch(const T* x, const T* y, const size_t& dimension) {
     return SquaredL2Impl<T>::computeDistance(x, y, dimension);
   }
 };
 
-} // namespace flatnav::distances
\ No newline at end of file
+}  // namespace flatnav::distances
\ No newline at end of file
diff --git a/flatnav/distances/SquaredL2Distance.h b/flatnav/distances/SquaredL2Distance.h
index a5a2996..76bf957 100644
--- a/flatnav/distances/SquaredL2Distance.h
+++ b/flatnav/distances/SquaredL2Distance.h
@@ -1,13 +1,13 @@
 #pragma once
-#include <cereal/access.hpp>
-#include <cereal/archives/binary.hpp>
-#include <cereal/cereal.hpp>
-#include <cstddef> // for size_t
-#include <cstring> // for memcpy
 #include <flatnav/distances/DistanceInterface.h>
 #include <flatnav/distances/L2DistanceDispatcher.h>
 #include <flatnav/util/Datatype.h>
 #include <flatnav/util/SquaredL2SimdExtensions.h>
+#include <cereal/access.hpp>
+#include <cereal/archives/binary.hpp>
+#include <cereal/cereal.hpp>
+#include <cstddef>  // for size_t
+#include <cstring>  // for memcpy
 #include <functional>
 #include <iostream>
 #include <type_traits>
@@ -21,18 +21,15 @@ namespace flatnav::distances {
 using util::DataType;
 using util::type_for_data_type;
 
-
 template <DataType data_type = DataType::float32>
-class SquaredL2Distance
-    : public DistanceInterface<SquaredL2Distance<data_type>> {
+class SquaredL2Distance : public DistanceInterface<SquaredL2Distance<data_type>> {
 
   friend class DistanceInterface<SquaredL2Distance>;
   enum { DISTANCE_ID = 0 };
 
-public:
+ public:
   SquaredL2Distance() = default;
-  SquaredL2Distance(size_t dim)
-      : _dimension(dim), _data_size_bytes(dim * util::size(data_type)) {}
+  SquaredL2Distance(size_t dim) : _dimension(dim), _data_size_bytes(dim * util::size(data_type)) {}
 
   static std::unique_ptr<SquaredL2Distance<data_type>> create(size_t dim) {
     return std::make_unique<SquaredL2Distance<data_type>>(dim);
@@ -40,27 +37,26 @@ class SquaredL2Distance
 
   inline constexpr size_t getDimension() const { return _dimension; }
 
-  constexpr float distanceImpl(const void *x, const void *y,
-                               [[maybe_unused]] bool asymmetric = false) const {
-    return L2DistanceDispatcher::dispatch(
-        static_cast<const typename type_for_data_type<data_type>::type *>(x),
-        static_cast<const typename type_for_data_type<data_type>::type *>(y),
-        _dimension);
+  constexpr float distanceImpl(const void* x, const void* y, [[maybe_unused]] bool asymmetric = false) const {
+    return L2DistanceDispatcher::dispatch(static_cast<const typename type_for_data_type<data_type>::type*>(x),
+                                          static_cast<const typename type_for_data_type<data_type>::type*>(y),
+                                          _dimension);
   }
 
-private:
+ private:
   size_t _dimension;
   size_t _data_size_bytes;
 
   friend class ::cereal::access;
 
-  template <typename Archive> void serialize(Archive &ar) {
+  template <typename Archive>
+  void serialize(Archive& ar) {
     ar(_dimension, _data_size_bytes);
   }
 
   inline size_t dataSizeImpl() { return _data_size_bytes; }
 
-  inline void transformDataImpl(void *destination, const void *src) {
+  inline void transformDataImpl(void* destination, const void* src) {
     std::memcpy(destination, src, _data_size_bytes);
   }
 
@@ -73,4 +69,4 @@ class SquaredL2Distance
   }
 };
 
-} // namespace flatnav::distances
+}  // namespace flatnav::distances
diff --git a/flatnav/index/Index.h b/flatnav/index/Index.h
index 97d4593..31a2f5b 100644
--- a/flatnav/index/Index.h
+++ b/flatnav/index/Index.h
@@ -1,5 +1,10 @@
 #pragma once
 
+#include <flatnav/distances/DistanceInterface.h>
+#include <flatnav/util/Macros.h>
+#include <flatnav/util/Multithreading.h>
+#include <flatnav/util/Reordering.h>
+#include <flatnav/util/VisitedSetPool.h>
 #include <algorithm>
 #include <atomic>
 #include <cassert>
@@ -8,11 +13,6 @@
 #include <cereal/cereal.hpp>
 #include <cereal/types/memory.hpp>
 #include <cstring>
-#include <flatnav/distances/DistanceInterface.h>
-#include <flatnav/util/Macros.h>
-#include <flatnav/util/Multithreading.h>
-#include <flatnav/util/Reordering.h>
-#include <flatnav/util/VisitedSetPool.h>
 #include <fstream>
 #include <limits>
 #include <memory>
@@ -30,7 +30,8 @@ namespace flatnav {
 
 // dist_t: A distance function implementing DistanceInterface.
 // label_t: A fixed-width data type for the label (meta-data) of each point.
-template <typename dist_t, typename label_t> class Index {
+template <typename dist_t, typename label_t>
+class Index {
   typedef std::pair<float, label_t> dist_label_t;
   // internal node numbering scheme. We might need to change this to uint64_t
   typedef uint32_t node_id_t;
@@ -39,11 +40,10 @@ template <typename dist_t, typename label_t> class Index {
   // NOTE: by default this is a max-heap. We could make this a min-heap
   // by using std::greater, but we want to use the queue as both a max-heap and
   // min-heap depending on the context.
-  typedef std::priority_queue<dist_node_t, std::vector<dist_node_t>>
-      PriorityQueue;
+  typedef std::priority_queue<dist_node_t, std::vector<dist_node_t>> PriorityQueue;
 
   // Large (several GB), pre-allocated block of memory.
-  char *_index_memory;
+  char* _index_memory;
 
   size_t _M;
   // size of one data point (does not support variable-size data, strings)
@@ -51,7 +51,7 @@ template <typename dist_t, typename label_t> class Index {
   // Node consists of: ([data] [M links] [data label]). This layout was chosen
   // after benchmarking - it's slightly more cache-efficient than others.
   size_t _node_size_bytes;
-  size_t _max_node_count; // Determines size of internal pre-allocated memory
+  size_t _max_node_count;  // Determines size of internal pre-allocated memory
   size_t _cur_num_nodes;
   std::unique_ptr<DistanceInterface<dist_t>> _distance;
   std::mutex _index_data_guard;
@@ -59,7 +59,7 @@ template <typename dist_t, typename label_t> class Index {
   uint32_t _num_threads;
 
   // Remembers which nodes we've visited, to avoid re-computing distances.
-  VisitedSetPool *_visited_set_pool;
+  VisitedSetPool* _visited_set_pool;
   std::vector<std::mutex> _node_links_mutexes;
 
   bool _collect_stats = false;
@@ -69,8 +69,8 @@ template <typename dist_t, typename label_t> class Index {
   mutable std::atomic<uint64_t> _distance_computations = 0;
   mutable std::atomic<uint64_t> _metric_hops = 0;
 
-  Index(const Index &) = delete;
-  Index &operator=(const Index &) = delete;
+  Index(const Index&) = delete;
+  Index& operator=(const Index&) = delete;
 
   // A custom move constructor is needed because the class manages dynamic
   // resources (_index_memory, _visited_set_pool),
@@ -78,8 +78,9 @@ template <typename dist_t, typename label_t> class Index {
   // leaks or double frees. The default move constructor cannot ensure these
   // resources are safely transferred and the source object is left in a valid
   // state.
-  Index(Index &&other) noexcept
-      : _index_memory(other._index_memory), _M(other._M),
+  Index(Index&& other) noexcept
+      : _index_memory(other._index_memory),
+        _M(other._M),
         _data_size_bytes(other._data_size_bytes),
         _node_size_bytes(other._node_size_bytes),
         _max_node_count(other._max_node_count),
@@ -93,7 +94,7 @@ template <typename dist_t, typename label_t> class Index {
     other._visited_set_pool = nullptr;
   }
 
-  Index &operator=(Index &&other) noexcept {
+  Index& operator=(Index&& other) noexcept {
     if (this != &other) {
       delete[] _index_memory;
       delete _visited_set_pool;
@@ -116,16 +117,15 @@ template <typename dist_t, typename label_t> class Index {
     return *this;
   }
 
-  template <typename Archive> void serialize(Archive &archive) {
-    archive(_M, _data_size_bytes, _node_size_bytes, _max_node_count,
-            _cur_num_nodes, *_distance);
+  template <typename Archive>
+  void serialize(Archive& archive) {
+    archive(_M, _data_size_bytes, _node_size_bytes, _max_node_count, _cur_num_nodes, *_distance);
 
     // Serialize the allocated memory for the index & query.
-    archive(
-        cereal::binary_data(_index_memory, _node_size_bytes * _max_node_count));
+    archive(cereal::binary_data(_index_memory, _node_size_bytes * _max_node_count));
   }
 
-public:
+ public:
   /**
    * @brief Construct a new Index object for approximate near neighbor search.
    *
@@ -141,21 +141,24 @@ template <typename dist_t, typename label_t> class Index {
    * @param collect_stats Flag indicating whether to collect statistics during
    * the search process.
    */
-  Index(std::unique_ptr<DistanceInterface<dist_t>> dist, int dataset_size,
-        int max_edges_per_node, bool collect_stats = false)
-      : _M(max_edges_per_node), _max_node_count(dataset_size),
-        _cur_num_nodes(0), _distance(std::move(dist)), _num_threads(1),
+  Index(std::unique_ptr<DistanceInterface<dist_t>> dist, int dataset_size, int max_edges_per_node,
+        bool collect_stats = false)
+      : _M(max_edges_per_node),
+        _max_node_count(dataset_size),
+        _cur_num_nodes(0),
+        _distance(std::move(dist)),
+        _num_threads(1),
         _visited_set_pool(new VisitedSetPool(
             /* initial_pool_size = */ 1,
             /* num_elements = */ dataset_size)),
-        _node_links_mutexes(dataset_size), _collect_stats(collect_stats) {
+        _node_links_mutexes(dataset_size),
+        _collect_stats(collect_stats) {
 
     // Get the size in bytes of the _node_links_mutexes vector.
     size_t mutexes_size_bytes = _node_links_mutexes.size() * sizeof(std::mutex);
 
     _data_size_bytes = _distance->dataSize();
-    _node_size_bytes =
-        _data_size_bytes + (sizeof(node_id_t) * _M) + sizeof(label_t);
+    _node_size_bytes = _data_size_bytes + (sizeof(node_id_t) * _M) + sizeof(label_t);
     size_t index_memory_size = _node_size_bytes * _max_node_count;
 
     _index_memory = new char[index_memory_size];
@@ -166,11 +169,10 @@ template <typename dist_t, typename label_t> class Index {
     delete _visited_set_pool;
   }
 
-  void buildGraphLinks(const std::string &mtx_filename) {
+  void buildGraphLinks(const std::string& mtx_filename) {
     std::ifstream input_file(mtx_filename);
     if (!input_file.is_open()) {
-      throw std::runtime_error("Unable to open file for reading: " +
-                               mtx_filename);
+      throw std::runtime_error("Unable to open file for reading: " + mtx_filename);
     }
 
     std::string line;
@@ -188,13 +190,15 @@ template <typename dist_t, typename label_t> class Index {
     // nodes in the index and that the number of edges is equal to the number of
     // links per node.
     if (num_vertices != _max_node_count) {
-      throw std::runtime_error("Number of vertices in the mtx file does not "
-                               "match the size allocated for the index.");
+      throw std::runtime_error(
+          "Number of vertices in the mtx file does not "
+          "match the size allocated for the index.");
     }
 
     if (num_edges != _M) {
-      throw std::runtime_error("Number of edges in the mtx file does not match "
-                               "the number of links per node.");
+      throw std::runtime_error(
+          "Number of edges in the mtx file does not match "
+          "the number of links per node.");
     }
 
     int u, v;
@@ -202,7 +206,7 @@ template <typename dist_t, typename label_t> class Index {
       // Adjust for 1-based indexing in Matrix Market format
       u--;
       v--;
-      node_id_t *links = getNodeLinks(u);
+      node_id_t* links = getNodeLinks(u);
       // Now add a directed edge from u to v. We need to check for the first
       // available slot in the links array since there might be other edges
       // added before this one. By definition, a slot is available if and only
@@ -221,7 +225,7 @@ template <typename dist_t, typename label_t> class Index {
   std::vector<std::vector<uint32_t>> getGraphOutdegreeTable() {
     std::vector<std::vector<uint32_t>> outdegree_table(_cur_num_nodes);
     for (node_id_t node = 0; node < _cur_num_nodes; node++) {
-      node_id_t *links = getNodeLinks(node);
+      node_id_t* links = getNodeLinks(node);
       for (int i = 0; i < _M; i++) {
         if (links[i] != node) {
           outdegree_table[node].push_back(links[i]);
@@ -240,7 +244,7 @@ template <typename dist_t, typename label_t> class Index {
    * @param label The label (meta-data) of the vector.
    * @param new_node_id The id of the new node.
    */
-  void allocateNode(void *data, label_t &label, node_id_t &new_node_id) {
+  void allocateNode(void* data, label_t& label, node_id_t& new_node_id) {
 
     new_node_id = _cur_num_nodes;
     _distance->transformData(
@@ -248,7 +252,7 @@ template <typename dist_t, typename label_t> class Index {
         /* src = */ data);
     *(getNodeLabel(new_node_id)) = label;
 
-    node_id_t *links = getNodeLinks(new_node_id);
+    node_id_t* links = getNodeLinks(new_node_id);
     // Initialize all edges to self
     std::fill_n(links, _M, new_node_id);
     _cur_num_nodes++;
@@ -281,11 +285,10 @@ template <typename dist_t, typename label_t> class Index {
    * index is reached.
    */
   template <typename data_type>
-  void addBatch(void *data, std::vector<label_t> &labels, int ef_construction,
+  void addBatch(void* data, std::vector<label_t>& labels, int ef_construction,
                 int num_initializations = 100) {
     if (num_initializations <= 0) {
-      throw std::invalid_argument(
-          "num_initializations must be greater than 0.");
+      throw std::invalid_argument("num_initializations must be greater than 0.");
     }
     uint32_t total_num_nodes = labels.size();
     uint32_t data_dimension = _distance->dimension();
@@ -293,7 +296,7 @@ template <typename dist_t, typename label_t> class Index {
     // Don't spawn any threads if we are only using one.
     if (_num_threads == 1) {
       for (uint32_t row_id = 0; row_id < total_num_nodes; row_id++) {
-        void *vector = (data_type *)data + (row_id * data_dimension);
+        void* vector = (data_type*)data + (row_id * data_dimension);
         label_t label = labels[row_id];
         this->add(vector, label, ef_construction, num_initializations);
       }
@@ -304,7 +307,7 @@ template <typename dist_t, typename label_t> class Index {
         /* start_index = */ 0, /* end_index = */ total_num_nodes,
         /* num_threads = */ _num_threads, /* function = */
         [&](uint32_t row_index) {
-          void *vector = (data_type *)data + (row_index * data_dimension);
+          void* vector = (data_type*)data + (row_index * data_dimension);
           label_t label = labels[row_index];
           this->add(vector, label, ef_construction, num_initializations);
         });
@@ -332,13 +335,13 @@ template <typename dist_t, typename label_t> class Index {
    * @exception std::runtime_error Thrown if the maximum number of nodes is
    * reached.
    */
-  void add(void *data, label_t &label, int ef_construction,
-           int num_initializations) {
+  void add(void* data, label_t& label, int ef_construction, int num_initializations) {
 
     if (_cur_num_nodes >= _max_node_count) {
-      throw std::runtime_error("Maximum number of nodes reached. Consider "
-                               "increasing the `max_node_count` parameter to "
-                               "create a larger index.");
+      throw std::runtime_error(
+          "Maximum number of nodes reached. Consider "
+          "increasing the `max_node_count` parameter to "
+          "create a larger index.");
     }
     _index_data_guard.lock();
     auto entry_node = initializeSearch(data, num_initializations);
@@ -365,26 +368,21 @@ template <typename dist_t, typename label_t> class Index {
    * @param ef_search The search beam width.
    * @param num_initializations The number of random initializations to use.
    */
-  std::vector<dist_label_t> search(const void *query, const int K,
-                                   int ef_search,
+  std::vector<dist_label_t> search(const void* query, const int K, int ef_search,
                                    int num_initializations = 100) {
     node_id_t entry_node = initializeSearch(query, num_initializations);
-    PriorityQueue neighbors =
-        beamSearch(/* query = */ query,
-                   /* entry_node = */ entry_node,
-                   /* buffer_size = */ std::max(ef_search, K));
+    PriorityQueue neighbors = beamSearch(/* query = */ query,
+                                         /* entry_node = */ entry_node,
+                                         /* buffer_size = */ std::max(ef_search, K));
     auto size = neighbors.size();
     std::vector<dist_label_t> results;
     results.reserve(size);
     while (!neighbors.empty()) {
-      results.emplace_back(neighbors.top().first,
-                           *getNodeLabel(neighbors.top().second));
+      results.emplace_back(neighbors.top().first, *getNodeLabel(neighbors.top().second));
       neighbors.pop();
     }
     std::sort(results.begin(), results.end(),
-              [](const dist_label_t &left, const dist_label_t &right) {
-                return left.first < right.first;
-              });
+              [](const dist_label_t& left, const dist_label_t& right) { return left.first < right.first; });
     if (results.size() > static_cast<size_t>(K)) {
       results.resize(K);
     }
@@ -392,9 +390,9 @@ template <typename dist_t, typename label_t> class Index {
     return results;
   }
 
-  void doGraphReordering(const std::vector<std::string> &reordering_methods) {
+  void doGraphReordering(const std::vector<std::string>& reordering_methods) {
 
-    for (const auto &method : reordering_methods) {
+    for (const auto& method : reordering_methods) {
       auto outdegree_table = getGraphOutdegreeTable();
       std::vector<node_id_t> P;
       if (method == "gorder") {
@@ -411,8 +409,7 @@ template <typename dist_t, typename label_t> class Index {
 
   void reorderGOrder(const int window_size = 5) {
     auto outdegree_table = getGraphOutdegreeTable();
-    std::vector<node_id_t> P =
-        util::gOrder<node_id_t>(outdegree_table, window_size);
+    std::vector<node_id_t> P = util::gOrder<node_id_t>(outdegree_table, window_size);
 
     relabel(P);
   }
@@ -423,8 +420,7 @@ template <typename dist_t, typename label_t> class Index {
     relabel(P);
   }
 
-  static std::unique_ptr<Index<dist_t, label_t>>
-  loadIndex(const std::string &filename) {
+  static std::unique_ptr<Index<dist_t, label_t>> loadIndex(const std::string& filename) {
     std::ifstream stream(filename, std::ios::binary);
 
     if (!stream.is_open()) {
@@ -434,34 +430,28 @@ template <typename dist_t, typename label_t> class Index {
     cereal::BinaryInputArchive archive(stream);
     std::unique_ptr<Index<dist_t, label_t>> index(new Index<dist_t, label_t>());
 
-    std::unique_ptr<DistanceInterface<dist_t>> dist =
-        std::make_unique<dist_t>();
+    std::unique_ptr<DistanceInterface<dist_t>> dist = std::make_unique<dist_t>();
 
     // 1. Deserialize metadata
-    archive(index->_M, index->_data_size_bytes, index->_node_size_bytes,
-            index->_max_node_count, index->_cur_num_nodes, *dist);
+    archive(index->_M, index->_data_size_bytes, index->_node_size_bytes, index->_max_node_count,
+            index->_cur_num_nodes, *dist);
     index->_visited_set_pool = new VisitedSetPool(
         /* initial_pool_size = */ 1,
         /* num_elements = */ index->_max_node_count);
     index->_distance = std::move(dist);
-    index->_num_threads = std::max(
-        (uint32_t)1, (uint32_t)std::thread::hardware_concurrency() / 2);
-    index->_node_links_mutexes =
-        std::vector<std::mutex>(index->_max_node_count);
+    index->_num_threads = std::max((uint32_t)1, (uint32_t)std::thread::hardware_concurrency() / 2);
+    index->_node_links_mutexes = std::vector<std::mutex>(index->_max_node_count);
 
     // 2. Allocate memory using deserialized metadata
-    index->_index_memory =
-        new char[index->_node_size_bytes * index->_max_node_count];
+    index->_index_memory = new char[index->_node_size_bytes * index->_max_node_count];
 
     // 3. Deserialize content into allocated memory
-    archive(
-        cereal::binary_data(index->_index_memory,
-                            index->_node_size_bytes * index->_max_node_count));
+    archive(cereal::binary_data(index->_index_memory, index->_node_size_bytes * index->_max_node_count));
 
     return index;
   }
 
-  void saveIndex(const std::string &filename) {
+  void saveIndex(const std::string& filename) {
     std::ofstream stream(filename, std::ios::binary);
 
     if (!stream.is_open()) {
@@ -488,8 +478,7 @@ template <typename dist_t, typename label_t> class Index {
     return static_cast<uint64_t>(_node_size_bytes * _max_node_count);
   }
   inline uint64_t mutexesAllocatedMemory() const {
-    return static_cast<uint64_t>(_node_links_mutexes.size() *
-                                 sizeof(std::mutex));
+    return static_cast<uint64_t>(_node_links_mutexes.size() * sizeof(std::mutex));
   }
 
   inline uint64_t visitedSetPoolAllocatedMemory() const {
@@ -509,9 +498,7 @@ template <typename dist_t, typename label_t> class Index {
   inline size_t currentNumNodes() const { return _cur_num_nodes; }
   inline size_t dataDimension() const { return _distance->dimension(); }
 
-  inline uint64_t distanceComputations() const {
-    return _distance_computations.load();
-  }
+  inline uint64_t distanceComputations() const { return _distance_computations.load(); }
 
   void resetStats() {
     _distance_computations = 0;
@@ -530,28 +517,25 @@ template <typename dist_t, typename label_t> class Index {
     _distance->getSummary();
   }
 
-private:
+ private:
   friend class cereal::access;
   // Default constructor for cereal
   Index() = default;
 
-  char *getNodeData(const node_id_t &n) const {
-    return _index_memory + (n * _node_size_bytes);
-  }
+  char* getNodeData(const node_id_t& n) const { return _index_memory + (n * _node_size_bytes); }
 
-  node_id_t *getNodeLinks(const node_id_t &n) const {
-    char *location = _index_memory + (n * _node_size_bytes) + _data_size_bytes;
-    return reinterpret_cast<node_id_t *>(location);
+  node_id_t* getNodeLinks(const node_id_t& n) const {
+    char* location = _index_memory + (n * _node_size_bytes) + _data_size_bytes;
+    return reinterpret_cast<node_id_t*>(location);
   }
 
-  label_t *getNodeLabel(const node_id_t &n) const {
-    char *location = _index_memory + (n * _node_size_bytes) + _data_size_bytes +
-                     (_M * sizeof(node_id_t));
-    return reinterpret_cast<label_t *>(location);
+  label_t* getNodeLabel(const node_id_t& n) const {
+    char* location = _index_memory + (n * _node_size_bytes) + _data_size_bytes + (_M * sizeof(node_id_t));
+    return reinterpret_cast<label_t*>(location);
   }
 
-  inline void swapNodes(node_id_t a, node_id_t b, void *temp_data,
-                        node_id_t *temp_links, label_t *temp_label) {
+  inline void swapNodes(node_id_t a, node_id_t b, void* temp_data, node_id_t* temp_links,
+                        label_t* temp_label) {
 
     // stash b in temp
     std::memcpy(temp_data, getNodeData(b), _data_size_bytes);
@@ -580,12 +564,11 @@ template <typename dist_t, typename label_t> class Index {
    *
    * @return PriorityQueue
    */
-  PriorityQueue beamSearch(const void *query, const node_id_t entry_node,
-                           const int buffer_size) {
+  PriorityQueue beamSearch(const void* query, const node_id_t entry_node, const int buffer_size) {
     PriorityQueue neighbors;
     PriorityQueue candidates;
 
-    auto *visited_set = _visited_set_pool->pollAvailableSet();
+    auto* visited_set = _visited_set_pool->pollAvailableSet();
     visited_set->clear();
 
     // Prefetch the data for entry node before computing its distance.
@@ -593,9 +576,8 @@ template <typename dist_t, typename label_t> class Index {
     _mm_prefetch(getNodeData(entry_node), _MM_HINT_T0);
 #endif
 
-    float dist =
-        _distance->distance(/* x = */ query, /* y = */ getNodeData(entry_node),
-                            /* asymmetric = */ true);
+    float dist = _distance->distance(/* x = */ query, /* y = */ getNodeData(entry_node),
+                                     /* asymmetric = */ true);
 
     float max_dist = dist;
     candidates.emplace(-dist, entry_node);
@@ -636,15 +618,13 @@ template <typename dist_t, typename label_t> class Index {
     return neighbors;
   }
 
-  void processCandidateNode(const void *query, node_id_t &node, float &max_dist,
-                            const int buffer_size, VisitedSet *visited_set,
-                            PriorityQueue &neighbors,
-                            PriorityQueue &candidates) {
+  void processCandidateNode(const void* query, node_id_t& node, float& max_dist, const int buffer_size,
+                            VisitedSet* visited_set, PriorityQueue& neighbors, PriorityQueue& candidates) {
     // Lock all operations on this specific node
     std::unique_lock<std::mutex> lock(_node_links_mutexes[node]);
     float dist = 0.f;
 
-    node_id_t *neighbor_node_links = getNodeLinks(node);
+    node_id_t* neighbor_node_links = getNodeLinks(node);
     for (uint32_t i = 0; i < _M; i++) {
       node_id_t neighbor_node_id = neighbor_node_links[i];
 
@@ -657,8 +637,7 @@ template <typename dist_t, typename label_t> class Index {
       }
 #endif
 
-      bool neighbor_is_visited =
-          visited_set->isVisited(/* num = */ neighbor_node_id);
+      bool neighbor_is_visited = visited_set->isVisited(/* num = */ neighbor_node_id);
 
       if (neighbor_is_visited) {
         continue;
@@ -693,7 +672,7 @@ template <typename dist_t, typename label_t> class Index {
    * heuristic. The neighbors priority queue contains elements sorted by
    * distance where the top element is the furthest neighbor from the query.
    */
-  void selectNeighbors(PriorityQueue &neighbors) {
+  void selectNeighbors(PriorityQueue& neighbors) {
     if (neighbors.size() < _M) {
       return;
     }
@@ -717,11 +696,10 @@ template <typename dist_t, typename label_t> class Index {
       candidates.pop();
 
       bool should_keep_candidate = true;
-      for (const dist_node_t &second_pair : saved_candidates) {
+      for (const dist_node_t& second_pair : saved_candidates) {
 
-        cur_dist =
-            _distance->distance(/* x = */ getNodeData(second_pair.second),
-                                /* y = */ getNodeData(current_pair.second));
+        cur_dist = _distance->distance(/* x = */ getNodeData(second_pair.second),
+                                       /* y = */ getNodeData(current_pair.second));
 
         if (cur_dist < (-current_pair.first)) {
           should_keep_candidate = false;
@@ -737,19 +715,19 @@ template <typename dist_t, typename label_t> class Index {
     }
     // TODO: implement my own priority queue, get rid of vector
     // saved_candidates, add directly to neighborqueue earlier.
-    for (const dist_node_t &current_pair : saved_candidates) {
+    for (const dist_node_t& current_pair : saved_candidates) {
       neighbors.emplace(-current_pair.first, current_pair.second);
     }
   }
 
-  void connectNeighbors(PriorityQueue &neighbors, node_id_t new_node_id) {
+  void connectNeighbors(PriorityQueue& neighbors, node_id_t new_node_id) {
     // connects neighbors according to the HSNW heuristic
 
     // Lock all operations on this node
     std::unique_lock<std::mutex> lock(_node_links_mutexes[new_node_id]);
 
-    node_id_t *new_node_links = getNodeLinks(new_node_id);
-    int i = 0; // iterates through links for "new_node_id"
+    node_id_t* new_node_links = getNodeLinks(new_node_id);
+    int i = 0;  // iterates through links for "new_node_id"
 
     while (neighbors.size() > 0) {
       node_id_t neighbor_node_id = neighbors.top().second;
@@ -757,9 +735,8 @@ template <typename dist_t, typename label_t> class Index {
       new_node_links[i] = neighbor_node_id;
       // now do the back-connections (a little tricky)
 
-      std::unique_lock<std::mutex> neighbor_lock(
-          _node_links_mutexes[neighbor_node_id]);
-      node_id_t *neighbor_node_links = getNodeLinks(neighbor_node_id);
+      std::unique_lock<std::mutex> neighbor_lock(_node_links_mutexes[neighbor_node_id]);
+      node_id_t* neighbor_node_links = getNodeLinks(neighbor_node_id);
       bool is_inserted = false;
       for (size_t j = 0; j < _M; j++) {
         if (neighbor_node_links[j] == neighbor_node_id) {
@@ -777,30 +754,28 @@ template <typename dist_t, typename label_t> class Index {
         // construct a candidate set including the old links AND our new
         // one, then prune this candidate set to get the new neighbors.
 
-        float max_dist =
-            _distance->distance(/* x = */ getNodeData(neighbor_node_id),
-                                /* y = */ getNodeData(new_node_id));
+        float max_dist = _distance->distance(/* x = */ getNodeData(neighbor_node_id),
+                                             /* y = */ getNodeData(new_node_id));
 
         PriorityQueue candidates;
         candidates.emplace(max_dist, new_node_id);
         for (size_t j = 0; j < _M; j++) {
           if (neighbor_node_links[j] != neighbor_node_id) {
             auto label = neighbor_node_links[j];
-            auto distance =
-                _distance->distance(/* x = */ getNodeData(neighbor_node_id),
-                                    /* y = */ getNodeData(label));
+            auto distance = _distance->distance(/* x = */ getNodeData(neighbor_node_id),
+                                                /* y = */ getNodeData(label));
             candidates.emplace(distance, label);
           }
         }
         selectNeighbors(candidates);
         // connect the pruned set of candidates, including self-loops:
         size_t j = 0;
-        while (candidates.size() > 0) { // candidates
+        while (candidates.size() > 0) {  // candidates
           neighbor_node_links[j] = candidates.top().second;
           candidates.pop();
           j++;
         }
-        while (j < _M) { // self-loops (unused links)
+        while (j < _M) {  // self-loops (unused links)
           neighbor_node_links[j] = neighbor_node_id;
           j++;
         }
@@ -827,12 +802,10 @@ template <typename dist_t, typename label_t> class Index {
    * @param num_initializations
    * @return node_id_t
    */
-  inline node_id_t initializeSearch(const void *query,
-                                    int num_initializations) {
+  inline node_id_t initializeSearch(const void* query, int num_initializations) {
     // select entry_node from a set of random entry point options
     if (num_initializations <= 0) {
-      throw std::invalid_argument(
-          "num_initializations must be greater than 0.");
+      throw std::invalid_argument("num_initializations must be greater than 0.");
     }
 
     int step_size = _cur_num_nodes / num_initializations;
@@ -846,9 +819,8 @@ template <typename dist_t, typename label_t> class Index {
     }
 
     for (node_id_t node = 0; node < _cur_num_nodes; node += step_size) {
-      float dist =
-          _distance->distance(/* x = */ query, /* y = */ getNodeData(node),
-                              /* asymmetric = */ true);
+      float dist = _distance->distance(/* x = */ query, /* y = */ getNodeData(node),
+                                       /* asymmetric = */ true);
       if (dist < min_dist) {
         min_dist = dist;
         entry_node = node;
@@ -857,21 +829,21 @@ template <typename dist_t, typename label_t> class Index {
     return entry_node;
   }
 
-  void relabel(const std::vector<node_id_t> &P) {
+  void relabel(const std::vector<node_id_t>& P) {
     // 1. Rewire all of the node connections
     for (node_id_t n = 0; n < _cur_num_nodes; n++) {
-      node_id_t *links = getNodeLinks(n);
+      node_id_t* links = getNodeLinks(n);
       for (int m = 0; m < _M; m++) {
         links[m] = P[links[m]];
       }
     }
 
     // 2. Physically re-layout the nodes (in place)
-    char *temp_data = new char[_data_size_bytes];
-    node_id_t *temp_links = new node_id_t[_M];
-    label_t *temp_label = new label_t;
+    char* temp_data = new char[_data_size_bytes];
+    node_id_t* temp_links = new node_id_t[_M];
+    label_t* temp_label = new label_t;
 
-    auto *visited_set = _visited_set_pool->pollAvailableSet();
+    auto* visited_set = _visited_set_pool->pollAvailableSet();
 
     // In this context, is_visited stores which nodes have been relocated
     // (it would be equivalent to name this variable "is_relocated").
@@ -914,4 +886,4 @@ template <typename dist_t, typename label_t> class Index {
   }
 };
 
-} // namespace flatnav
+}  // namespace flatnav
diff --git a/flatnav/tests/test_distances.cpp b/flatnav/tests/test_distances.cpp
index d7a4f00..adb1fd6 100644
--- a/flatnav/tests/test_distances.cpp
+++ b/flatnav/tests/test_distances.cpp
@@ -1,9 +1,9 @@
 
-#include "gtest/gtest.h"
-#include <chrono>
 #include <flatnav/util/Macros.h>
 #include <flatnav/util/SimdUtils.h>
+#include <chrono>
 #include <random>
+#include "gtest/gtest.h"
 
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
@@ -11,7 +11,7 @@
 namespace flatnav::testing {
 
 class DistanceTest : public ::testing::Test {
-protected:
+ protected:
   void SetUp() override {
     // Initialize x and y with values drawn from a normal distribution
     std::default_random_engine generator;
@@ -37,8 +37,7 @@ class DistanceTest : public ::testing::Test {
 TEST_F(DistanceTest, TestAvx512L2Distance) {
 #if defined(USE_AVX512)
   float result = flatnav::util::computeL2_Avx512(x, y, dimensions);
-  float expected =
-      flatnav::distances::defaultSquaredL2<float>(x, y, dimensions);
+  float expected = flatnav::distances::defaultSquaredL2<float>(x, y, dimensions);
   ASSERT_NEAR(result, expected, epsilon);
 
 #endif
@@ -49,19 +48,18 @@ TEST_F(DistanceTest, TestAvx512L2DistanceUint8) {
 #if defined(USE_AVX512)
   auto total_num_vectors = 1000;
   auto total_size = dimensions * total_num_vectors;
-  uint8_t *x_matrix = (uint8_t *)malloc(total_size);
-  uint8_t *y_matrix = (uint8_t *)malloc(total_size);
+  uint8_t* x_matrix = (uint8_t*)malloc(total_size);
+  uint8_t* y_matrix = (uint8_t*)malloc(total_size);
   for (size_t i = 0; i < total_size; i++) {
     x_matrix[i] = (uint8_t)rand() % 256;
     y_matrix[i] = (uint8_t)rand() % 256;
   }
 
   for (size_t i = 0; i < total_num_vectors; i++) {
-    uint8_t *x = x_matrix + i * dimensions;
-    uint8_t *y = y_matrix + i * dimensions;
+    uint8_t* x = x_matrix + i * dimensions;
+    uint8_t* y = y_matrix + i * dimensions;
     float result = flatnav::util::computeL2_Avx512_Uint8(x, y, dimensions);
-    float expected =
-        flatnav::distances::defaultSquaredL2<uint8_t>(x, y, dimensions);
+    float expected = flatnav::distances::defaultSquaredL2<uint8_t>(x, y, dimensions);
     ASSERT_NEAR(result, expected, epsilon);
   }
 
@@ -76,8 +74,7 @@ TEST_F(DistanceTest, TestAvxL2Distance) {
 #if defined(USE_AVX)
 
   float result = flatnav::util::computeL2_Avx2(x, y, dimensions);
-  float expected =
-      flatnav::distances::defaultSquaredL2<float>(x, y, dimensions);
+  float expected = flatnav::distances::defaultSquaredL2<float>(x, y, dimensions);
 
   ASSERT_NEAR(result, expected, epsilon);
 
@@ -106,8 +103,7 @@ TEST(TestSingleIntrinsic, TestReduceAddSse) {
 TEST_F(DistanceTest, TestSseL2Distance) {
 #if defined(USE_SSE)
   float result = flatnav::util::computeL2_Sse(x, y, dimensions);
-  float expected =
-      flatnav::distances::defaultSquaredL2<float>(x, y, dimensions);
+  float expected = flatnav::distances::defaultSquaredL2<float>(x, y, dimensions);
   ASSERT_NEAR(result, expected, epsilon);
 
   // try with dimensions not divisible by 16
@@ -182,4 +178,4 @@ TEST_F(DistanceTest, TestSseInnerProductDistance) {
 #endif
 }
 
-} // namespace flatnav::testing
\ No newline at end of file
+}  // namespace flatnav::testing
\ No newline at end of file
diff --git a/flatnav/tests/test_serialization.cpp b/flatnav/tests/test_serialization.cpp
index 5ffc4a2..f87513f 100644
--- a/flatnav/tests/test_serialization.cpp
+++ b/flatnav/tests/test_serialization.cpp
@@ -1,11 +1,11 @@
-#include "gtest/gtest.h"
-#include <cassert>
-#include <cstdio> // for remove
 #include <flatnav/distances/DistanceInterface.h>
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
 #include <flatnav/index/Index.h>
+#include <cassert>
+#include <cstdio>  // for remove
 #include <random>
+#include "gtest/gtest.h"
 
 using flatnav::Index;
 using flatnav::distances::DistanceInterface;
@@ -31,23 +31,20 @@ std::vector<float> generateRandomVectors(uint32_t num_vectors, uint32_t dim) {
 }
 
 template <typename dist_t, typename label_t>
-void runTest(float *data, std::unique_ptr<DistanceInterface<dist_t>> &&distance,
-             int N, int M, int dim, int ef_construction,
-             const std::string &save_file) {
+void runTest(float* data, std::unique_ptr<DistanceInterface<dist_t>>&& distance, int N, int M, int dim,
+             int ef_construction, const std::string& save_file) {
   auto data_size = distance->dataSize();
 
-  std::unique_ptr<Index<dist_t, label_t>> index =
-      std::make_unique<Index<dist_t, label_t>>(
-          /* dist = */ std::move(distance), /* dataset_size = */ N,
-          /* max_edges = */ M);
+  std::unique_ptr<Index<dist_t, label_t>> index = std::make_unique<Index<dist_t, label_t>>(
+      /* dist = */ std::move(distance), /* dataset_size = */ N,
+      /* max_edges = */ M);
 
   std::vector<int> labels(N);
   std::iota(labels.begin(), labels.end(), 0);
   index->template addBatch<float>(data, labels, ef_construction);
   index->saveIndex(/* filename = */ save_file);
 
-  auto new_index =
-      Index<dist_t, label_t>::loadIndex(/* filename = */ save_file);
+  auto new_index = Index<dist_t, label_t>::loadIndex(/* filename = */ save_file);
 
   ASSERT_EQ(new_index->maxEdgesPerNode(), M);
   ASSERT_EQ(new_index->dataSizeBytes(), index->dataSizeBytes());
@@ -55,19 +52,16 @@ void runTest(float *data, std::unique_ptr<DistanceInterface<dist_t>> &&distance,
   ASSERT_EQ(new_index->nodeSizeBytes(), data_size + (4 * M) + 4);
   ASSERT_EQ(new_index->maxNodeCount(), N);
 
-  uint64_t total_index_size =
-      new_index->nodeSizeBytes() * new_index->maxNodeCount();
+  uint64_t total_index_size = new_index->nodeSizeBytes() * new_index->maxNodeCount();
 
   std::vector<float> queries = generateRandomVectors(QUERY_VECTORS, dim);
 
   for (uint32_t i = 0; i < QUERY_VECTORS; i++) {
-    float *q = queries.data() + (dim * i);
+    float* q = queries.data() + (dim * i);
 
-    std::vector<std::pair<float, int>> query_result =
-        index->search(q, K, EF_SEARCH);
+    std::vector<std::pair<float, int>> query_result = index->search(q, K, EF_SEARCH);
 
-    std::vector<std::pair<float, int>> new_query_result =
-        new_index->search(q, K, EF_SEARCH);
+    std::vector<std::pair<float, int>> new_query_result = new_index->search(q, K, EF_SEARCH);
 
     for (uint32_t j = 0; j < K; j++) {
       ASSERT_EQ(query_result[j].first, new_query_result[j].first);
@@ -109,4 +103,4 @@ TEST(FlatnavSerializationTest, TestInnerProductIndexSerialization) {
   EXPECT_EQ(std::remove(save_file.c_str()), 0);
 }
 
-} // namespace flatnav::testing
\ No newline at end of file
+}  // namespace flatnav::testing
\ No newline at end of file
diff --git a/flatnav/util/Datatype.h b/flatnav/util/Datatype.h
index c7dff7d..1a07b00 100644
--- a/flatnav/util/Datatype.h
+++ b/flatnav/util/Datatype.h
@@ -26,39 +26,39 @@ enum class DataType {
 /**
  * @brief Get a string representation of the data type
  */
-inline constexpr const char *name(DataType data_type) {
+inline constexpr const char* name(DataType data_type) {
   switch (data_type) {
-  case DataType::uint8:
-    return "uint8";
-  case DataType::uint16:
-    return "uint16";
-  case DataType::uint32:
-    return "uint32";
-  case DataType::uint64:
-    return "uint64";
-  case DataType::int8:
-    return "int8";
-  case DataType::int16:
-    return "int16";
-  case DataType::int32:
-    return "int32";
-  case DataType::int64:
-    return "int64";
-  case DataType::float16:
-    return "float16";
-  case DataType::float32:
-    return "float32";
-  case DataType::float64:
-    return "float64";
-  default:
-    return "undefined";
+    case DataType::uint8:
+      return "uint8";
+    case DataType::uint16:
+      return "uint16";
+    case DataType::uint32:
+      return "uint32";
+    case DataType::uint64:
+      return "uint64";
+    case DataType::int8:
+      return "int8";
+    case DataType::int16:
+      return "int16";
+    case DataType::int32:
+      return "int32";
+    case DataType::int64:
+      return "int64";
+    case DataType::float16:
+      return "float16";
+    case DataType::float32:
+      return "float32";
+    case DataType::float64:
+      return "float64";
+    default:
+      return "undefined";
   }
 }
 
 /**
  * @brief Get the data type from a string representation
  */
-inline constexpr DataType type(const std::string_view &data_type) {
+inline constexpr DataType type(const std::string_view& data_type) {
   if (data_type == "uint8") {
     return DataType::uint8;
   } else if (data_type == "uint16") {
@@ -91,42 +91,48 @@ inline constexpr DataType type(const std::string_view &data_type) {
  */
 inline constexpr size_t size(DataType data_type) {
   switch (data_type) {
-  case DataType::uint8:
-    return sizeof(uint8_t);
-  case DataType::uint16:
-    return sizeof(uint16_t);
-  case DataType::uint32:
-    return sizeof(uint32_t);
-  case DataType::uint64:
-    return sizeof(uint64_t);
-  case DataType::int8:
-    return sizeof(int8_t);
-  case DataType::int16:
-    return sizeof(int16_t);
-  case DataType::int32:
-    return sizeof(int32_t);
-  case DataType::int64:
-    return sizeof(int64_t);
-  case DataType::float16:
-    return sizeof(float) / 2;
-  case DataType::float32:
-    return sizeof(float);
-  case DataType::float64:
-    return sizeof(double);
-  default:
-    return 0;
+    case DataType::uint8:
+      return sizeof(uint8_t);
+    case DataType::uint16:
+      return sizeof(uint16_t);
+    case DataType::uint32:
+      return sizeof(uint32_t);
+    case DataType::uint64:
+      return sizeof(uint64_t);
+    case DataType::int8:
+      return sizeof(int8_t);
+    case DataType::int16:
+      return sizeof(int16_t);
+    case DataType::int32:
+      return sizeof(int32_t);
+    case DataType::int64:
+      return sizeof(int64_t);
+    case DataType::float16:
+      return sizeof(float) / 2;
+    case DataType::float32:
+      return sizeof(float);
+    case DataType::float64:
+      return sizeof(double);
+    default:
+      return 0;
   }
 }
 
 // Some nice template metaprogramming (TMP) to allow us to get compile-time
 // distance dispatching.
-template <DataType data_type> struct type_for_data_type;
+template <DataType data_type>
+struct type_for_data_type;
 
-template <> struct type_for_data_type<DataType::float32> {
+template <>
+struct type_for_data_type<DataType::float32> {
   using type = float;
 };
-template <> struct type_for_data_type<DataType::int8> { using type = int8_t; };
-template <> struct type_for_data_type<DataType::uint8> {
+template <>
+struct type_for_data_type<DataType::int8> {
+  using type = int8_t;
+};
+template <>
+struct type_for_data_type<DataType::uint8> {
   using type = uint8_t;
 };
 
@@ -151,7 +157,8 @@ template <> struct type_for_data_type<DataType::uint8> {
  * @tparam F A callable object
  * @tparam data_types The data types to iterate over
  */
-template <typename F, DataType... data_types> struct for_each_data_type;
+template <typename F, DataType... data_types>
+struct for_each_data_type;
 
 /**
  * @brief Template specialization for for_each_data_type when there are data
@@ -162,7 +169,7 @@ template <typename F, DataType... data_types> struct for_each_data_type;
  */
 template <typename F, DataType data_type, DataType... rest>
 struct for_each_data_type<F, data_type, rest...> {
-  static void apply(F &&f) {
+  static void apply(F&& f) {
     f.template operator()<data_type>();
     for_each_data_type<F, rest...>::apply(std::forward<F>(f));
   }
@@ -173,8 +180,9 @@ struct for_each_data_type<F, data_type, rest...> {
  * types to iterate over
  * @tparam F A callable object
  */
-template <typename F> struct for_each_data_type<F> {
-  static void apply(F &&) {}
+template <typename F>
+struct for_each_data_type<F> {
+  static void apply(F&&) {}
 };
 
-} // namespace flatnav::util
\ No newline at end of file
+}  // namespace flatnav::util
\ No newline at end of file
diff --git a/flatnav/util/GorderPriorityQueue.h b/flatnav/util/GorderPriorityQueue.h
index 9fcf715..f9dcb16 100644
--- a/flatnav/util/GorderPriorityQueue.h
+++ b/flatnav/util/GorderPriorityQueue.h
@@ -10,7 +10,8 @@
 
 namespace flatnav::util {
 
-template <typename node_id_t> class GorderPriorityQueue {
+template <typename node_id_t>
+class GorderPriorityQueue {
 
   typedef std::unordered_map<node_id_t, int> map_t;
 
@@ -20,7 +21,7 @@ template <typename node_id_t> class GorderPriorityQueue {
   };
 
   std::vector<Node> _list;
-  map_t _index_table; // map: key -> index in _list
+  map_t _index_table;  // map: key -> index in _list
 
   inline void swap(int i, int j) {
     Node tmp = _list[i];
@@ -30,8 +31,8 @@ template <typename node_id_t> class GorderPriorityQueue {
     _index_table[_list[j].key] = j;
   }
 
-public:
-  GorderPriorityQueue(const std::vector<node_id_t> &nodes) {
+ public:
+  GorderPriorityQueue(const std::vector<node_id_t>& nodes) {
     for (int i = 0; i < nodes.size(); i++) {
       _list.push_back({nodes[i], 0});
       _index_table[nodes[i]] = i;
@@ -53,9 +54,7 @@ template <typename node_id_t> class GorderPriorityQueue {
     std::cout << std::endl;
   }
 
-  static bool compare(const Node &a, const Node &b) {
-    return (a.priority < b.priority);
-  }
+  static bool compare(const Node& a, const Node& b) { return (a.priority < b.priority); }
 
   void increment(node_id_t key) {
     typename map_t::const_iterator i = _index_table.find(key);
@@ -67,9 +66,8 @@ template <typename node_id_t> class GorderPriorityQueue {
     // _list[i->second].priority)){ 	new_index--;
     // }
 
-    auto it =
-        std::upper_bound(_list.begin(), _list.end(), _list[i->second], compare);
-    size_t new_index = it - _list.begin() - 1; // possible bug
+    auto it = std::upper_bound(_list.begin(), _list.end(), _list[i->second], compare);
+    size_t new_index = it - _list.begin() - 1;  // possible bug
     // new_index points to the right-most element with same priority as key
     // i.e. priority equal to "_list[i->second].priority" (i.e. the current
     // priority)
@@ -88,9 +86,8 @@ template <typename node_id_t> class GorderPriorityQueue {
     // }
     // new_index++;
     // i shoudl do this better but am pressed for time now
-    auto it =
-        std::lower_bound(_list.begin(), _list.end(), _list[i->second], compare);
-    size_t new_index = it - _list.begin(); // POSSIBLE BUG
+    auto it = std::lower_bound(_list.begin(), _list.end(), _list[i->second], compare);
+    size_t new_index = it - _list.begin();  // POSSIBLE BUG
     // while((new_index > _list.size()) && (_list[new_index].priority ==
     // _list[i->second].priority)){ 	new_index++;
     // }
@@ -111,4 +108,4 @@ template <typename node_id_t> class GorderPriorityQueue {
   size_t size() { return _list.size(); }
 };
 
-} // namespace flatnav::util
\ No newline at end of file
+}  // namespace flatnav::util
\ No newline at end of file
diff --git a/flatnav/util/InnerProductSimdExtensions.h b/flatnav/util/InnerProductSimdExtensions.h
index 65cc1c5..903776a 100644
--- a/flatnav/util/InnerProductSimdExtensions.h
+++ b/flatnav/util/InnerProductSimdExtensions.h
@@ -6,13 +6,12 @@ namespace flatnav::util {
 
 #if defined(USE_AVX512)
 
-static float computeIP_Avx512(const void *x, const void *y,
-                              const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+static float computeIP_Avx512(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
   // Align to 16-floats boundary
-  const float *end_x = pointer_x + (dimension >> 4 << 4);
+  const float* end_x = pointer_x + (dimension >> 4 << 4);
   simd16float32 product, v1, v2;
 
   simd16float32 sum(0.0f);
@@ -29,15 +28,14 @@ static float computeIP_Avx512(const void *x, const void *y,
   return 1.0f - total;
 }
 
-#endif // USE_AVX512
+#endif  // USE_AVX512
 
 #if defined(USE_AVX)
-static float computeIP_Avx(const void *x, const void *y,
-                           const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+static float computeIP_Avx(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
-  const float *end_x = pointer_x + (dimension >> 4 << 4);
+  const float* end_x = pointer_x + (dimension >> 4 << 4);
   simd8float32 product, v1, v2;
   simd8float32 sum(0.0f);
 
@@ -61,14 +59,13 @@ static float computeIP_Avx(const void *x, const void *y,
   return 1.0f - total;
 }
 
-static float computeIP_Avx_4aligned(const void *x, const void *y,
-                                    const size_t &dimension) {
+static float computeIP_Avx_4aligned(const void* x, const void* y, const size_t& dimension) {
 
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
-  const float *first_chunk_end = pointer_x + (dimension >> 4 << 4);
-  const float *second_chunk_end = pointer_x + (dimension >> 2 << 2);
+  const float* first_chunk_end = pointer_x + (dimension >> 4 << 4);
+  const float* second_chunk_end = pointer_x + (dimension >> 2 << 2);
 
   simd8float32 v1, v2;
   simd8float32 sum(0.0f);
@@ -82,8 +79,7 @@ static float computeIP_Avx_4aligned(const void *x, const void *y,
   }
 
   // TODO: See if we can reduce this to fewer instructions
-  simd4float32 aggregate =
-      simd4float32(sum.get_low()) + simd4float32(sum.get_high());
+  simd4float32 aggregate = simd4float32(sum.get_low()) + simd4float32(sum.get_high());
   simd4float32 v1_residual, v2_residual;
 
   while (pointer_x != second_chunk_end) {
@@ -98,16 +94,15 @@ static float computeIP_Avx_4aligned(const void *x, const void *y,
   return 1.0f - total;
 }
 
-#endif // USE_AVX
+#endif  // USE_AVX
 
 #if defined(USE_SSE)
 
-const float computeIP_Sse(const void *x, const void *y,
-                          const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+const float computeIP_Sse(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
-  const float *end_x = pointer_x + (dimension >> 4 << 4);
+  const float* end_x = pointer_x + (dimension >> 4 << 4);
   simd4float32 v1, v2;
   simd4float32 sum(0.0f);
 
@@ -141,12 +136,11 @@ const float computeIP_Sse(const void *x, const void *y,
   return 1.0f - total;
 }
 
-const float computeIP_Sse_4aligned(const void *x, const void *y,
-                                   const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
-  const float *first_chunk_end = pointer_x + (dimension >> 4 << 4);
-  const float *second_chunk_end = pointer_x + (dimension >> 2 << 2);
+const float computeIP_Sse_4aligned(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
+  const float* first_chunk_end = pointer_x + (dimension >> 4 << 4);
+  const float* second_chunk_end = pointer_x + (dimension >> 2 << 2);
 
   simd4float32 v1, v2;
   simd4float32 sum(0.0f);
@@ -188,8 +182,7 @@ const float computeIP_Sse_4aligned(const void *x, const void *y,
   return 1.0f - total;
 }
 
-const float computeIP_SseWithResidual_16(const void *x, const void *y,
-                                         const size_t &dimension) {
+const float computeIP_SseWithResidual_16(const void* x, const void* y, const size_t& dimension) {
   size_t aligned_dimension = dimension >> 4 << 4;
   size_t residual_dimension = dimension - aligned_dimension;
 
@@ -200,18 +193,15 @@ const float computeIP_SseWithResidual_16(const void *x, const void *y,
   first_chunk_sum *= -1.0f;
 
   float residual_sum = 0.0f;
-  float *pointer_x =
-      static_cast<float *>(const_cast<void *>(x)) + aligned_dimension;
-  float *pointer_y =
-      static_cast<float *>(const_cast<void *>(y)) + aligned_dimension;
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x)) + aligned_dimension;
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y)) + aligned_dimension;
   for (size_t i = 0; i < residual_dimension; i++) {
     residual_sum += pointer_x[i] * pointer_y[i];
   }
   return 1.0f - (first_chunk_sum + residual_sum);
 }
 
-const float computeIP_SseWithResidual_4(const void *x, const void *y,
-                                        const size_t &dimension) {
+const float computeIP_SseWithResidual_4(const void* x, const void* y, const size_t& dimension) {
   size_t aligned_dimension = dimension >> 2 << 2;
   size_t residual_dimension = dimension - aligned_dimension;
 
@@ -222,16 +212,14 @@ const float computeIP_SseWithResidual_4(const void *x, const void *y,
   first_chunk_sum *= -1.0f;
 
   float residual_sum = 0.0f;
-  float *pointer_x =
-      static_cast<float *>(const_cast<void *>(x)) + aligned_dimension;
-  float *pointer_y =
-      static_cast<float *>(const_cast<void *>(y)) + aligned_dimension;
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x)) + aligned_dimension;
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y)) + aligned_dimension;
   for (size_t i = 0; i < residual_dimension; i++) {
     residual_sum += pointer_x[i] * pointer_y[i];
   }
   return 1.0f - (first_chunk_sum + residual_sum);
 }
 
-#endif // USE_SSE
+#endif  // USE_SSE
 
-} // namespace flatnav::util
+}  // namespace flatnav::util
diff --git a/flatnav/util/Macros.h b/flatnav/util/Macros.h
index 0418a55..a8ef494 100644
--- a/flatnav/util/Macros.h
+++ b/flatnav/util/Macros.h
@@ -11,11 +11,11 @@
 
 #ifdef __SSE3__
 #define USE_SSE3
-#endif // __SSE3__
+#endif  // __SSE3__
 
 #ifdef __SSE4_1__
 #define USE_SSE4_1
-#endif // __SSE4_1__
+#endif  // __SSE4_1__
 
 #ifdef __AVX__
 #define USE_AVX
@@ -24,18 +24,18 @@
 
 #ifdef __AVX512BW__
 #define USE_AVX512BW
-#endif // __AVX512BW__
+#endif  // __AVX512BW__
 
 #ifdef __AVX512VNNI__
 #define USE_AVX512VNNI
-#endif // __AVX512VNNI__
+#endif  // __AVX512VNNI__
 
 #define USE_AVX512
-#endif // __AVX512F__
+#endif  // __AVX512F__
 
-#endif // __AVX__
+#endif  // __AVX__
 #endif
-#endif // NO_SIMD_VECTORIZATION
+#endif  // NO_SIMD_VECTORIZATION
 
 #if defined(USE_AVX) || defined(USE_SSE)
 
@@ -89,7 +89,7 @@ uint64_t xgetbv(unsigned int index) {
 #else
 #define PORTABLE_ALIGN32 __declspec(align(32))
 #define PORTABLE_ALIGN64 __declspec(align(64))
-#endif // __GNUC__
+#endif  // __GNUC__
 
 #define _XCR_XFEATURE_ENABLED_MASK 0
 
diff --git a/flatnav/util/Multithreading.h b/flatnav/util/Multithreading.h
index 9f81cca..cb1f6fb 100644
--- a/flatnav/util/Multithreading.h
+++ b/flatnav/util/Multithreading.h
@@ -16,8 +16,7 @@ namespace flatnav {
  * installing the Python library.
  */
 template <typename Function, typename... Args>
-void executeInParallel(uint32_t start_index, uint32_t end_index,
-                       uint32_t num_threads, Function function,
+void executeInParallel(uint32_t start_index, uint32_t end_index, uint32_t num_threads, Function function,
                        Args... additional_args) {
   if (num_threads == 0) {
     throw std::invalid_argument("Invalid number of threads");
@@ -35,8 +34,8 @@ void executeInParallel(uint32_t start_index, uint32_t end_index,
         break;
       }
       // Use std::apply to pass arguments to the function
-      std::apply(function, std::tuple_cat(std::make_tuple(current_vector_idx),
-                                          std::make_tuple(additional_args...)));
+      std::apply(function,
+                 std::tuple_cat(std::make_tuple(current_vector_idx), std::make_tuple(additional_args...)));
     }
   };
 
@@ -48,4 +47,4 @@ void executeInParallel(uint32_t start_index, uint32_t end_index,
   }
 }
 
-} // namespace flatnav
\ No newline at end of file
+}  // namespace flatnav
\ No newline at end of file
diff --git a/flatnav/util/Reordering.h b/flatnav/util/Reordering.h
index d5ed029..a9b066d 100644
--- a/flatnav/util/Reordering.h
+++ b/flatnav/util/Reordering.h
@@ -24,8 +24,7 @@
 namespace flatnav::util {
 
 template <typename node_id_t>
-std::vector<node_id_t>
-gOrder(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
+std::vector<node_id_t> gOrder(std::vector<std::vector<node_id_t>>& outdegree_table, const int w) {
   /* Simple explanation of the Gorder Algorithm:
   insert all v into Q each with priority 0
   select a start node into P
@@ -55,7 +54,7 @@ gOrder(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
   // create table of in-degrees
   std::vector<std::vector<node_id_t>> indegree_table(cur_num_nodes);
   for (node_id_t node = 0; node < cur_num_nodes; node++) {
-    for (node_id_t &edge : outdegree_table[node]) {
+    for (node_id_t& edge : outdegree_table[node]) {
       indegree_table[edge].push_back(node);
     }
   }
@@ -72,15 +71,15 @@ gOrder(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
     node_id_t v_e = P[i - 1];
     // ve = newest node in window
     // for each node u in out-edges of ve:
-    for (node_id_t &u : outdegree_table[v_e]) {
+    for (node_id_t& u : outdegree_table[v_e]) {
       Q.increment(u);
     }
     // for each node u in in-edges of v_e:
-    for (node_id_t &u : indegree_table[v_e]) {
+    for (node_id_t& u : indegree_table[v_e]) {
       // if u in Q, increment priority of u
       Q.increment(u);
       // for each node v in out-edges of u:
-      for (node_id_t &v : outdegree_table[u]) {
+      for (node_id_t& v : outdegree_table[u]) {
         Q.increment(v);
       }
     }
@@ -88,12 +87,12 @@ gOrder(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
     if (i > w + 1) {
       node_id_t v_b = P[i - w - 1];
       // for each node u in out-edges of vb:
-      for (node_id_t &u : outdegree_table[v_b]) {
+      for (node_id_t& u : outdegree_table[v_b]) {
         Q.decrement(u);
       }
 
       // for each node u in in-edges of v_b
-      for (node_id_t &u : indegree_table[v_b]) {
+      for (node_id_t& u : indegree_table[v_b]) {
         // if u in Q, increment priority of u
         // Note: it doesn't seem to matter whether this particular
         // operation is an increment or a decrement. In a previous
@@ -101,7 +100,7 @@ gOrder(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
         // technically wrong) but the performance was nearly the same.
         Q.decrement(u);
         // for each node v in out-edges of u:
-        for (node_id_t &v : outdegree_table[u]) {
+        for (node_id_t& v : outdegree_table[u]) {
           Q.decrement(v);
         }
       }
@@ -118,8 +117,7 @@ gOrder(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
 }
 
 template <typename node_id_t>
-std::vector<node_id_t>
-rcmOrder(std::vector<std::vector<node_id_t>> &outdegree_table) {
+std::vector<node_id_t> rcmOrder(std::vector<std::vector<node_id_t>>& outdegree_table) {
 
   int cur_num_nodes = outdegree_table.size();
   std::vector<std::pair<node_id_t, int>> sorted_nodes;
@@ -131,10 +129,10 @@ rcmOrder(std::vector<std::vector<node_id_t>> &outdegree_table) {
     degrees.push_back(deg);
   }
 
-  std::sort(
-      sorted_nodes.begin(), sorted_nodes.end(),
-      [](const std::pair<node_id_t, int> &a,
-         const std::pair<node_id_t, int> &b) { return a.second < b.second; });
+  std::sort(sorted_nodes.begin(), sorted_nodes.end(),
+            [](const std::pair<node_id_t, int>& a, const std::pair<node_id_t, int>& b) {
+              return a.second < b.second;
+            });
 
   std::vector<node_id_t> P;
   auto visited_set = VisitedSet(cur_num_nodes);
@@ -151,14 +149,13 @@ rcmOrder(std::vector<std::vector<node_id_t>> &outdegree_table) {
 
       // get list of neighbors
       std::vector<std::pair<node_id_t, int>> neighbors;
-      for (auto &edge : outdegree_table[node]) {
+      for (auto& edge : outdegree_table[node]) {
         neighbors.push_back({edge, degrees[edge]});
       }
 
       // sort neighbors by degree (min degree first)
       std::sort(neighbors.begin(), neighbors.end(),
-                [](const std::pair<node_id_t, int> &a,
-                   const std::pair<node_id_t, int> &b) {
+                [](const std::pair<node_id_t, int>& a, const std::pair<node_id_t, int>& b) {
                   return a.second < b.second;
                 });
 
@@ -177,13 +174,12 @@ rcmOrder(std::vector<std::vector<node_id_t>> &outdegree_table) {
 
           // get list of neighbors of candidate
           std::vector<std::pair<node_id_t, int>> candidate_neighbors;
-          for (auto &edge : outdegree_table[candidate]) {
+          for (auto& edge : outdegree_table[candidate]) {
             candidate_neighbors.push_back({edge, degrees[edge]});
           }
           // sort neighbors by degree (min degree first)
           std::sort(candidate_neighbors.begin(), candidate_neighbors.end(),
-                    [](const std::pair<node_id_t, int> &a,
-                       const std::pair<node_id_t, int> &b) {
+                    [](const std::pair<node_id_t, int>& a, const std::pair<node_id_t, int>& b) {
                       return a.second < b.second;
                     });
           // add neighbors to queue
@@ -203,4 +199,4 @@ rcmOrder(std::vector<std::vector<node_id_t>> &outdegree_table) {
   return Pinv;
 }
 
-} // namespace flatnav::util
\ No newline at end of file
+}  // namespace flatnav::util
\ No newline at end of file
diff --git a/flatnav/util/SquaredL2SimdExtensions.h b/flatnav/util/SquaredL2SimdExtensions.h
index bd067a8..04df0e2 100644
--- a/flatnav/util/SquaredL2SimdExtensions.h
+++ b/flatnav/util/SquaredL2SimdExtensions.h
@@ -5,13 +5,12 @@
 namespace flatnav::util {
 
 #if defined(USE_AVX512)
-static float computeL2_Avx512(const void *x, const void *y,
-                              const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+static float computeL2_Avx512(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
   // Align to 16-floats boundary
-  const float *end_x = pointer_x + (dimension >> 4 << 4);
+  const float* end_x = pointer_x + (dimension >> 4 << 4);
   simd16float32 difference, v1, v2;
 
   simd16float32 sum(0.0f);
@@ -30,10 +29,9 @@ static float computeL2_Avx512(const void *x, const void *y,
 /**
  * @todo Make this support dimensions that are not multiples of 64
  */
-static float computeL2_Avx512_Uint8(const void *x, const void *y,
-                                    const size_t &dimension) {
-  const uint8_t *pointer_x = static_cast<const uint8_t *>(x);
-  const uint8_t *pointer_y = static_cast<const uint8_t *>(y);
+static float computeL2_Avx512_Uint8(const void* x, const void* y, const size_t& dimension) {
+  const uint8_t* pointer_x = static_cast<const uint8_t*>(x);
+  const uint8_t* pointer_y = static_cast<const uint8_t*>(y);
 
   // Initialize sum to zero
   __m512i sum = _mm512_setzero_si512();
@@ -41,10 +39,8 @@ static float computeL2_Avx512_Uint8(const void *x, const void *y,
   // Loop over the input arrays
   for (size_t i = 0; i < dimension; i += 64) {
     // Load 64 bytes from each array
-    __m512i v1 =
-        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(pointer_x + i));
-    __m512i v2 =
-        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(pointer_y + i));
+    __m512i v1 = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(pointer_x + i));
+    __m512i v2 = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(pointer_y + i));
 
     // Unpack to 16-bit integers to avoid overflow
     __m512i v1_lo = _mm512_unpacklo_epi8(v1, _mm512_setzero_si512());
@@ -72,23 +68,22 @@ static float computeL2_Avx512_Uint8(const void *x, const void *y,
   sum256 = _mm256_hadd_epi32(sum256, sum256);
 
   int32_t buffer[8];
-  _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer), sum256);
+  _mm256_storeu_si256(reinterpret_cast<__m256i*>(buffer), sum256);
 
   int32_t total_sum = buffer[0] + buffer[4];
 
   return static_cast<float>(total_sum);
 }
 
-#endif // USE_AVX512
+#endif  // USE_AVX512
 
 #if defined(USE_AVX)
 
-static float computeL2_Avx2(const void *x, const void *y,
-                            const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+static float computeL2_Avx2(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
-  const float *end_x = pointer_x + (dimension & ~7);
+  const float* end_x = pointer_x + (dimension & ~7);
   simd8float32 difference, v1, v2;
   simd8float32 sum(0.0f);
 
@@ -112,20 +107,18 @@ static float computeL2_Avx2(const void *x, const void *y,
 
   float result[8];
   sum.storeu(result);
-  return result[0] + result[1] + result[2] + result[3] + result[4] + result[5] +
-         result[6] + result[7];
+  return result[0] + result[1] + result[2] + result[3] + result[4] + result[5] + result[6] + result[7];
 }
 
-#endif // USE_AVX
+#endif  // USE_AVX
 
 #if defined(USE_SSE)
 
-static float computeL2_Sse(const void *x, const void *y,
-                           const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+static float computeL2_Sse(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
-  const float *end_x = pointer_x + (dimension >> 4 << 4);
+  const float* end_x = pointer_x + (dimension >> 4 << 4);
   simd4float32 difference, v1, v2;
   simd4float32 sum(0.0f);
 
@@ -162,28 +155,25 @@ static float computeL2_Sse(const void *x, const void *y,
   return sum.reduce_add();
 }
 
-
 #if defined(USE_SSE4_1)
 
 // This function computes the L2 distance between two int8 vectors using SSE2
 // instructions.
-static float computeL2_Sse_int8(const void *x, const void *y,
-                                const size_t &dimension) {
-  int8_t *pointer_x = static_cast<int8_t *>(const_cast<void *>(x));
-  int8_t *pointer_y = static_cast<int8_t *>(const_cast<void *>(y));
+static float computeL2_Sse_int8(const void* x, const void* y, const size_t& dimension) {
+  int8_t* pointer_x = static_cast<int8_t*>(const_cast<void*>(x));
+  int8_t* pointer_y = static_cast<int8_t*>(const_cast<void*>(y));
 
   __m128i sum = _mm_setzero_si128();
   size_t aligned_dimension = dimension & ~0xF;
   size_t i = 0;
 
   for (; i < aligned_dimension; i += 16) {
-    __m128i vx = _mm_loadu_si128(reinterpret_cast<__m128i *>(pointer_x + i));
-    __m128i vy = _mm_loadu_si128(reinterpret_cast<__m128i *>(pointer_y + i));
+    __m128i vx = _mm_loadu_si128(reinterpret_cast<__m128i*>(pointer_x + i));
+    __m128i vy = _mm_loadu_si128(reinterpret_cast<__m128i*>(pointer_y + i));
     __m128i diff = _mm_sub_epi8(vx, vy);
 
     // Convert to 16-bit and square
-    __m128i diff_squared =
-        _mm_madd_epi16(_mm_cvtepi8_epi16(diff), _mm_cvtepi8_epi16(diff));
+    __m128i diff_squared = _mm_madd_epi16(_mm_cvtepi8_epi16(diff), _mm_cvtepi8_epi16(diff));
 
     // Accumulate in 32-bit integer
     sum = _mm_add_epi32(sum, diff_squared);
@@ -198,19 +188,17 @@ static float computeL2_Sse_int8(const void *x, const void *y,
 
   // Reduce sum
   int32_t buffer[4];
-  _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer), sum);
-  return static_cast<float>(buffer[0] + buffer[1] + buffer[2] + buffer[3] +
-                            partial_sum);
+  _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), sum);
+  return static_cast<float>(buffer[0] + buffer[1] + buffer[2] + buffer[3] + partial_sum);
 }
 
-#endif // USE_SSE4_1
+#endif  // USE_SSE4_1
 
-static float computeL2_Sse4Aligned(const void *x, const void *y,
-                                   const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+static float computeL2_Sse4Aligned(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
-  const float *end_x = pointer_x + (dimension >> 2 << 2);
+  const float* end_x = pointer_x + (dimension >> 2 << 2);
   simd4float32 difference, v1, v2;
   simd4float32 sum(0.0f);
 
@@ -226,17 +214,14 @@ static float computeL2_Sse4Aligned(const void *x, const void *y,
   return sum.reduce_add();
 }
 
-static float computeL2_SseWithResidual_16(const void *x, const void *y,
-                                          const size_t &dimension) {
+static float computeL2_SseWithResidual_16(const void* x, const void* y, const size_t& dimension) {
 
   size_t dimension_aligned = dimension >> 4 << 4;
   float aligned_distance = computeL2_Sse(x, y, dimension_aligned);
   size_t residual_dimension = dimension - dimension_aligned;
   float residual_distance = 0.0f;
-  float *pointer_x =
-      static_cast<float *>(const_cast<void *>(x)) + dimension_aligned;
-  float *pointer_y =
-      static_cast<float *>(const_cast<void *>(y)) + dimension_aligned;
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x)) + dimension_aligned;
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y)) + dimension_aligned;
   for (size_t i = 0; i < residual_dimension; i++) {
     float difference = *pointer_x - *pointer_y;
     residual_distance += difference * difference;
@@ -246,12 +231,11 @@ static float computeL2_SseWithResidual_16(const void *x, const void *y,
   return aligned_distance + residual_distance;
 }
 
-static float computeL2_Sse4aligned(const void *x, const void *y,
-                                   const size_t &dimension) {
-  float *pointer_x = static_cast<float *>(const_cast<void *>(x));
-  float *pointer_y = static_cast<float *>(const_cast<void *>(y));
+static float computeL2_Sse4aligned(const void* x, const void* y, const size_t& dimension) {
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x));
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y));
 
-  const float *end_x = pointer_x + (dimension >> 2 << 2);
+  const float* end_x = pointer_x + (dimension >> 2 << 2);
   simd4float32 difference, v1, v2;
   simd4float32 sum(0.0f);
 
@@ -267,16 +251,13 @@ static float computeL2_Sse4aligned(const void *x, const void *y,
   return sum.reduce_add();
 }
 
-static float computeL2_SseWithResidual_4(const void *x, const void *y,
-                                         const size_t &dimension) {
+static float computeL2_SseWithResidual_4(const void* x, const void* y, const size_t& dimension) {
   size_t dimension_aligned = dimension >> 2 << 2;
   float aligned_distance = computeL2_Sse4aligned(x, y, dimension_aligned);
   size_t residual_dimension = dimension - dimension_aligned;
   float residual_distance = 0.0f;
-  float *pointer_x =
-      static_cast<float *>(const_cast<void *>(x)) + dimension_aligned;
-  float *pointer_y =
-      static_cast<float *>(const_cast<void *>(y)) + dimension_aligned;
+  float* pointer_x = static_cast<float*>(const_cast<void*>(x)) + dimension_aligned;
+  float* pointer_y = static_cast<float*>(const_cast<void*>(y)) + dimension_aligned;
   for (size_t i = 0; i < residual_dimension; i++) {
     float difference = *pointer_x - *pointer_y;
     residual_distance += difference * difference;
@@ -286,6 +267,6 @@ static float computeL2_SseWithResidual_4(const void *x, const void *y,
   return aligned_distance + residual_distance;
 }
 
-#endif // USE_SSE
+#endif  // USE_SSE
 
-} // namespace flatnav::util
\ No newline at end of file
+}  // namespace flatnav::util
\ No newline at end of file
diff --git a/flatnav/util/VisitedSetPool.h b/flatnav/util/VisitedSetPool.h
index 0367fe7..286019a 100644
--- a/flatnav/util/VisitedSetPool.h
+++ b/flatnav/util/VisitedSetPool.h
@@ -2,24 +2,24 @@
 
 // #include <flatnav/util/SIMDDistanceSpecializations.h>
 
-#include <cstring>
 #include <flatnav/util/Macros.h>
+#include <stdint.h>
+#include <cstring>
 #include <iostream>
 #include <memory>
 #include <mutex>
-#include <stdint.h>
 #include <thread>
 #include <vector>
 
 namespace flatnav::util {
 
 class VisitedSet {
-private:
+ private:
   uint8_t _mark;
-  uint8_t *_table;
+  uint8_t* _table;
   uint32_t _table_size;
 
-public:
+ public:
   VisitedSet(const uint32_t size) : _mark(1), _table_size(size) {
     // initialize values to 0
     _table = new uint8_t[_table_size]();
@@ -27,7 +27,7 @@ class VisitedSet {
 
   inline void prefetch(const uint32_t num) const {
 #ifdef USE_SSE
-    _mm_prefetch(reinterpret_cast<const char *>(&_table[num]), _MM_HINT_T0);
+    _mm_prefetch(reinterpret_cast<const char*>(&_table[num]), _MM_HINT_T0);
 #endif
   }
 
@@ -45,31 +45,27 @@ class VisitedSet {
     }
   }
 
-  inline bool isVisited(const uint32_t num) const {
-    return _table[num] == _mark;
-  }
+  inline bool isVisited(const uint32_t num) const { return _table[num] == _mark; }
 
   ~VisitedSet() { delete[] _table; }
 
   // copy constructor
-  VisitedSet(const VisitedSet &other)
-      : _table_size(other._table_size), _mark(other._mark) {
+  VisitedSet(const VisitedSet& other) : _table_size(other._table_size), _mark(other._mark) {
 
     _table = new uint8_t[_table_size];
     std::memcpy(_table, other._table, _table_size);
   }
 
   // move constructor
-  VisitedSet(VisitedSet &&other) noexcept
-      : _table_size(other._table_size), _mark(other._mark),
-        _table(other._table) {
+  VisitedSet(VisitedSet&& other) noexcept
+      : _table_size(other._table_size), _mark(other._mark), _table(other._table) {
     other._table = nullptr;
     other._table_size = 0;
     other._mark = 0;
   }
 
   // copy assignment
-  VisitedSet &operator=(const VisitedSet &other) {
+  VisitedSet& operator=(const VisitedSet& other) {
     if (this != &other) {
       delete[] _table;
       _table_size = other._table_size;
@@ -81,7 +77,7 @@ class VisitedSet {
   }
 
   // move assignment
-  VisitedSet &operator=(VisitedSet &&other) noexcept {
+  VisitedSet& operator=(VisitedSet&& other) noexcept {
     _table_size = other._table_size;
     _mark = other._mark;
     _table = other._table;
@@ -135,35 +131,31 @@ class VisitedSet {
  * expected to manage.
  */
 class VisitedSetPool {
-  std::vector<VisitedSet *> _visisted_set_pool;
+  std::vector<VisitedSet*> _visisted_set_pool;
   std::mutex _pool_guard;
   uint32_t _num_elements;
   uint32_t _max_pool_size;
 
-public:
+ public:
   VisitedSetPool(uint32_t initial_pool_size, uint32_t num_elements,
                  uint32_t max_pool_size = std::thread::hardware_concurrency())
-      : _visisted_set_pool(initial_pool_size), _num_elements(num_elements),
-        _max_pool_size(max_pool_size) {
+      : _visisted_set_pool(initial_pool_size), _num_elements(num_elements), _max_pool_size(max_pool_size) {
     if (initial_pool_size > max_pool_size) {
-      throw std::invalid_argument(
-          "initial_pool_size must be less than or equal to max_pool_size");
+      throw std::invalid_argument("initial_pool_size must be less than or equal to max_pool_size");
     }
-    for (uint32_t visited_set_id = 0;
-         visited_set_id < _visisted_set_pool.size(); visited_set_id++) {
-      _visisted_set_pool[visited_set_id] =
-          new VisitedSet(/* size = */ _num_elements);
+    for (uint32_t visited_set_id = 0; visited_set_id < _visisted_set_pool.size(); visited_set_id++) {
+      _visisted_set_pool[visited_set_id] = new VisitedSet(/* size = */ _num_elements);
     }
   }
 
   // TODO: Enforce the condition that we never allocate more than _max_pool_size
   // visited_sets. For now there is nothing stopping a user from allocating more
   // than _max_pool_size.
-  VisitedSet *pollAvailableSet() {
+  VisitedSet* pollAvailableSet() {
     std::unique_lock<std::mutex> lock(_pool_guard);
 
     if (!_visisted_set_pool.empty()) {
-      auto *visited_set = _visisted_set_pool.back();
+      auto* visited_set = _visisted_set_pool.back();
       _visisted_set_pool.pop_back();
       return visited_set;
     } else {
@@ -173,7 +165,7 @@ class VisitedSetPool {
 
   size_t poolSize() const { return _visisted_set_pool.size(); }
 
-  void pushVisitedSet(VisitedSet *visited_set) {
+  void pushVisitedSet(VisitedSet* visited_set) {
     std::unique_lock<std::mutex> lock(_pool_guard);
 
     _visisted_set_pool.push_back(visited_set);
@@ -183,12 +175,11 @@ class VisitedSetPool {
     std::unique_lock<std::mutex> lock(_pool_guard);
 
     if (new_pool_size > _visisted_set_pool.size()) {
-      throw std::invalid_argument(
-          "new_pool_size must be less than or equal to the current pool size");
+      throw std::invalid_argument("new_pool_size must be less than or equal to the current pool size");
     }
 
     while (_visisted_set_pool.size() > new_pool_size) {
-      auto *visited_set = _visisted_set_pool.back();
+      auto* visited_set = _visisted_set_pool.back();
       _visisted_set_pool.pop_back();
       delete visited_set;
     }
@@ -198,11 +189,11 @@ class VisitedSetPool {
 
   ~VisitedSetPool() {
     while (!_visisted_set_pool.empty()) {
-      auto *visited_set = _visisted_set_pool.back();
+      auto* visited_set = _visisted_set_pool.back();
       _visisted_set_pool.pop_back();
       delete visited_set;
     }
   }
 };
 
-} // namespace flatnav::util
\ No newline at end of file
+}  // namespace flatnav::util
\ No newline at end of file
diff --git a/flatnav_python/python_bindings.cpp b/flatnav_python/python_bindings.cpp
index 99c39c7..008c2ef 100644
--- a/flatnav_python/python_bindings.cpp
+++ b/flatnav_python/python_bindings.cpp
@@ -1,22 +1,22 @@
-#include "docs.h"
-#include <algorithm>
-#include <cstdint>
 #include <flatnav/distances/DistanceInterface.h>
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
 #include <flatnav/index/Index.h>
 #include <flatnav/util/Datatype.h>
 #include <flatnav/util/Multithreading.h>
-#include <iostream>
-#include <memory>
-#include <ostream>
 #include <pybind11/numpy.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
+#include <algorithm>
+#include <cstdint>
+#include <iostream>
+#include <memory>
+#include <ostream>
 #include <string>
 #include <thread>
 #include <utility>
 #include <vector>
+#include "docs.h"
 
 using flatnav::Index;
 using flatnav::distances::DistanceInterface;
@@ -28,26 +28,19 @@ using flatnav::util::for_each_data_type;
 namespace py = pybind11;
 
 template <typename Func, typename... Args>
-auto cast_and_call(DataType data_type, const py::array &array, Func &&function,
-                   Args &&... args) {
+auto cast_and_call(DataType data_type, const py::array& array, Func&& function, Args&&... args) {
   switch (data_type) {
-  case DataType::float32:
-    return function(
-        array.cast<
-            py::array_t<float, py::array::c_style | py::array::forcecast>>(),
-        std::forward<Args>(args)...);
-  case DataType::int8:
-    return function(
-        array.cast<
-            py::array_t<int8_t, py::array::c_style | py::array::forcecast>>(),
-        std::forward<Args>(args)...);
-  case DataType::uint8:
-    return function(
-        array.cast<
-            py::array_t<uint8_t, py::array::c_style | py::array::forcecast>>(),
-        std::forward<Args>(args)...);
-  default:
-    throw std::invalid_argument("Unsupported data type.");
+    case DataType::float32:
+      return function(array.cast<py::array_t<float, py::array::c_style | py::array::forcecast>>(),
+                      std::forward<Args>(args)...);
+    case DataType::int8:
+      return function(array.cast<py::array_t<int8_t, py::array::c_style | py::array::forcecast>>(),
+                      std::forward<Args>(args)...);
+    case DataType::uint8:
+      return function(array.cast<py::array_t<uint8_t, py::array::c_style | py::array::forcecast>>(),
+                      std::forward<Args>(args)...);
+    default:
+      throw std::invalid_argument("Unsupported data type.");
   }
 }
 
@@ -57,18 +50,15 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
   int _dim;
   label_t _label_id;
   bool _verbose;
-  Index<dist_t, label_t> *_index;
+  Index<dist_t, label_t>* _index;
   DataType _data_type;
 
-  typedef std::pair<py::array_t<float>, py::array_t<label_t>>
-      DistancesLabelsPair;
+  typedef std::pair<py::array_t<float>, py::array_t<label_t>> DistancesLabelsPair;
 
   // Internal add method that handles templated dispatch
   template <typename data_type>
-  void addImpl(const py::array_t<data_type, py::array::c_style |
-                                                py::array::forcecast> &data,
-               int ef_construction, int num_initializations = 100,
-               py::object labels = py::none()) {
+  void addImpl(const py::array_t<data_type, py::array::c_style | py::array::forcecast>& data,
+               int ef_construction, int num_initializations = 100, py::object labels = py::none()) {
     // py::array_t<float, py::array::c_style | py::array::forcecast> means that
     // the functions expects either a Numpy array of floats or a castable type
     // to that type. If the given type can't be casted, pybind11 will throw an
@@ -80,8 +70,7 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
       throw std::invalid_argument(
           "Data has incorrect dimensions. data.ndim() = "
           "`" +
-          std::to_string(data.ndim()) + "` and data_dim = `" +
-          std::to_string(data_dim) +
+          std::to_string(data.ndim()) + "` and data_dim = `" + std::to_string(data_dim) +
           "`. Expected 2D "
           "array with "
           "dimensions "
@@ -97,7 +86,7 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
         // Release python GIL while threads are running
         py::gil_scoped_release gil;
         this->_index->template addBatch<data_type>(
-            /* data = */ (void *)data.data(0),
+            /* data = */ (void*)data.data(0),
             /* labels = */ vec_labels,
             /* ef_construction = */ ef_construction,
             /* num_initializations = */ num_initializations);
@@ -115,37 +104,35 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
         // Relase python GIL while threads are running
         py::gil_scoped_release gil;
         this->_index->template addBatch<data_type>(
-            /* data = */ (void *)data.data(0), /* labels = */ vec_labels,
+            /* data = */ (void*)data.data(0), /* labels = */ vec_labels,
             /* ef_construction = */ ef_construction,
             /* num_initializations = */ num_initializations);
       }
-    } catch (const py::cast_error &error) {
+    } catch (const py::cast_error& error) {
       throw std::invalid_argument("Invalid labels provided.");
     }
   }
 
   template <typename data_type>
   DistancesLabelsPair searchSingleImpl(
-      const py::array_t<data_type, py::array::c_style | py::array::forcecast>
-          &query,
-      int K, int ef_search, int num_initializations = 100) {
+      const py::array_t<data_type, py::array::c_style | py::array::forcecast>& query, int K, int ef_search,
+      int num_initializations = 100) {
     if (query.ndim() != 1 || query.shape(0) != _dim) {
       throw std::invalid_argument("Query has incorrect dimensions.");
     }
 
     std::vector<std::pair<float, label_t>> top_k = this->_index->search(
-        /* query = */ (const void *)query.data(0), /* K = */ K,
+        /* query = */ (const void*)query.data(0), /* K = */ K,
         /* ef_search = */ ef_search,
         /* num_initializations = */ num_initializations);
 
     if (top_k.size() != K) {
-      throw std::runtime_error(
-          "Search did not return the expected number of results. Expected " +
-          std::to_string(K) + " but got " + std::to_string(top_k.size()) + ".");
+      throw std::runtime_error("Search did not return the expected number of results. Expected " +
+                               std::to_string(K) + " but got " + std::to_string(top_k.size()) + ".");
     }
 
-    label_t *labels = new label_t[K];
-    float *distances = new float[K];
+    label_t* labels = new label_t[K];
+    float* distances = new float[K];
 
     for (size_t i = 0; i < K; i++) {
       distances[i] = top_k[i].first;
@@ -153,26 +140,23 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
     }
 
     // Allows to transfer ownership to Python
-    py::capsule free_labels_when_done(labels,
-                                      [](void *ptr) { delete (label_t *)ptr; });
+    py::capsule free_labels_when_done(labels, [](void* ptr) { delete (label_t*)ptr; });
 
-    py::capsule free_distances_when_done(
-        distances, [](void *ptr) { delete (float *)ptr; });
+    py::capsule free_distances_when_done(distances, [](void* ptr) { delete (float*)ptr; });
 
-    py::array_t<label_t> labels_array = py::array_t<label_t>(
-        {K}, {sizeof(label_t)}, labels, free_labels_when_done);
+    py::array_t<label_t> labels_array =
+        py::array_t<label_t>({K}, {sizeof(label_t)}, labels, free_labels_when_done);
 
-    py::array_t<float> distances_array = py::array_t<float>(
-        {K}, {sizeof(float)}, distances, free_distances_when_done);
+    py::array_t<float> distances_array =
+        py::array_t<float>({K}, {sizeof(float)}, distances, free_distances_when_done);
 
     return {distances_array, labels_array};
   }
 
   template <typename data_type>
-  DistancesLabelsPair
-  searchImpl(const py::array_t<data_type, py::array::c_style |
-                                              py::array::forcecast> &queries,
-             int K, int ef_search, int num_initializations = 100) {
+  DistancesLabelsPair searchImpl(
+      const py::array_t<data_type, py::array::c_style | py::array::forcecast>& queries, int K, int ef_search,
+      int num_initializations = 100) {
     size_t num_queries = queries.shape(0);
     size_t queries_dim = queries.shape(1);
 
@@ -181,22 +165,22 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
     }
 
     auto num_threads = _index->getNumThreads();
-    label_t *results = new label_t[num_queries * K];
-    float *distances = new float[num_queries * K];
+    label_t* results = new label_t[num_queries * K];
+    float* distances = new float[num_queries * K];
 
     // No need to spawn any threads if we are in a single-threaded environment
     if (num_threads == 1) {
       for (size_t query_index = 0; query_index < num_queries; query_index++) {
         std::vector<std::pair<float, label_t>> top_k = this->_index->search(
-            /* query = */ (const void *)queries.data(query_index), /* K = */ K,
+            /* query = */ (const void*)queries.data(query_index), /* K = */ K,
             /* ef_search = */ ef_search,
             /* num_initializations = */ num_initializations);
 
         if (top_k.size() != K) {
-          throw std::runtime_error("Search did not return the expected number "
-                                   "of results. Expected " +
-                                   std::to_string(K) + " but got " +
-                                   std::to_string(top_k.size()) + ".");
+          throw std::runtime_error(
+              "Search did not return the expected number "
+              "of results. Expected " +
+              std::to_string(K) + " but got " + std::to_string(top_k.size()) + ".");
         }
 
         for (size_t i = 0; i < top_k.size(); i++) {
@@ -210,7 +194,7 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
           /* start_index = */ 0, /* end_index = */ num_queries,
           /* num_threads = */ num_threads,
           /* function = */ [&](uint32_t row_index) {
-            auto *query = (const void *)queries.data(row_index);
+            auto* query = (const void*)queries.data(row_index);
             std::vector<std::pair<float, label_t>> top_k = this->_index->search(
                 /* query = */ query, /* K = */ K, /* ef_search = */ ef_search,
                 /* num_initializations = */ num_initializations);
@@ -223,39 +207,35 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
     }
 
     // Allows to transfer ownership to Python
-    py::capsule free_results_when_done(
-        results, [](void *ptr) { delete (label_t *)ptr; });
-    py::capsule free_distances_when_done(
-        distances, [](void *ptr) { delete (float *)ptr; });
-
-    py::array_t<label_t> labels =
-        py::array_t<label_t>({num_queries, (size_t)K}, // shape of the array
-                             {K * sizeof(label_t), sizeof(label_t)}, // strides
-                             results,               // data pointer
-                             free_results_when_done // capsule
-        );
+    py::capsule free_results_when_done(results, [](void* ptr) { delete (label_t*)ptr; });
+    py::capsule free_distances_when_done(distances, [](void* ptr) { delete (float*)ptr; });
+
+    py::array_t<label_t> labels = py::array_t<label_t>({num_queries, (size_t)K},  // shape of the array
+                                                       {K * sizeof(label_t), sizeof(label_t)},  // strides
+                                                       results,                // data pointer
+                                                       free_results_when_done  // capsule
+    );
 
     py::array_t<float> dists = py::array_t<float>(
-        {num_queries, (size_t)K}, {K * sizeof(float), sizeof(float)}, distances,
-        free_distances_when_done);
+        {num_queries, (size_t)K}, {K * sizeof(float), sizeof(float)}, distances, free_distances_when_done);
 
     return {dists, labels};
   }
 
-public:
+ public:
   explicit PyIndex(std::unique_ptr<Index<dist_t, label_t>> index)
-      : _dim(index->dataDimension()), _label_id(0), _verbose(false),
-        _index(index.release()) {
+      : _dim(index->dataDimension()), _label_id(0), _verbose(false), _index(index.release()) {
 
     if (_verbose) {
       _index->getIndexSummary();
     }
   }
 
-  PyIndex(std::unique_ptr<DistanceInterface<dist_t>> &&distance,
-          DataType data_type, int dataset_size, int max_edges_per_node,
-          bool verbose = false, bool collect_stats = false)
-      : _dim(distance->dimension()), _label_id(0), _verbose(verbose),
+  PyIndex(std::unique_ptr<DistanceInterface<dist_t>>&& distance, DataType data_type, int dataset_size,
+          int max_edges_per_node, bool verbose = false, bool collect_stats = false)
+      : _dim(distance->dimension()),
+        _label_id(0),
+        _verbose(verbose),
         _index(new Index<dist_t, label_t>(
             /* dist = */ std::move(distance),
             /* dataset_size = */ dataset_size,
@@ -266,16 +246,12 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
 
     if (_verbose) {
       uint64_t total_index_memory = _index->getTotalIndexMemory();
-      uint64_t visited_set_allocated_memory =
-          _index->visitedSetPoolAllocatedMemory();
+      uint64_t visited_set_allocated_memory = _index->visitedSetPoolAllocatedMemory();
       uint64_t mutexes_allocated_memory = _index->mutexesAllocatedMemory();
 
-      auto total_memory = total_index_memory + visited_set_allocated_memory +
-                          mutexes_allocated_memory;
+      auto total_memory = total_index_memory + visited_set_allocated_memory + mutexes_allocated_memory;
 
-      std::cout << "Total allocated index memory: "
-                << (float)(total_memory / 1e9) << " GB \n"
-                << std::flush;
+      std::cout << "Total allocated index memory: " << (float)(total_memory / 1e9) << " GB \n" << std::flush;
       std::cout << "[WARN]: More memory might be allocated due to visited sets "
                    "in multi-threaded environments.\n"
                 << std::flush;
@@ -283,7 +259,7 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
     }
   }
 
-  Index<dist_t, label_t> *getIndex() { return _index; }
+  Index<dist_t, label_t>* getIndex() { return _index; }
 
   ~PyIndex() { delete _index; }
 
@@ -293,50 +269,39 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
     return distance_computations;
   }
 
-  void buildGraphLinks(const std::string &mtx_filename) {
+  void buildGraphLinks(const std::string& mtx_filename) {
     _index->buildGraphLinks(/* mtx_filename = */ mtx_filename);
   }
 
-  std::vector<std::vector<uint32_t>> getGraphOutdegreeTable() {
-    return _index->getGraphOutdegreeTable();
-  }
+  std::vector<std::vector<uint32_t>> getGraphOutdegreeTable() { return _index->getGraphOutdegreeTable(); }
 
   uint32_t getMaxEdgesPerNode() { return _index->maxEdgesPerNode(); }
 
-  void reorder(const std::vector<std::string> &strategies) {
+  void reorder(const std::vector<std::string>& strategies) {
     // validate the given strategies
-    for (auto &strategy : strategies) {
+    for (auto& strategy : strategies) {
       auto alg = strategy;
-      std::transform(alg.begin(), alg.end(), alg.begin(),
-                     [](unsigned char c) { return std::tolower(c); });
+      std::transform(alg.begin(), alg.end(), alg.begin(), [](unsigned char c) { return std::tolower(c); });
       if (alg != "gorder" && alg != "rcm") {
-        throw std::invalid_argument(
-            "`" + strategy +
-            "` is not a supported graph re-ordering strategy.");
+        throw std::invalid_argument("`" + strategy + "` is not a supported graph re-ordering strategy.");
       }
     }
     _index->doGraphReordering(strategies);
   }
 
-  void setNumThreads(uint32_t num_threads) {
-    _index->setNumThreads(num_threads);
-  }
+  void setNumThreads(uint32_t num_threads) { _index->setNumThreads(num_threads); }
 
   uint32_t getNumThreads() { return _index->getNumThreads(); }
 
-  void save(const std::string &filename) {
-    _index->saveIndex(/* filename = */ filename);
-  }
+  void save(const std::string& filename) { _index->saveIndex(/* filename = */ filename); }
 
-  static std::shared_ptr<PyIndex<dist_t, label_t>>
-  loadIndex(const std::string &filename) {
+  static std::shared_ptr<PyIndex<dist_t, label_t>> loadIndex(const std::string& filename) {
     auto index = Index<dist_t, label_t>::loadIndex(/* filename = */ filename);
     return std::make_shared<PyIndex<dist_t, label_t>>(std::move(index));
   }
 
   std::shared_ptr<PyIndex<dist_t, label_t>> allocateNodes(
-      const py::array_t<float, py::array::c_style | py::array::forcecast>
-          &data) {
+      const py::array_t<float, py::array::c_style | py::array::forcecast>& data) {
     auto num_vectors = data.shape(0);
     auto data_dim = data.shape(1);
     if (data.ndim() != 2 || data_dim != _dim) {
@@ -345,7 +310,7 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
     for (size_t vec_index = 0; vec_index < num_vectors; vec_index++) {
       uint32_t new_node_id;
 
-      this->_index->allocateNode(/* data = */ (void *)data.data(vec_index),
+      this->_index->allocateNode(/* data = */ (void*)data.data(vec_index),
                                  /* label = */ _label_id,
                                  /* new_node_id = */ new_node_id);
       _label_id++;
@@ -353,76 +318,75 @@ class PyIndex : public std::enable_shared_from_this<PyIndex<dist_t, label_t>> {
     return this->shared_from_this();
   }
 
-  void add(const py::array &data, int ef_construction, int num_initializations,
+  void add(const py::array& data, int ef_construction, int num_initializations,
            py::object labels = py::none()) {
     cast_and_call(
         _data_type, data,
-        [this](auto &&casted_data, int ef, int num_init, py::object lbls) {
-          this->addImpl(std::forward<decltype(casted_data)>(casted_data), ef,
-                        num_init, lbls);
+        [this](auto&& casted_data, int ef, int num_init, py::object lbls) {
+          this->addImpl(std::forward<decltype(casted_data)>(casted_data), ef, num_init, lbls);
         },
         ef_construction, num_initializations, labels);
   }
 
-  DistancesLabelsPair search(const py::array &queries, int K, int ef_search,
-                             int num_initializations) {
+  DistancesLabelsPair search(const py::array& queries, int K, int ef_search, int num_initializations) {
     return cast_and_call(
         _data_type, queries,
-        [this](auto &&casted_queries, int k, int ef, int num_init) {
-          return this->searchImpl(
-              std::forward<decltype(casted_queries)>(casted_queries), k, ef,
-              num_init);
+        [this](auto&& casted_queries, int k, int ef, int num_init) {
+          return this->searchImpl(std::forward<decltype(casted_queries)>(casted_queries), k, ef, num_init);
         },
         K, ef_search, num_initializations);
   }
 
-  DistancesLabelsPair searchSingle(const py::array &query, int K, int ef_search,
-                                   int num_initializations) {
+  DistancesLabelsPair searchSingle(const py::array& query, int K, int ef_search, int num_initializations) {
     return cast_and_call(
         _data_type, query,
-        [this](auto &&casted_query, int k, int ef, int num_init) {
-          return this->searchSingleImpl(
-              std::forward<decltype(casted_query)>(casted_query), k, ef,
-              num_init);
+        [this](auto&& casted_query, int k, int ef, int num_init) {
+          return this->searchSingleImpl(std::forward<decltype(casted_query)>(casted_query), k, ef, num_init);
         },
         K, ef_search, num_initializations);
   }
 };
 
-template <typename dist_t> struct IndexSpecialization;
+template <typename dist_t>
+struct IndexSpecialization;
 
-template <> struct IndexSpecialization<SquaredL2Distance<DataType::float32>> {
+template <>
+struct IndexSpecialization<SquaredL2Distance<DataType::float32>> {
   using type = PyIndex<SquaredL2Distance<DataType::float32>, int>;
-  static constexpr char *name = "IndexL2Float";
+  static constexpr char* name = "IndexL2Float";
 };
 
-template <> struct IndexSpecialization<SquaredL2Distance<DataType::uint8>> {
+template <>
+struct IndexSpecialization<SquaredL2Distance<DataType::uint8>> {
   using type = PyIndex<SquaredL2Distance<DataType::uint8>, int>;
-  static constexpr char *name = "IndexL2Uint8";
+  static constexpr char* name = "IndexL2Uint8";
 };
 
-template <> struct IndexSpecialization<SquaredL2Distance<DataType::int8>> {
+template <>
+struct IndexSpecialization<SquaredL2Distance<DataType::int8>> {
   using type = PyIndex<SquaredL2Distance<DataType::int8>, int>;
-  static constexpr char *name = "IndexL2Int8";
+  static constexpr char* name = "IndexL2Int8";
 };
 
 template <>
 struct IndexSpecialization<InnerProductDistance<DataType::float32>> {
   using type = PyIndex<InnerProductDistance<DataType::float32>, int>;
-  static constexpr char *name = "IndexIPFloat";
+  static constexpr char* name = "IndexIPFloat";
 };
 
-template <> struct IndexSpecialization<InnerProductDistance<DataType::uint8>> {
+template <>
+struct IndexSpecialization<InnerProductDistance<DataType::uint8>> {
   using type = PyIndex<InnerProductDistance<DataType::uint8>, int>;
-  static constexpr char *name = "IndexIPUint8";
+  static constexpr char* name = "IndexIPUint8";
 };
 
-template <> struct IndexSpecialization<InnerProductDistance<DataType::int8>> {
+template <>
+struct IndexSpecialization<InnerProductDistance<DataType::int8>> {
   using type = PyIndex<InnerProductDistance<DataType::int8>, int>;
-  static constexpr char *name = "IndexIPInt8";
+  static constexpr char* name = "IndexIPInt8";
 };
 
-void validateDistanceType(const std::string &distance_type) {
+void validateDistanceType(const std::string& distance_type) {
   auto dist_type = distance_type;
   std::transform(dist_type.begin(), dist_type.end(), dist_type.begin(),
                  [](unsigned char c) { return std::tolower(c); });
@@ -435,123 +399,104 @@ void validateDistanceType(const std::string &distance_type) {
 }
 
 template <DataType data_type, typename... Args>
-py::object createIndex(const std::string &distance_type, int dim,
-                       Args &&... args) {
+py::object createIndex(const std::string& distance_type, int dim, Args&&... args) {
   validateDistanceType(distance_type);
 
   if (distance_type == "l2") {
     auto distance = SquaredL2Distance<data_type>::create(dim);
-    auto index = std::make_shared<PyIndex<SquaredL2Distance<data_type>, int>>(
-        std::move(distance), data_type, std::forward<Args>(args)...);
+    auto index = std::make_shared<PyIndex<SquaredL2Distance<data_type>, int>>(std::move(distance), data_type,
+                                                                              std::forward<Args>(args)...);
     return py::cast(index);
   }
 
   auto distance = InnerProductDistance<data_type>::create(dim);
-  auto index = std::make_shared<PyIndex<InnerProductDistance<data_type>, int>>(
-      std::move(distance), data_type, std::forward<Args>(args)...);
+  auto index = std::make_shared<PyIndex<InnerProductDistance<data_type>, int>>(std::move(distance), data_type,
+                                                                               std::forward<Args>(args)...);
   return py::cast(index);
 }
 
 template <typename dist_t, typename label_t>
-void bindSpecialization(py::module_ &index_submodule) {
+void bindSpecialization(py::module_& index_submodule) {
   using IndexType = typename IndexSpecialization<dist_t>::type;
-  auto index_class = py::class_<IndexType, std::shared_ptr<IndexType>>(
-      index_submodule, IndexSpecialization<dist_t>::name);
+  auto index_class =
+      py::class_<IndexType, std::shared_ptr<IndexType>>(index_submodule, IndexSpecialization<dist_t>::name);
 
   index_class
       .def(
           "add",
-          [](IndexType &index, const py::array &data, int ef_construction,
-             int num_initializations = 100, py::object labels = py::none()) {
+          [](IndexType& index, const py::array& data, int ef_construction, int num_initializations = 100,
+             py::object labels = py::none()) {
             index.add(data, ef_construction, num_initializations, labels);
           },
-          py::arg("data"), py::arg("ef_construction"),
-          py::arg("num_initializations") = 100, py::arg("labels") = py::none(),
-          ADD_DOCSTRING)
+          py::arg("data"), py::arg("ef_construction"), py::arg("num_initializations") = 100,
+          py::arg("labels") = py::none(), ADD_DOCSTRING)
       .def(
           "allocate_nodes",
-          [](IndexType &index,
-             const py::array_t<float, py::array::c_style | py::array::forcecast>
-                 &data) { return index.allocateNodes(data); },
+          [](IndexType& index, const py::array_t<float, py::array::c_style | py::array::forcecast>& data) {
+            return index.allocateNodes(data);
+          },
           py::arg("data"), ALLOCATE_NODES_DOCSTRING)
       .def(
           "search_single",
-          [](IndexType &index, const py::array &query, int K, int ef_search,
-             int num_initializations = 100) {
+          [](IndexType& index, const py::array& query, int K, int ef_search, int num_initializations = 100) {
             return index.searchSingle(query, K, ef_search, num_initializations);
           },
-          py::arg("query"), py::arg("K"), py::arg("ef_search"),
-          py::arg("num_initializations") = 100, SEARCH_SINGLE_DOCSTRING)
+          py::arg("query"), py::arg("K"), py::arg("ef_search"), py::arg("num_initializations") = 100,
+          SEARCH_SINGLE_DOCSTRING)
       .def(
           "search",
-          [](IndexType &index, const py::array &queries, int K, int ef_search,
+          [](IndexType& index, const py::array& queries, int K, int ef_search,
              int num_initializations = 100) {
             return index.search(queries, K, ef_search, num_initializations);
           },
-          py::arg("queries"), py::arg("K"), py::arg("ef_search"),
-          py::arg("num_initializations") = 100, SEARCH_DOCSTRING)
-      .def("get_query_distance_computations",
-           &IndexType::getQueryDistanceComputations,
+          py::arg("queries"), py::arg("K"), py::arg("ef_search"), py::arg("num_initializations") = 100,
+          SEARCH_DOCSTRING)
+      .def("get_query_distance_computations", &IndexType::getQueryDistanceComputations,
            GET_QUERY_DISTANCE_COMPUTATIONS_DOCSTRING)
       .def("save", &IndexType::save, py::arg("filename"), SAVE_DOCSTRING)
-      .def("build_graph_links", &IndexType::buildGraphLinks,
-           py::arg("mtx_filename"), BUILD_GRAPH_LINKS_DOCSTRING)
+      .def("build_graph_links", &IndexType::buildGraphLinks, py::arg("mtx_filename"),
+           BUILD_GRAPH_LINKS_DOCSTRING)
       .def("get_graph_outdegree_table", &IndexType::getGraphOutdegreeTable,
            GET_GRAPH_OUTDEGREE_TABLE_DOCSTRING)
-      .def("reorder", &IndexType::reorder, py::arg("strategies"),
-           REORDER_DOCSTRING)
-      .def("set_num_threads", &IndexType::setNumThreads, py::arg("num_threads"),
-           SET_NUM_THREADS_DOCSTRING)
-      .def_static("load_index", &IndexType::loadIndex, py::arg("filename"),
-                  LOAD_INDEX_DOCSTRING)
-      .def_property_readonly("max_edges_per_node",
-                             &IndexType::getMaxEdgesPerNode)
-      .def_property_readonly("num_threads", &IndexType::getNumThreads,
-                             NUM_THREADS_DOCSTRING);
+      .def("reorder", &IndexType::reorder, py::arg("strategies"), REORDER_DOCSTRING)
+      .def("set_num_threads", &IndexType::setNumThreads, py::arg("num_threads"), SET_NUM_THREADS_DOCSTRING)
+      .def_static("load_index", &IndexType::loadIndex, py::arg("filename"), LOAD_INDEX_DOCSTRING)
+      .def_property_readonly("max_edges_per_node", &IndexType::getMaxEdgesPerNode)
+      .def_property_readonly("num_threads", &IndexType::getNumThreads, NUM_THREADS_DOCSTRING);
 }
 
-void defineIndexSubmodule(py::module_ &index_submodule) {
-  bindSpecialization<SquaredL2Distance<DataType::float32>, int>(
-      index_submodule);
+void defineIndexSubmodule(py::module_& index_submodule) {
+  bindSpecialization<SquaredL2Distance<DataType::float32>, int>(index_submodule);
   bindSpecialization<SquaredL2Distance<DataType::int8>, int>(index_submodule);
   bindSpecialization<SquaredL2Distance<DataType::uint8>, int>(index_submodule);
-  bindSpecialization<InnerProductDistance<DataType::float32>, int>(
-      index_submodule);
-  bindSpecialization<InnerProductDistance<DataType::int8>, int>(
-      index_submodule);
-  bindSpecialization<InnerProductDistance<DataType::uint8>, int>(
-      index_submodule);
+  bindSpecialization<InnerProductDistance<DataType::float32>, int>(index_submodule);
+  bindSpecialization<InnerProductDistance<DataType::int8>, int>(index_submodule);
+  bindSpecialization<InnerProductDistance<DataType::uint8>, int>(index_submodule);
 
   index_submodule.def(
       "create",
-      [](const std::string &distance_type, int dim, int dataset_size,
-         int max_edges_per_node, DataType index_data_type, bool verbose = false,
-         bool collect_stats = false) {
+      [](const std::string& distance_type, int dim, int dataset_size, int max_edges_per_node,
+         DataType index_data_type, bool verbose = false, bool collect_stats = false) {
         switch (index_data_type) {
-        case DataType::float32:
-          return createIndex<DataType::float32>(
-              distance_type, dim, dataset_size, max_edges_per_node, verbose,
-              collect_stats);
-        case DataType::int8:
-          return createIndex<DataType::int8>(distance_type, dim, dataset_size,
-                                             max_edges_per_node, verbose,
-                                             collect_stats);
-        case DataType::uint8:
-          return createIndex<DataType::uint8>(distance_type, dim, dataset_size,
-                                              max_edges_per_node, verbose,
-                                              collect_stats);
-        default:
-          throw std::runtime_error("Unsupported data type");
+          case DataType::float32:
+            return createIndex<DataType::float32>(distance_type, dim, dataset_size, max_edges_per_node,
+                                                  verbose, collect_stats);
+          case DataType::int8:
+            return createIndex<DataType::int8>(distance_type, dim, dataset_size, max_edges_per_node, verbose,
+                                               collect_stats);
+          case DataType::uint8:
+            return createIndex<DataType::uint8>(distance_type, dim, dataset_size, max_edges_per_node, verbose,
+                                                collect_stats);
+          default:
+            throw std::runtime_error("Unsupported data type");
         }
       },
-      py::arg("distance_type"), py::arg("dim"), py::arg("dataset_size"),
-      py::arg("max_edges_per_node"),
-      py::arg("index_data_type") = DataType::float32,
-      py::arg("verbose") = false, py::arg("collect_stats") = false,
-      CONSTRUCTOR_DOCSTRING);
+      py::arg("distance_type"), py::arg("dim"), py::arg("dataset_size"), py::arg("max_edges_per_node"),
+      py::arg("index_data_type") = DataType::float32, py::arg("verbose") = false,
+      py::arg("collect_stats") = false, CONSTRUCTOR_DOCSTRING);
 }
 
-void defineDatatypeEnums(py::module_ &module) {
+void defineDatatypeEnums(py::module_& module) {
   // More enums are available, but these are the only ones that we support
   // for index construction.
   py::enum_<DataType>(module, "DataType")
@@ -561,7 +506,7 @@ void defineDatatypeEnums(py::module_ &module) {
       .export_values();
 }
 
-void defineDistanceEnums(py::module_ &module) {
+void defineDistanceEnums(py::module_& module) {
   py::enum_<flatnav::distances::MetricType>(module, "MetricType")
       .value("L2", flatnav::distances::MetricType::L2)
       .value("IP", flatnav::distances::MetricType::IP)

From c80758dd196f6e24cf524d159e3f0e08e08e4795 Mon Sep 17 00:00:00 2001
From: BlaiseMuhirwa <blaisemuhirwa3@gmail.com>
Date: Sat, 2 Nov 2024 16:41:17 -0700
Subject: [PATCH 2/3] further formatting

---
 Makefile                                      |   7 +-
 bin/format.sh                                 |  16 --
 .../quantization/CentroidsGenerator.h         | 100 ++++-----
 .../quantization/ProductQuantization.h        | 203 ++++++++----------
 developmental-features/quantization/Utils.h   |  21 +-
 tools/cereal_tests.cpp                        |  31 ++-
 tools/construct_npy.cpp                       |  65 +++---
 tools/flatnav_pq.cpp                          |  56 ++---
 tools/query_npy.cpp                           |  56 ++---
 9 files changed, 234 insertions(+), 321 deletions(-)
 delete mode 100755 bin/format.sh

diff --git a/Makefile b/Makefile
index 17f7ffb..4fd8afa 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,10 @@
-CPP_FILES := $(wildcard flatnav/**/*.h flatnav/**/*.cc flatnav/**/*.cpp flatnav_python/*.cpp)
+CPP_FILES := $(wildcard flatnav/**/*.h flatnav/**/*.cpp flatnav_python/*.cpp tools/*.cpp developmental-features/**/*.h)
 
 format-cpp:
 	clang-format -i $(CPP_FILES)
 
 build-cpp:
-	./bin/build.sh -e -t
\ No newline at end of file
+	./bin/build.sh -e -t
+
+cmake-format:
+	cmake-format -i CMakeLists.txt
\ No newline at end of file
diff --git a/bin/format.sh b/bin/format.sh
deleted file mode 100755
index 8de3fe2..0000000
--- a/bin/format.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-# First install clang-format and cmake-format
-
-clang-format -i 
-
-# Format all header files with clang-format 
-# TODO: Use a recursive find solution to format headers/src files
-find flatnav -iname *.h -o -iname *.cpp | xargs clang-format -i 
-find tools -iname *.cpp -o -iname *.h | xargs clang-format -i 
-find flatnav_python -iname *.cpp | xargs clang-format -i
-find quantization -iname *.h -o -iname *.cpp | xargs clang-format -i 
-find quantization/tests -iname *.h -o -iname *.cpp | xargs clang-format -i 
-
-# Format CMakeLists.txt file 
-cmake-format -i CMakeLists.txt 
\ No newline at end of file
diff --git a/developmental-features/quantization/CentroidsGenerator.h b/developmental-features/quantization/CentroidsGenerator.h
index 569cb71..69647ac 100644
--- a/developmental-features/quantization/CentroidsGenerator.h
+++ b/developmental-features/quantization/CentroidsGenerator.h
@@ -21,7 +21,7 @@
 namespace flatnav::quantization {
 
 class CentroidsGenerator {
-public:
+ public:
   /**
    * @brief Construct a new Centroids Generator object
    *
@@ -37,20 +37,21 @@ class CentroidsGenerator {
    * @param verbose                   Whether to print verbose output
    * @param seed                      The seed for the random number generator
    */
-  CentroidsGenerator(uint32_t dim, uint32_t num_centroids,
-                     uint32_t num_iterations = 62, bool normalized = true,
-                     bool verbose = false, int seed = 3333)
-      : _dim(dim), _num_centroids(num_centroids),
-        _clustering_iterations(num_iterations), _normalized(normalized),
-        _verbose(verbose), _centroids_initialized(false), _seed(seed),
+  CentroidsGenerator(uint32_t dim, uint32_t num_centroids, uint32_t num_iterations = 62,
+                     bool normalized = true, bool verbose = false, int seed = 3333)
+      : _dim(dim),
+        _num_centroids(num_centroids),
+        _clustering_iterations(num_iterations),
+        _normalized(normalized),
+        _verbose(verbose),
+        _centroids_initialized(false),
+        _seed(seed),
         _initialization_type("default") {}
 
-  void initializeCentroids(
-      const float *data, uint64_t n,
-      const std::function<float(const float *, const float *)> &distance_func) {
+  void initializeCentroids(const float* data, uint64_t n,
+                           const std::function<float(const float*, const float*)>& distance_func) {
     auto initialization_type = _initialization_type;
-    std::transform(initialization_type.begin(), initialization_type.end(),
-                   initialization_type.begin(),
+    std::transform(initialization_type.begin(), initialization_type.end(), initialization_type.begin(),
                    [](unsigned char c) { return std::tolower(c); });
 
     if (_centroids.size() != _num_centroids * _dim) {
@@ -64,9 +65,8 @@ class CentroidsGenerator {
     } else if (initialization_type == "hypercube") {
       hypercubeInitialize(data, n);
     } else {
-      throw std::invalid_argument(
-          "Invalid centroids initialization initialization type: " +
-          initialization_type);
+      throw std::invalid_argument("Invalid centroids initialization initialization type: " +
+                                  initialization_type);
     }
     _centroids_initialized = true;
   }
@@ -94,13 +94,11 @@ class CentroidsGenerator {
    * @param distance_func The distance function to use (e.g. l2 distance or
    cosinde/inner product)
    */
-  void generateCentroids(
-      const float *vectors, const float *vec_weights, uint64_t n,
-      const std::function<float(const float *, const float *)> &distance_func) {
+  void generateCentroids(const float* vectors, const float* vec_weights, uint64_t n,
+                         const std::function<float(const float*, const float*)>& distance_func) {
     if (n < _num_centroids) {
       throw std::runtime_error(
-          "Invalid configuration. The number of centroids: " +
-          std::to_string(_num_centroids) +
+          "Invalid configuration. The number of centroids: " + std::to_string(_num_centroids) +
           " is bigger than the number of data points: " + std::to_string(n));
     }
 
@@ -110,8 +108,7 @@ class CentroidsGenerator {
     std::vector<uint32_t> assignment(n);
 
     // K-means loop
-    for (uint32_t iteration = 0; iteration < _clustering_iterations;
-         iteration++) {
+    for (uint32_t iteration = 0; iteration < _clustering_iterations; iteration++) {
 // Step 1. Find the minimizing centroid based on l2 distance
 #pragma omp parallel for
       for (uint64_t vec_index = 0; vec_index < n; vec_index++) {
@@ -119,9 +116,8 @@ class CentroidsGenerator {
 
         for (uint32_t c_index = 0; c_index < _num_centroids; c_index++) {
           // Get distance using the distance function
-          float *vector = const_cast<float *>(vectors + (vec_index * _dim));
-          float *centroid =
-              const_cast<float *>(_centroids.data() + (c_index * _dim));
+          float* vector = const_cast<float*>(vectors + (vec_index * _dim));
+          float* centroid = const_cast<float*>(_centroids.data() + (c_index * _dim));
           auto distance = distance_func(vector, centroid);
 
           if (distance < min_distance) {
@@ -139,8 +135,7 @@ class CentroidsGenerator {
       for (uint64_t vec_index = 0; vec_index < n; vec_index++) {
         for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) {
 #pragma omp atomic
-          sums[assignment[vec_index] * _dim + dim_index] +=
-              vectors[vec_index * _dim + dim_index];
+          sums[assignment[vec_index] * _dim + dim_index] += vectors[vec_index * _dim + dim_index];
         }
 #pragma omp atomic
         counts[assignment[vec_index]]++;
@@ -148,43 +143,40 @@ class CentroidsGenerator {
 #pragma omp parallel for
       for (uint32_t c_index = 0; c_index < _num_centroids; c_index++) {
         for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) {
-          _centroids[c_index * _dim + dim_index] =
-              counts[c_index]
-                  ? sums[c_index * _dim + dim_index] / counts[c_index]
-                  : _centroids[c_index * _dim + dim_index];
+          _centroids[c_index * _dim + dim_index] = counts[c_index]
+                                                       ? sums[c_index * _dim + dim_index] / counts[c_index]
+                                                       : _centroids[c_index * _dim + dim_index];
         }
       }
     }
   }
 
-  inline const float *centroids() const { return _centroids.data(); }
+  inline const float* centroids() const { return _centroids.data(); }
 
-  inline void setInitializationType(const std::string &initialization_type) {
+  inline void setInitializationType(const std::string& initialization_type) {
     _initialization_type = initialization_type;
   }
 
-private:
+ private:
   /**
    * @brief Initialize the centroids by randomly sampling k centroids among the
    * n data points
    * @param data  The input data points
    * @param n     The number of data points
    */
-  void randomInitialize(const float *data, uint64_t n) {
+  void randomInitialize(const float* data, uint64_t n) {
     std::vector<uint64_t> indices(n);
 
     std::iota(indices.begin(), indices.end(), 0);
     std::mt19937 generator(_seed + 1);
     std::vector<uint64_t> sample_indices(_num_centroids);
-    std::sample(indices.begin(), indices.end(), sample_indices.begin(),
-                _num_centroids, generator);
+    std::sample(indices.begin(), indices.end(), sample_indices.begin(), _num_centroids, generator);
 
     for (uint32_t i = 0; i < _num_centroids; i++) {
       auto sample_index = sample_indices[i];
 
       for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) {
-        _centroids[(i * _dim) + dim_index] =
-            data[(sample_index * _dim) + dim_index];
+        _centroids[(i * _dim) + dim_index] = data[(sample_index * _dim) + dim_index];
       }
     }
   }
@@ -204,9 +196,8 @@ class CentroidsGenerator {
    * @param data  The input data points
    * @param n     The number of data points
    */
-  void kmeansPlusPlusInitialize(
-      const float *data, uint64_t n,
-      const std::function<float(const float *, const float *)> &distance_func) {
+  void kmeansPlusPlusInitialize(const float* data, uint64_t n,
+                                const std::function<float(const float*, const float*)>& distance_func) {
     std::mt19937 generator(_seed);
     std::uniform_int_distribution<uint64_t> distribution(0, n - 1);
 
@@ -216,8 +207,7 @@ class CentroidsGenerator {
       _centroids[dim_index] = data[first_centroid_index * _dim + dim_index];
     }
 
-    std::vector<double> min_squared_distances(
-        n, std::numeric_limits<double>::max());
+    std::vector<double> min_squared_distances(n, std::numeric_limits<double>::max());
 
     // Step 2. For k-1 remaining centroids
     for (uint32_t cent_idx = 1; cent_idx < _num_centroids; cent_idx++) {
@@ -230,8 +220,8 @@ class CentroidsGenerator {
 
         for (uint64_t c = 0; c < cent_idx; c++) {
 
-          float *centroid = const_cast<float *>(_centroids.data() + (c * _dim));
-          float *vector = const_cast<float *>(data + (i * _dim));
+          float* centroid = const_cast<float*>(_centroids.data() + (c * _dim));
+          float* vector = const_cast<float*>(data + (i * _dim));
           auto distance = distance_func(centroid, vector);
 
           if (distance < min_distance) {
@@ -256,8 +246,7 @@ class CentroidsGenerator {
 
       // Add selected centroid the the centroids array
       for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) {
-        _centroids[cent_idx * _dim + dim_index] =
-            data[next_centroid_index * _dim + dim_index];
+        _centroids[cent_idx * _dim + dim_index] = data[next_centroid_index * _dim + dim_index];
       }
     }
   }
@@ -288,7 +277,7 @@ class CentroidsGenerator {
 
  */
 
-  void hypercubeInitialize(const float *data, uint64_t n) {
+  void hypercubeInitialize(const float* data, uint64_t n) {
 
     std::vector<float> means(_dim);
     for (uint64_t vec_index = 0; vec_index < n; vec_index++) {
@@ -304,11 +293,11 @@ class CentroidsGenerator {
       maxm = fabs(means[dim_index]) > maxm ? fabs(means[dim_index]) : maxm;
     }
 
-    float *centroids = _centroids.data();
+    float* centroids = _centroids.data();
     auto num_bits = log2(_num_centroids);
 
     for (uint32_t i = 0; i < _num_centroids; i++) {
-      float *centroid = const_cast<float *>(centroids + (i * _dim));
+      float* centroid = const_cast<float*>(centroids + (i * _dim));
       for (uint32_t j = 0; j < num_bits; j++) {
         centroid[j] = means[j] + (((i >> j) & 1) ? 1 : -1) * maxm;
       }
@@ -341,10 +330,11 @@ class CentroidsGenerator {
   std::string _initialization_type;
 
   friend class cereal::access;
-  template <typename Archive> void serialize(Archive &ar) {
-    ar(_dim, _num_centroids, _centroids, _clustering_iterations, _normalized,
-       _verbose, _centroids_initialized, _seed, _initialization_type);
+  template <typename Archive>
+  void serialize(Archive& ar) {
+    ar(_dim, _num_centroids, _centroids, _clustering_iterations, _normalized, _verbose,
+       _centroids_initialized, _seed, _initialization_type);
   }
 };
 
-} // namespace flatnav::quantization
\ No newline at end of file
+}  // namespace flatnav::quantization
\ No newline at end of file
diff --git a/developmental-features/quantization/ProductQuantization.h b/developmental-features/quantization/ProductQuantization.h
index a1eb3bb..172b3f3 100644
--- a/developmental-features/quantization/ProductQuantization.h
+++ b/developmental-features/quantization/ProductQuantization.h
@@ -1,5 +1,9 @@
 #pragma once
 
+#include <flatnav/distances/DistanceInterface.h>
+#include <flatnav/distances/InnerProductDistance.h>
+#include <flatnav/distances/SquaredL2Distance.h>
+#include <flatnav/util/Datatype.h>
 #include <algorithm>
 #include <cassert>
 #include <cereal/access.hpp>
@@ -10,10 +14,6 @@
 #include <cmath>
 #include <cstdint>
 #include <cstring>
-#include <flatnav/distances/DistanceInterface.h>
-#include <flatnav/distances/InnerProductDistance.h>
-#include <flatnav/distances/SquaredL2Distance.h>
-#include <flatnav/util/Datatype.h>
 #include <memory>
 
 #ifdef _OPENMP
@@ -36,7 +36,8 @@ using flatnav::distances::SquaredL2Distance;
 using flatnav::quantization::CentroidsGenerator;
 using flatnav::util::DataType;
 
-template <typename n_bits_t> struct PQCodeManager {
+template <typename n_bits_t>
+struct PQCodeManager {
   // This is an array that represents a quantization code for
   // a given vector. For instance, if x = [x_0, ..., x_{m-1}]
   // is subdivided into 8 subvectors, each of size m/8, we will
@@ -45,17 +46,17 @@ template <typename n_bits_t> struct PQCodeManager {
   //
   // NOTE: code here means the index of the local centroid that
   // minimizes the (squared) distance between a given subvector and itself.
-  n_bits_t *code;
-  n_bits_t *start;
+  n_bits_t* code;
+  n_bits_t* start;
 
   // Indicates if the code manager has already been redirected to the
   // start of the encoding so that we don't do this more than once (to
   // avoid segfaults while decoding).
   bool code_manager_already_set_to_start;
 
-  PQCodeManager(uint8_t *code, uint32_t nbits)
-      : code(reinterpret_cast<n_bits_t *>(code)),
-        start(reinterpret_cast<n_bits_t *>(code)),
+  PQCodeManager(uint8_t* code, uint32_t nbits)
+      : code(reinterpret_cast<n_bits_t*>(code)),
+        start(reinterpret_cast<n_bits_t*>(code)),
         code_manager_already_set_to_start(false) {
     assert(nbits == 8 * sizeof(n_bits_t));
   }
@@ -87,14 +88,13 @@ template <typename n_bits_t> struct PQCodeManager {
  *
  */
 
-class ProductQuantizer
-    : public flatnav::distances::DistanceInterface<ProductQuantizer> {
+class ProductQuantizer : public flatnav::distances::DistanceInterface<ProductQuantizer> {
   friend class flatnav::distances::DistanceInterface<ProductQuantizer>;
 
   // Represents the block size used in ProductQuantizer::computePQCodes
   static const uint64_t BLOCK_SIZE = 256 * 1024;
 
-public:
+ public:
   // Constructor for serializaiton
   ProductQuantizer() = default;
 
@@ -108,14 +108,17 @@ class ProductQuantizer
    * This will be possible once the PQ integration with the flatnav
    * index is complete.
    */
-  ProductQuantizer(uint32_t dim, uint32_t M, uint32_t nbits,
-                   MetricType metric_type)
-      : _num_subquantizers(M), _num_bits(nbits), _is_trained(false),
-        _metric_type(metric_type), _train_type(TrainType::DEFAULT) {
+  ProductQuantizer(uint32_t dim, uint32_t M, uint32_t nbits, MetricType metric_type)
+      : _num_subquantizers(M),
+        _num_bits(nbits),
+        _is_trained(false),
+        _metric_type(metric_type),
+        _train_type(TrainType::DEFAULT) {
 
     if (dim % _num_subquantizers) {
-      throw std::invalid_argument("The dataset dimension must be a multiple of "
-                                  "the desired number of sub-quantizers.");
+      throw std::invalid_argument(
+          "The dataset dimension must be a multiple of "
+          "the desired number of sub-quantizers.");
     }
     _code_size = (_num_bits * 8 + 7) / 8;
     _subvector_dim = dim / _num_subquantizers;
@@ -135,13 +138,13 @@ class ProductQuantizer
   }
 
   // Return a pointer to the centroids associated with a given subvector
-  const float *getCentroids(uint32_t subvector_index, uint32_t i) const {
+  const float* getCentroids(uint32_t subvector_index, uint32_t i) const {
     auto index = (subvector_index * _subq_centroids_count + i) * _subvector_dim;
     return &_centroids[index];
   }
 
-  void setParameters(const float *centroids_, int m) {
-    float *centroids = const_cast<float *>(getCentroids(m, 0));
+  void setParameters(const float* centroids_, int m) {
+    float* centroids = const_cast<float*>(getCentroids(m, 0));
     auto bytes_to_copy = _subq_centroids_count * _subvector_dim * sizeof(float);
 
     std::memcpy(centroids, centroids_, bytes_to_copy);
@@ -153,14 +156,14 @@ class ProductQuantizer
    * @param vector
    * @param code
    */
-  void computePQCode(const float *vector, uint8_t *code) const {
+  void computePQCode(const float* vector, uint8_t* code) const {
     std::vector<float> distances(_subq_centroids_count);
 
     PQCodeManager<uint8_t> code_manager(/* code = */ code,
                                         /* nbits = */ 8);
 
     for (uint32_t m = 0; m < _num_subquantizers; m++) {
-      const float *subvector = vector + (m * _subvector_dim);
+      const float* subvector = vector + (m * _subvector_dim);
       uint64_t minimizer_index = flatnav::distanceWithKNeighbors(
           /* distances_tmp_buffer = */ distances.data(), /* x = */ subvector,
           /* y = */ getCentroids(m, 0), /* dim = */ _subvector_dim,
@@ -180,15 +183,14 @@ class ProductQuantizer
    * @param codes          quantization codes
    * @param n              total number of vectors
    */
-  void computePQCodes(const float *vectors, uint8_t *codes, uint64_t n) const {
+  void computePQCodes(const float* vectors, uint8_t* codes, uint64_t n) const {
     // process by blocks to avoid using too much RAM
 
     auto dim = _subvector_dim * _num_subquantizers;
     if (n > BLOCK_SIZE) {
       for (uint64_t i0 = 0; i0 < n; i0 += BLOCK_SIZE) {
         auto i1 = std::min(i0 + BLOCK_SIZE, n);
-        computePQCodes(vectors + (dim * i0), codes + (_code_size * i0),
-                       i1 - i0);
+        computePQCodes(vectors + (dim * i0), codes + (_code_size * i0), i1 - i0);
       }
       return;
     }
@@ -205,7 +207,7 @@ class ProductQuantizer
    * @param vectors          Vectors to use for quantization
    * @param n                Number of vectors
    */
-  void train(const float *vectors, uint64_t n) {
+  void train(const float* vectors, uint64_t n) {
 
     CentroidsGenerator centroids_generator(
         /* dim = */ _subvector_dim,
@@ -226,8 +228,7 @@ class ProductQuantizer
 
     TrainType final_train_type = _train_type;
 
-    if (_train_type == TrainType::HYPERCUBE ||
-        _train_type == TrainType::HYPERCUBE_PCA) {
+    if (_train_type == TrainType::HYPERCUBE || _train_type == TrainType::HYPERCUBE_PCA) {
       if (_subvector_dim < _num_bits) {
         final_train_type = TrainType::DEFAULT;
         std::cout << "[pq-train-warning] cannot train hypercube with num "
@@ -236,29 +237,28 @@ class ProductQuantizer
       }
     }
 
-    float *slice = new float[n * _subvector_dim];
+    float* slice = new float[n * _subvector_dim];
     auto dim = _subvector_dim * _num_subquantizers;
 
     // Arrange the vectors such that the first subvector of each vector is
     // contiguous, then the second subvector, and so on.
     for (uint32_t m = 0; m < _num_subquantizers; m++) {
       for (uint64_t vec_index = 0; vec_index < n; vec_index++) {
-        std::memcpy(slice + (vec_index * _subvector_dim),
-                    vectors + (vec_index * dim) + (m * _subvector_dim),
+        std::memcpy(slice + (vec_index * _subvector_dim), vectors + (vec_index * dim) + (m * _subvector_dim),
                     _subvector_dim * sizeof(float));
       }
 
       switch (final_train_type) {
-      case TrainType::HYPERCUBE:
-        centroids_generator.setInitializationType("hypercube");
-        break;
+        case TrainType::HYPERCUBE:
+          centroids_generator.setInitializationType("hypercube");
+          break;
 
-      case TrainType::HOT_START:
-        std::memcpy((void *)centroids_generator.centroids(), getCentroids(m, 0),
-                    _subvector_dim * _subq_centroids_count * sizeof(float));
-        break;
+        case TrainType::HOT_START:
+          std::memcpy((void*)centroids_generator.centroids(), getCentroids(m, 0),
+                      _subvector_dim * _subq_centroids_count * sizeof(float));
+          break;
 
-      default:;
+        default:;
       }
 
       // generate the actual centroids
@@ -283,23 +283,22 @@ class ProductQuantizer
    * @param code      Code corresponding to the given vector
    * @param vector    Vector to decode
    */
-  void decode(const uint8_t *code, float *vector) const {
+  void decode(const uint8_t* code, float* vector) const {
     // TODO check whether this const_cast does not cause any issues
     PQCodeManager<uint8_t> code_manager(
-        /* code = */ const_cast<uint8_t *>(code),
+        /* code = */ const_cast<uint8_t*>(code),
         /* nbits = */ 8);
 
     for (uint32_t m = 0; m < _num_subquantizers; m++) {
       uint64_t code_ = code_manager.decode();
-      std::memcpy(vector + (m * _subvector_dim), getCentroids(m, 0),
-                  sizeof(float) * _subvector_dim);
+      std::memcpy(vector + (m * _subvector_dim), getCentroids(m, 0), sizeof(float) * _subvector_dim);
     }
   }
 
   /**
    * @brief Decode multiple vectors given their respective codes.
    */
-  void decode(const uint8_t *code, float *vectors, uint64_t n) const {
+  void decode(const uint8_t* code, float* vectors, uint64_t n) const {
     auto dim = _subvector_dim * _num_subquantizers;
     for (uint64_t vec_index = 0; vec_index < n; vec_index++) {
       decode(code + (vec_index * _code_size), vectors + (vec_index * dim));
@@ -315,10 +314,8 @@ class ProductQuantizer
    * @param dist_table output table, size (_num_subquantizers x
    * _subq_centroids_count)
    */
-  void
-  computeDistanceTable(const float *vector, float *dist_table,
-                       const std::function<float(const float *, const float *)>
-                           &dist_func) const {
+  void computeDistanceTable(const float* vector, float* dist_table,
+                            const std::function<float(const float*, const float*)>& dist_func) const {
 
     for (uint32_t m = 0; m < _num_subquantizers; m++) {
       flatnav::copyDistancesIntoBuffer(
@@ -330,17 +327,14 @@ class ProductQuantizer
     }
   }
 
-  void computeDistanceTables(const float *vectors, float *dist_tables,
-                             uint64_t n) const {
+  void computeDistanceTables(const float* vectors, float* dist_tables, uint64_t n) const {
 
     // TODO: Use SIMD
     auto dim = _subvector_dim * _num_subquantizers;
 #pragma omp parallel for if (n > 1)
     for (uint64_t i = 0; i < n; i++) {
       computeDistanceTable(vectors + (i * dim),
-                           dist_tables +
-                               (i * _subq_centroids_count * _num_subquantizers),
-                           _dist_func);
+                           dist_tables + (i * _subq_centroids_count * _num_subquantizers), _dist_func);
     }
   }
 
@@ -352,9 +346,9 @@ class ProductQuantizer
 
   inline size_t dataSizeImpl() { return getCodeSize(); }
 
-  void transformDataImpl(void *destination, const void *src) {
-    uint8_t *code = new uint8_t[_code_size]();
-    computePQCode(static_cast<const float *>(src), code);
+  void transformDataImpl(void* destination, const void* src) {
+    uint8_t* code = new uint8_t[_code_size]();
+    computePQCode(static_cast<const float*>(src), code);
 
     std::memcpy(destination, code, _code_size);
 
@@ -370,13 +364,13 @@ class ProductQuantizer
    * @param y         database vector
    * @return
    */
-  float asymmetricDistanceImpl(const void *x, const void *y) const {
+  float asymmetricDistanceImpl(const void* x, const void* y) const {
     assert(_is_trained);
 
-    float *x_ptr = (float *)(x);
-    uint8_t *y_ptr = (uint8_t *)(y);
+    float* x_ptr = (float*)(x);
+    uint8_t* y_ptr = (uint8_t*)(y);
 
-    float *dist_table = new float[_subq_centroids_count * _num_subquantizers];
+    float* dist_table = new float[_subq_centroids_count * _num_subquantizers];
 
     computeDistanceTable(/* vector = */ x_ptr,
                          /* dist_table = */ dist_table,
@@ -398,16 +392,16 @@ class ProductQuantizer
    * @param y
    * @return
    */
-  float symmetricDistanceImpl(const void *x, const void *y) const {
+  float symmetricDistanceImpl(const void* x, const void* y) const {
     assert(_is_trained);
 
-    uint8_t *code1 = (uint8_t *)(x);
-    uint8_t *code2 = (uint8_t *)(y);
+    uint8_t* code1 = (uint8_t*)(x);
+    uint8_t* code2 = (uint8_t*)(y);
 
     float distance = 0.0;
 
     // Get a pointer to the distance table for the first subquantizer
-    const float *dist_table = _symmetric_distance_tables.data();
+    const float* dist_table = _symmetric_distance_tables.data();
 
     for (uint32_t m = 0; m < _num_subquantizers; m++) {
       distance += dist_table[(code1[m] * _subq_centroids_count) + code2[m]];
@@ -416,7 +410,7 @@ class ProductQuantizer
     return distance;
   }
 
-  float distanceImpl(const void *x, const void *y, bool asymmetric) const {
+  float distanceImpl(const void* x, const void* y, bool asymmetric) const {
     if (asymmetric) {
       return asymmetricDistanceImpl(x, y);
     }
@@ -426,15 +420,10 @@ class ProductQuantizer
   void getSummaryImpl() const {
     std::cout << "\nProduct Quantizer Parameters" << std::flush;
     std::cout << "-----------------------------" << std::flush;
-    std::cout << "Number of subquantizers (M): " << _num_subquantizers << "\n"
-              << std::flush;
-    std::cout << "Number of bits per index: " << _num_bits << "\n"
-              << std::flush;
-    std::cout << "Subvector dimension: " << _subvector_dim << "\n"
-              << std::flush;
-    std::cout << "Subquantizer centroids count: " << _subq_centroids_count
-              << "\n"
-              << std::flush;
+    std::cout << "Number of subquantizers (M): " << _num_subquantizers << "\n" << std::flush;
+    std::cout << "Number of bits per index: " << _num_bits << "\n" << std::flush;
+    std::cout << "Subvector dimension: " << _subvector_dim << "\n" << std::flush;
+    std::cout << "Subquantizer centroids count: " << _subq_centroids_count << "\n" << std::flush;
     std::cout << "Code size: " << _code_size << "\n" << std::flush;
     std::cout << "Is trained: " << _is_trained << "\n" << std::flush;
     std::cout << "Train type: " << _train_type << "\n" << std::flush;
@@ -450,26 +439,21 @@ class ProductQuantizer
 
   inline bool isTrained() const { return _is_trained; }
 
-private:
+ private:
   // NOTE: This is a hack to get around the fact that the PQ class needs to know
   // which distance function to use. So, this function allows us to just extract
   // the distance function pointer since that's the only thing we care about.
   // There's gotta be a cleaner way to not have to do this, but this will do for
   // now.
 
-  std::function<float(const float *, const float *)>
-  getDistFuncFromVariant() const {
+  std::function<float(const float*, const float*)> getDistFuncFromVariant() const {
     if (_distance.index() == 0) {
-      return [local_distance = _distance](const float *a,
-                                          const float *b) -> float {
-        return std::get<SquaredL2Distance<DataType::float32>>(local_distance)
-            .distanceImpl(a, b);
+      return [local_distance = _distance](const float* a, const float* b) -> float {
+        return std::get<SquaredL2Distance<DataType::float32>>(local_distance).distanceImpl(a, b);
       };
     }
-    return [local_distance = _distance](const float *a,
-                                        const float *b) -> float {
-      return std::get<InnerProductDistance<DataType::float32>>(local_distance)
-          .distanceImpl(a, b);
+    return [local_distance = _distance](const float* a, const float* b) -> float {
+      return std::get<InnerProductDistance<DataType::float32>>(local_distance).distanceImpl(a, b);
     };
   }
 
@@ -489,19 +473,16 @@ class ProductQuantizer
    *
    */
   void computeSymmetricDistanceTables() {
-    _symmetric_distance_tables.resize(
-        _num_subquantizers * _subq_centroids_count * _subq_centroids_count);
+    _symmetric_distance_tables.resize(_num_subquantizers * _subq_centroids_count * _subq_centroids_count);
 
 #pragma omp parallel for
-    for (uint64_t mk = 0; mk < _num_subquantizers * _subq_centroids_count;
-         mk++) {
+    for (uint64_t mk = 0; mk < _num_subquantizers * _subq_centroids_count; mk++) {
       auto m = mk / _subq_centroids_count;
       auto k = mk % _subq_centroids_count;
-      const float *centroids =
-          _centroids.data() + (m * _subq_centroids_count * _subvector_dim);
-      const float *centroid_k = centroids + (k * _subvector_dim);
-      float *dist_table = _symmetric_distance_tables.data() +
-                          (m * _subq_centroids_count * _subq_centroids_count);
+      const float* centroids = _centroids.data() + (m * _subq_centroids_count * _subvector_dim);
+      const float* centroid_k = centroids + (k * _subvector_dim);
+      float* dist_table =
+          _symmetric_distance_tables.data() + (m * _subq_centroids_count * _subq_centroids_count);
 
       flatnav::copyDistancesIntoBuffer(
           /* distances_buffer = */ dist_table + (k * _subq_centroids_count),
@@ -552,30 +533,28 @@ class ProductQuantizer
   // Initialization
   enum TrainType {
     DEFAULT,
-    HOT_START,     // The centroids are already initialized
-    SHARED,        // Share dictionary across PQ segments
-    HYPERCUBE,     // Initialize centroids with nbits-D hypercube
-    HYPERCUBE_PCA, // Initialize centroids with nbits-D hypercube post PCA
-                   // pre-processing. For now, this is not implemented. FAISS
-                   // seems to believe that this is a good initialization, so we
-                   // might test it out to see if it actually works well.
+    HOT_START,      // The centroids are already initialized
+    SHARED,         // Share dictionary across PQ segments
+    HYPERCUBE,      // Initialize centroids with nbits-D hypercube
+    HYPERCUBE_PCA,  // Initialize centroids with nbits-D hypercube post PCA
+                    // pre-processing. For now, this is not implemented. FAISS
+                    // seems to believe that this is a good initialization, so we
+                    // might test it out to see if it actually works well.
   };
 
   TrainType _train_type;
 
-  std::variant<SquaredL2Distance<DataType::float32>,
-               InnerProductDistance<DataType::float32>>
-      _distance;
+  std::variant<SquaredL2Distance<DataType::float32>, InnerProductDistance<DataType::float32>> _distance;
 
-  std::function<float(const float *, const float *)> _dist_func;
+  std::function<float(const float*, const float*)> _dist_func;
 
   friend class ::cereal::access;
 
-  template <typename Archive> void serialize(Archive &archive) {
+  template <typename Archive>
+  void serialize(Archive& archive) {
 
-    archive(_code_size, _num_subquantizers, _num_bits, _subvector_dim,
-            _subq_centroids_count, _centroids, _symmetric_distance_tables,
-            _is_trained, _metric_type, _train_type);
+    archive(_code_size, _num_subquantizers, _num_bits, _subvector_dim, _subq_centroids_count, _centroids,
+            _symmetric_distance_tables, _is_trained, _metric_type, _train_type);
 
     if constexpr (Archive::is_loading::value) {
       // loading PQ
@@ -591,4 +570,4 @@ class ProductQuantizer
   }
 };
 
-} // namespace flatnav::quantization
\ No newline at end of file
+}  // namespace flatnav::quantization
\ No newline at end of file
diff --git a/developmental-features/quantization/Utils.h b/developmental-features/quantization/Utils.h
index b77b587..9275d16 100644
--- a/developmental-features/quantization/Utils.h
+++ b/developmental-features/quantization/Utils.h
@@ -1,9 +1,9 @@
 
 #pragma once
 
-#include <cstdint>
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
+#include <cstdint>
 #include <limits>
 #include <variant>
 
@@ -31,10 +31,9 @@ namespace flatnav {
  * @param dim
  * @param target_set_size
  */
-static void copyDistancesIntoBuffer(
-    float *distances_buffer, const float *x, const float *y, uint32_t dim,
-    uint64_t target_set_size,
-    const std::function<float(const float *, const float *)> &dist_func) {
+static void copyDistancesIntoBuffer(float* distances_buffer, const float* x, const float* y, uint32_t dim,
+                                    uint64_t target_set_size,
+                                    const std::function<float(const float*, const float*)>& dist_func) {
 
   for (uint64_t i = 0; i < target_set_size; i++) {
     distances_buffer[i] = dist_func(x, y);
@@ -55,16 +54,14 @@ static void copyDistancesIntoBuffer(
  * @return 0 if target_set_size equals 0. Otherwise, the index of the
  * nearest vector.
  */
-static uint64_t distanceWithKNeighbors(
-    float *distances_buffer, const float *x, const float *y, uint32_t dim,
-    uint64_t target_set_size,
-    const std::function<float(const float *, const float *)> &dist_func) {
+static uint64_t distanceWithKNeighbors(float* distances_buffer, const float* x, const float* y, uint32_t dim,
+                                       uint64_t target_set_size,
+                                       const std::function<float(const float*, const float*)>& dist_func) {
 
   if (target_set_size == 0) {
     return 0;
   }
-  copyDistancesIntoBuffer(distances_buffer, x, y, dim, target_set_size,
-                          dist_func);
+  copyDistancesIntoBuffer(distances_buffer, x, y, dim, target_set_size, dist_func);
   uint64_t minimizer = 0;
   float minimum_distance = std::numeric_limits<float>::max();
 
@@ -77,4 +74,4 @@ static uint64_t distanceWithKNeighbors(
   return minimizer;
 }
 
-} // namespace flatnav
\ No newline at end of file
+}  // namespace flatnav
\ No newline at end of file
diff --git a/tools/cereal_tests.cpp b/tools/cereal_tests.cpp
index 4f6cceb..cd409d2 100644
--- a/tools/cereal_tests.cpp
+++ b/tools/cereal_tests.cpp
@@ -1,10 +1,10 @@
-#include "cnpy.h"
-#include <cassert>
 #include <flatnav/distances/DistanceInterface.h>
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
 #include <flatnav/index/Index.h>
+#include <cassert>
 #include <memory>
+#include "cnpy.h"
 
 using flatnav::Index;
 using flatnav::distances::DistanceInterface;
@@ -13,16 +13,13 @@ using flatnav::distances::SquaredL2Distance;
 using flatnav::util::DataType;
 
 template <typename dist_t>
-void serializeIndex(float *data,
-                    std::unique_ptr<DistanceInterface<dist_t>> &&distance,
-                    int N, int M, int dim, int ef_construction,
-                    const std::string &save_file) {
-  std::unique_ptr<Index<dist_t, int>> index =
-      std::make_unique<Index<dist_t, int>>(
-          /* dist = */ std::move(distance), /* dataset_size = */ N,
-          /* max_edges = */ M);
+void serializeIndex(float* data, std::unique_ptr<DistanceInterface<dist_t>>&& distance, int N, int M, int dim,
+                    int ef_construction, const std::string& save_file) {
+  std::unique_ptr<Index<dist_t, int>> index = std::make_unique<Index<dist_t, int>>(
+      /* dist = */ std::move(distance), /* dataset_size = */ N,
+      /* max_edges = */ M);
 
-  float *element = new float[dim];
+  float* element = new float[dim];
   std::vector<int> labels(N);
   std::iota(labels.begin(), labels.end(), 0);
 
@@ -39,15 +36,14 @@ void serializeIndex(float *data,
   assert(new_index->dataSizeBytes() == distance->dataSize() + (32 * M) + 32);
   assert(new_index->maxNodeCount() == N);
 
-  uint64_t total_index_size =
-      new_index->nodeSizeBytes() * new_index->maxNodeCount();
+  uint64_t total_index_size = new_index->nodeSizeBytes() * new_index->maxNodeCount();
 
   for (uint64_t i = 0; i < total_index_size; i++) {
     assert(index->indexMemory()[i] == new_index->indexMemory()[i] * 2);
   }
 }
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
   if (argc < 2) {
     std::cout << "Usage: " << argv[0] << " <data.npy>\n" << std::flush;
     std::cout << "data.npy: Path to a NPY file for MNIST\n" << std::flush;
@@ -59,11 +55,10 @@ int main(int argc, char **argv) {
   int ef_construction = 100;
   int dim = 784;
   int N = 60000;
-  float *data = datafile.data<float>();
+  float* data = datafile.data<float>();
   auto l2_distance = SquaredL2Distance<DataType::float32>::create(dim);
-  serializeIndex<SquaredL2Distance<DataType::float32>>(
-      data, std::move(l2_distance), N, M, dim, ef_construction,
-      std::string("l2_flatnav.bin"));
+  serializeIndex<SquaredL2Distance<DataType::float32>>(data, std::move(l2_distance), N, M, dim,
+                                                       ef_construction, std::string("l2_flatnav.bin"));
 
   // auto inner_product_distance =
   //     std::make_unique<InnerProductDistance<float>>(dim);
diff --git a/tools/construct_npy.cpp b/tools/construct_npy.cpp
index 4be1df3..e9ba9eb 100644
--- a/tools/construct_npy.cpp
+++ b/tools/construct_npy.cpp
@@ -1,24 +1,24 @@
-#include "cnpy.h"
-#include <algorithm>
-#include <chrono>
-#include <cmath>
+#include <developmental-features/quantization/ProductQuantization.h>
 #include <flatnav/distances/DistanceInterface.h>
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
 #include <flatnav/index/Index.h>
 #include <flatnav/util/Datatype.h>
+#include <algorithm>
+#include <chrono>
+#include <cmath>
 #include <fstream>
 #include <iostream>
 #include <memory>
 #include <numeric>
 #include <optional>
-#include <developmental-features/quantization/ProductQuantization.h>
 #include <random>
 #include <stdexcept>
 #include <string>
 #include <thread>
 #include <utility>
 #include <vector>
+#include "cnpy.h"
 
 using flatnav::Index;
 using flatnav::distances::DistanceInterface;
@@ -28,10 +28,8 @@ using flatnav::quantization::ProductQuantizer;
 using flatnav::util::DataType;
 
 template <typename dist_t>
-void buildIndex(float *data,
-                std::unique_ptr<DistanceInterface<dist_t>> distance, int N,
-                int M, int dim, int ef_construction, int build_num_threads,
-                const std::string &save_file) {
+void buildIndex(float* data, std::unique_ptr<DistanceInterface<dist_t>> distance, int N, int M, int dim,
+                int ef_construction, int build_num_threads, const std::string& save_file) {
 
   auto index = new Index<dist_t, int>(
       /* dist = */ std::move(distance), /* dataset_size = */ N,
@@ -43,15 +41,13 @@ void buildIndex(float *data,
 
   std::vector<int> labels(N);
   std::iota(labels.begin(), labels.end(), 0);
-  index->template addBatch<float>(/* data = */ (void *)data,
+  index->template addBatch<float>(/* data = */ (void*)data,
                                   /* labels = */ labels,
                                   /* ef_construction */ ef_construction);
 
   auto stop = std::chrono::high_resolution_clock ::now();
-  auto duration =
-      std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
-  std::clog << "Build time: " << (float)duration.count() << " milliseconds"
-            << std::endl;
+  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+  std::clog << "Build time: " << (float)duration.count() << " milliseconds" << std::endl;
 
   std::clog << "Saving index to: " << save_file << std::endl;
   index->saveIndex(/* filename = */ save_file);
@@ -59,9 +55,8 @@ void buildIndex(float *data,
   delete index;
 }
 
-void run(float *data, flatnav::distances::MetricType metric_type, int N, int M,
-         int dim, int ef_construction, int build_num_threads,
-         const std::string &save_file, bool quantize = false) {
+void run(float* data, flatnav::distances::MetricType metric_type, int N, int M, int dim, int ef_construction,
+         int build_num_threads, const std::string& save_file, bool quantize = false) {
 
   if (quantize) {
     // Parameters M and nbits should be adjusted accordingly.
@@ -72,41 +67,35 @@ void run(float *data, flatnav::distances::MetricType metric_type, int N, int M,
     auto start = std::chrono::high_resolution_clock::now();
     quantizer->train(/* vectors = */ data, /* num_vectors = */ N);
     auto stop = std::chrono::high_resolution_clock::now();
-    auto duration =
-        std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
-    std::clog << "Quantization time: " << (float)duration.count()
-              << " milliseconds" << std::endl;
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+    std::clog << "Quantization time: " << (float)duration.count() << " milliseconds" << std::endl;
 
-    buildIndex<ProductQuantizer>(data, std::move(quantizer), N, M, dim,
-                                 ef_construction, build_num_threads, save_file);
+    buildIndex<ProductQuantizer>(data, std::move(quantizer), N, M, dim, ef_construction, build_num_threads,
+                                 save_file);
 
   } else {
     if (metric_type == flatnav::distances::MetricType::L2) {
       auto distance = SquaredL2Distance<>::create(dim);
-      buildIndex<SquaredL2Distance<DataType::float32>>(
-          data, std::move(distance), N, M, dim, ef_construction,
-          build_num_threads, save_file);
+      buildIndex<SquaredL2Distance<DataType::float32>>(data, std::move(distance), N, M, dim, ef_construction,
+                                                       build_num_threads, save_file);
 
     } else if (metric_type == flatnav::distances::MetricType::IP) {
       auto distance = InnerProductDistance<>::create(dim);
-      buildIndex<InnerProductDistance<DataType::float32>>(
-          data, std::move(distance), N, M, dim, ef_construction,
-          build_num_threads, save_file);
+      buildIndex<InnerProductDistance<DataType::float32>>(data, std::move(distance), N, M, dim,
+                                                          ef_construction, build_num_threads, save_file);
     }
   }
 }
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
 
   if (argc < 8) {
     std::clog << "Usage: " << std::endl;
     std::clog << "construct <quantize> <metric> <data> <M> <ef_construction> "
                  "<build_num_threads> <outfile>"
               << std::endl;
-    std::clog << "\t <quantize> int, 0 for no quantization, 1 for quantization"
-              << std::endl;
-    std::clog << "\t <metric> int, 0 for L2, 1 for inner product (angular)"
-              << std::endl;
+    std::clog << "\t <quantize> int, 0 for no quantization, 1 for quantization" << std::endl;
+    std::clog << "\t <metric> int, 0 for L2, 1 for inner product (angular)" << std::endl;
     std::clog << "\t <data> npy file from ann-benchmarks" << std::endl;
     std::clog << "\t <M>: int " << std::endl;
     std::clog << "\t <ef_construction>: int " << std::endl;
@@ -129,12 +118,10 @@ int main(int argc, char **argv) {
   int dim = datafile.shape[1];
   int N = datafile.shape[0];
 
-  std::clog << "Loading " << dim << "-dimensional dataset with N = " << N
-            << std::endl;
-  float *data = datafile.data<float>();
+  std::clog << "Loading " << dim << "-dimensional dataset with N = " << N << std::endl;
+  float* data = datafile.data<float>();
   flatnav::distances::MetricType metric_type =
-      metric_id == 0 ? flatnav::distances::MetricType::L2
-                     : flatnav::distances::MetricType::IP;
+      metric_id == 0 ? flatnav::distances::MetricType::L2 : flatnav::distances::MetricType::IP;
 
   run(/* data = */ data,
       /* metric_type = */ metric_type,
diff --git a/tools/flatnav_pq.cpp b/tools/flatnav_pq.cpp
index a9da320..d46e4c9 100644
--- a/tools/flatnav_pq.cpp
+++ b/tools/flatnav_pq.cpp
@@ -1,10 +1,9 @@
-#include "cnpy.h"
-#include <algorithm>
-#include <chrono>
-#include <cmath>
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
 #include <flatnav/index/Index.h>
+#include <algorithm>
+#include <chrono>
+#include <cmath>
 #include <fstream>
 #include <iostream>
 #include <random>
@@ -12,16 +11,15 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include "cnpy.h"
 
 using flatnav::Index;
 using flatnav::distances::InnerProductDistance;
 using flatnav::distances::SquaredL2Distance;
 
 template <typename dist_t>
-void run(
-    float *data,
-    std::unique_ptr<flatnav::distances::DistanceInterface<dist_t>> &&distance,
-    int N, int M, int dim, int ef_construction, const std::string &save_file) {
+void run(float* data, std::unique_ptr<flatnav::distances::DistanceInterface<dist_t>>&& distance, int N, int M,
+         int dim, int ef_construction, const std::string& save_file) {
   auto index = new Index<dist_t, int>(
       /* dist = */ std::move(distance), /* dataset_size = */ N,
       /* max_edges = */ M);
@@ -29,8 +27,8 @@ void run(
   auto start = std::chrono::high_resolution_clock::now();
 
   for (int label = 0; label < N; label++) {
-    float *element = data + (dim * label);
-    index->add(/* data = */ (void *)element, /* label = */ label,
+    float* element = data + (dim * label);
+    index->add(/* data = */ (void*)element, /* label = */ label,
                /* ef_construction */ ef_construction);
     if (label % 100000 == 0)
       std::clog << "." << std::flush;
@@ -38,10 +36,8 @@ void run(
   std::clog << std::endl;
 
   auto stop = std::chrono::high_resolution_clock::now();
-  auto duration =
-      std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
-  std::clog << "Build time: " << (float)duration.count() << " milliseconds"
-            << std::endl;
+  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+  std::clog << "Build time: " << (float)duration.count() << " milliseconds" << std::endl;
 
   std::clog << "Saving index to: " << save_file << std::endl;
   index->saveIndex(/* filename = */ save_file);
@@ -49,8 +45,7 @@ void run(
   delete index;
 }
 
-std::vector<uint8_t> quantize(float *vectors, uint64_t vec_count, uint32_t dim,
-                              uint32_t M, uint32_t nbits) {
+std::vector<uint8_t> quantize(float* vectors, uint64_t vec_count, uint32_t dim, uint32_t M, uint32_t nbits) {
   auto distance = std::make_unique<SquaredL2Distance>(dim);
   ProductQuantizer<SquaredL2Distance> pq(/* dist = */ std::move(distance),
                                          /* dim = */ dim, /* M = */ M,
@@ -68,12 +63,12 @@ std::vector<uint8_t> quantize(float *vectors, uint64_t vec_count, uint32_t dim,
   std::cout << "[INFO] Saving codes to: "
             << "codes.bin" << std::endl;
   std::ofstream stream("codes.bin");
-  stream.write((char *)codes.data(), codes.size());
+  stream.write((char*)codes.data(), codes.size());
 
   return codes;
 }
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
   // Quantize
   const bool quantize = false;
 
@@ -89,22 +84,20 @@ int main(int argc, char **argv) {
   const int dataset_size = 60000;
 
   // datafile
-  const char *filename = "mnnist-784-euclidean.train.npy";
+  const char* filename = "mnnist-784-euclidean.train.npy";
   cnpy::NpyArray datafile = cnpy::npy_load(filename);
 
   assert(datafile.shape.size() == 2);
   assert(datafile.shape[0] == dataset_size);
   assert(datafile.shape[1] == dim);
 
-  std::clog << "Loading " << dim
-            << "-dimensional dataset with N = " << dataset_size << std::endl;
-  float *data = datafile.data<float>();
+  std::clog << "Loading " << dim << "-dimensional dataset with N = " << dataset_size << std::endl;
+  float* data = datafile.data<float>();
 
   if (quantize) {
     // NOTE: M here is different from max_edges.
-    std::vector<uint8_t> codes =
-        quantize(/* vectors = */ data, /* vec_count = */ dataset_size,
-                 /* dim = */ dim, /* M = */ 8, /* nbits = */ 8);
+    std::vector<uint8_t> codes = quantize(/* vectors = */ data, /* vec_count = */ dataset_size,
+                                          /* dim = */ dim, /* M = */ 8, /* nbits = */ 8);
   }
 
   auto distance = std::make_unique<SquaredL2Distance>(dim);
@@ -114,8 +107,8 @@ int main(int argc, char **argv) {
 
   auto start = std::chrono::high_resolution_clock::now();
   for (int label = 0; label < N; label++) {
-    float *element = data + (dim * label);
-    index->add(/* data = */ (void *)element, /* label = */ label,
+    float* element = data + (dim * label);
+    index->add(/* data = */ (void*)element, /* label = */ label,
                /* ef_construction */ ef_construction);
     if (label % 100000 == 0)
       std::clog << "." << std::flush;
@@ -123,10 +116,8 @@ int main(int argc, char **argv) {
   std::clog << std::endl;
 
   auto stop = std::chrono::high_resolution_clock::now();
-  auto duration =
-      std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
-  std::clog << "Build time: " << (float)duration.count() << " milliseconds"
-            << std::endl;
+  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+  std::clog << "Build time: " << (float)duration.count() << " milliseconds" << std::endl;
 
   if (metric_id == 0) {
     auto distance = std::make_unique<SquaredL2Distance>(dim);
@@ -143,8 +134,7 @@ int main(int argc, char **argv) {
         /* N = */ N, /* M = */ M, dim,
         /* ef_construction = */ ef_construction, /* save_file = */ argv[5]);
   } else {
-    throw std::invalid_argument("Provided metric ID " +
-                                std::to_string(metric_id) + "is invalid.");
+    throw std::invalid_argument("Provided metric ID " + std::to_string(metric_id) + "is invalid.");
   }
 
   return 0;
diff --git a/tools/query_npy.cpp b/tools/query_npy.cpp
index 3089679..42a9ecb 100644
--- a/tools/query_npy.cpp
+++ b/tools/query_npy.cpp
@@ -1,20 +1,20 @@
-#include <chrono>
-#include <cmath>
+#include <developmental-features/quantization/ProductQuantization.h>
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
 #include <flatnav/index/Index.h>
 #include <flatnav/util/Datatype.h>
+#include <chrono>
+#include <cmath>
 #include <fstream>
 #include <iostream>
-#include <developmental-features/quantization/ProductQuantization.h>
 #include <random>
 #include <utility>
 #include <vector>
 
-#include "cnpy.h"
 #include <algorithm>
 #include <sstream>
 #include <string>
+#include "cnpy.h"
 
 using flatnav::Index;
 using flatnav::distances::InnerProductDistance;
@@ -23,12 +23,10 @@ using flatnav::quantization::ProductQuantizer;
 using flatnav::util::DataType;
 
 template <typename dist_t>
-void run(float *queries, int *gtruth, const std::string &index_filename,
-         const std::vector<int> &ef_searches, int K, int num_queries,
-         int num_gtruth, int dim, bool reorder = true) {
+void run(float* queries, int* gtruth, const std::string& index_filename, const std::vector<int>& ef_searches,
+         int K, int num_queries, int num_gtruth, int dim, bool reorder = true) {
 
-  std::unique_ptr<Index<dist_t, int>> index =
-      Index<dist_t, int>::loadIndex(index_filename);
+  std::unique_ptr<Index<dist_t, int>> index = Index<dist_t, int>::loadIndex(index_filename);
 
   std::cout << "[INFO] Index loaded" << std::endl;
   index->getIndexSummary();
@@ -38,22 +36,19 @@ void run(float *queries, int *gtruth, const std::string &index_filename,
     auto start_r = std::chrono::high_resolution_clock::now();
     index->reorderGOrder();
     auto stop_r = std::chrono::high_resolution_clock::now();
-    auto duration_r =
-        std::chrono::duration_cast<std::chrono::milliseconds>(stop_r - start_r);
-    std::clog << "Reordering time: " << (float)(duration_r.count()) / (1000.0)
-              << " seconds" << std::endl;
+    auto duration_r = std::chrono::duration_cast<std::chrono::milliseconds>(stop_r - start_r);
+    std::clog << "Reordering time: " << (float)(duration_r.count()) / (1000.0) << " seconds" << std::endl;
   }
 
-  for (const auto &ef_search : ef_searches) {
+  for (const auto& ef_search : ef_searches) {
     double mean_recall = 0;
 
     auto start_q = std::chrono::high_resolution_clock::now();
     for (int i = 0; i < num_queries; i++) {
-      float *q = queries + dim * i;
-      int *g = gtruth + num_gtruth * i;
+      float* q = queries + dim * i;
+      int* g = gtruth + num_gtruth * i;
 
-      std::vector<std::pair<float, int>> result =
-          index->search(q, K, ef_search);
+      std::vector<std::pair<float, int>> result = index->search(q, K, ef_search);
 
       double recall = 0;
       for (int j = 0; j < K; j++) {
@@ -67,15 +62,13 @@ void run(float *queries, int *gtruth, const std::string &index_filename,
       mean_recall = mean_recall + recall;
     }
     auto stop_q = std::chrono::high_resolution_clock::now();
-    auto duration_q =
-        std::chrono::duration_cast<std::chrono::milliseconds>(stop_q - start_q);
+    auto duration_q = std::chrono::duration_cast<std::chrono::milliseconds>(stop_q - start_q);
     std::cout << "[INFO] Mean Recall: " << mean_recall / num_queries
-              << ", Duration:" << (float)(duration_q.count()) / num_queries
-              << " milliseconds" << std::endl;
+              << ", Duration:" << (float)(duration_q.count()) / num_queries << " milliseconds" << std::endl;
   }
 }
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
 
   if (argc < 9) {
     std::clog << "Usage: " << std::endl;
@@ -89,10 +82,8 @@ int main(int argc, char **argv) {
     std::clog << "\t <ef_construction>: int " << std::endl;
     std::clog << "\t <ef_search>: int,int,int,int...,int " << std::endl;
     std::clog << "\t <k>: number of neighbors " << std::endl;
-    std::clog << "\t <Reorder ID>: 0 for no reordering, 1 for reordering"
-              << std::endl;
-    std::clog << "\t <Quantized>: 0 for no quantization, 1 for quantization"
-              << std::endl;
+    std::clog << "\t <Reorder ID>: 0 for no reordering, 1 for reordering" << std::endl;
+    std::clog << "\t <Quantized>: 0 for no quantization, 1 for quantization" << std::endl;
     return -1;
   }
 
@@ -123,17 +114,14 @@ int main(int argc, char **argv) {
   int dim = queryfile.shape[1];
   int n_gt = truthfile.shape[1];
   if (k > n_gt) {
-    std::cerr
-        << "K is larger than the number of precomputed ground truth neighbors"
-        << std::endl;
+    std::cerr << "K is larger than the number of precomputed ground truth neighbors" << std::endl;
     return -1;
   }
 
   std::clog << "Loading " << num_queries << " queries" << std::endl;
-  float *queries = queryfile.data<float>();
-  std::clog << "Loading " << num_queries
-            << " ground truth results with k = " << k << std::endl;
-  int *gtruth = truthfile.data<int>();
+  float* queries = queryfile.data<float>();
+  std::clog << "Loading " << num_queries << " ground truth results with k = " << k << std::endl;
+  int* gtruth = truthfile.data<int>();
 
   if (quantized) {
     run<ProductQuantizer>(/* queries = */ queries, /* gtruth = */

From c5b245a0dd2e52feee3962fe65325e4d76409e05 Mon Sep 17 00:00:00 2001
From: BlaiseMuhirwa <blaisemuhirwa3@gmail.com>
Date: Sat, 2 Nov 2024 16:45:02 -0700
Subject: [PATCH 3/3] fix github actions workflow

---
 .github/workflows/cicd.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
index 8b65de2..64dadee 100644
--- a/.github/workflows/cicd.yaml
+++ b/.github/workflows/cicd.yaml
@@ -63,7 +63,7 @@ jobs:
     - name: Install dependencies
       run: |
         # Install CMake and clang
-        ./bin/install_clang.sh
+        ./bin/install_clang_and_libomp.sh
         ./bin/install_cmake.sh
 
     - name: Build flatnav