From e9917c98074e104d546e697c69df78f09bc72620 Mon Sep 17 00:00:00 2001 From: BlaiseMuhirwa Date: Sat, 2 Nov 2024 16:38:11 -0700 Subject: [PATCH 1/3] standardize code formatting --- .clang-format | 81 +++++ .gitignore | 3 + Makefile | 7 + bin/format.sh | 7 +- flatnav/distances/DistanceInterface.h | 30 +- flatnav/distances/IPDistanceDispatcher.h | 31 +- flatnav/distances/InnerProductDistance.h | 37 +-- flatnav/distances/L2DistanceDispatcher.h | 30 +- flatnav/distances/SquaredL2Distance.h | 38 +-- flatnav/index/Index.h | 264 +++++++-------- flatnav/tests/test_distances.cpp | 28 +- flatnav/tests/test_serialization.cpp | 34 +- flatnav/util/Datatype.h | 126 ++++---- flatnav/util/GorderPriorityQueue.h | 25 +- flatnav/util/InnerProductSimdExtensions.h | 78 ++--- flatnav/util/Macros.h | 16 +- flatnav/util/Multithreading.h | 9 +- flatnav/util/Reordering.h | 40 ++- flatnav/util/SquaredL2SimdExtensions.h | 111 +++---- flatnav/util/VisitedSetPool.h | 59 ++-- flatnav_python/python_bindings.cpp | 377 +++++++++------------- 21 files changed, 689 insertions(+), 742 deletions(-) create mode 100644 .clang-format create mode 100644 Makefile diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..1bfb3be --- /dev/null +++ b/.clang-format @@ -0,0 +1,81 @@ +# Google C/C++ Code Style settings +# https://clang.llvm.org/docs/ClangFormatStyleOptions.html + +Language: Cpp +BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: None +AlignOperands: Align +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Inline +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterStruct: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false +BreakBeforeBinaryOperators: None +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +ColumnLimit: 110 +CompactNamespaces: false +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +EmptyLineBeforeAccessModifier: LogicalBlock +FixNamespaceComments: true +IncludeBlocks: Preserve +IndentCaseLabels: true +IndentPPDirectives: None +IndentWidth: 2 +KeepEmptyLinesAtTheStartOfBlocks: true +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PointerAlignment: Left +ReflowComments: false +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: c++17 +TabWidth: 4 +UseTab: Never \ No newline at end of file diff --git a/.gitignore b/.gitignore index eca58a8..fc5082f 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,9 @@ build metrics node-access-distributions +# PYcache +**/__pycache__ + # Python wheel related folders/files flatnav_python/flatnav.egg-info/ flatnav_python/poetry.lock diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..17f7ffb --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +CPP_FILES := $(wildcard flatnav/**/*.h flatnav/**/*.cc flatnav/**/*.cpp flatnav_python/*.cpp) + +format-cpp: + clang-format -i $(CPP_FILES) + +build-cpp: + ./bin/build.sh -e -t \ No newline at end of file diff --git a/bin/format.sh b/bin/format.sh index 5bfa07f..8de3fe2 100755 --- a/bin/format.sh +++ b/bin/format.sh @@ -1,11 +1,8 @@ #!/bin/bash +# First install clang-format and cmake-format -# Install cmake-format if it is not installed via pip -if ! command -v cmake-format &> /dev/null -then - pip install cmake-format -fi +clang-format -i # Format all header files with clang-format # TODO: Use a recursive find solution to format headers/src files diff --git a/flatnav/distances/DistanceInterface.h b/flatnav/distances/DistanceInterface.h index 717df4c..d5b7617 100644 --- a/flatnav/distances/DistanceInterface.h +++ b/flatnav/distances/DistanceInterface.h @@ -1,8 +1,8 @@ #pragma once #include -#include // for size_t -#include // for ifstream, ofstream +#include // for size_t +#include // for ifstream, ofstream #include namespace flatnav::distances { @@ -15,39 +15,41 @@ enum class MetricType { L2, IP }; // distance function through a pointer or virtual function call. // CRTP: https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern -template class DistanceInterface { -public: +template +class DistanceInterface { + public: // The asymmetric flag is used to indicate whether the distance function // is between two database vectors (symmetric) or between a database vector // and a query vector. For regular distances (l2, inner product), there is // no difference between the two. However, for quantization techniques, such // as product quantization, the two distance modes are different. - float distance(const void *x, const void *y, bool asymmetric = false) { - return static_cast(this)->distanceImpl(x, y, asymmetric); + float distance(const void* x, const void* y, bool asymmetric = false) { + return static_cast(this)->distanceImpl(x, y, asymmetric); } // Returns the dimension of the input data. - size_t dimension() { return static_cast(this)->getDimension(); } + size_t dimension() { return static_cast(this)->getDimension(); } // Returns the size, in bytes, of the transformed data representation. - size_t dataSize() { return static_cast(this)->dataSizeImpl(); } + size_t dataSize() { return static_cast(this)->dataSizeImpl(); } // Prints the parameters of the distance function. - void getSummary() { static_cast(this)->getSummaryImpl(); } + void getSummary() { static_cast(this)->getSummaryImpl(); } // This transforms the data located at src into a form that is writeable // to disk / storable in RAM. For distance functions that don't // compress the input, this just passses through a copy from src to // destination. However, there are functions (e.g. with quantization) where // the in-memory representation is not the same as the raw input. - void transformData(void *destination, const void *src) { - static_cast(this)->transformDataImpl(destination, src); + void transformData(void* destination, const void* src) { + static_cast(this)->transformDataImpl(destination, src); } // Serializes the distance function to disk. - template void serialize(Archive &archive) { - static_cast(this)->template serialize(archive); + template + void serialize(Archive& archive) { + static_cast(this)->template serialize(archive); } }; -} // namespace flatnav::distances \ No newline at end of file +} // namespace flatnav::distances \ No newline at end of file diff --git a/flatnav/distances/IPDistanceDispatcher.h b/flatnav/distances/IPDistanceDispatcher.h index 1e5149c..0124dae 100644 --- a/flatnav/distances/IPDistanceDispatcher.h +++ b/flatnav/distances/IPDistanceDispatcher.h @@ -7,8 +7,7 @@ namespace flatnav::distances { template -static float defaultInnerProduct(const T *x, const T *y, - const size_t &dimension) { +static float defaultInnerProduct(const T* x, const T* y, const size_t& dimension) { float inner_product = 0; for (size_t i = 0; i < dimension; i++) { inner_product += x[i] * y[i]; @@ -16,16 +15,16 @@ static float defaultInnerProduct(const T *x, const T *y, return 1.0f - inner_product; } -template struct InnerProductImpl { - static float computeDistance(const T *x, const T *y, - const size_t &dimension) { +template +struct InnerProductImpl { + static float computeDistance(const T* x, const T* y, const size_t& dimension) { return defaultInnerProduct(x, y, dimension); } }; -template <> struct InnerProductImpl { - static float computeDistance(const float *x, const float *y, - const size_t &dimension) { +template <> +struct InnerProductImpl { + static float computeDistance(const float* x, const float* y, const size_t& dimension) { #if defined(USE_AVX512) if (platformSupportsAvx512()) { if (dimension % 16 == 0) { @@ -78,26 +77,26 @@ template <> struct InnerProductImpl { }; // TODO: Include SIMD optimized implementations for int8_t. -template <> struct InnerProductImpl { - static float computeDistance(const int8_t *x, const int8_t *y, - const size_t &dimension) { +template <> +struct InnerProductImpl { + static float computeDistance(const int8_t* x, const int8_t* y, const size_t& dimension) { return defaultInnerProduct(x, y, dimension); } }; // TODO: Include SIMD optimized implementations for uint8_t. -template <> struct InnerProductImpl { - static float computeDistance(const uint8_t *x, const uint8_t *y, - const size_t &dimension) { +template <> +struct InnerProductImpl { + static float computeDistance(const uint8_t* x, const uint8_t* y, const size_t& dimension) { return defaultInnerProduct(x, y, dimension); } }; struct IPDistanceDispatcher { template - static float dispatch(const T *x, const T *y, const size_t &dimension) { + static float dispatch(const T* x, const T* y, const size_t& dimension) { return InnerProductImpl::computeDistance(x, y, dimension); } }; -} // namespace flatnav::distances \ No newline at end of file +} // namespace flatnav::distances \ No newline at end of file diff --git a/flatnav/distances/InnerProductDistance.h b/flatnav/distances/InnerProductDistance.h index 29efd7f..559b262 100644 --- a/flatnav/distances/InnerProductDistance.h +++ b/flatnav/distances/InnerProductDistance.h @@ -1,13 +1,13 @@ #pragma once -#include -#include -#include // for size_t -#include // for memcpy #include #include #include #include +#include +#include +#include // for size_t +#include // for memcpy #include #include #include @@ -21,38 +21,35 @@ using util::DataType; using util::type_for_data_type; template -class InnerProductDistance - : public DistanceInterface> { +class InnerProductDistance : public DistanceInterface> { friend class DistanceInterface; // Enum for compile-time constant enum { DISTANCE_ID = 1 }; -public: + public: InnerProductDistance() = default; InnerProductDistance(size_t dim) - : _dimension(dim), - _data_size_bytes(dim * flatnav::util::size(data_type)) {} + : _dimension(dim), _data_size_bytes(dim * flatnav::util::size(data_type)) {} static std::unique_ptr> create(size_t dim) { return std::make_unique>(dim); } - constexpr float distanceImpl(const void *x, const void *y, - [[maybe_unused]] bool asymmetric = false) const { - return IPDistanceDispatcher::dispatch( - static_cast::type *>(x), - static_cast::type *>(y), - _dimension); + constexpr float distanceImpl(const void* x, const void* y, [[maybe_unused]] bool asymmetric = false) const { + return IPDistanceDispatcher::dispatch(static_cast::type*>(x), + static_cast::type*>(y), + _dimension); } -private: + private: size_t _dimension; size_t _data_size_bytes; friend class cereal::access; - template void serialize(Archive &ar) { + template + void serialize(Archive& ar) { ar(_dimension, _data_size_bytes); } @@ -60,9 +57,7 @@ class InnerProductDistance size_t dataSizeImpl() { return _data_size_bytes; } - void transformDataImpl(void *dst, const void *src) { - std::memcpy(dst, src, _data_size_bytes); - } + void transformDataImpl(void* dst, const void* src) { std::memcpy(dst, src, _data_size_bytes); } void getSummaryImpl() { std::cout << "\nInnerProductDistance Parameters" << std::flush; @@ -73,4 +68,4 @@ class InnerProductDistance } }; -} // namespace flatnav::distances \ No newline at end of file +} // namespace flatnav::distances \ No newline at end of file diff --git a/flatnav/distances/L2DistanceDispatcher.h b/flatnav/distances/L2DistanceDispatcher.h index f84dc6d..671e951 100644 --- a/flatnav/distances/L2DistanceDispatcher.h +++ b/flatnav/distances/L2DistanceDispatcher.h @@ -7,7 +7,7 @@ namespace flatnav::distances { template -static float defaultSquaredL2(const T *x, const T *y, const size_t &dimension) { +static float defaultSquaredL2(const T* x, const T* y, const size_t& dimension) { float squared_distance = 0; for (size_t i = 0; i < dimension; i++) { float difference = x[i] - y[i]; @@ -20,7 +20,8 @@ static float defaultSquaredL2(const T *x, const T *y, const size_t &dimension) { // distance // between two arrays of type T. // @TODO: We should add constraints to the T type. -template struct SquaredL2Impl { +template +struct SquaredL2Impl { /** * Computes the squared L2 distance between two arrays of type T. * @@ -29,16 +30,15 @@ template struct SquaredL2Impl { * @param dimension The dimension of the arrays. * @return The squared L2 distance between the two arrays. */ - static float computeDistance(const T *x, const T *y, - const size_t &dimension) { + static float computeDistance(const T* x, const T* y, const size_t& dimension) { return defaultSquaredL2(x, y, dimension); } }; // Specialization of SquaredL2Impl for the float type. -template <> struct SquaredL2Impl { - static float computeDistance(const float *x, const float *y, - const size_t &dimension) { +template <> +struct SquaredL2Impl { + static float computeDistance(const float* x, const float* y, const size_t& dimension) { #if defined(USE_AVX512) if (platformSupportsAvx512()) { if (dimension % 16 == 0) { @@ -86,9 +86,9 @@ template <> struct SquaredL2Impl { } }; -template <> struct SquaredL2Impl { - static float computeDistance(const int8_t *x, const int8_t *y, - const size_t &dimension) { +template <> +struct SquaredL2Impl { + static float computeDistance(const int8_t* x, const int8_t* y, const size_t& dimension) { // #if defined(USE_AVX512BW) && defined(USE_AVX512VNNI) // if (platformSupportsAvx512()) { // return flatnav::util::computeL2_Avx512_int8(x, y, dimension); @@ -103,9 +103,9 @@ template <> struct SquaredL2Impl { } }; -template <> struct SquaredL2Impl { - static float computeDistance(const uint8_t *x, const uint8_t *y, - const size_t &dimension) { +template <> +struct SquaredL2Impl { + static float computeDistance(const uint8_t* x, const uint8_t* y, const size_t& dimension) { #if defined(USE_AVX512) if (platformSupportsAvx512()) { if (dimension % 64 == 0) { @@ -120,9 +120,9 @@ template <> struct SquaredL2Impl { struct L2DistanceDispatcher { template - static float dispatch(const T *x, const T *y, const size_t &dimension) { + static float dispatch(const T* x, const T* y, const size_t& dimension) { return SquaredL2Impl::computeDistance(x, y, dimension); } }; -} // namespace flatnav::distances \ No newline at end of file +} // namespace flatnav::distances \ No newline at end of file diff --git a/flatnav/distances/SquaredL2Distance.h b/flatnav/distances/SquaredL2Distance.h index a5a2996..76bf957 100644 --- a/flatnav/distances/SquaredL2Distance.h +++ b/flatnav/distances/SquaredL2Distance.h @@ -1,13 +1,13 @@ #pragma once -#include -#include -#include -#include // for size_t -#include // for memcpy #include #include #include #include +#include +#include +#include +#include // for size_t +#include // for memcpy #include #include #include @@ -21,18 +21,15 @@ namespace flatnav::distances { using util::DataType; using util::type_for_data_type; - template -class SquaredL2Distance - : public DistanceInterface> { +class SquaredL2Distance : public DistanceInterface> { friend class DistanceInterface; enum { DISTANCE_ID = 0 }; -public: + public: SquaredL2Distance() = default; - SquaredL2Distance(size_t dim) - : _dimension(dim), _data_size_bytes(dim * util::size(data_type)) {} + SquaredL2Distance(size_t dim) : _dimension(dim), _data_size_bytes(dim * util::size(data_type)) {} static std::unique_ptr> create(size_t dim) { return std::make_unique>(dim); @@ -40,27 +37,26 @@ class SquaredL2Distance inline constexpr size_t getDimension() const { return _dimension; } - constexpr float distanceImpl(const void *x, const void *y, - [[maybe_unused]] bool asymmetric = false) const { - return L2DistanceDispatcher::dispatch( - static_cast::type *>(x), - static_cast::type *>(y), - _dimension); + constexpr float distanceImpl(const void* x, const void* y, [[maybe_unused]] bool asymmetric = false) const { + return L2DistanceDispatcher::dispatch(static_cast::type*>(x), + static_cast::type*>(y), + _dimension); } -private: + private: size_t _dimension; size_t _data_size_bytes; friend class ::cereal::access; - template void serialize(Archive &ar) { + template + void serialize(Archive& ar) { ar(_dimension, _data_size_bytes); } inline size_t dataSizeImpl() { return _data_size_bytes; } - inline void transformDataImpl(void *destination, const void *src) { + inline void transformDataImpl(void* destination, const void* src) { std::memcpy(destination, src, _data_size_bytes); } @@ -73,4 +69,4 @@ class SquaredL2Distance } }; -} // namespace flatnav::distances +} // namespace flatnav::distances diff --git a/flatnav/index/Index.h b/flatnav/index/Index.h index 97d4593..31a2f5b 100644 --- a/flatnav/index/Index.h +++ b/flatnav/index/Index.h @@ -1,5 +1,10 @@ #pragma once +#include +#include +#include +#include +#include #include #include #include @@ -8,11 +13,6 @@ #include #include #include -#include -#include -#include -#include -#include #include #include #include @@ -30,7 +30,8 @@ namespace flatnav { // dist_t: A distance function implementing DistanceInterface. // label_t: A fixed-width data type for the label (meta-data) of each point. -template class Index { +template +class Index { typedef std::pair dist_label_t; // internal node numbering scheme. We might need to change this to uint64_t typedef uint32_t node_id_t; @@ -39,11 +40,10 @@ template class Index { // NOTE: by default this is a max-heap. We could make this a min-heap // by using std::greater, but we want to use the queue as both a max-heap and // min-heap depending on the context. - typedef std::priority_queue> - PriorityQueue; + typedef std::priority_queue> PriorityQueue; // Large (several GB), pre-allocated block of memory. - char *_index_memory; + char* _index_memory; size_t _M; // size of one data point (does not support variable-size data, strings) @@ -51,7 +51,7 @@ template class Index { // Node consists of: ([data] [M links] [data label]). This layout was chosen // after benchmarking - it's slightly more cache-efficient than others. size_t _node_size_bytes; - size_t _max_node_count; // Determines size of internal pre-allocated memory + size_t _max_node_count; // Determines size of internal pre-allocated memory size_t _cur_num_nodes; std::unique_ptr> _distance; std::mutex _index_data_guard; @@ -59,7 +59,7 @@ template class Index { uint32_t _num_threads; // Remembers which nodes we've visited, to avoid re-computing distances. - VisitedSetPool *_visited_set_pool; + VisitedSetPool* _visited_set_pool; std::vector _node_links_mutexes; bool _collect_stats = false; @@ -69,8 +69,8 @@ template class Index { mutable std::atomic _distance_computations = 0; mutable std::atomic _metric_hops = 0; - Index(const Index &) = delete; - Index &operator=(const Index &) = delete; + Index(const Index&) = delete; + Index& operator=(const Index&) = delete; // A custom move constructor is needed because the class manages dynamic // resources (_index_memory, _visited_set_pool), @@ -78,8 +78,9 @@ template class Index { // leaks or double frees. The default move constructor cannot ensure these // resources are safely transferred and the source object is left in a valid // state. - Index(Index &&other) noexcept - : _index_memory(other._index_memory), _M(other._M), + Index(Index&& other) noexcept + : _index_memory(other._index_memory), + _M(other._M), _data_size_bytes(other._data_size_bytes), _node_size_bytes(other._node_size_bytes), _max_node_count(other._max_node_count), @@ -93,7 +94,7 @@ template class Index { other._visited_set_pool = nullptr; } - Index &operator=(Index &&other) noexcept { + Index& operator=(Index&& other) noexcept { if (this != &other) { delete[] _index_memory; delete _visited_set_pool; @@ -116,16 +117,15 @@ template class Index { return *this; } - template void serialize(Archive &archive) { - archive(_M, _data_size_bytes, _node_size_bytes, _max_node_count, - _cur_num_nodes, *_distance); + template + void serialize(Archive& archive) { + archive(_M, _data_size_bytes, _node_size_bytes, _max_node_count, _cur_num_nodes, *_distance); // Serialize the allocated memory for the index & query. - archive( - cereal::binary_data(_index_memory, _node_size_bytes * _max_node_count)); + archive(cereal::binary_data(_index_memory, _node_size_bytes * _max_node_count)); } -public: + public: /** * @brief Construct a new Index object for approximate near neighbor search. * @@ -141,21 +141,24 @@ template class Index { * @param collect_stats Flag indicating whether to collect statistics during * the search process. */ - Index(std::unique_ptr> dist, int dataset_size, - int max_edges_per_node, bool collect_stats = false) - : _M(max_edges_per_node), _max_node_count(dataset_size), - _cur_num_nodes(0), _distance(std::move(dist)), _num_threads(1), + Index(std::unique_ptr> dist, int dataset_size, int max_edges_per_node, + bool collect_stats = false) + : _M(max_edges_per_node), + _max_node_count(dataset_size), + _cur_num_nodes(0), + _distance(std::move(dist)), + _num_threads(1), _visited_set_pool(new VisitedSetPool( /* initial_pool_size = */ 1, /* num_elements = */ dataset_size)), - _node_links_mutexes(dataset_size), _collect_stats(collect_stats) { + _node_links_mutexes(dataset_size), + _collect_stats(collect_stats) { // Get the size in bytes of the _node_links_mutexes vector. size_t mutexes_size_bytes = _node_links_mutexes.size() * sizeof(std::mutex); _data_size_bytes = _distance->dataSize(); - _node_size_bytes = - _data_size_bytes + (sizeof(node_id_t) * _M) + sizeof(label_t); + _node_size_bytes = _data_size_bytes + (sizeof(node_id_t) * _M) + sizeof(label_t); size_t index_memory_size = _node_size_bytes * _max_node_count; _index_memory = new char[index_memory_size]; @@ -166,11 +169,10 @@ template class Index { delete _visited_set_pool; } - void buildGraphLinks(const std::string &mtx_filename) { + void buildGraphLinks(const std::string& mtx_filename) { std::ifstream input_file(mtx_filename); if (!input_file.is_open()) { - throw std::runtime_error("Unable to open file for reading: " + - mtx_filename); + throw std::runtime_error("Unable to open file for reading: " + mtx_filename); } std::string line; @@ -188,13 +190,15 @@ template class Index { // nodes in the index and that the number of edges is equal to the number of // links per node. if (num_vertices != _max_node_count) { - throw std::runtime_error("Number of vertices in the mtx file does not " - "match the size allocated for the index."); + throw std::runtime_error( + "Number of vertices in the mtx file does not " + "match the size allocated for the index."); } if (num_edges != _M) { - throw std::runtime_error("Number of edges in the mtx file does not match " - "the number of links per node."); + throw std::runtime_error( + "Number of edges in the mtx file does not match " + "the number of links per node."); } int u, v; @@ -202,7 +206,7 @@ template class Index { // Adjust for 1-based indexing in Matrix Market format u--; v--; - node_id_t *links = getNodeLinks(u); + node_id_t* links = getNodeLinks(u); // Now add a directed edge from u to v. We need to check for the first // available slot in the links array since there might be other edges // added before this one. By definition, a slot is available if and only @@ -221,7 +225,7 @@ template class Index { std::vector> getGraphOutdegreeTable() { std::vector> outdegree_table(_cur_num_nodes); for (node_id_t node = 0; node < _cur_num_nodes; node++) { - node_id_t *links = getNodeLinks(node); + node_id_t* links = getNodeLinks(node); for (int i = 0; i < _M; i++) { if (links[i] != node) { outdegree_table[node].push_back(links[i]); @@ -240,7 +244,7 @@ template class Index { * @param label The label (meta-data) of the vector. * @param new_node_id The id of the new node. */ - void allocateNode(void *data, label_t &label, node_id_t &new_node_id) { + void allocateNode(void* data, label_t& label, node_id_t& new_node_id) { new_node_id = _cur_num_nodes; _distance->transformData( @@ -248,7 +252,7 @@ template class Index { /* src = */ data); *(getNodeLabel(new_node_id)) = label; - node_id_t *links = getNodeLinks(new_node_id); + node_id_t* links = getNodeLinks(new_node_id); // Initialize all edges to self std::fill_n(links, _M, new_node_id); _cur_num_nodes++; @@ -281,11 +285,10 @@ template class Index { * index is reached. */ template - void addBatch(void *data, std::vector &labels, int ef_construction, + void addBatch(void* data, std::vector& labels, int ef_construction, int num_initializations = 100) { if (num_initializations <= 0) { - throw std::invalid_argument( - "num_initializations must be greater than 0."); + throw std::invalid_argument("num_initializations must be greater than 0."); } uint32_t total_num_nodes = labels.size(); uint32_t data_dimension = _distance->dimension(); @@ -293,7 +296,7 @@ template class Index { // Don't spawn any threads if we are only using one. if (_num_threads == 1) { for (uint32_t row_id = 0; row_id < total_num_nodes; row_id++) { - void *vector = (data_type *)data + (row_id * data_dimension); + void* vector = (data_type*)data + (row_id * data_dimension); label_t label = labels[row_id]; this->add(vector, label, ef_construction, num_initializations); } @@ -304,7 +307,7 @@ template class Index { /* start_index = */ 0, /* end_index = */ total_num_nodes, /* num_threads = */ _num_threads, /* function = */ [&](uint32_t row_index) { - void *vector = (data_type *)data + (row_index * data_dimension); + void* vector = (data_type*)data + (row_index * data_dimension); label_t label = labels[row_index]; this->add(vector, label, ef_construction, num_initializations); }); @@ -332,13 +335,13 @@ template class Index { * @exception std::runtime_error Thrown if the maximum number of nodes is * reached. */ - void add(void *data, label_t &label, int ef_construction, - int num_initializations) { + void add(void* data, label_t& label, int ef_construction, int num_initializations) { if (_cur_num_nodes >= _max_node_count) { - throw std::runtime_error("Maximum number of nodes reached. Consider " - "increasing the `max_node_count` parameter to " - "create a larger index."); + throw std::runtime_error( + "Maximum number of nodes reached. Consider " + "increasing the `max_node_count` parameter to " + "create a larger index."); } _index_data_guard.lock(); auto entry_node = initializeSearch(data, num_initializations); @@ -365,26 +368,21 @@ template class Index { * @param ef_search The search beam width. * @param num_initializations The number of random initializations to use. */ - std::vector search(const void *query, const int K, - int ef_search, + std::vector search(const void* query, const int K, int ef_search, int num_initializations = 100) { node_id_t entry_node = initializeSearch(query, num_initializations); - PriorityQueue neighbors = - beamSearch(/* query = */ query, - /* entry_node = */ entry_node, - /* buffer_size = */ std::max(ef_search, K)); + PriorityQueue neighbors = beamSearch(/* query = */ query, + /* entry_node = */ entry_node, + /* buffer_size = */ std::max(ef_search, K)); auto size = neighbors.size(); std::vector results; results.reserve(size); while (!neighbors.empty()) { - results.emplace_back(neighbors.top().first, - *getNodeLabel(neighbors.top().second)); + results.emplace_back(neighbors.top().first, *getNodeLabel(neighbors.top().second)); neighbors.pop(); } std::sort(results.begin(), results.end(), - [](const dist_label_t &left, const dist_label_t &right) { - return left.first < right.first; - }); + [](const dist_label_t& left, const dist_label_t& right) { return left.first < right.first; }); if (results.size() > static_cast(K)) { results.resize(K); } @@ -392,9 +390,9 @@ template class Index { return results; } - void doGraphReordering(const std::vector &reordering_methods) { + void doGraphReordering(const std::vector& reordering_methods) { - for (const auto &method : reordering_methods) { + for (const auto& method : reordering_methods) { auto outdegree_table = getGraphOutdegreeTable(); std::vector P; if (method == "gorder") { @@ -411,8 +409,7 @@ template class Index { void reorderGOrder(const int window_size = 5) { auto outdegree_table = getGraphOutdegreeTable(); - std::vector P = - util::gOrder(outdegree_table, window_size); + std::vector P = util::gOrder(outdegree_table, window_size); relabel(P); } @@ -423,8 +420,7 @@ template class Index { relabel(P); } - static std::unique_ptr> - loadIndex(const std::string &filename) { + static std::unique_ptr> loadIndex(const std::string& filename) { std::ifstream stream(filename, std::ios::binary); if (!stream.is_open()) { @@ -434,34 +430,28 @@ template class Index { cereal::BinaryInputArchive archive(stream); std::unique_ptr> index(new Index()); - std::unique_ptr> dist = - std::make_unique(); + std::unique_ptr> dist = std::make_unique(); // 1. Deserialize metadata - archive(index->_M, index->_data_size_bytes, index->_node_size_bytes, - index->_max_node_count, index->_cur_num_nodes, *dist); + archive(index->_M, index->_data_size_bytes, index->_node_size_bytes, index->_max_node_count, + index->_cur_num_nodes, *dist); index->_visited_set_pool = new VisitedSetPool( /* initial_pool_size = */ 1, /* num_elements = */ index->_max_node_count); index->_distance = std::move(dist); - index->_num_threads = std::max( - (uint32_t)1, (uint32_t)std::thread::hardware_concurrency() / 2); - index->_node_links_mutexes = - std::vector(index->_max_node_count); + index->_num_threads = std::max((uint32_t)1, (uint32_t)std::thread::hardware_concurrency() / 2); + index->_node_links_mutexes = std::vector(index->_max_node_count); // 2. Allocate memory using deserialized metadata - index->_index_memory = - new char[index->_node_size_bytes * index->_max_node_count]; + index->_index_memory = new char[index->_node_size_bytes * index->_max_node_count]; // 3. Deserialize content into allocated memory - archive( - cereal::binary_data(index->_index_memory, - index->_node_size_bytes * index->_max_node_count)); + archive(cereal::binary_data(index->_index_memory, index->_node_size_bytes * index->_max_node_count)); return index; } - void saveIndex(const std::string &filename) { + void saveIndex(const std::string& filename) { std::ofstream stream(filename, std::ios::binary); if (!stream.is_open()) { @@ -488,8 +478,7 @@ template class Index { return static_cast(_node_size_bytes * _max_node_count); } inline uint64_t mutexesAllocatedMemory() const { - return static_cast(_node_links_mutexes.size() * - sizeof(std::mutex)); + return static_cast(_node_links_mutexes.size() * sizeof(std::mutex)); } inline uint64_t visitedSetPoolAllocatedMemory() const { @@ -509,9 +498,7 @@ template class Index { inline size_t currentNumNodes() const { return _cur_num_nodes; } inline size_t dataDimension() const { return _distance->dimension(); } - inline uint64_t distanceComputations() const { - return _distance_computations.load(); - } + inline uint64_t distanceComputations() const { return _distance_computations.load(); } void resetStats() { _distance_computations = 0; @@ -530,28 +517,25 @@ template class Index { _distance->getSummary(); } -private: + private: friend class cereal::access; // Default constructor for cereal Index() = default; - char *getNodeData(const node_id_t &n) const { - return _index_memory + (n * _node_size_bytes); - } + char* getNodeData(const node_id_t& n) const { return _index_memory + (n * _node_size_bytes); } - node_id_t *getNodeLinks(const node_id_t &n) const { - char *location = _index_memory + (n * _node_size_bytes) + _data_size_bytes; - return reinterpret_cast(location); + node_id_t* getNodeLinks(const node_id_t& n) const { + char* location = _index_memory + (n * _node_size_bytes) + _data_size_bytes; + return reinterpret_cast(location); } - label_t *getNodeLabel(const node_id_t &n) const { - char *location = _index_memory + (n * _node_size_bytes) + _data_size_bytes + - (_M * sizeof(node_id_t)); - return reinterpret_cast(location); + label_t* getNodeLabel(const node_id_t& n) const { + char* location = _index_memory + (n * _node_size_bytes) + _data_size_bytes + (_M * sizeof(node_id_t)); + return reinterpret_cast(location); } - inline void swapNodes(node_id_t a, node_id_t b, void *temp_data, - node_id_t *temp_links, label_t *temp_label) { + inline void swapNodes(node_id_t a, node_id_t b, void* temp_data, node_id_t* temp_links, + label_t* temp_label) { // stash b in temp std::memcpy(temp_data, getNodeData(b), _data_size_bytes); @@ -580,12 +564,11 @@ template class Index { * * @return PriorityQueue */ - PriorityQueue beamSearch(const void *query, const node_id_t entry_node, - const int buffer_size) { + PriorityQueue beamSearch(const void* query, const node_id_t entry_node, const int buffer_size) { PriorityQueue neighbors; PriorityQueue candidates; - auto *visited_set = _visited_set_pool->pollAvailableSet(); + auto* visited_set = _visited_set_pool->pollAvailableSet(); visited_set->clear(); // Prefetch the data for entry node before computing its distance. @@ -593,9 +576,8 @@ template class Index { _mm_prefetch(getNodeData(entry_node), _MM_HINT_T0); #endif - float dist = - _distance->distance(/* x = */ query, /* y = */ getNodeData(entry_node), - /* asymmetric = */ true); + float dist = _distance->distance(/* x = */ query, /* y = */ getNodeData(entry_node), + /* asymmetric = */ true); float max_dist = dist; candidates.emplace(-dist, entry_node); @@ -636,15 +618,13 @@ template class Index { return neighbors; } - void processCandidateNode(const void *query, node_id_t &node, float &max_dist, - const int buffer_size, VisitedSet *visited_set, - PriorityQueue &neighbors, - PriorityQueue &candidates) { + void processCandidateNode(const void* query, node_id_t& node, float& max_dist, const int buffer_size, + VisitedSet* visited_set, PriorityQueue& neighbors, PriorityQueue& candidates) { // Lock all operations on this specific node std::unique_lock lock(_node_links_mutexes[node]); float dist = 0.f; - node_id_t *neighbor_node_links = getNodeLinks(node); + node_id_t* neighbor_node_links = getNodeLinks(node); for (uint32_t i = 0; i < _M; i++) { node_id_t neighbor_node_id = neighbor_node_links[i]; @@ -657,8 +637,7 @@ template class Index { } #endif - bool neighbor_is_visited = - visited_set->isVisited(/* num = */ neighbor_node_id); + bool neighbor_is_visited = visited_set->isVisited(/* num = */ neighbor_node_id); if (neighbor_is_visited) { continue; @@ -693,7 +672,7 @@ template class Index { * heuristic. The neighbors priority queue contains elements sorted by * distance where the top element is the furthest neighbor from the query. */ - void selectNeighbors(PriorityQueue &neighbors) { + void selectNeighbors(PriorityQueue& neighbors) { if (neighbors.size() < _M) { return; } @@ -717,11 +696,10 @@ template class Index { candidates.pop(); bool should_keep_candidate = true; - for (const dist_node_t &second_pair : saved_candidates) { + for (const dist_node_t& second_pair : saved_candidates) { - cur_dist = - _distance->distance(/* x = */ getNodeData(second_pair.second), - /* y = */ getNodeData(current_pair.second)); + cur_dist = _distance->distance(/* x = */ getNodeData(second_pair.second), + /* y = */ getNodeData(current_pair.second)); if (cur_dist < (-current_pair.first)) { should_keep_candidate = false; @@ -737,19 +715,19 @@ template class Index { } // TODO: implement my own priority queue, get rid of vector // saved_candidates, add directly to neighborqueue earlier. - for (const dist_node_t ¤t_pair : saved_candidates) { + for (const dist_node_t& current_pair : saved_candidates) { neighbors.emplace(-current_pair.first, current_pair.second); } } - void connectNeighbors(PriorityQueue &neighbors, node_id_t new_node_id) { + void connectNeighbors(PriorityQueue& neighbors, node_id_t new_node_id) { // connects neighbors according to the HSNW heuristic // Lock all operations on this node std::unique_lock lock(_node_links_mutexes[new_node_id]); - node_id_t *new_node_links = getNodeLinks(new_node_id); - int i = 0; // iterates through links for "new_node_id" + node_id_t* new_node_links = getNodeLinks(new_node_id); + int i = 0; // iterates through links for "new_node_id" while (neighbors.size() > 0) { node_id_t neighbor_node_id = neighbors.top().second; @@ -757,9 +735,8 @@ template class Index { new_node_links[i] = neighbor_node_id; // now do the back-connections (a little tricky) - std::unique_lock neighbor_lock( - _node_links_mutexes[neighbor_node_id]); - node_id_t *neighbor_node_links = getNodeLinks(neighbor_node_id); + std::unique_lock neighbor_lock(_node_links_mutexes[neighbor_node_id]); + node_id_t* neighbor_node_links = getNodeLinks(neighbor_node_id); bool is_inserted = false; for (size_t j = 0; j < _M; j++) { if (neighbor_node_links[j] == neighbor_node_id) { @@ -777,30 +754,28 @@ template class Index { // construct a candidate set including the old links AND our new // one, then prune this candidate set to get the new neighbors. - float max_dist = - _distance->distance(/* x = */ getNodeData(neighbor_node_id), - /* y = */ getNodeData(new_node_id)); + float max_dist = _distance->distance(/* x = */ getNodeData(neighbor_node_id), + /* y = */ getNodeData(new_node_id)); PriorityQueue candidates; candidates.emplace(max_dist, new_node_id); for (size_t j = 0; j < _M; j++) { if (neighbor_node_links[j] != neighbor_node_id) { auto label = neighbor_node_links[j]; - auto distance = - _distance->distance(/* x = */ getNodeData(neighbor_node_id), - /* y = */ getNodeData(label)); + auto distance = _distance->distance(/* x = */ getNodeData(neighbor_node_id), + /* y = */ getNodeData(label)); candidates.emplace(distance, label); } } selectNeighbors(candidates); // connect the pruned set of candidates, including self-loops: size_t j = 0; - while (candidates.size() > 0) { // candidates + while (candidates.size() > 0) { // candidates neighbor_node_links[j] = candidates.top().second; candidates.pop(); j++; } - while (j < _M) { // self-loops (unused links) + while (j < _M) { // self-loops (unused links) neighbor_node_links[j] = neighbor_node_id; j++; } @@ -827,12 +802,10 @@ template class Index { * @param num_initializations * @return node_id_t */ - inline node_id_t initializeSearch(const void *query, - int num_initializations) { + inline node_id_t initializeSearch(const void* query, int num_initializations) { // select entry_node from a set of random entry point options if (num_initializations <= 0) { - throw std::invalid_argument( - "num_initializations must be greater than 0."); + throw std::invalid_argument("num_initializations must be greater than 0."); } int step_size = _cur_num_nodes / num_initializations; @@ -846,9 +819,8 @@ template class Index { } for (node_id_t node = 0; node < _cur_num_nodes; node += step_size) { - float dist = - _distance->distance(/* x = */ query, /* y = */ getNodeData(node), - /* asymmetric = */ true); + float dist = _distance->distance(/* x = */ query, /* y = */ getNodeData(node), + /* asymmetric = */ true); if (dist < min_dist) { min_dist = dist; entry_node = node; @@ -857,21 +829,21 @@ template class Index { return entry_node; } - void relabel(const std::vector &P) { + void relabel(const std::vector& P) { // 1. Rewire all of the node connections for (node_id_t n = 0; n < _cur_num_nodes; n++) { - node_id_t *links = getNodeLinks(n); + node_id_t* links = getNodeLinks(n); for (int m = 0; m < _M; m++) { links[m] = P[links[m]]; } } // 2. Physically re-layout the nodes (in place) - char *temp_data = new char[_data_size_bytes]; - node_id_t *temp_links = new node_id_t[_M]; - label_t *temp_label = new label_t; + char* temp_data = new char[_data_size_bytes]; + node_id_t* temp_links = new node_id_t[_M]; + label_t* temp_label = new label_t; - auto *visited_set = _visited_set_pool->pollAvailableSet(); + auto* visited_set = _visited_set_pool->pollAvailableSet(); // In this context, is_visited stores which nodes have been relocated // (it would be equivalent to name this variable "is_relocated"). @@ -914,4 +886,4 @@ template class Index { } }; -} // namespace flatnav +} // namespace flatnav diff --git a/flatnav/tests/test_distances.cpp b/flatnav/tests/test_distances.cpp index d7a4f00..adb1fd6 100644 --- a/flatnav/tests/test_distances.cpp +++ b/flatnav/tests/test_distances.cpp @@ -1,9 +1,9 @@ -#include "gtest/gtest.h" -#include #include #include +#include #include +#include "gtest/gtest.h" #include #include @@ -11,7 +11,7 @@ namespace flatnav::testing { class DistanceTest : public ::testing::Test { -protected: + protected: void SetUp() override { // Initialize x and y with values drawn from a normal distribution std::default_random_engine generator; @@ -37,8 +37,7 @@ class DistanceTest : public ::testing::Test { TEST_F(DistanceTest, TestAvx512L2Distance) { #if defined(USE_AVX512) float result = flatnav::util::computeL2_Avx512(x, y, dimensions); - float expected = - flatnav::distances::defaultSquaredL2(x, y, dimensions); + float expected = flatnav::distances::defaultSquaredL2(x, y, dimensions); ASSERT_NEAR(result, expected, epsilon); #endif @@ -49,19 +48,18 @@ TEST_F(DistanceTest, TestAvx512L2DistanceUint8) { #if defined(USE_AVX512) auto total_num_vectors = 1000; auto total_size = dimensions * total_num_vectors; - uint8_t *x_matrix = (uint8_t *)malloc(total_size); - uint8_t *y_matrix = (uint8_t *)malloc(total_size); + uint8_t* x_matrix = (uint8_t*)malloc(total_size); + uint8_t* y_matrix = (uint8_t*)malloc(total_size); for (size_t i = 0; i < total_size; i++) { x_matrix[i] = (uint8_t)rand() % 256; y_matrix[i] = (uint8_t)rand() % 256; } for (size_t i = 0; i < total_num_vectors; i++) { - uint8_t *x = x_matrix + i * dimensions; - uint8_t *y = y_matrix + i * dimensions; + uint8_t* x = x_matrix + i * dimensions; + uint8_t* y = y_matrix + i * dimensions; float result = flatnav::util::computeL2_Avx512_Uint8(x, y, dimensions); - float expected = - flatnav::distances::defaultSquaredL2(x, y, dimensions); + float expected = flatnav::distances::defaultSquaredL2(x, y, dimensions); ASSERT_NEAR(result, expected, epsilon); } @@ -76,8 +74,7 @@ TEST_F(DistanceTest, TestAvxL2Distance) { #if defined(USE_AVX) float result = flatnav::util::computeL2_Avx2(x, y, dimensions); - float expected = - flatnav::distances::defaultSquaredL2(x, y, dimensions); + float expected = flatnav::distances::defaultSquaredL2(x, y, dimensions); ASSERT_NEAR(result, expected, epsilon); @@ -106,8 +103,7 @@ TEST(TestSingleIntrinsic, TestReduceAddSse) { TEST_F(DistanceTest, TestSseL2Distance) { #if defined(USE_SSE) float result = flatnav::util::computeL2_Sse(x, y, dimensions); - float expected = - flatnav::distances::defaultSquaredL2(x, y, dimensions); + float expected = flatnav::distances::defaultSquaredL2(x, y, dimensions); ASSERT_NEAR(result, expected, epsilon); // try with dimensions not divisible by 16 @@ -182,4 +178,4 @@ TEST_F(DistanceTest, TestSseInnerProductDistance) { #endif } -} // namespace flatnav::testing \ No newline at end of file +} // namespace flatnav::testing \ No newline at end of file diff --git a/flatnav/tests/test_serialization.cpp b/flatnav/tests/test_serialization.cpp index 5ffc4a2..f87513f 100644 --- a/flatnav/tests/test_serialization.cpp +++ b/flatnav/tests/test_serialization.cpp @@ -1,11 +1,11 @@ -#include "gtest/gtest.h" -#include -#include // for remove #include #include #include #include +#include +#include // for remove #include +#include "gtest/gtest.h" using flatnav::Index; using flatnav::distances::DistanceInterface; @@ -31,23 +31,20 @@ std::vector generateRandomVectors(uint32_t num_vectors, uint32_t dim) { } template -void runTest(float *data, std::unique_ptr> &&distance, - int N, int M, int dim, int ef_construction, - const std::string &save_file) { +void runTest(float* data, std::unique_ptr>&& distance, int N, int M, int dim, + int ef_construction, const std::string& save_file) { auto data_size = distance->dataSize(); - std::unique_ptr> index = - std::make_unique>( - /* dist = */ std::move(distance), /* dataset_size = */ N, - /* max_edges = */ M); + std::unique_ptr> index = std::make_unique>( + /* dist = */ std::move(distance), /* dataset_size = */ N, + /* max_edges = */ M); std::vector labels(N); std::iota(labels.begin(), labels.end(), 0); index->template addBatch(data, labels, ef_construction); index->saveIndex(/* filename = */ save_file); - auto new_index = - Index::loadIndex(/* filename = */ save_file); + auto new_index = Index::loadIndex(/* filename = */ save_file); ASSERT_EQ(new_index->maxEdgesPerNode(), M); ASSERT_EQ(new_index->dataSizeBytes(), index->dataSizeBytes()); @@ -55,19 +52,16 @@ void runTest(float *data, std::unique_ptr> &&distance, ASSERT_EQ(new_index->nodeSizeBytes(), data_size + (4 * M) + 4); ASSERT_EQ(new_index->maxNodeCount(), N); - uint64_t total_index_size = - new_index->nodeSizeBytes() * new_index->maxNodeCount(); + uint64_t total_index_size = new_index->nodeSizeBytes() * new_index->maxNodeCount(); std::vector queries = generateRandomVectors(QUERY_VECTORS, dim); for (uint32_t i = 0; i < QUERY_VECTORS; i++) { - float *q = queries.data() + (dim * i); + float* q = queries.data() + (dim * i); - std::vector> query_result = - index->search(q, K, EF_SEARCH); + std::vector> query_result = index->search(q, K, EF_SEARCH); - std::vector> new_query_result = - new_index->search(q, K, EF_SEARCH); + std::vector> new_query_result = new_index->search(q, K, EF_SEARCH); for (uint32_t j = 0; j < K; j++) { ASSERT_EQ(query_result[j].first, new_query_result[j].first); @@ -109,4 +103,4 @@ TEST(FlatnavSerializationTest, TestInnerProductIndexSerialization) { EXPECT_EQ(std::remove(save_file.c_str()), 0); } -} // namespace flatnav::testing \ No newline at end of file +} // namespace flatnav::testing \ No newline at end of file diff --git a/flatnav/util/Datatype.h b/flatnav/util/Datatype.h index c7dff7d..1a07b00 100644 --- a/flatnav/util/Datatype.h +++ b/flatnav/util/Datatype.h @@ -26,39 +26,39 @@ enum class DataType { /** * @brief Get a string representation of the data type */ -inline constexpr const char *name(DataType data_type) { +inline constexpr const char* name(DataType data_type) { switch (data_type) { - case DataType::uint8: - return "uint8"; - case DataType::uint16: - return "uint16"; - case DataType::uint32: - return "uint32"; - case DataType::uint64: - return "uint64"; - case DataType::int8: - return "int8"; - case DataType::int16: - return "int16"; - case DataType::int32: - return "int32"; - case DataType::int64: - return "int64"; - case DataType::float16: - return "float16"; - case DataType::float32: - return "float32"; - case DataType::float64: - return "float64"; - default: - return "undefined"; + case DataType::uint8: + return "uint8"; + case DataType::uint16: + return "uint16"; + case DataType::uint32: + return "uint32"; + case DataType::uint64: + return "uint64"; + case DataType::int8: + return "int8"; + case DataType::int16: + return "int16"; + case DataType::int32: + return "int32"; + case DataType::int64: + return "int64"; + case DataType::float16: + return "float16"; + case DataType::float32: + return "float32"; + case DataType::float64: + return "float64"; + default: + return "undefined"; } } /** * @brief Get the data type from a string representation */ -inline constexpr DataType type(const std::string_view &data_type) { +inline constexpr DataType type(const std::string_view& data_type) { if (data_type == "uint8") { return DataType::uint8; } else if (data_type == "uint16") { @@ -91,42 +91,48 @@ inline constexpr DataType type(const std::string_view &data_type) { */ inline constexpr size_t size(DataType data_type) { switch (data_type) { - case DataType::uint8: - return sizeof(uint8_t); - case DataType::uint16: - return sizeof(uint16_t); - case DataType::uint32: - return sizeof(uint32_t); - case DataType::uint64: - return sizeof(uint64_t); - case DataType::int8: - return sizeof(int8_t); - case DataType::int16: - return sizeof(int16_t); - case DataType::int32: - return sizeof(int32_t); - case DataType::int64: - return sizeof(int64_t); - case DataType::float16: - return sizeof(float) / 2; - case DataType::float32: - return sizeof(float); - case DataType::float64: - return sizeof(double); - default: - return 0; + case DataType::uint8: + return sizeof(uint8_t); + case DataType::uint16: + return sizeof(uint16_t); + case DataType::uint32: + return sizeof(uint32_t); + case DataType::uint64: + return sizeof(uint64_t); + case DataType::int8: + return sizeof(int8_t); + case DataType::int16: + return sizeof(int16_t); + case DataType::int32: + return sizeof(int32_t); + case DataType::int64: + return sizeof(int64_t); + case DataType::float16: + return sizeof(float) / 2; + case DataType::float32: + return sizeof(float); + case DataType::float64: + return sizeof(double); + default: + return 0; } } // Some nice template metaprogramming (TMP) to allow us to get compile-time // distance dispatching. -template struct type_for_data_type; +template +struct type_for_data_type; -template <> struct type_for_data_type { +template <> +struct type_for_data_type { using type = float; }; -template <> struct type_for_data_type { using type = int8_t; }; -template <> struct type_for_data_type { +template <> +struct type_for_data_type { + using type = int8_t; +}; +template <> +struct type_for_data_type { using type = uint8_t; }; @@ -151,7 +157,8 @@ template <> struct type_for_data_type { * @tparam F A callable object * @tparam data_types The data types to iterate over */ -template struct for_each_data_type; +template +struct for_each_data_type; /** * @brief Template specialization for for_each_data_type when there are data @@ -162,7 +169,7 @@ template struct for_each_data_type; */ template struct for_each_data_type { - static void apply(F &&f) { + static void apply(F&& f) { f.template operator()(); for_each_data_type::apply(std::forward(f)); } @@ -173,8 +180,9 @@ struct for_each_data_type { * types to iterate over * @tparam F A callable object */ -template struct for_each_data_type { - static void apply(F &&) {} +template +struct for_each_data_type { + static void apply(F&&) {} }; -} // namespace flatnav::util \ No newline at end of file +} // namespace flatnav::util \ No newline at end of file diff --git a/flatnav/util/GorderPriorityQueue.h b/flatnav/util/GorderPriorityQueue.h index 9fcf715..f9dcb16 100644 --- a/flatnav/util/GorderPriorityQueue.h +++ b/flatnav/util/GorderPriorityQueue.h @@ -10,7 +10,8 @@ namespace flatnav::util { -template class GorderPriorityQueue { +template +class GorderPriorityQueue { typedef std::unordered_map map_t; @@ -20,7 +21,7 @@ template class GorderPriorityQueue { }; std::vector _list; - map_t _index_table; // map: key -> index in _list + map_t _index_table; // map: key -> index in _list inline void swap(int i, int j) { Node tmp = _list[i]; @@ -30,8 +31,8 @@ template class GorderPriorityQueue { _index_table[_list[j].key] = j; } -public: - GorderPriorityQueue(const std::vector &nodes) { + public: + GorderPriorityQueue(const std::vector& nodes) { for (int i = 0; i < nodes.size(); i++) { _list.push_back({nodes[i], 0}); _index_table[nodes[i]] = i; @@ -53,9 +54,7 @@ template class GorderPriorityQueue { std::cout << std::endl; } - static bool compare(const Node &a, const Node &b) { - return (a.priority < b.priority); - } + static bool compare(const Node& a, const Node& b) { return (a.priority < b.priority); } void increment(node_id_t key) { typename map_t::const_iterator i = _index_table.find(key); @@ -67,9 +66,8 @@ template class GorderPriorityQueue { // _list[i->second].priority)){ new_index--; // } - auto it = - std::upper_bound(_list.begin(), _list.end(), _list[i->second], compare); - size_t new_index = it - _list.begin() - 1; // possible bug + auto it = std::upper_bound(_list.begin(), _list.end(), _list[i->second], compare); + size_t new_index = it - _list.begin() - 1; // possible bug // new_index points to the right-most element with same priority as key // i.e. priority equal to "_list[i->second].priority" (i.e. the current // priority) @@ -88,9 +86,8 @@ template class GorderPriorityQueue { // } // new_index++; // i shoudl do this better but am pressed for time now - auto it = - std::lower_bound(_list.begin(), _list.end(), _list[i->second], compare); - size_t new_index = it - _list.begin(); // POSSIBLE BUG + auto it = std::lower_bound(_list.begin(), _list.end(), _list[i->second], compare); + size_t new_index = it - _list.begin(); // POSSIBLE BUG // while((new_index > _list.size()) && (_list[new_index].priority == // _list[i->second].priority)){ new_index++; // } @@ -111,4 +108,4 @@ template class GorderPriorityQueue { size_t size() { return _list.size(); } }; -} // namespace flatnav::util \ No newline at end of file +} // namespace flatnav::util \ No newline at end of file diff --git a/flatnav/util/InnerProductSimdExtensions.h b/flatnav/util/InnerProductSimdExtensions.h index 65cc1c5..903776a 100644 --- a/flatnav/util/InnerProductSimdExtensions.h +++ b/flatnav/util/InnerProductSimdExtensions.h @@ -6,13 +6,12 @@ namespace flatnav::util { #if defined(USE_AVX512) -static float computeIP_Avx512(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); +static float computeIP_Avx512(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); // Align to 16-floats boundary - const float *end_x = pointer_x + (dimension >> 4 << 4); + const float* end_x = pointer_x + (dimension >> 4 << 4); simd16float32 product, v1, v2; simd16float32 sum(0.0f); @@ -29,15 +28,14 @@ static float computeIP_Avx512(const void *x, const void *y, return 1.0f - total; } -#endif // USE_AVX512 +#endif // USE_AVX512 #if defined(USE_AVX) -static float computeIP_Avx(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); +static float computeIP_Avx(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); - const float *end_x = pointer_x + (dimension >> 4 << 4); + const float* end_x = pointer_x + (dimension >> 4 << 4); simd8float32 product, v1, v2; simd8float32 sum(0.0f); @@ -61,14 +59,13 @@ static float computeIP_Avx(const void *x, const void *y, return 1.0f - total; } -static float computeIP_Avx_4aligned(const void *x, const void *y, - const size_t &dimension) { +static float computeIP_Avx_4aligned(const void* x, const void* y, const size_t& dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); - const float *first_chunk_end = pointer_x + (dimension >> 4 << 4); - const float *second_chunk_end = pointer_x + (dimension >> 2 << 2); + const float* first_chunk_end = pointer_x + (dimension >> 4 << 4); + const float* second_chunk_end = pointer_x + (dimension >> 2 << 2); simd8float32 v1, v2; simd8float32 sum(0.0f); @@ -82,8 +79,7 @@ static float computeIP_Avx_4aligned(const void *x, const void *y, } // TODO: See if we can reduce this to fewer instructions - simd4float32 aggregate = - simd4float32(sum.get_low()) + simd4float32(sum.get_high()); + simd4float32 aggregate = simd4float32(sum.get_low()) + simd4float32(sum.get_high()); simd4float32 v1_residual, v2_residual; while (pointer_x != second_chunk_end) { @@ -98,16 +94,15 @@ static float computeIP_Avx_4aligned(const void *x, const void *y, return 1.0f - total; } -#endif // USE_AVX +#endif // USE_AVX #if defined(USE_SSE) -const float computeIP_Sse(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); +const float computeIP_Sse(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); - const float *end_x = pointer_x + (dimension >> 4 << 4); + const float* end_x = pointer_x + (dimension >> 4 << 4); simd4float32 v1, v2; simd4float32 sum(0.0f); @@ -141,12 +136,11 @@ const float computeIP_Sse(const void *x, const void *y, return 1.0f - total; } -const float computeIP_Sse_4aligned(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); - const float *first_chunk_end = pointer_x + (dimension >> 4 << 4); - const float *second_chunk_end = pointer_x + (dimension >> 2 << 2); +const float computeIP_Sse_4aligned(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); + const float* first_chunk_end = pointer_x + (dimension >> 4 << 4); + const float* second_chunk_end = pointer_x + (dimension >> 2 << 2); simd4float32 v1, v2; simd4float32 sum(0.0f); @@ -188,8 +182,7 @@ const float computeIP_Sse_4aligned(const void *x, const void *y, return 1.0f - total; } -const float computeIP_SseWithResidual_16(const void *x, const void *y, - const size_t &dimension) { +const float computeIP_SseWithResidual_16(const void* x, const void* y, const size_t& dimension) { size_t aligned_dimension = dimension >> 4 << 4; size_t residual_dimension = dimension - aligned_dimension; @@ -200,18 +193,15 @@ const float computeIP_SseWithResidual_16(const void *x, const void *y, first_chunk_sum *= -1.0f; float residual_sum = 0.0f; - float *pointer_x = - static_cast(const_cast(x)) + aligned_dimension; - float *pointer_y = - static_cast(const_cast(y)) + aligned_dimension; + float* pointer_x = static_cast(const_cast(x)) + aligned_dimension; + float* pointer_y = static_cast(const_cast(y)) + aligned_dimension; for (size_t i = 0; i < residual_dimension; i++) { residual_sum += pointer_x[i] * pointer_y[i]; } return 1.0f - (first_chunk_sum + residual_sum); } -const float computeIP_SseWithResidual_4(const void *x, const void *y, - const size_t &dimension) { +const float computeIP_SseWithResidual_4(const void* x, const void* y, const size_t& dimension) { size_t aligned_dimension = dimension >> 2 << 2; size_t residual_dimension = dimension - aligned_dimension; @@ -222,16 +212,14 @@ const float computeIP_SseWithResidual_4(const void *x, const void *y, first_chunk_sum *= -1.0f; float residual_sum = 0.0f; - float *pointer_x = - static_cast(const_cast(x)) + aligned_dimension; - float *pointer_y = - static_cast(const_cast(y)) + aligned_dimension; + float* pointer_x = static_cast(const_cast(x)) + aligned_dimension; + float* pointer_y = static_cast(const_cast(y)) + aligned_dimension; for (size_t i = 0; i < residual_dimension; i++) { residual_sum += pointer_x[i] * pointer_y[i]; } return 1.0f - (first_chunk_sum + residual_sum); } -#endif // USE_SSE +#endif // USE_SSE -} // namespace flatnav::util +} // namespace flatnav::util diff --git a/flatnav/util/Macros.h b/flatnav/util/Macros.h index 0418a55..a8ef494 100644 --- a/flatnav/util/Macros.h +++ b/flatnav/util/Macros.h @@ -11,11 +11,11 @@ #ifdef __SSE3__ #define USE_SSE3 -#endif // __SSE3__ +#endif // __SSE3__ #ifdef __SSE4_1__ #define USE_SSE4_1 -#endif // __SSE4_1__ +#endif // __SSE4_1__ #ifdef __AVX__ #define USE_AVX @@ -24,18 +24,18 @@ #ifdef __AVX512BW__ #define USE_AVX512BW -#endif // __AVX512BW__ +#endif // __AVX512BW__ #ifdef __AVX512VNNI__ #define USE_AVX512VNNI -#endif // __AVX512VNNI__ +#endif // __AVX512VNNI__ #define USE_AVX512 -#endif // __AVX512F__ +#endif // __AVX512F__ -#endif // __AVX__ +#endif // __AVX__ #endif -#endif // NO_SIMD_VECTORIZATION +#endif // NO_SIMD_VECTORIZATION #if defined(USE_AVX) || defined(USE_SSE) @@ -89,7 +89,7 @@ uint64_t xgetbv(unsigned int index) { #else #define PORTABLE_ALIGN32 __declspec(align(32)) #define PORTABLE_ALIGN64 __declspec(align(64)) -#endif // __GNUC__ +#endif // __GNUC__ #define _XCR_XFEATURE_ENABLED_MASK 0 diff --git a/flatnav/util/Multithreading.h b/flatnav/util/Multithreading.h index 9f81cca..cb1f6fb 100644 --- a/flatnav/util/Multithreading.h +++ b/flatnav/util/Multithreading.h @@ -16,8 +16,7 @@ namespace flatnav { * installing the Python library. */ template -void executeInParallel(uint32_t start_index, uint32_t end_index, - uint32_t num_threads, Function function, +void executeInParallel(uint32_t start_index, uint32_t end_index, uint32_t num_threads, Function function, Args... additional_args) { if (num_threads == 0) { throw std::invalid_argument("Invalid number of threads"); @@ -35,8 +34,8 @@ void executeInParallel(uint32_t start_index, uint32_t end_index, break; } // Use std::apply to pass arguments to the function - std::apply(function, std::tuple_cat(std::make_tuple(current_vector_idx), - std::make_tuple(additional_args...))); + std::apply(function, + std::tuple_cat(std::make_tuple(current_vector_idx), std::make_tuple(additional_args...))); } }; @@ -48,4 +47,4 @@ void executeInParallel(uint32_t start_index, uint32_t end_index, } } -} // namespace flatnav \ No newline at end of file +} // namespace flatnav \ No newline at end of file diff --git a/flatnav/util/Reordering.h b/flatnav/util/Reordering.h index d5ed029..a9b066d 100644 --- a/flatnav/util/Reordering.h +++ b/flatnav/util/Reordering.h @@ -24,8 +24,7 @@ namespace flatnav::util { template -std::vector -gOrder(std::vector> &outdegree_table, const int w) { +std::vector gOrder(std::vector>& outdegree_table, const int w) { /* Simple explanation of the Gorder Algorithm: insert all v into Q each with priority 0 select a start node into P @@ -55,7 +54,7 @@ gOrder(std::vector> &outdegree_table, const int w) { // create table of in-degrees std::vector> indegree_table(cur_num_nodes); for (node_id_t node = 0; node < cur_num_nodes; node++) { - for (node_id_t &edge : outdegree_table[node]) { + for (node_id_t& edge : outdegree_table[node]) { indegree_table[edge].push_back(node); } } @@ -72,15 +71,15 @@ gOrder(std::vector> &outdegree_table, const int w) { node_id_t v_e = P[i - 1]; // ve = newest node in window // for each node u in out-edges of ve: - for (node_id_t &u : outdegree_table[v_e]) { + for (node_id_t& u : outdegree_table[v_e]) { Q.increment(u); } // for each node u in in-edges of v_e: - for (node_id_t &u : indegree_table[v_e]) { + for (node_id_t& u : indegree_table[v_e]) { // if u in Q, increment priority of u Q.increment(u); // for each node v in out-edges of u: - for (node_id_t &v : outdegree_table[u]) { + for (node_id_t& v : outdegree_table[u]) { Q.increment(v); } } @@ -88,12 +87,12 @@ gOrder(std::vector> &outdegree_table, const int w) { if (i > w + 1) { node_id_t v_b = P[i - w - 1]; // for each node u in out-edges of vb: - for (node_id_t &u : outdegree_table[v_b]) { + for (node_id_t& u : outdegree_table[v_b]) { Q.decrement(u); } // for each node u in in-edges of v_b - for (node_id_t &u : indegree_table[v_b]) { + for (node_id_t& u : indegree_table[v_b]) { // if u in Q, increment priority of u // Note: it doesn't seem to matter whether this particular // operation is an increment or a decrement. In a previous @@ -101,7 +100,7 @@ gOrder(std::vector> &outdegree_table, const int w) { // technically wrong) but the performance was nearly the same. Q.decrement(u); // for each node v in out-edges of u: - for (node_id_t &v : outdegree_table[u]) { + for (node_id_t& v : outdegree_table[u]) { Q.decrement(v); } } @@ -118,8 +117,7 @@ gOrder(std::vector> &outdegree_table, const int w) { } template -std::vector -rcmOrder(std::vector> &outdegree_table) { +std::vector rcmOrder(std::vector>& outdegree_table) { int cur_num_nodes = outdegree_table.size(); std::vector> sorted_nodes; @@ -131,10 +129,10 @@ rcmOrder(std::vector> &outdegree_table) { degrees.push_back(deg); } - std::sort( - sorted_nodes.begin(), sorted_nodes.end(), - [](const std::pair &a, - const std::pair &b) { return a.second < b.second; }); + std::sort(sorted_nodes.begin(), sorted_nodes.end(), + [](const std::pair& a, const std::pair& b) { + return a.second < b.second; + }); std::vector P; auto visited_set = VisitedSet(cur_num_nodes); @@ -151,14 +149,13 @@ rcmOrder(std::vector> &outdegree_table) { // get list of neighbors std::vector> neighbors; - for (auto &edge : outdegree_table[node]) { + for (auto& edge : outdegree_table[node]) { neighbors.push_back({edge, degrees[edge]}); } // sort neighbors by degree (min degree first) std::sort(neighbors.begin(), neighbors.end(), - [](const std::pair &a, - const std::pair &b) { + [](const std::pair& a, const std::pair& b) { return a.second < b.second; }); @@ -177,13 +174,12 @@ rcmOrder(std::vector> &outdegree_table) { // get list of neighbors of candidate std::vector> candidate_neighbors; - for (auto &edge : outdegree_table[candidate]) { + for (auto& edge : outdegree_table[candidate]) { candidate_neighbors.push_back({edge, degrees[edge]}); } // sort neighbors by degree (min degree first) std::sort(candidate_neighbors.begin(), candidate_neighbors.end(), - [](const std::pair &a, - const std::pair &b) { + [](const std::pair& a, const std::pair& b) { return a.second < b.second; }); // add neighbors to queue @@ -203,4 +199,4 @@ rcmOrder(std::vector> &outdegree_table) { return Pinv; } -} // namespace flatnav::util \ No newline at end of file +} // namespace flatnav::util \ No newline at end of file diff --git a/flatnav/util/SquaredL2SimdExtensions.h b/flatnav/util/SquaredL2SimdExtensions.h index bd067a8..04df0e2 100644 --- a/flatnav/util/SquaredL2SimdExtensions.h +++ b/flatnav/util/SquaredL2SimdExtensions.h @@ -5,13 +5,12 @@ namespace flatnav::util { #if defined(USE_AVX512) -static float computeL2_Avx512(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); +static float computeL2_Avx512(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); // Align to 16-floats boundary - const float *end_x = pointer_x + (dimension >> 4 << 4); + const float* end_x = pointer_x + (dimension >> 4 << 4); simd16float32 difference, v1, v2; simd16float32 sum(0.0f); @@ -30,10 +29,9 @@ static float computeL2_Avx512(const void *x, const void *y, /** * @todo Make this support dimensions that are not multiples of 64 */ -static float computeL2_Avx512_Uint8(const void *x, const void *y, - const size_t &dimension) { - const uint8_t *pointer_x = static_cast(x); - const uint8_t *pointer_y = static_cast(y); +static float computeL2_Avx512_Uint8(const void* x, const void* y, const size_t& dimension) { + const uint8_t* pointer_x = static_cast(x); + const uint8_t* pointer_y = static_cast(y); // Initialize sum to zero __m512i sum = _mm512_setzero_si512(); @@ -41,10 +39,8 @@ static float computeL2_Avx512_Uint8(const void *x, const void *y, // Loop over the input arrays for (size_t i = 0; i < dimension; i += 64) { // Load 64 bytes from each array - __m512i v1 = - _mm512_loadu_si512(reinterpret_cast(pointer_x + i)); - __m512i v2 = - _mm512_loadu_si512(reinterpret_cast(pointer_y + i)); + __m512i v1 = _mm512_loadu_si512(reinterpret_cast(pointer_x + i)); + __m512i v2 = _mm512_loadu_si512(reinterpret_cast(pointer_y + i)); // Unpack to 16-bit integers to avoid overflow __m512i v1_lo = _mm512_unpacklo_epi8(v1, _mm512_setzero_si512()); @@ -72,23 +68,22 @@ static float computeL2_Avx512_Uint8(const void *x, const void *y, sum256 = _mm256_hadd_epi32(sum256, sum256); int32_t buffer[8]; - _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer), sum256); + _mm256_storeu_si256(reinterpret_cast<__m256i*>(buffer), sum256); int32_t total_sum = buffer[0] + buffer[4]; return static_cast(total_sum); } -#endif // USE_AVX512 +#endif // USE_AVX512 #if defined(USE_AVX) -static float computeL2_Avx2(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); +static float computeL2_Avx2(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); - const float *end_x = pointer_x + (dimension & ~7); + const float* end_x = pointer_x + (dimension & ~7); simd8float32 difference, v1, v2; simd8float32 sum(0.0f); @@ -112,20 +107,18 @@ static float computeL2_Avx2(const void *x, const void *y, float result[8]; sum.storeu(result); - return result[0] + result[1] + result[2] + result[3] + result[4] + result[5] + - result[6] + result[7]; + return result[0] + result[1] + result[2] + result[3] + result[4] + result[5] + result[6] + result[7]; } -#endif // USE_AVX +#endif // USE_AVX #if defined(USE_SSE) -static float computeL2_Sse(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); +static float computeL2_Sse(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); - const float *end_x = pointer_x + (dimension >> 4 << 4); + const float* end_x = pointer_x + (dimension >> 4 << 4); simd4float32 difference, v1, v2; simd4float32 sum(0.0f); @@ -162,28 +155,25 @@ static float computeL2_Sse(const void *x, const void *y, return sum.reduce_add(); } - #if defined(USE_SSE4_1) // This function computes the L2 distance between two int8 vectors using SSE2 // instructions. -static float computeL2_Sse_int8(const void *x, const void *y, - const size_t &dimension) { - int8_t *pointer_x = static_cast(const_cast(x)); - int8_t *pointer_y = static_cast(const_cast(y)); +static float computeL2_Sse_int8(const void* x, const void* y, const size_t& dimension) { + int8_t* pointer_x = static_cast(const_cast(x)); + int8_t* pointer_y = static_cast(const_cast(y)); __m128i sum = _mm_setzero_si128(); size_t aligned_dimension = dimension & ~0xF; size_t i = 0; for (; i < aligned_dimension; i += 16) { - __m128i vx = _mm_loadu_si128(reinterpret_cast<__m128i *>(pointer_x + i)); - __m128i vy = _mm_loadu_si128(reinterpret_cast<__m128i *>(pointer_y + i)); + __m128i vx = _mm_loadu_si128(reinterpret_cast<__m128i*>(pointer_x + i)); + __m128i vy = _mm_loadu_si128(reinterpret_cast<__m128i*>(pointer_y + i)); __m128i diff = _mm_sub_epi8(vx, vy); // Convert to 16-bit and square - __m128i diff_squared = - _mm_madd_epi16(_mm_cvtepi8_epi16(diff), _mm_cvtepi8_epi16(diff)); + __m128i diff_squared = _mm_madd_epi16(_mm_cvtepi8_epi16(diff), _mm_cvtepi8_epi16(diff)); // Accumulate in 32-bit integer sum = _mm_add_epi32(sum, diff_squared); @@ -198,19 +188,17 @@ static float computeL2_Sse_int8(const void *x, const void *y, // Reduce sum int32_t buffer[4]; - _mm_storeu_si128(reinterpret_cast<__m128i *>(buffer), sum); - return static_cast(buffer[0] + buffer[1] + buffer[2] + buffer[3] + - partial_sum); + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), sum); + return static_cast(buffer[0] + buffer[1] + buffer[2] + buffer[3] + partial_sum); } -#endif // USE_SSE4_1 +#endif // USE_SSE4_1 -static float computeL2_Sse4Aligned(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); +static float computeL2_Sse4Aligned(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); - const float *end_x = pointer_x + (dimension >> 2 << 2); + const float* end_x = pointer_x + (dimension >> 2 << 2); simd4float32 difference, v1, v2; simd4float32 sum(0.0f); @@ -226,17 +214,14 @@ static float computeL2_Sse4Aligned(const void *x, const void *y, return sum.reduce_add(); } -static float computeL2_SseWithResidual_16(const void *x, const void *y, - const size_t &dimension) { +static float computeL2_SseWithResidual_16(const void* x, const void* y, const size_t& dimension) { size_t dimension_aligned = dimension >> 4 << 4; float aligned_distance = computeL2_Sse(x, y, dimension_aligned); size_t residual_dimension = dimension - dimension_aligned; float residual_distance = 0.0f; - float *pointer_x = - static_cast(const_cast(x)) + dimension_aligned; - float *pointer_y = - static_cast(const_cast(y)) + dimension_aligned; + float* pointer_x = static_cast(const_cast(x)) + dimension_aligned; + float* pointer_y = static_cast(const_cast(y)) + dimension_aligned; for (size_t i = 0; i < residual_dimension; i++) { float difference = *pointer_x - *pointer_y; residual_distance += difference * difference; @@ -246,12 +231,11 @@ static float computeL2_SseWithResidual_16(const void *x, const void *y, return aligned_distance + residual_distance; } -static float computeL2_Sse4aligned(const void *x, const void *y, - const size_t &dimension) { - float *pointer_x = static_cast(const_cast(x)); - float *pointer_y = static_cast(const_cast(y)); +static float computeL2_Sse4aligned(const void* x, const void* y, const size_t& dimension) { + float* pointer_x = static_cast(const_cast(x)); + float* pointer_y = static_cast(const_cast(y)); - const float *end_x = pointer_x + (dimension >> 2 << 2); + const float* end_x = pointer_x + (dimension >> 2 << 2); simd4float32 difference, v1, v2; simd4float32 sum(0.0f); @@ -267,16 +251,13 @@ static float computeL2_Sse4aligned(const void *x, const void *y, return sum.reduce_add(); } -static float computeL2_SseWithResidual_4(const void *x, const void *y, - const size_t &dimension) { +static float computeL2_SseWithResidual_4(const void* x, const void* y, const size_t& dimension) { size_t dimension_aligned = dimension >> 2 << 2; float aligned_distance = computeL2_Sse4aligned(x, y, dimension_aligned); size_t residual_dimension = dimension - dimension_aligned; float residual_distance = 0.0f; - float *pointer_x = - static_cast(const_cast(x)) + dimension_aligned; - float *pointer_y = - static_cast(const_cast(y)) + dimension_aligned; + float* pointer_x = static_cast(const_cast(x)) + dimension_aligned; + float* pointer_y = static_cast(const_cast(y)) + dimension_aligned; for (size_t i = 0; i < residual_dimension; i++) { float difference = *pointer_x - *pointer_y; residual_distance += difference * difference; @@ -286,6 +267,6 @@ static float computeL2_SseWithResidual_4(const void *x, const void *y, return aligned_distance + residual_distance; } -#endif // USE_SSE +#endif // USE_SSE -} // namespace flatnav::util \ No newline at end of file +} // namespace flatnav::util \ No newline at end of file diff --git a/flatnav/util/VisitedSetPool.h b/flatnav/util/VisitedSetPool.h index 0367fe7..286019a 100644 --- a/flatnav/util/VisitedSetPool.h +++ b/flatnav/util/VisitedSetPool.h @@ -2,24 +2,24 @@ // #include -#include #include +#include +#include #include #include #include -#include #include #include namespace flatnav::util { class VisitedSet { -private: + private: uint8_t _mark; - uint8_t *_table; + uint8_t* _table; uint32_t _table_size; -public: + public: VisitedSet(const uint32_t size) : _mark(1), _table_size(size) { // initialize values to 0 _table = new uint8_t[_table_size](); @@ -27,7 +27,7 @@ class VisitedSet { inline void prefetch(const uint32_t num) const { #ifdef USE_SSE - _mm_prefetch(reinterpret_cast(&_table[num]), _MM_HINT_T0); + _mm_prefetch(reinterpret_cast(&_table[num]), _MM_HINT_T0); #endif } @@ -45,31 +45,27 @@ class VisitedSet { } } - inline bool isVisited(const uint32_t num) const { - return _table[num] == _mark; - } + inline bool isVisited(const uint32_t num) const { return _table[num] == _mark; } ~VisitedSet() { delete[] _table; } // copy constructor - VisitedSet(const VisitedSet &other) - : _table_size(other._table_size), _mark(other._mark) { + VisitedSet(const VisitedSet& other) : _table_size(other._table_size), _mark(other._mark) { _table = new uint8_t[_table_size]; std::memcpy(_table, other._table, _table_size); } // move constructor - VisitedSet(VisitedSet &&other) noexcept - : _table_size(other._table_size), _mark(other._mark), - _table(other._table) { + VisitedSet(VisitedSet&& other) noexcept + : _table_size(other._table_size), _mark(other._mark), _table(other._table) { other._table = nullptr; other._table_size = 0; other._mark = 0; } // copy assignment - VisitedSet &operator=(const VisitedSet &other) { + VisitedSet& operator=(const VisitedSet& other) { if (this != &other) { delete[] _table; _table_size = other._table_size; @@ -81,7 +77,7 @@ class VisitedSet { } // move assignment - VisitedSet &operator=(VisitedSet &&other) noexcept { + VisitedSet& operator=(VisitedSet&& other) noexcept { _table_size = other._table_size; _mark = other._mark; _table = other._table; @@ -135,35 +131,31 @@ class VisitedSet { * expected to manage. */ class VisitedSetPool { - std::vector _visisted_set_pool; + std::vector _visisted_set_pool; std::mutex _pool_guard; uint32_t _num_elements; uint32_t _max_pool_size; -public: + public: VisitedSetPool(uint32_t initial_pool_size, uint32_t num_elements, uint32_t max_pool_size = std::thread::hardware_concurrency()) - : _visisted_set_pool(initial_pool_size), _num_elements(num_elements), - _max_pool_size(max_pool_size) { + : _visisted_set_pool(initial_pool_size), _num_elements(num_elements), _max_pool_size(max_pool_size) { if (initial_pool_size > max_pool_size) { - throw std::invalid_argument( - "initial_pool_size must be less than or equal to max_pool_size"); + throw std::invalid_argument("initial_pool_size must be less than or equal to max_pool_size"); } - for (uint32_t visited_set_id = 0; - visited_set_id < _visisted_set_pool.size(); visited_set_id++) { - _visisted_set_pool[visited_set_id] = - new VisitedSet(/* size = */ _num_elements); + for (uint32_t visited_set_id = 0; visited_set_id < _visisted_set_pool.size(); visited_set_id++) { + _visisted_set_pool[visited_set_id] = new VisitedSet(/* size = */ _num_elements); } } // TODO: Enforce the condition that we never allocate more than _max_pool_size // visited_sets. For now there is nothing stopping a user from allocating more // than _max_pool_size. - VisitedSet *pollAvailableSet() { + VisitedSet* pollAvailableSet() { std::unique_lock lock(_pool_guard); if (!_visisted_set_pool.empty()) { - auto *visited_set = _visisted_set_pool.back(); + auto* visited_set = _visisted_set_pool.back(); _visisted_set_pool.pop_back(); return visited_set; } else { @@ -173,7 +165,7 @@ class VisitedSetPool { size_t poolSize() const { return _visisted_set_pool.size(); } - void pushVisitedSet(VisitedSet *visited_set) { + void pushVisitedSet(VisitedSet* visited_set) { std::unique_lock lock(_pool_guard); _visisted_set_pool.push_back(visited_set); @@ -183,12 +175,11 @@ class VisitedSetPool { std::unique_lock lock(_pool_guard); if (new_pool_size > _visisted_set_pool.size()) { - throw std::invalid_argument( - "new_pool_size must be less than or equal to the current pool size"); + throw std::invalid_argument("new_pool_size must be less than or equal to the current pool size"); } while (_visisted_set_pool.size() > new_pool_size) { - auto *visited_set = _visisted_set_pool.back(); + auto* visited_set = _visisted_set_pool.back(); _visisted_set_pool.pop_back(); delete visited_set; } @@ -198,11 +189,11 @@ class VisitedSetPool { ~VisitedSetPool() { while (!_visisted_set_pool.empty()) { - auto *visited_set = _visisted_set_pool.back(); + auto* visited_set = _visisted_set_pool.back(); _visisted_set_pool.pop_back(); delete visited_set; } } }; -} // namespace flatnav::util \ No newline at end of file +} // namespace flatnav::util \ No newline at end of file diff --git a/flatnav_python/python_bindings.cpp b/flatnav_python/python_bindings.cpp index 99c39c7..008c2ef 100644 --- a/flatnav_python/python_bindings.cpp +++ b/flatnav_python/python_bindings.cpp @@ -1,22 +1,22 @@ -#include "docs.h" -#include -#include #include #include #include #include #include #include -#include -#include -#include #include #include #include +#include +#include +#include +#include +#include #include #include #include #include +#include "docs.h" using flatnav::Index; using flatnav::distances::DistanceInterface; @@ -28,26 +28,19 @@ using flatnav::util::for_each_data_type; namespace py = pybind11; template -auto cast_and_call(DataType data_type, const py::array &array, Func &&function, - Args &&... args) { +auto cast_and_call(DataType data_type, const py::array& array, Func&& function, Args&&... args) { switch (data_type) { - case DataType::float32: - return function( - array.cast< - py::array_t>(), - std::forward(args)...); - case DataType::int8: - return function( - array.cast< - py::array_t>(), - std::forward(args)...); - case DataType::uint8: - return function( - array.cast< - py::array_t>(), - std::forward(args)...); - default: - throw std::invalid_argument("Unsupported data type."); + case DataType::float32: + return function(array.cast>(), + std::forward(args)...); + case DataType::int8: + return function(array.cast>(), + std::forward(args)...); + case DataType::uint8: + return function(array.cast>(), + std::forward(args)...); + default: + throw std::invalid_argument("Unsupported data type."); } } @@ -57,18 +50,15 @@ class PyIndex : public std::enable_shared_from_this> { int _dim; label_t _label_id; bool _verbose; - Index *_index; + Index* _index; DataType _data_type; - typedef std::pair, py::array_t> - DistancesLabelsPair; + typedef std::pair, py::array_t> DistancesLabelsPair; // Internal add method that handles templated dispatch template - void addImpl(const py::array_t &data, - int ef_construction, int num_initializations = 100, - py::object labels = py::none()) { + void addImpl(const py::array_t& data, + int ef_construction, int num_initializations = 100, py::object labels = py::none()) { // py::array_t means that // the functions expects either a Numpy array of floats or a castable type // to that type. If the given type can't be casted, pybind11 will throw an @@ -80,8 +70,7 @@ class PyIndex : public std::enable_shared_from_this> { throw std::invalid_argument( "Data has incorrect dimensions. data.ndim() = " "`" + - std::to_string(data.ndim()) + "` and data_dim = `" + - std::to_string(data_dim) + + std::to_string(data.ndim()) + "` and data_dim = `" + std::to_string(data_dim) + "`. Expected 2D " "array with " "dimensions " @@ -97,7 +86,7 @@ class PyIndex : public std::enable_shared_from_this> { // Release python GIL while threads are running py::gil_scoped_release gil; this->_index->template addBatch( - /* data = */ (void *)data.data(0), + /* data = */ (void*)data.data(0), /* labels = */ vec_labels, /* ef_construction = */ ef_construction, /* num_initializations = */ num_initializations); @@ -115,37 +104,35 @@ class PyIndex : public std::enable_shared_from_this> { // Relase python GIL while threads are running py::gil_scoped_release gil; this->_index->template addBatch( - /* data = */ (void *)data.data(0), /* labels = */ vec_labels, + /* data = */ (void*)data.data(0), /* labels = */ vec_labels, /* ef_construction = */ ef_construction, /* num_initializations = */ num_initializations); } - } catch (const py::cast_error &error) { + } catch (const py::cast_error& error) { throw std::invalid_argument("Invalid labels provided."); } } template DistancesLabelsPair searchSingleImpl( - const py::array_t - &query, - int K, int ef_search, int num_initializations = 100) { + const py::array_t& query, int K, int ef_search, + int num_initializations = 100) { if (query.ndim() != 1 || query.shape(0) != _dim) { throw std::invalid_argument("Query has incorrect dimensions."); } std::vector> top_k = this->_index->search( - /* query = */ (const void *)query.data(0), /* K = */ K, + /* query = */ (const void*)query.data(0), /* K = */ K, /* ef_search = */ ef_search, /* num_initializations = */ num_initializations); if (top_k.size() != K) { - throw std::runtime_error( - "Search did not return the expected number of results. Expected " + - std::to_string(K) + " but got " + std::to_string(top_k.size()) + "."); + throw std::runtime_error("Search did not return the expected number of results. Expected " + + std::to_string(K) + " but got " + std::to_string(top_k.size()) + "."); } - label_t *labels = new label_t[K]; - float *distances = new float[K]; + label_t* labels = new label_t[K]; + float* distances = new float[K]; for (size_t i = 0; i < K; i++) { distances[i] = top_k[i].first; @@ -153,26 +140,23 @@ class PyIndex : public std::enable_shared_from_this> { } // Allows to transfer ownership to Python - py::capsule free_labels_when_done(labels, - [](void *ptr) { delete (label_t *)ptr; }); + py::capsule free_labels_when_done(labels, [](void* ptr) { delete (label_t*)ptr; }); - py::capsule free_distances_when_done( - distances, [](void *ptr) { delete (float *)ptr; }); + py::capsule free_distances_when_done(distances, [](void* ptr) { delete (float*)ptr; }); - py::array_t labels_array = py::array_t( - {K}, {sizeof(label_t)}, labels, free_labels_when_done); + py::array_t labels_array = + py::array_t({K}, {sizeof(label_t)}, labels, free_labels_when_done); - py::array_t distances_array = py::array_t( - {K}, {sizeof(float)}, distances, free_distances_when_done); + py::array_t distances_array = + py::array_t({K}, {sizeof(float)}, distances, free_distances_when_done); return {distances_array, labels_array}; } template - DistancesLabelsPair - searchImpl(const py::array_t &queries, - int K, int ef_search, int num_initializations = 100) { + DistancesLabelsPair searchImpl( + const py::array_t& queries, int K, int ef_search, + int num_initializations = 100) { size_t num_queries = queries.shape(0); size_t queries_dim = queries.shape(1); @@ -181,22 +165,22 @@ class PyIndex : public std::enable_shared_from_this> { } auto num_threads = _index->getNumThreads(); - label_t *results = new label_t[num_queries * K]; - float *distances = new float[num_queries * K]; + label_t* results = new label_t[num_queries * K]; + float* distances = new float[num_queries * K]; // No need to spawn any threads if we are in a single-threaded environment if (num_threads == 1) { for (size_t query_index = 0; query_index < num_queries; query_index++) { std::vector> top_k = this->_index->search( - /* query = */ (const void *)queries.data(query_index), /* K = */ K, + /* query = */ (const void*)queries.data(query_index), /* K = */ K, /* ef_search = */ ef_search, /* num_initializations = */ num_initializations); if (top_k.size() != K) { - throw std::runtime_error("Search did not return the expected number " - "of results. Expected " + - std::to_string(K) + " but got " + - std::to_string(top_k.size()) + "."); + throw std::runtime_error( + "Search did not return the expected number " + "of results. Expected " + + std::to_string(K) + " but got " + std::to_string(top_k.size()) + "."); } for (size_t i = 0; i < top_k.size(); i++) { @@ -210,7 +194,7 @@ class PyIndex : public std::enable_shared_from_this> { /* start_index = */ 0, /* end_index = */ num_queries, /* num_threads = */ num_threads, /* function = */ [&](uint32_t row_index) { - auto *query = (const void *)queries.data(row_index); + auto* query = (const void*)queries.data(row_index); std::vector> top_k = this->_index->search( /* query = */ query, /* K = */ K, /* ef_search = */ ef_search, /* num_initializations = */ num_initializations); @@ -223,39 +207,35 @@ class PyIndex : public std::enable_shared_from_this> { } // Allows to transfer ownership to Python - py::capsule free_results_when_done( - results, [](void *ptr) { delete (label_t *)ptr; }); - py::capsule free_distances_when_done( - distances, [](void *ptr) { delete (float *)ptr; }); - - py::array_t labels = - py::array_t({num_queries, (size_t)K}, // shape of the array - {K * sizeof(label_t), sizeof(label_t)}, // strides - results, // data pointer - free_results_when_done // capsule - ); + py::capsule free_results_when_done(results, [](void* ptr) { delete (label_t*)ptr; }); + py::capsule free_distances_when_done(distances, [](void* ptr) { delete (float*)ptr; }); + + py::array_t labels = py::array_t({num_queries, (size_t)K}, // shape of the array + {K * sizeof(label_t), sizeof(label_t)}, // strides + results, // data pointer + free_results_when_done // capsule + ); py::array_t dists = py::array_t( - {num_queries, (size_t)K}, {K * sizeof(float), sizeof(float)}, distances, - free_distances_when_done); + {num_queries, (size_t)K}, {K * sizeof(float), sizeof(float)}, distances, free_distances_when_done); return {dists, labels}; } -public: + public: explicit PyIndex(std::unique_ptr> index) - : _dim(index->dataDimension()), _label_id(0), _verbose(false), - _index(index.release()) { + : _dim(index->dataDimension()), _label_id(0), _verbose(false), _index(index.release()) { if (_verbose) { _index->getIndexSummary(); } } - PyIndex(std::unique_ptr> &&distance, - DataType data_type, int dataset_size, int max_edges_per_node, - bool verbose = false, bool collect_stats = false) - : _dim(distance->dimension()), _label_id(0), _verbose(verbose), + PyIndex(std::unique_ptr>&& distance, DataType data_type, int dataset_size, + int max_edges_per_node, bool verbose = false, bool collect_stats = false) + : _dim(distance->dimension()), + _label_id(0), + _verbose(verbose), _index(new Index( /* dist = */ std::move(distance), /* dataset_size = */ dataset_size, @@ -266,16 +246,12 @@ class PyIndex : public std::enable_shared_from_this> { if (_verbose) { uint64_t total_index_memory = _index->getTotalIndexMemory(); - uint64_t visited_set_allocated_memory = - _index->visitedSetPoolAllocatedMemory(); + uint64_t visited_set_allocated_memory = _index->visitedSetPoolAllocatedMemory(); uint64_t mutexes_allocated_memory = _index->mutexesAllocatedMemory(); - auto total_memory = total_index_memory + visited_set_allocated_memory + - mutexes_allocated_memory; + auto total_memory = total_index_memory + visited_set_allocated_memory + mutexes_allocated_memory; - std::cout << "Total allocated index memory: " - << (float)(total_memory / 1e9) << " GB \n" - << std::flush; + std::cout << "Total allocated index memory: " << (float)(total_memory / 1e9) << " GB \n" << std::flush; std::cout << "[WARN]: More memory might be allocated due to visited sets " "in multi-threaded environments.\n" << std::flush; @@ -283,7 +259,7 @@ class PyIndex : public std::enable_shared_from_this> { } } - Index *getIndex() { return _index; } + Index* getIndex() { return _index; } ~PyIndex() { delete _index; } @@ -293,50 +269,39 @@ class PyIndex : public std::enable_shared_from_this> { return distance_computations; } - void buildGraphLinks(const std::string &mtx_filename) { + void buildGraphLinks(const std::string& mtx_filename) { _index->buildGraphLinks(/* mtx_filename = */ mtx_filename); } - std::vector> getGraphOutdegreeTable() { - return _index->getGraphOutdegreeTable(); - } + std::vector> getGraphOutdegreeTable() { return _index->getGraphOutdegreeTable(); } uint32_t getMaxEdgesPerNode() { return _index->maxEdgesPerNode(); } - void reorder(const std::vector &strategies) { + void reorder(const std::vector& strategies) { // validate the given strategies - for (auto &strategy : strategies) { + for (auto& strategy : strategies) { auto alg = strategy; - std::transform(alg.begin(), alg.end(), alg.begin(), - [](unsigned char c) { return std::tolower(c); }); + std::transform(alg.begin(), alg.end(), alg.begin(), [](unsigned char c) { return std::tolower(c); }); if (alg != "gorder" && alg != "rcm") { - throw std::invalid_argument( - "`" + strategy + - "` is not a supported graph re-ordering strategy."); + throw std::invalid_argument("`" + strategy + "` is not a supported graph re-ordering strategy."); } } _index->doGraphReordering(strategies); } - void setNumThreads(uint32_t num_threads) { - _index->setNumThreads(num_threads); - } + void setNumThreads(uint32_t num_threads) { _index->setNumThreads(num_threads); } uint32_t getNumThreads() { return _index->getNumThreads(); } - void save(const std::string &filename) { - _index->saveIndex(/* filename = */ filename); - } + void save(const std::string& filename) { _index->saveIndex(/* filename = */ filename); } - static std::shared_ptr> - loadIndex(const std::string &filename) { + static std::shared_ptr> loadIndex(const std::string& filename) { auto index = Index::loadIndex(/* filename = */ filename); return std::make_shared>(std::move(index)); } std::shared_ptr> allocateNodes( - const py::array_t - &data) { + const py::array_t& data) { auto num_vectors = data.shape(0); auto data_dim = data.shape(1); if (data.ndim() != 2 || data_dim != _dim) { @@ -345,7 +310,7 @@ class PyIndex : public std::enable_shared_from_this> { for (size_t vec_index = 0; vec_index < num_vectors; vec_index++) { uint32_t new_node_id; - this->_index->allocateNode(/* data = */ (void *)data.data(vec_index), + this->_index->allocateNode(/* data = */ (void*)data.data(vec_index), /* label = */ _label_id, /* new_node_id = */ new_node_id); _label_id++; @@ -353,76 +318,75 @@ class PyIndex : public std::enable_shared_from_this> { return this->shared_from_this(); } - void add(const py::array &data, int ef_construction, int num_initializations, + void add(const py::array& data, int ef_construction, int num_initializations, py::object labels = py::none()) { cast_and_call( _data_type, data, - [this](auto &&casted_data, int ef, int num_init, py::object lbls) { - this->addImpl(std::forward(casted_data), ef, - num_init, lbls); + [this](auto&& casted_data, int ef, int num_init, py::object lbls) { + this->addImpl(std::forward(casted_data), ef, num_init, lbls); }, ef_construction, num_initializations, labels); } - DistancesLabelsPair search(const py::array &queries, int K, int ef_search, - int num_initializations) { + DistancesLabelsPair search(const py::array& queries, int K, int ef_search, int num_initializations) { return cast_and_call( _data_type, queries, - [this](auto &&casted_queries, int k, int ef, int num_init) { - return this->searchImpl( - std::forward(casted_queries), k, ef, - num_init); + [this](auto&& casted_queries, int k, int ef, int num_init) { + return this->searchImpl(std::forward(casted_queries), k, ef, num_init); }, K, ef_search, num_initializations); } - DistancesLabelsPair searchSingle(const py::array &query, int K, int ef_search, - int num_initializations) { + DistancesLabelsPair searchSingle(const py::array& query, int K, int ef_search, int num_initializations) { return cast_and_call( _data_type, query, - [this](auto &&casted_query, int k, int ef, int num_init) { - return this->searchSingleImpl( - std::forward(casted_query), k, ef, - num_init); + [this](auto&& casted_query, int k, int ef, int num_init) { + return this->searchSingleImpl(std::forward(casted_query), k, ef, num_init); }, K, ef_search, num_initializations); } }; -template struct IndexSpecialization; +template +struct IndexSpecialization; -template <> struct IndexSpecialization> { +template <> +struct IndexSpecialization> { using type = PyIndex, int>; - static constexpr char *name = "IndexL2Float"; + static constexpr char* name = "IndexL2Float"; }; -template <> struct IndexSpecialization> { +template <> +struct IndexSpecialization> { using type = PyIndex, int>; - static constexpr char *name = "IndexL2Uint8"; + static constexpr char* name = "IndexL2Uint8"; }; -template <> struct IndexSpecialization> { +template <> +struct IndexSpecialization> { using type = PyIndex, int>; - static constexpr char *name = "IndexL2Int8"; + static constexpr char* name = "IndexL2Int8"; }; template <> struct IndexSpecialization> { using type = PyIndex, int>; - static constexpr char *name = "IndexIPFloat"; + static constexpr char* name = "IndexIPFloat"; }; -template <> struct IndexSpecialization> { +template <> +struct IndexSpecialization> { using type = PyIndex, int>; - static constexpr char *name = "IndexIPUint8"; + static constexpr char* name = "IndexIPUint8"; }; -template <> struct IndexSpecialization> { +template <> +struct IndexSpecialization> { using type = PyIndex, int>; - static constexpr char *name = "IndexIPInt8"; + static constexpr char* name = "IndexIPInt8"; }; -void validateDistanceType(const std::string &distance_type) { +void validateDistanceType(const std::string& distance_type) { auto dist_type = distance_type; std::transform(dist_type.begin(), dist_type.end(), dist_type.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -435,123 +399,104 @@ void validateDistanceType(const std::string &distance_type) { } template -py::object createIndex(const std::string &distance_type, int dim, - Args &&... args) { +py::object createIndex(const std::string& distance_type, int dim, Args&&... args) { validateDistanceType(distance_type); if (distance_type == "l2") { auto distance = SquaredL2Distance::create(dim); - auto index = std::make_shared, int>>( - std::move(distance), data_type, std::forward(args)...); + auto index = std::make_shared, int>>(std::move(distance), data_type, + std::forward(args)...); return py::cast(index); } auto distance = InnerProductDistance::create(dim); - auto index = std::make_shared, int>>( - std::move(distance), data_type, std::forward(args)...); + auto index = std::make_shared, int>>(std::move(distance), data_type, + std::forward(args)...); return py::cast(index); } template -void bindSpecialization(py::module_ &index_submodule) { +void bindSpecialization(py::module_& index_submodule) { using IndexType = typename IndexSpecialization::type; - auto index_class = py::class_>( - index_submodule, IndexSpecialization::name); + auto index_class = + py::class_>(index_submodule, IndexSpecialization::name); index_class .def( "add", - [](IndexType &index, const py::array &data, int ef_construction, - int num_initializations = 100, py::object labels = py::none()) { + [](IndexType& index, const py::array& data, int ef_construction, int num_initializations = 100, + py::object labels = py::none()) { index.add(data, ef_construction, num_initializations, labels); }, - py::arg("data"), py::arg("ef_construction"), - py::arg("num_initializations") = 100, py::arg("labels") = py::none(), - ADD_DOCSTRING) + py::arg("data"), py::arg("ef_construction"), py::arg("num_initializations") = 100, + py::arg("labels") = py::none(), ADD_DOCSTRING) .def( "allocate_nodes", - [](IndexType &index, - const py::array_t - &data) { return index.allocateNodes(data); }, + [](IndexType& index, const py::array_t& data) { + return index.allocateNodes(data); + }, py::arg("data"), ALLOCATE_NODES_DOCSTRING) .def( "search_single", - [](IndexType &index, const py::array &query, int K, int ef_search, - int num_initializations = 100) { + [](IndexType& index, const py::array& query, int K, int ef_search, int num_initializations = 100) { return index.searchSingle(query, K, ef_search, num_initializations); }, - py::arg("query"), py::arg("K"), py::arg("ef_search"), - py::arg("num_initializations") = 100, SEARCH_SINGLE_DOCSTRING) + py::arg("query"), py::arg("K"), py::arg("ef_search"), py::arg("num_initializations") = 100, + SEARCH_SINGLE_DOCSTRING) .def( "search", - [](IndexType &index, const py::array &queries, int K, int ef_search, + [](IndexType& index, const py::array& queries, int K, int ef_search, int num_initializations = 100) { return index.search(queries, K, ef_search, num_initializations); }, - py::arg("queries"), py::arg("K"), py::arg("ef_search"), - py::arg("num_initializations") = 100, SEARCH_DOCSTRING) - .def("get_query_distance_computations", - &IndexType::getQueryDistanceComputations, + py::arg("queries"), py::arg("K"), py::arg("ef_search"), py::arg("num_initializations") = 100, + SEARCH_DOCSTRING) + .def("get_query_distance_computations", &IndexType::getQueryDistanceComputations, GET_QUERY_DISTANCE_COMPUTATIONS_DOCSTRING) .def("save", &IndexType::save, py::arg("filename"), SAVE_DOCSTRING) - .def("build_graph_links", &IndexType::buildGraphLinks, - py::arg("mtx_filename"), BUILD_GRAPH_LINKS_DOCSTRING) + .def("build_graph_links", &IndexType::buildGraphLinks, py::arg("mtx_filename"), + BUILD_GRAPH_LINKS_DOCSTRING) .def("get_graph_outdegree_table", &IndexType::getGraphOutdegreeTable, GET_GRAPH_OUTDEGREE_TABLE_DOCSTRING) - .def("reorder", &IndexType::reorder, py::arg("strategies"), - REORDER_DOCSTRING) - .def("set_num_threads", &IndexType::setNumThreads, py::arg("num_threads"), - SET_NUM_THREADS_DOCSTRING) - .def_static("load_index", &IndexType::loadIndex, py::arg("filename"), - LOAD_INDEX_DOCSTRING) - .def_property_readonly("max_edges_per_node", - &IndexType::getMaxEdgesPerNode) - .def_property_readonly("num_threads", &IndexType::getNumThreads, - NUM_THREADS_DOCSTRING); + .def("reorder", &IndexType::reorder, py::arg("strategies"), REORDER_DOCSTRING) + .def("set_num_threads", &IndexType::setNumThreads, py::arg("num_threads"), SET_NUM_THREADS_DOCSTRING) + .def_static("load_index", &IndexType::loadIndex, py::arg("filename"), LOAD_INDEX_DOCSTRING) + .def_property_readonly("max_edges_per_node", &IndexType::getMaxEdgesPerNode) + .def_property_readonly("num_threads", &IndexType::getNumThreads, NUM_THREADS_DOCSTRING); } -void defineIndexSubmodule(py::module_ &index_submodule) { - bindSpecialization, int>( - index_submodule); +void defineIndexSubmodule(py::module_& index_submodule) { + bindSpecialization, int>(index_submodule); bindSpecialization, int>(index_submodule); bindSpecialization, int>(index_submodule); - bindSpecialization, int>( - index_submodule); - bindSpecialization, int>( - index_submodule); - bindSpecialization, int>( - index_submodule); + bindSpecialization, int>(index_submodule); + bindSpecialization, int>(index_submodule); + bindSpecialization, int>(index_submodule); index_submodule.def( "create", - [](const std::string &distance_type, int dim, int dataset_size, - int max_edges_per_node, DataType index_data_type, bool verbose = false, - bool collect_stats = false) { + [](const std::string& distance_type, int dim, int dataset_size, int max_edges_per_node, + DataType index_data_type, bool verbose = false, bool collect_stats = false) { switch (index_data_type) { - case DataType::float32: - return createIndex( - distance_type, dim, dataset_size, max_edges_per_node, verbose, - collect_stats); - case DataType::int8: - return createIndex(distance_type, dim, dataset_size, - max_edges_per_node, verbose, - collect_stats); - case DataType::uint8: - return createIndex(distance_type, dim, dataset_size, - max_edges_per_node, verbose, - collect_stats); - default: - throw std::runtime_error("Unsupported data type"); + case DataType::float32: + return createIndex(distance_type, dim, dataset_size, max_edges_per_node, + verbose, collect_stats); + case DataType::int8: + return createIndex(distance_type, dim, dataset_size, max_edges_per_node, verbose, + collect_stats); + case DataType::uint8: + return createIndex(distance_type, dim, dataset_size, max_edges_per_node, verbose, + collect_stats); + default: + throw std::runtime_error("Unsupported data type"); } }, - py::arg("distance_type"), py::arg("dim"), py::arg("dataset_size"), - py::arg("max_edges_per_node"), - py::arg("index_data_type") = DataType::float32, - py::arg("verbose") = false, py::arg("collect_stats") = false, - CONSTRUCTOR_DOCSTRING); + py::arg("distance_type"), py::arg("dim"), py::arg("dataset_size"), py::arg("max_edges_per_node"), + py::arg("index_data_type") = DataType::float32, py::arg("verbose") = false, + py::arg("collect_stats") = false, CONSTRUCTOR_DOCSTRING); } -void defineDatatypeEnums(py::module_ &module) { +void defineDatatypeEnums(py::module_& module) { // More enums are available, but these are the only ones that we support // for index construction. py::enum_(module, "DataType") @@ -561,7 +506,7 @@ void defineDatatypeEnums(py::module_ &module) { .export_values(); } -void defineDistanceEnums(py::module_ &module) { +void defineDistanceEnums(py::module_& module) { py::enum_(module, "MetricType") .value("L2", flatnav::distances::MetricType::L2) .value("IP", flatnav::distances::MetricType::IP) From c80758dd196f6e24cf524d159e3f0e08e08e4795 Mon Sep 17 00:00:00 2001 From: BlaiseMuhirwa Date: Sat, 2 Nov 2024 16:41:17 -0700 Subject: [PATCH 2/3] further formatting --- Makefile | 7 +- bin/format.sh | 16 -- .../quantization/CentroidsGenerator.h | 100 ++++----- .../quantization/ProductQuantization.h | 203 ++++++++---------- developmental-features/quantization/Utils.h | 21 +- tools/cereal_tests.cpp | 31 ++- tools/construct_npy.cpp | 65 +++--- tools/flatnav_pq.cpp | 56 ++--- tools/query_npy.cpp | 56 ++--- 9 files changed, 234 insertions(+), 321 deletions(-) delete mode 100755 bin/format.sh diff --git a/Makefile b/Makefile index 17f7ffb..4fd8afa 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,10 @@ -CPP_FILES := $(wildcard flatnav/**/*.h flatnav/**/*.cc flatnav/**/*.cpp flatnav_python/*.cpp) +CPP_FILES := $(wildcard flatnav/**/*.h flatnav/**/*.cpp flatnav_python/*.cpp tools/*.cpp developmental-features/**/*.h) format-cpp: clang-format -i $(CPP_FILES) build-cpp: - ./bin/build.sh -e -t \ No newline at end of file + ./bin/build.sh -e -t + +cmake-format: + cmake-format -i CMakeLists.txt \ No newline at end of file diff --git a/bin/format.sh b/bin/format.sh deleted file mode 100755 index 8de3fe2..0000000 --- a/bin/format.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# First install clang-format and cmake-format - -clang-format -i - -# Format all header files with clang-format -# TODO: Use a recursive find solution to format headers/src files -find flatnav -iname *.h -o -iname *.cpp | xargs clang-format -i -find tools -iname *.cpp -o -iname *.h | xargs clang-format -i -find flatnav_python -iname *.cpp | xargs clang-format -i -find quantization -iname *.h -o -iname *.cpp | xargs clang-format -i -find quantization/tests -iname *.h -o -iname *.cpp | xargs clang-format -i - -# Format CMakeLists.txt file -cmake-format -i CMakeLists.txt \ No newline at end of file diff --git a/developmental-features/quantization/CentroidsGenerator.h b/developmental-features/quantization/CentroidsGenerator.h index 569cb71..69647ac 100644 --- a/developmental-features/quantization/CentroidsGenerator.h +++ b/developmental-features/quantization/CentroidsGenerator.h @@ -21,7 +21,7 @@ namespace flatnav::quantization { class CentroidsGenerator { -public: + public: /** * @brief Construct a new Centroids Generator object * @@ -37,20 +37,21 @@ class CentroidsGenerator { * @param verbose Whether to print verbose output * @param seed The seed for the random number generator */ - CentroidsGenerator(uint32_t dim, uint32_t num_centroids, - uint32_t num_iterations = 62, bool normalized = true, - bool verbose = false, int seed = 3333) - : _dim(dim), _num_centroids(num_centroids), - _clustering_iterations(num_iterations), _normalized(normalized), - _verbose(verbose), _centroids_initialized(false), _seed(seed), + CentroidsGenerator(uint32_t dim, uint32_t num_centroids, uint32_t num_iterations = 62, + bool normalized = true, bool verbose = false, int seed = 3333) + : _dim(dim), + _num_centroids(num_centroids), + _clustering_iterations(num_iterations), + _normalized(normalized), + _verbose(verbose), + _centroids_initialized(false), + _seed(seed), _initialization_type("default") {} - void initializeCentroids( - const float *data, uint64_t n, - const std::function &distance_func) { + void initializeCentroids(const float* data, uint64_t n, + const std::function& distance_func) { auto initialization_type = _initialization_type; - std::transform(initialization_type.begin(), initialization_type.end(), - initialization_type.begin(), + std::transform(initialization_type.begin(), initialization_type.end(), initialization_type.begin(), [](unsigned char c) { return std::tolower(c); }); if (_centroids.size() != _num_centroids * _dim) { @@ -64,9 +65,8 @@ class CentroidsGenerator { } else if (initialization_type == "hypercube") { hypercubeInitialize(data, n); } else { - throw std::invalid_argument( - "Invalid centroids initialization initialization type: " + - initialization_type); + throw std::invalid_argument("Invalid centroids initialization initialization type: " + + initialization_type); } _centroids_initialized = true; } @@ -94,13 +94,11 @@ class CentroidsGenerator { * @param distance_func The distance function to use (e.g. l2 distance or cosinde/inner product) */ - void generateCentroids( - const float *vectors, const float *vec_weights, uint64_t n, - const std::function &distance_func) { + void generateCentroids(const float* vectors, const float* vec_weights, uint64_t n, + const std::function& distance_func) { if (n < _num_centroids) { throw std::runtime_error( - "Invalid configuration. The number of centroids: " + - std::to_string(_num_centroids) + + "Invalid configuration. The number of centroids: " + std::to_string(_num_centroids) + " is bigger than the number of data points: " + std::to_string(n)); } @@ -110,8 +108,7 @@ class CentroidsGenerator { std::vector assignment(n); // K-means loop - for (uint32_t iteration = 0; iteration < _clustering_iterations; - iteration++) { + for (uint32_t iteration = 0; iteration < _clustering_iterations; iteration++) { // Step 1. Find the minimizing centroid based on l2 distance #pragma omp parallel for for (uint64_t vec_index = 0; vec_index < n; vec_index++) { @@ -119,9 +116,8 @@ class CentroidsGenerator { for (uint32_t c_index = 0; c_index < _num_centroids; c_index++) { // Get distance using the distance function - float *vector = const_cast(vectors + (vec_index * _dim)); - float *centroid = - const_cast(_centroids.data() + (c_index * _dim)); + float* vector = const_cast(vectors + (vec_index * _dim)); + float* centroid = const_cast(_centroids.data() + (c_index * _dim)); auto distance = distance_func(vector, centroid); if (distance < min_distance) { @@ -139,8 +135,7 @@ class CentroidsGenerator { for (uint64_t vec_index = 0; vec_index < n; vec_index++) { for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) { #pragma omp atomic - sums[assignment[vec_index] * _dim + dim_index] += - vectors[vec_index * _dim + dim_index]; + sums[assignment[vec_index] * _dim + dim_index] += vectors[vec_index * _dim + dim_index]; } #pragma omp atomic counts[assignment[vec_index]]++; @@ -148,43 +143,40 @@ class CentroidsGenerator { #pragma omp parallel for for (uint32_t c_index = 0; c_index < _num_centroids; c_index++) { for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) { - _centroids[c_index * _dim + dim_index] = - counts[c_index] - ? sums[c_index * _dim + dim_index] / counts[c_index] - : _centroids[c_index * _dim + dim_index]; + _centroids[c_index * _dim + dim_index] = counts[c_index] + ? sums[c_index * _dim + dim_index] / counts[c_index] + : _centroids[c_index * _dim + dim_index]; } } } } - inline const float *centroids() const { return _centroids.data(); } + inline const float* centroids() const { return _centroids.data(); } - inline void setInitializationType(const std::string &initialization_type) { + inline void setInitializationType(const std::string& initialization_type) { _initialization_type = initialization_type; } -private: + private: /** * @brief Initialize the centroids by randomly sampling k centroids among the * n data points * @param data The input data points * @param n The number of data points */ - void randomInitialize(const float *data, uint64_t n) { + void randomInitialize(const float* data, uint64_t n) { std::vector indices(n); std::iota(indices.begin(), indices.end(), 0); std::mt19937 generator(_seed + 1); std::vector sample_indices(_num_centroids); - std::sample(indices.begin(), indices.end(), sample_indices.begin(), - _num_centroids, generator); + std::sample(indices.begin(), indices.end(), sample_indices.begin(), _num_centroids, generator); for (uint32_t i = 0; i < _num_centroids; i++) { auto sample_index = sample_indices[i]; for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) { - _centroids[(i * _dim) + dim_index] = - data[(sample_index * _dim) + dim_index]; + _centroids[(i * _dim) + dim_index] = data[(sample_index * _dim) + dim_index]; } } } @@ -204,9 +196,8 @@ class CentroidsGenerator { * @param data The input data points * @param n The number of data points */ - void kmeansPlusPlusInitialize( - const float *data, uint64_t n, - const std::function &distance_func) { + void kmeansPlusPlusInitialize(const float* data, uint64_t n, + const std::function& distance_func) { std::mt19937 generator(_seed); std::uniform_int_distribution distribution(0, n - 1); @@ -216,8 +207,7 @@ class CentroidsGenerator { _centroids[dim_index] = data[first_centroid_index * _dim + dim_index]; } - std::vector min_squared_distances( - n, std::numeric_limits::max()); + std::vector min_squared_distances(n, std::numeric_limits::max()); // Step 2. For k-1 remaining centroids for (uint32_t cent_idx = 1; cent_idx < _num_centroids; cent_idx++) { @@ -230,8 +220,8 @@ class CentroidsGenerator { for (uint64_t c = 0; c < cent_idx; c++) { - float *centroid = const_cast(_centroids.data() + (c * _dim)); - float *vector = const_cast(data + (i * _dim)); + float* centroid = const_cast(_centroids.data() + (c * _dim)); + float* vector = const_cast(data + (i * _dim)); auto distance = distance_func(centroid, vector); if (distance < min_distance) { @@ -256,8 +246,7 @@ class CentroidsGenerator { // Add selected centroid the the centroids array for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) { - _centroids[cent_idx * _dim + dim_index] = - data[next_centroid_index * _dim + dim_index]; + _centroids[cent_idx * _dim + dim_index] = data[next_centroid_index * _dim + dim_index]; } } } @@ -288,7 +277,7 @@ class CentroidsGenerator { */ - void hypercubeInitialize(const float *data, uint64_t n) { + void hypercubeInitialize(const float* data, uint64_t n) { std::vector means(_dim); for (uint64_t vec_index = 0; vec_index < n; vec_index++) { @@ -304,11 +293,11 @@ class CentroidsGenerator { maxm = fabs(means[dim_index]) > maxm ? fabs(means[dim_index]) : maxm; } - float *centroids = _centroids.data(); + float* centroids = _centroids.data(); auto num_bits = log2(_num_centroids); for (uint32_t i = 0; i < _num_centroids; i++) { - float *centroid = const_cast(centroids + (i * _dim)); + float* centroid = const_cast(centroids + (i * _dim)); for (uint32_t j = 0; j < num_bits; j++) { centroid[j] = means[j] + (((i >> j) & 1) ? 1 : -1) * maxm; } @@ -341,10 +330,11 @@ class CentroidsGenerator { std::string _initialization_type; friend class cereal::access; - template void serialize(Archive &ar) { - ar(_dim, _num_centroids, _centroids, _clustering_iterations, _normalized, - _verbose, _centroids_initialized, _seed, _initialization_type); + template + void serialize(Archive& ar) { + ar(_dim, _num_centroids, _centroids, _clustering_iterations, _normalized, _verbose, + _centroids_initialized, _seed, _initialization_type); } }; -} // namespace flatnav::quantization \ No newline at end of file +} // namespace flatnav::quantization \ No newline at end of file diff --git a/developmental-features/quantization/ProductQuantization.h b/developmental-features/quantization/ProductQuantization.h index a1eb3bb..172b3f3 100644 --- a/developmental-features/quantization/ProductQuantization.h +++ b/developmental-features/quantization/ProductQuantization.h @@ -1,5 +1,9 @@ #pragma once +#include +#include +#include +#include #include #include #include @@ -10,10 +14,6 @@ #include #include #include -#include -#include -#include -#include #include #ifdef _OPENMP @@ -36,7 +36,8 @@ using flatnav::distances::SquaredL2Distance; using flatnav::quantization::CentroidsGenerator; using flatnav::util::DataType; -template struct PQCodeManager { +template +struct PQCodeManager { // This is an array that represents a quantization code for // a given vector. For instance, if x = [x_0, ..., x_{m-1}] // is subdivided into 8 subvectors, each of size m/8, we will @@ -45,17 +46,17 @@ template struct PQCodeManager { // // NOTE: code here means the index of the local centroid that // minimizes the (squared) distance between a given subvector and itself. - n_bits_t *code; - n_bits_t *start; + n_bits_t* code; + n_bits_t* start; // Indicates if the code manager has already been redirected to the // start of the encoding so that we don't do this more than once (to // avoid segfaults while decoding). bool code_manager_already_set_to_start; - PQCodeManager(uint8_t *code, uint32_t nbits) - : code(reinterpret_cast(code)), - start(reinterpret_cast(code)), + PQCodeManager(uint8_t* code, uint32_t nbits) + : code(reinterpret_cast(code)), + start(reinterpret_cast(code)), code_manager_already_set_to_start(false) { assert(nbits == 8 * sizeof(n_bits_t)); } @@ -87,14 +88,13 @@ template struct PQCodeManager { * */ -class ProductQuantizer - : public flatnav::distances::DistanceInterface { +class ProductQuantizer : public flatnav::distances::DistanceInterface { friend class flatnav::distances::DistanceInterface; // Represents the block size used in ProductQuantizer::computePQCodes static const uint64_t BLOCK_SIZE = 256 * 1024; -public: + public: // Constructor for serializaiton ProductQuantizer() = default; @@ -108,14 +108,17 @@ class ProductQuantizer * This will be possible once the PQ integration with the flatnav * index is complete. */ - ProductQuantizer(uint32_t dim, uint32_t M, uint32_t nbits, - MetricType metric_type) - : _num_subquantizers(M), _num_bits(nbits), _is_trained(false), - _metric_type(metric_type), _train_type(TrainType::DEFAULT) { + ProductQuantizer(uint32_t dim, uint32_t M, uint32_t nbits, MetricType metric_type) + : _num_subquantizers(M), + _num_bits(nbits), + _is_trained(false), + _metric_type(metric_type), + _train_type(TrainType::DEFAULT) { if (dim % _num_subquantizers) { - throw std::invalid_argument("The dataset dimension must be a multiple of " - "the desired number of sub-quantizers."); + throw std::invalid_argument( + "The dataset dimension must be a multiple of " + "the desired number of sub-quantizers."); } _code_size = (_num_bits * 8 + 7) / 8; _subvector_dim = dim / _num_subquantizers; @@ -135,13 +138,13 @@ class ProductQuantizer } // Return a pointer to the centroids associated with a given subvector - const float *getCentroids(uint32_t subvector_index, uint32_t i) const { + const float* getCentroids(uint32_t subvector_index, uint32_t i) const { auto index = (subvector_index * _subq_centroids_count + i) * _subvector_dim; return &_centroids[index]; } - void setParameters(const float *centroids_, int m) { - float *centroids = const_cast(getCentroids(m, 0)); + void setParameters(const float* centroids_, int m) { + float* centroids = const_cast(getCentroids(m, 0)); auto bytes_to_copy = _subq_centroids_count * _subvector_dim * sizeof(float); std::memcpy(centroids, centroids_, bytes_to_copy); @@ -153,14 +156,14 @@ class ProductQuantizer * @param vector * @param code */ - void computePQCode(const float *vector, uint8_t *code) const { + void computePQCode(const float* vector, uint8_t* code) const { std::vector distances(_subq_centroids_count); PQCodeManager code_manager(/* code = */ code, /* nbits = */ 8); for (uint32_t m = 0; m < _num_subquantizers; m++) { - const float *subvector = vector + (m * _subvector_dim); + const float* subvector = vector + (m * _subvector_dim); uint64_t minimizer_index = flatnav::distanceWithKNeighbors( /* distances_tmp_buffer = */ distances.data(), /* x = */ subvector, /* y = */ getCentroids(m, 0), /* dim = */ _subvector_dim, @@ -180,15 +183,14 @@ class ProductQuantizer * @param codes quantization codes * @param n total number of vectors */ - void computePQCodes(const float *vectors, uint8_t *codes, uint64_t n) const { + void computePQCodes(const float* vectors, uint8_t* codes, uint64_t n) const { // process by blocks to avoid using too much RAM auto dim = _subvector_dim * _num_subquantizers; if (n > BLOCK_SIZE) { for (uint64_t i0 = 0; i0 < n; i0 += BLOCK_SIZE) { auto i1 = std::min(i0 + BLOCK_SIZE, n); - computePQCodes(vectors + (dim * i0), codes + (_code_size * i0), - i1 - i0); + computePQCodes(vectors + (dim * i0), codes + (_code_size * i0), i1 - i0); } return; } @@ -205,7 +207,7 @@ class ProductQuantizer * @param vectors Vectors to use for quantization * @param n Number of vectors */ - void train(const float *vectors, uint64_t n) { + void train(const float* vectors, uint64_t n) { CentroidsGenerator centroids_generator( /* dim = */ _subvector_dim, @@ -226,8 +228,7 @@ class ProductQuantizer TrainType final_train_type = _train_type; - if (_train_type == TrainType::HYPERCUBE || - _train_type == TrainType::HYPERCUBE_PCA) { + if (_train_type == TrainType::HYPERCUBE || _train_type == TrainType::HYPERCUBE_PCA) { if (_subvector_dim < _num_bits) { final_train_type = TrainType::DEFAULT; std::cout << "[pq-train-warning] cannot train hypercube with num " @@ -236,29 +237,28 @@ class ProductQuantizer } } - float *slice = new float[n * _subvector_dim]; + float* slice = new float[n * _subvector_dim]; auto dim = _subvector_dim * _num_subquantizers; // Arrange the vectors such that the first subvector of each vector is // contiguous, then the second subvector, and so on. for (uint32_t m = 0; m < _num_subquantizers; m++) { for (uint64_t vec_index = 0; vec_index < n; vec_index++) { - std::memcpy(slice + (vec_index * _subvector_dim), - vectors + (vec_index * dim) + (m * _subvector_dim), + std::memcpy(slice + (vec_index * _subvector_dim), vectors + (vec_index * dim) + (m * _subvector_dim), _subvector_dim * sizeof(float)); } switch (final_train_type) { - case TrainType::HYPERCUBE: - centroids_generator.setInitializationType("hypercube"); - break; + case TrainType::HYPERCUBE: + centroids_generator.setInitializationType("hypercube"); + break; - case TrainType::HOT_START: - std::memcpy((void *)centroids_generator.centroids(), getCentroids(m, 0), - _subvector_dim * _subq_centroids_count * sizeof(float)); - break; + case TrainType::HOT_START: + std::memcpy((void*)centroids_generator.centroids(), getCentroids(m, 0), + _subvector_dim * _subq_centroids_count * sizeof(float)); + break; - default:; + default:; } // generate the actual centroids @@ -283,23 +283,22 @@ class ProductQuantizer * @param code Code corresponding to the given vector * @param vector Vector to decode */ - void decode(const uint8_t *code, float *vector) const { + void decode(const uint8_t* code, float* vector) const { // TODO check whether this const_cast does not cause any issues PQCodeManager code_manager( - /* code = */ const_cast(code), + /* code = */ const_cast(code), /* nbits = */ 8); for (uint32_t m = 0; m < _num_subquantizers; m++) { uint64_t code_ = code_manager.decode(); - std::memcpy(vector + (m * _subvector_dim), getCentroids(m, 0), - sizeof(float) * _subvector_dim); + std::memcpy(vector + (m * _subvector_dim), getCentroids(m, 0), sizeof(float) * _subvector_dim); } } /** * @brief Decode multiple vectors given their respective codes. */ - void decode(const uint8_t *code, float *vectors, uint64_t n) const { + void decode(const uint8_t* code, float* vectors, uint64_t n) const { auto dim = _subvector_dim * _num_subquantizers; for (uint64_t vec_index = 0; vec_index < n; vec_index++) { decode(code + (vec_index * _code_size), vectors + (vec_index * dim)); @@ -315,10 +314,8 @@ class ProductQuantizer * @param dist_table output table, size (_num_subquantizers x * _subq_centroids_count) */ - void - computeDistanceTable(const float *vector, float *dist_table, - const std::function - &dist_func) const { + void computeDistanceTable(const float* vector, float* dist_table, + const std::function& dist_func) const { for (uint32_t m = 0; m < _num_subquantizers; m++) { flatnav::copyDistancesIntoBuffer( @@ -330,17 +327,14 @@ class ProductQuantizer } } - void computeDistanceTables(const float *vectors, float *dist_tables, - uint64_t n) const { + void computeDistanceTables(const float* vectors, float* dist_tables, uint64_t n) const { // TODO: Use SIMD auto dim = _subvector_dim * _num_subquantizers; #pragma omp parallel for if (n > 1) for (uint64_t i = 0; i < n; i++) { computeDistanceTable(vectors + (i * dim), - dist_tables + - (i * _subq_centroids_count * _num_subquantizers), - _dist_func); + dist_tables + (i * _subq_centroids_count * _num_subquantizers), _dist_func); } } @@ -352,9 +346,9 @@ class ProductQuantizer inline size_t dataSizeImpl() { return getCodeSize(); } - void transformDataImpl(void *destination, const void *src) { - uint8_t *code = new uint8_t[_code_size](); - computePQCode(static_cast(src), code); + void transformDataImpl(void* destination, const void* src) { + uint8_t* code = new uint8_t[_code_size](); + computePQCode(static_cast(src), code); std::memcpy(destination, code, _code_size); @@ -370,13 +364,13 @@ class ProductQuantizer * @param y database vector * @return */ - float asymmetricDistanceImpl(const void *x, const void *y) const { + float asymmetricDistanceImpl(const void* x, const void* y) const { assert(_is_trained); - float *x_ptr = (float *)(x); - uint8_t *y_ptr = (uint8_t *)(y); + float* x_ptr = (float*)(x); + uint8_t* y_ptr = (uint8_t*)(y); - float *dist_table = new float[_subq_centroids_count * _num_subquantizers]; + float* dist_table = new float[_subq_centroids_count * _num_subquantizers]; computeDistanceTable(/* vector = */ x_ptr, /* dist_table = */ dist_table, @@ -398,16 +392,16 @@ class ProductQuantizer * @param y * @return */ - float symmetricDistanceImpl(const void *x, const void *y) const { + float symmetricDistanceImpl(const void* x, const void* y) const { assert(_is_trained); - uint8_t *code1 = (uint8_t *)(x); - uint8_t *code2 = (uint8_t *)(y); + uint8_t* code1 = (uint8_t*)(x); + uint8_t* code2 = (uint8_t*)(y); float distance = 0.0; // Get a pointer to the distance table for the first subquantizer - const float *dist_table = _symmetric_distance_tables.data(); + const float* dist_table = _symmetric_distance_tables.data(); for (uint32_t m = 0; m < _num_subquantizers; m++) { distance += dist_table[(code1[m] * _subq_centroids_count) + code2[m]]; @@ -416,7 +410,7 @@ class ProductQuantizer return distance; } - float distanceImpl(const void *x, const void *y, bool asymmetric) const { + float distanceImpl(const void* x, const void* y, bool asymmetric) const { if (asymmetric) { return asymmetricDistanceImpl(x, y); } @@ -426,15 +420,10 @@ class ProductQuantizer void getSummaryImpl() const { std::cout << "\nProduct Quantizer Parameters" << std::flush; std::cout << "-----------------------------" << std::flush; - std::cout << "Number of subquantizers (M): " << _num_subquantizers << "\n" - << std::flush; - std::cout << "Number of bits per index: " << _num_bits << "\n" - << std::flush; - std::cout << "Subvector dimension: " << _subvector_dim << "\n" - << std::flush; - std::cout << "Subquantizer centroids count: " << _subq_centroids_count - << "\n" - << std::flush; + std::cout << "Number of subquantizers (M): " << _num_subquantizers << "\n" << std::flush; + std::cout << "Number of bits per index: " << _num_bits << "\n" << std::flush; + std::cout << "Subvector dimension: " << _subvector_dim << "\n" << std::flush; + std::cout << "Subquantizer centroids count: " << _subq_centroids_count << "\n" << std::flush; std::cout << "Code size: " << _code_size << "\n" << std::flush; std::cout << "Is trained: " << _is_trained << "\n" << std::flush; std::cout << "Train type: " << _train_type << "\n" << std::flush; @@ -450,26 +439,21 @@ class ProductQuantizer inline bool isTrained() const { return _is_trained; } -private: + private: // NOTE: This is a hack to get around the fact that the PQ class needs to know // which distance function to use. So, this function allows us to just extract // the distance function pointer since that's the only thing we care about. // There's gotta be a cleaner way to not have to do this, but this will do for // now. - std::function - getDistFuncFromVariant() const { + std::function getDistFuncFromVariant() const { if (_distance.index() == 0) { - return [local_distance = _distance](const float *a, - const float *b) -> float { - return std::get>(local_distance) - .distanceImpl(a, b); + return [local_distance = _distance](const float* a, const float* b) -> float { + return std::get>(local_distance).distanceImpl(a, b); }; } - return [local_distance = _distance](const float *a, - const float *b) -> float { - return std::get>(local_distance) - .distanceImpl(a, b); + return [local_distance = _distance](const float* a, const float* b) -> float { + return std::get>(local_distance).distanceImpl(a, b); }; } @@ -489,19 +473,16 @@ class ProductQuantizer * */ void computeSymmetricDistanceTables() { - _symmetric_distance_tables.resize( - _num_subquantizers * _subq_centroids_count * _subq_centroids_count); + _symmetric_distance_tables.resize(_num_subquantizers * _subq_centroids_count * _subq_centroids_count); #pragma omp parallel for - for (uint64_t mk = 0; mk < _num_subquantizers * _subq_centroids_count; - mk++) { + for (uint64_t mk = 0; mk < _num_subquantizers * _subq_centroids_count; mk++) { auto m = mk / _subq_centroids_count; auto k = mk % _subq_centroids_count; - const float *centroids = - _centroids.data() + (m * _subq_centroids_count * _subvector_dim); - const float *centroid_k = centroids + (k * _subvector_dim); - float *dist_table = _symmetric_distance_tables.data() + - (m * _subq_centroids_count * _subq_centroids_count); + const float* centroids = _centroids.data() + (m * _subq_centroids_count * _subvector_dim); + const float* centroid_k = centroids + (k * _subvector_dim); + float* dist_table = + _symmetric_distance_tables.data() + (m * _subq_centroids_count * _subq_centroids_count); flatnav::copyDistancesIntoBuffer( /* distances_buffer = */ dist_table + (k * _subq_centroids_count), @@ -552,30 +533,28 @@ class ProductQuantizer // Initialization enum TrainType { DEFAULT, - HOT_START, // The centroids are already initialized - SHARED, // Share dictionary across PQ segments - HYPERCUBE, // Initialize centroids with nbits-D hypercube - HYPERCUBE_PCA, // Initialize centroids with nbits-D hypercube post PCA - // pre-processing. For now, this is not implemented. FAISS - // seems to believe that this is a good initialization, so we - // might test it out to see if it actually works well. + HOT_START, // The centroids are already initialized + SHARED, // Share dictionary across PQ segments + HYPERCUBE, // Initialize centroids with nbits-D hypercube + HYPERCUBE_PCA, // Initialize centroids with nbits-D hypercube post PCA + // pre-processing. For now, this is not implemented. FAISS + // seems to believe that this is a good initialization, so we + // might test it out to see if it actually works well. }; TrainType _train_type; - std::variant, - InnerProductDistance> - _distance; + std::variant, InnerProductDistance> _distance; - std::function _dist_func; + std::function _dist_func; friend class ::cereal::access; - template void serialize(Archive &archive) { + template + void serialize(Archive& archive) { - archive(_code_size, _num_subquantizers, _num_bits, _subvector_dim, - _subq_centroids_count, _centroids, _symmetric_distance_tables, - _is_trained, _metric_type, _train_type); + archive(_code_size, _num_subquantizers, _num_bits, _subvector_dim, _subq_centroids_count, _centroids, + _symmetric_distance_tables, _is_trained, _metric_type, _train_type); if constexpr (Archive::is_loading::value) { // loading PQ @@ -591,4 +570,4 @@ class ProductQuantizer } }; -} // namespace flatnav::quantization \ No newline at end of file +} // namespace flatnav::quantization \ No newline at end of file diff --git a/developmental-features/quantization/Utils.h b/developmental-features/quantization/Utils.h index b77b587..9275d16 100644 --- a/developmental-features/quantization/Utils.h +++ b/developmental-features/quantization/Utils.h @@ -1,9 +1,9 @@ #pragma once -#include #include #include +#include #include #include @@ -31,10 +31,9 @@ namespace flatnav { * @param dim * @param target_set_size */ -static void copyDistancesIntoBuffer( - float *distances_buffer, const float *x, const float *y, uint32_t dim, - uint64_t target_set_size, - const std::function &dist_func) { +static void copyDistancesIntoBuffer(float* distances_buffer, const float* x, const float* y, uint32_t dim, + uint64_t target_set_size, + const std::function& dist_func) { for (uint64_t i = 0; i < target_set_size; i++) { distances_buffer[i] = dist_func(x, y); @@ -55,16 +54,14 @@ static void copyDistancesIntoBuffer( * @return 0 if target_set_size equals 0. Otherwise, the index of the * nearest vector. */ -static uint64_t distanceWithKNeighbors( - float *distances_buffer, const float *x, const float *y, uint32_t dim, - uint64_t target_set_size, - const std::function &dist_func) { +static uint64_t distanceWithKNeighbors(float* distances_buffer, const float* x, const float* y, uint32_t dim, + uint64_t target_set_size, + const std::function& dist_func) { if (target_set_size == 0) { return 0; } - copyDistancesIntoBuffer(distances_buffer, x, y, dim, target_set_size, - dist_func); + copyDistancesIntoBuffer(distances_buffer, x, y, dim, target_set_size, dist_func); uint64_t minimizer = 0; float minimum_distance = std::numeric_limits::max(); @@ -77,4 +74,4 @@ static uint64_t distanceWithKNeighbors( return minimizer; } -} // namespace flatnav \ No newline at end of file +} // namespace flatnav \ No newline at end of file diff --git a/tools/cereal_tests.cpp b/tools/cereal_tests.cpp index 4f6cceb..cd409d2 100644 --- a/tools/cereal_tests.cpp +++ b/tools/cereal_tests.cpp @@ -1,10 +1,10 @@ -#include "cnpy.h" -#include #include #include #include #include +#include #include +#include "cnpy.h" using flatnav::Index; using flatnav::distances::DistanceInterface; @@ -13,16 +13,13 @@ using flatnav::distances::SquaredL2Distance; using flatnav::util::DataType; template -void serializeIndex(float *data, - std::unique_ptr> &&distance, - int N, int M, int dim, int ef_construction, - const std::string &save_file) { - std::unique_ptr> index = - std::make_unique>( - /* dist = */ std::move(distance), /* dataset_size = */ N, - /* max_edges = */ M); +void serializeIndex(float* data, std::unique_ptr>&& distance, int N, int M, int dim, + int ef_construction, const std::string& save_file) { + std::unique_ptr> index = std::make_unique>( + /* dist = */ std::move(distance), /* dataset_size = */ N, + /* max_edges = */ M); - float *element = new float[dim]; + float* element = new float[dim]; std::vector labels(N); std::iota(labels.begin(), labels.end(), 0); @@ -39,15 +36,14 @@ void serializeIndex(float *data, assert(new_index->dataSizeBytes() == distance->dataSize() + (32 * M) + 32); assert(new_index->maxNodeCount() == N); - uint64_t total_index_size = - new_index->nodeSizeBytes() * new_index->maxNodeCount(); + uint64_t total_index_size = new_index->nodeSizeBytes() * new_index->maxNodeCount(); for (uint64_t i = 0; i < total_index_size; i++) { assert(index->indexMemory()[i] == new_index->indexMemory()[i] * 2); } } -int main(int argc, char **argv) { +int main(int argc, char** argv) { if (argc < 2) { std::cout << "Usage: " << argv[0] << " \n" << std::flush; std::cout << "data.npy: Path to a NPY file for MNIST\n" << std::flush; @@ -59,11 +55,10 @@ int main(int argc, char **argv) { int ef_construction = 100; int dim = 784; int N = 60000; - float *data = datafile.data(); + float* data = datafile.data(); auto l2_distance = SquaredL2Distance::create(dim); - serializeIndex>( - data, std::move(l2_distance), N, M, dim, ef_construction, - std::string("l2_flatnav.bin")); + serializeIndex>(data, std::move(l2_distance), N, M, dim, + ef_construction, std::string("l2_flatnav.bin")); // auto inner_product_distance = // std::make_unique>(dim); diff --git a/tools/construct_npy.cpp b/tools/construct_npy.cpp index 4be1df3..e9ba9eb 100644 --- a/tools/construct_npy.cpp +++ b/tools/construct_npy.cpp @@ -1,24 +1,24 @@ -#include "cnpy.h" -#include -#include -#include +#include #include #include #include #include #include +#include +#include +#include #include #include #include #include #include -#include #include #include #include #include #include #include +#include "cnpy.h" using flatnav::Index; using flatnav::distances::DistanceInterface; @@ -28,10 +28,8 @@ using flatnav::quantization::ProductQuantizer; using flatnav::util::DataType; template -void buildIndex(float *data, - std::unique_ptr> distance, int N, - int M, int dim, int ef_construction, int build_num_threads, - const std::string &save_file) { +void buildIndex(float* data, std::unique_ptr> distance, int N, int M, int dim, + int ef_construction, int build_num_threads, const std::string& save_file) { auto index = new Index( /* dist = */ std::move(distance), /* dataset_size = */ N, @@ -43,15 +41,13 @@ void buildIndex(float *data, std::vector labels(N); std::iota(labels.begin(), labels.end(), 0); - index->template addBatch(/* data = */ (void *)data, + index->template addBatch(/* data = */ (void*)data, /* labels = */ labels, /* ef_construction */ ef_construction); auto stop = std::chrono::high_resolution_clock ::now(); - auto duration = - std::chrono::duration_cast(stop - start); - std::clog << "Build time: " << (float)duration.count() << " milliseconds" - << std::endl; + auto duration = std::chrono::duration_cast(stop - start); + std::clog << "Build time: " << (float)duration.count() << " milliseconds" << std::endl; std::clog << "Saving index to: " << save_file << std::endl; index->saveIndex(/* filename = */ save_file); @@ -59,9 +55,8 @@ void buildIndex(float *data, delete index; } -void run(float *data, flatnav::distances::MetricType metric_type, int N, int M, - int dim, int ef_construction, int build_num_threads, - const std::string &save_file, bool quantize = false) { +void run(float* data, flatnav::distances::MetricType metric_type, int N, int M, int dim, int ef_construction, + int build_num_threads, const std::string& save_file, bool quantize = false) { if (quantize) { // Parameters M and nbits should be adjusted accordingly. @@ -72,41 +67,35 @@ void run(float *data, flatnav::distances::MetricType metric_type, int N, int M, auto start = std::chrono::high_resolution_clock::now(); quantizer->train(/* vectors = */ data, /* num_vectors = */ N); auto stop = std::chrono::high_resolution_clock::now(); - auto duration = - std::chrono::duration_cast(stop - start); - std::clog << "Quantization time: " << (float)duration.count() - << " milliseconds" << std::endl; + auto duration = std::chrono::duration_cast(stop - start); + std::clog << "Quantization time: " << (float)duration.count() << " milliseconds" << std::endl; - buildIndex(data, std::move(quantizer), N, M, dim, - ef_construction, build_num_threads, save_file); + buildIndex(data, std::move(quantizer), N, M, dim, ef_construction, build_num_threads, + save_file); } else { if (metric_type == flatnav::distances::MetricType::L2) { auto distance = SquaredL2Distance<>::create(dim); - buildIndex>( - data, std::move(distance), N, M, dim, ef_construction, - build_num_threads, save_file); + buildIndex>(data, std::move(distance), N, M, dim, ef_construction, + build_num_threads, save_file); } else if (metric_type == flatnav::distances::MetricType::IP) { auto distance = InnerProductDistance<>::create(dim); - buildIndex>( - data, std::move(distance), N, M, dim, ef_construction, - build_num_threads, save_file); + buildIndex>(data, std::move(distance), N, M, dim, + ef_construction, build_num_threads, save_file); } } } -int main(int argc, char **argv) { +int main(int argc, char** argv) { if (argc < 8) { std::clog << "Usage: " << std::endl; std::clog << "construct " " " << std::endl; - std::clog << "\t int, 0 for no quantization, 1 for quantization" - << std::endl; - std::clog << "\t int, 0 for L2, 1 for inner product (angular)" - << std::endl; + std::clog << "\t int, 0 for no quantization, 1 for quantization" << std::endl; + std::clog << "\t int, 0 for L2, 1 for inner product (angular)" << std::endl; std::clog << "\t npy file from ann-benchmarks" << std::endl; std::clog << "\t : int " << std::endl; std::clog << "\t : int " << std::endl; @@ -129,12 +118,10 @@ int main(int argc, char **argv) { int dim = datafile.shape[1]; int N = datafile.shape[0]; - std::clog << "Loading " << dim << "-dimensional dataset with N = " << N - << std::endl; - float *data = datafile.data(); + std::clog << "Loading " << dim << "-dimensional dataset with N = " << N << std::endl; + float* data = datafile.data(); flatnav::distances::MetricType metric_type = - metric_id == 0 ? flatnav::distances::MetricType::L2 - : flatnav::distances::MetricType::IP; + metric_id == 0 ? flatnav::distances::MetricType::L2 : flatnav::distances::MetricType::IP; run(/* data = */ data, /* metric_type = */ metric_type, diff --git a/tools/flatnav_pq.cpp b/tools/flatnav_pq.cpp index a9da320..d46e4c9 100644 --- a/tools/flatnav_pq.cpp +++ b/tools/flatnav_pq.cpp @@ -1,10 +1,9 @@ -#include "cnpy.h" -#include -#include -#include #include #include #include +#include +#include +#include #include #include #include @@ -12,16 +11,15 @@ #include #include #include +#include "cnpy.h" using flatnav::Index; using flatnav::distances::InnerProductDistance; using flatnav::distances::SquaredL2Distance; template -void run( - float *data, - std::unique_ptr> &&distance, - int N, int M, int dim, int ef_construction, const std::string &save_file) { +void run(float* data, std::unique_ptr>&& distance, int N, int M, + int dim, int ef_construction, const std::string& save_file) { auto index = new Index( /* dist = */ std::move(distance), /* dataset_size = */ N, /* max_edges = */ M); @@ -29,8 +27,8 @@ void run( auto start = std::chrono::high_resolution_clock::now(); for (int label = 0; label < N; label++) { - float *element = data + (dim * label); - index->add(/* data = */ (void *)element, /* label = */ label, + float* element = data + (dim * label); + index->add(/* data = */ (void*)element, /* label = */ label, /* ef_construction */ ef_construction); if (label % 100000 == 0) std::clog << "." << std::flush; @@ -38,10 +36,8 @@ void run( std::clog << std::endl; auto stop = std::chrono::high_resolution_clock::now(); - auto duration = - std::chrono::duration_cast(stop - start); - std::clog << "Build time: " << (float)duration.count() << " milliseconds" - << std::endl; + auto duration = std::chrono::duration_cast(stop - start); + std::clog << "Build time: " << (float)duration.count() << " milliseconds" << std::endl; std::clog << "Saving index to: " << save_file << std::endl; index->saveIndex(/* filename = */ save_file); @@ -49,8 +45,7 @@ void run( delete index; } -std::vector quantize(float *vectors, uint64_t vec_count, uint32_t dim, - uint32_t M, uint32_t nbits) { +std::vector quantize(float* vectors, uint64_t vec_count, uint32_t dim, uint32_t M, uint32_t nbits) { auto distance = std::make_unique(dim); ProductQuantizer pq(/* dist = */ std::move(distance), /* dim = */ dim, /* M = */ M, @@ -68,12 +63,12 @@ std::vector quantize(float *vectors, uint64_t vec_count, uint32_t dim, std::cout << "[INFO] Saving codes to: " << "codes.bin" << std::endl; std::ofstream stream("codes.bin"); - stream.write((char *)codes.data(), codes.size()); + stream.write((char*)codes.data(), codes.size()); return codes; } -int main(int argc, char **argv) { +int main(int argc, char** argv) { // Quantize const bool quantize = false; @@ -89,22 +84,20 @@ int main(int argc, char **argv) { const int dataset_size = 60000; // datafile - const char *filename = "mnnist-784-euclidean.train.npy"; + const char* filename = "mnnist-784-euclidean.train.npy"; cnpy::NpyArray datafile = cnpy::npy_load(filename); assert(datafile.shape.size() == 2); assert(datafile.shape[0] == dataset_size); assert(datafile.shape[1] == dim); - std::clog << "Loading " << dim - << "-dimensional dataset with N = " << dataset_size << std::endl; - float *data = datafile.data(); + std::clog << "Loading " << dim << "-dimensional dataset with N = " << dataset_size << std::endl; + float* data = datafile.data(); if (quantize) { // NOTE: M here is different from max_edges. - std::vector codes = - quantize(/* vectors = */ data, /* vec_count = */ dataset_size, - /* dim = */ dim, /* M = */ 8, /* nbits = */ 8); + std::vector codes = quantize(/* vectors = */ data, /* vec_count = */ dataset_size, + /* dim = */ dim, /* M = */ 8, /* nbits = */ 8); } auto distance = std::make_unique(dim); @@ -114,8 +107,8 @@ int main(int argc, char **argv) { auto start = std::chrono::high_resolution_clock::now(); for (int label = 0; label < N; label++) { - float *element = data + (dim * label); - index->add(/* data = */ (void *)element, /* label = */ label, + float* element = data + (dim * label); + index->add(/* data = */ (void*)element, /* label = */ label, /* ef_construction */ ef_construction); if (label % 100000 == 0) std::clog << "." << std::flush; @@ -123,10 +116,8 @@ int main(int argc, char **argv) { std::clog << std::endl; auto stop = std::chrono::high_resolution_clock::now(); - auto duration = - std::chrono::duration_cast(stop - start); - std::clog << "Build time: " << (float)duration.count() << " milliseconds" - << std::endl; + auto duration = std::chrono::duration_cast(stop - start); + std::clog << "Build time: " << (float)duration.count() << " milliseconds" << std::endl; if (metric_id == 0) { auto distance = std::make_unique(dim); @@ -143,8 +134,7 @@ int main(int argc, char **argv) { /* N = */ N, /* M = */ M, dim, /* ef_construction = */ ef_construction, /* save_file = */ argv[5]); } else { - throw std::invalid_argument("Provided metric ID " + - std::to_string(metric_id) + "is invalid."); + throw std::invalid_argument("Provided metric ID " + std::to_string(metric_id) + "is invalid."); } return 0; diff --git a/tools/query_npy.cpp b/tools/query_npy.cpp index 3089679..42a9ecb 100644 --- a/tools/query_npy.cpp +++ b/tools/query_npy.cpp @@ -1,20 +1,20 @@ -#include -#include +#include #include #include #include #include +#include +#include #include #include -#include #include #include #include -#include "cnpy.h" #include #include #include +#include "cnpy.h" using flatnav::Index; using flatnav::distances::InnerProductDistance; @@ -23,12 +23,10 @@ using flatnav::quantization::ProductQuantizer; using flatnav::util::DataType; template -void run(float *queries, int *gtruth, const std::string &index_filename, - const std::vector &ef_searches, int K, int num_queries, - int num_gtruth, int dim, bool reorder = true) { +void run(float* queries, int* gtruth, const std::string& index_filename, const std::vector& ef_searches, + int K, int num_queries, int num_gtruth, int dim, bool reorder = true) { - std::unique_ptr> index = - Index::loadIndex(index_filename); + std::unique_ptr> index = Index::loadIndex(index_filename); std::cout << "[INFO] Index loaded" << std::endl; index->getIndexSummary(); @@ -38,22 +36,19 @@ void run(float *queries, int *gtruth, const std::string &index_filename, auto start_r = std::chrono::high_resolution_clock::now(); index->reorderGOrder(); auto stop_r = std::chrono::high_resolution_clock::now(); - auto duration_r = - std::chrono::duration_cast(stop_r - start_r); - std::clog << "Reordering time: " << (float)(duration_r.count()) / (1000.0) - << " seconds" << std::endl; + auto duration_r = std::chrono::duration_cast(stop_r - start_r); + std::clog << "Reordering time: " << (float)(duration_r.count()) / (1000.0) << " seconds" << std::endl; } - for (const auto &ef_search : ef_searches) { + for (const auto& ef_search : ef_searches) { double mean_recall = 0; auto start_q = std::chrono::high_resolution_clock::now(); for (int i = 0; i < num_queries; i++) { - float *q = queries + dim * i; - int *g = gtruth + num_gtruth * i; + float* q = queries + dim * i; + int* g = gtruth + num_gtruth * i; - std::vector> result = - index->search(q, K, ef_search); + std::vector> result = index->search(q, K, ef_search); double recall = 0; for (int j = 0; j < K; j++) { @@ -67,15 +62,13 @@ void run(float *queries, int *gtruth, const std::string &index_filename, mean_recall = mean_recall + recall; } auto stop_q = std::chrono::high_resolution_clock::now(); - auto duration_q = - std::chrono::duration_cast(stop_q - start_q); + auto duration_q = std::chrono::duration_cast(stop_q - start_q); std::cout << "[INFO] Mean Recall: " << mean_recall / num_queries - << ", Duration:" << (float)(duration_q.count()) / num_queries - << " milliseconds" << std::endl; + << ", Duration:" << (float)(duration_q.count()) / num_queries << " milliseconds" << std::endl; } } -int main(int argc, char **argv) { +int main(int argc, char** argv) { if (argc < 9) { std::clog << "Usage: " << std::endl; @@ -89,10 +82,8 @@ int main(int argc, char **argv) { std::clog << "\t : int " << std::endl; std::clog << "\t : int,int,int,int...,int " << std::endl; std::clog << "\t : number of neighbors " << std::endl; - std::clog << "\t : 0 for no reordering, 1 for reordering" - << std::endl; - std::clog << "\t : 0 for no quantization, 1 for quantization" - << std::endl; + std::clog << "\t : 0 for no reordering, 1 for reordering" << std::endl; + std::clog << "\t : 0 for no quantization, 1 for quantization" << std::endl; return -1; } @@ -123,17 +114,14 @@ int main(int argc, char **argv) { int dim = queryfile.shape[1]; int n_gt = truthfile.shape[1]; if (k > n_gt) { - std::cerr - << "K is larger than the number of precomputed ground truth neighbors" - << std::endl; + std::cerr << "K is larger than the number of precomputed ground truth neighbors" << std::endl; return -1; } std::clog << "Loading " << num_queries << " queries" << std::endl; - float *queries = queryfile.data(); - std::clog << "Loading " << num_queries - << " ground truth results with k = " << k << std::endl; - int *gtruth = truthfile.data(); + float* queries = queryfile.data(); + std::clog << "Loading " << num_queries << " ground truth results with k = " << k << std::endl; + int* gtruth = truthfile.data(); if (quantized) { run(/* queries = */ queries, /* gtruth = */ From c5b245a0dd2e52feee3962fe65325e4d76409e05 Mon Sep 17 00:00:00 2001 From: BlaiseMuhirwa Date: Sat, 2 Nov 2024 16:45:02 -0700 Subject: [PATCH 3/3] fix github actions workflow --- .github/workflows/cicd.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index 8b65de2..64dadee 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -63,7 +63,7 @@ jobs: - name: Install dependencies run: | # Install CMake and clang - ./bin/install_clang.sh + ./bin/install_clang_and_libomp.sh ./bin/install_cmake.sh - name: Build flatnav