Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Standardize code formatting with Google style #71

Merged
merged 3 commits into from
Nov 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Google C/C++ Code Style settings
# https://clang.llvm.org/docs/ClangFormatStyleOptions.html

Language: Cpp
BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: None
AlignOperands: Align
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BreakBeforeBraces: Custom
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterStruct: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakBeforeBinaryOperators: None
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakInheritanceList: BeforeColon
ColumnLimit: 110
CompactNamespaces: false
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
EmptyLineBeforeAccessModifier: LogicalBlock
FixNamespaceComments: true
IncludeBlocks: Preserve
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 2
KeepEmptyLinesAtTheStartOfBlocks: true
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PointerAlignment: Left
ReflowComments: false
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: c++17
TabWidth: 4
UseTab: Never
2 changes: 1 addition & 1 deletion .github/workflows/cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
- name: Install dependencies
run: |
# Install CMake and clang
./bin/install_clang.sh
./bin/install_clang_and_libomp.sh
./bin/install_cmake.sh

- name: Build flatnav
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ build
metrics
node-access-distributions

# PYcache
**/__pycache__

# Python wheel related folders/files
flatnav_python/flatnav.egg-info/
flatnav_python/poetry.lock
Expand Down
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CPP_FILES := $(wildcard flatnav/**/*.h flatnav/**/*.cpp flatnav_python/*.cpp tools/*.cpp developmental-features/**/*.h)

format-cpp:
clang-format -i $(CPP_FILES)

build-cpp:
./bin/build.sh -e -t

cmake-format:
cmake-format -i CMakeLists.txt
19 changes: 0 additions & 19 deletions bin/format.sh

This file was deleted.

100 changes: 45 additions & 55 deletions developmental-features/quantization/CentroidsGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
namespace flatnav::quantization {

class CentroidsGenerator {
public:
public:
/**
* @brief Construct a new Centroids Generator object
*
Expand All @@ -37,20 +37,21 @@ class CentroidsGenerator {
* @param verbose Whether to print verbose output
* @param seed The seed for the random number generator
*/
CentroidsGenerator(uint32_t dim, uint32_t num_centroids,
uint32_t num_iterations = 62, bool normalized = true,
bool verbose = false, int seed = 3333)
: _dim(dim), _num_centroids(num_centroids),
_clustering_iterations(num_iterations), _normalized(normalized),
_verbose(verbose), _centroids_initialized(false), _seed(seed),
CentroidsGenerator(uint32_t dim, uint32_t num_centroids, uint32_t num_iterations = 62,
bool normalized = true, bool verbose = false, int seed = 3333)
: _dim(dim),
_num_centroids(num_centroids),
_clustering_iterations(num_iterations),
_normalized(normalized),
_verbose(verbose),
_centroids_initialized(false),
_seed(seed),
_initialization_type("default") {}

void initializeCentroids(
const float *data, uint64_t n,
const std::function<float(const float *, const float *)> &distance_func) {
void initializeCentroids(const float* data, uint64_t n,
const std::function<float(const float*, const float*)>& distance_func) {
auto initialization_type = _initialization_type;
std::transform(initialization_type.begin(), initialization_type.end(),
initialization_type.begin(),
std::transform(initialization_type.begin(), initialization_type.end(), initialization_type.begin(),
[](unsigned char c) { return std::tolower(c); });

if (_centroids.size() != _num_centroids * _dim) {
Expand All @@ -64,9 +65,8 @@ class CentroidsGenerator {
} else if (initialization_type == "hypercube") {
hypercubeInitialize(data, n);
} else {
throw std::invalid_argument(
"Invalid centroids initialization initialization type: " +
initialization_type);
throw std::invalid_argument("Invalid centroids initialization initialization type: " +
initialization_type);
}
_centroids_initialized = true;
}
Expand Down Expand Up @@ -94,13 +94,11 @@ class CentroidsGenerator {
* @param distance_func The distance function to use (e.g. l2 distance or
cosinde/inner product)
*/
void generateCentroids(
const float *vectors, const float *vec_weights, uint64_t n,
const std::function<float(const float *, const float *)> &distance_func) {
void generateCentroids(const float* vectors, const float* vec_weights, uint64_t n,
const std::function<float(const float*, const float*)>& distance_func) {
if (n < _num_centroids) {
throw std::runtime_error(
"Invalid configuration. The number of centroids: " +
std::to_string(_num_centroids) +
"Invalid configuration. The number of centroids: " + std::to_string(_num_centroids) +
" is bigger than the number of data points: " + std::to_string(n));
}

Expand All @@ -110,18 +108,16 @@ class CentroidsGenerator {
std::vector<uint32_t> assignment(n);

// K-means loop
for (uint32_t iteration = 0; iteration < _clustering_iterations;
iteration++) {
for (uint32_t iteration = 0; iteration < _clustering_iterations; iteration++) {
// Step 1. Find the minimizing centroid based on l2 distance
#pragma omp parallel for
for (uint64_t vec_index = 0; vec_index < n; vec_index++) {
float min_distance = std::numeric_limits<float>::max();

for (uint32_t c_index = 0; c_index < _num_centroids; c_index++) {
// Get distance using the distance function
float *vector = const_cast<float *>(vectors + (vec_index * _dim));
float *centroid =
const_cast<float *>(_centroids.data() + (c_index * _dim));
float* vector = const_cast<float*>(vectors + (vec_index * _dim));
float* centroid = const_cast<float*>(_centroids.data() + (c_index * _dim));
auto distance = distance_func(vector, centroid);

if (distance < min_distance) {
Expand All @@ -139,52 +135,48 @@ class CentroidsGenerator {
for (uint64_t vec_index = 0; vec_index < n; vec_index++) {
for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) {
#pragma omp atomic
sums[assignment[vec_index] * _dim + dim_index] +=
vectors[vec_index * _dim + dim_index];
sums[assignment[vec_index] * _dim + dim_index] += vectors[vec_index * _dim + dim_index];
}
#pragma omp atomic
counts[assignment[vec_index]]++;
}
#pragma omp parallel for
for (uint32_t c_index = 0; c_index < _num_centroids; c_index++) {
for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) {
_centroids[c_index * _dim + dim_index] =
counts[c_index]
? sums[c_index * _dim + dim_index] / counts[c_index]
: _centroids[c_index * _dim + dim_index];
_centroids[c_index * _dim + dim_index] = counts[c_index]
? sums[c_index * _dim + dim_index] / counts[c_index]
: _centroids[c_index * _dim + dim_index];
}
}
}
}

inline const float *centroids() const { return _centroids.data(); }
inline const float* centroids() const { return _centroids.data(); }

inline void setInitializationType(const std::string &initialization_type) {
inline void setInitializationType(const std::string& initialization_type) {
_initialization_type = initialization_type;
}

private:
private:
/**
* @brief Initialize the centroids by randomly sampling k centroids among the
* n data points
* @param data The input data points
* @param n The number of data points
*/
void randomInitialize(const float *data, uint64_t n) {
void randomInitialize(const float* data, uint64_t n) {
std::vector<uint64_t> indices(n);

std::iota(indices.begin(), indices.end(), 0);
std::mt19937 generator(_seed + 1);
std::vector<uint64_t> sample_indices(_num_centroids);
std::sample(indices.begin(), indices.end(), sample_indices.begin(),
_num_centroids, generator);
std::sample(indices.begin(), indices.end(), sample_indices.begin(), _num_centroids, generator);

for (uint32_t i = 0; i < _num_centroids; i++) {
auto sample_index = sample_indices[i];

for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) {
_centroids[(i * _dim) + dim_index] =
data[(sample_index * _dim) + dim_index];
_centroids[(i * _dim) + dim_index] = data[(sample_index * _dim) + dim_index];
}
}
}
Expand All @@ -204,9 +196,8 @@ class CentroidsGenerator {
* @param data The input data points
* @param n The number of data points
*/
void kmeansPlusPlusInitialize(
const float *data, uint64_t n,
const std::function<float(const float *, const float *)> &distance_func) {
void kmeansPlusPlusInitialize(const float* data, uint64_t n,
const std::function<float(const float*, const float*)>& distance_func) {
std::mt19937 generator(_seed);
std::uniform_int_distribution<uint64_t> distribution(0, n - 1);

Expand All @@ -216,8 +207,7 @@ class CentroidsGenerator {
_centroids[dim_index] = data[first_centroid_index * _dim + dim_index];
}

std::vector<double> min_squared_distances(
n, std::numeric_limits<double>::max());
std::vector<double> min_squared_distances(n, std::numeric_limits<double>::max());

// Step 2. For k-1 remaining centroids
for (uint32_t cent_idx = 1; cent_idx < _num_centroids; cent_idx++) {
Expand All @@ -230,8 +220,8 @@ class CentroidsGenerator {

for (uint64_t c = 0; c < cent_idx; c++) {

float *centroid = const_cast<float *>(_centroids.data() + (c * _dim));
float *vector = const_cast<float *>(data + (i * _dim));
float* centroid = const_cast<float*>(_centroids.data() + (c * _dim));
float* vector = const_cast<float*>(data + (i * _dim));
auto distance = distance_func(centroid, vector);

if (distance < min_distance) {
Expand All @@ -256,8 +246,7 @@ class CentroidsGenerator {

// Add selected centroid the the centroids array
for (uint32_t dim_index = 0; dim_index < _dim; dim_index++) {
_centroids[cent_idx * _dim + dim_index] =
data[next_centroid_index * _dim + dim_index];
_centroids[cent_idx * _dim + dim_index] = data[next_centroid_index * _dim + dim_index];
}
}
}
Expand Down Expand Up @@ -288,7 +277,7 @@ class CentroidsGenerator {

*/

void hypercubeInitialize(const float *data, uint64_t n) {
void hypercubeInitialize(const float* data, uint64_t n) {

std::vector<float> means(_dim);
for (uint64_t vec_index = 0; vec_index < n; vec_index++) {
Expand All @@ -304,11 +293,11 @@ class CentroidsGenerator {
maxm = fabs(means[dim_index]) > maxm ? fabs(means[dim_index]) : maxm;
}

float *centroids = _centroids.data();
float* centroids = _centroids.data();
auto num_bits = log2(_num_centroids);

for (uint32_t i = 0; i < _num_centroids; i++) {
float *centroid = const_cast<float *>(centroids + (i * _dim));
float* centroid = const_cast<float*>(centroids + (i * _dim));
for (uint32_t j = 0; j < num_bits; j++) {
centroid[j] = means[j] + (((i >> j) & 1) ? 1 : -1) * maxm;
}
Expand Down Expand Up @@ -341,10 +330,11 @@ class CentroidsGenerator {
std::string _initialization_type;

friend class cereal::access;
template <typename Archive> void serialize(Archive &ar) {
ar(_dim, _num_centroids, _centroids, _clustering_iterations, _normalized,
_verbose, _centroids_initialized, _seed, _initialization_type);
template <typename Archive>
void serialize(Archive& ar) {
ar(_dim, _num_centroids, _centroids, _clustering_iterations, _normalized, _verbose,
_centroids_initialized, _seed, _initialization_type);
}
};

} // namespace flatnav::quantization
} // namespace flatnav::quantization
Loading
Loading