-
Notifications
You must be signed in to change notification settings - Fork 3.7k
/
utils.h
181 lines (133 loc) · 5.45 KB
/
utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
/*
* A few utilitary functions for similarity search:
* - optimized exhaustive distance and knn search functions
* - some functions reimplemented from torch for speed
*/
#ifndef FAISS_utils_h
#define FAISS_utils_h
#include <stdint.h>
#include <faiss/utils/Heap.h>
namespace faiss {
/**************************************************
* Get some stats about the system
**************************************************/
/// ms elapsed since some arbitrary epoch
double getmillisecs ();
/// get current RSS usage in kB
size_t get_mem_usage_kb ();
uint64_t get_cycles ();
/***************************************************************************
* Misc matrix and vector manipulation functions
***************************************************************************/
/** compute c := a + bf * b for a, b and c tables
*
* @param n size of the tables
* @param a size n
* @param b size n
* @param c restult table, size n
*/
void fvec_madd (size_t n, const float *a,
float bf, const float *b, float *c);
/** same as fvec_madd, also return index of the min of the result table
* @return index of the min of table c
*/
int fvec_madd_and_argmin (size_t n, const float *a,
float bf, const float *b, float *c);
/* perform a reflection (not an efficient implementation, just for test ) */
void reflection (const float * u, float * x, size_t n, size_t d, size_t nu);
/** For k-means: update stage.
*
* @param x training vectors, size n * d
* @param centroids centroid vectors, size k * d
* @param assign nearest centroid for each training vector, size n
* @param k_frozen do not update the k_frozen first centroids
* @return nb of spliting operations to fight empty clusters
*/
int km_update_centroids (
const float * x,
float * centroids,
int64_t * assign,
size_t d, size_t k, size_t n,
size_t k_frozen);
/** compute the Q of the QR decomposition for m > n
* @param a size n * m: input matrix and output Q
*/
void matrix_qr (int m, int n, float *a);
/** distances are supposed to be sorted. Sorts indices with same distance*/
void ranklist_handle_ties (int k, int64_t *idx, const float *dis);
/** count the number of comon elements between v1 and v2
* algorithm = sorting + bissection to avoid double-counting duplicates
*/
size_t ranklist_intersection_size (size_t k1, const int64_t *v1,
size_t k2, const int64_t *v2);
/** merge a result table into another one
*
* @param I0, D0 first result table, size (n, k)
* @param I1, D1 second result table, size (n, k)
* @param keep_min if true, keep min values, otherwise keep max
* @param translation add this value to all I1's indexes
* @return nb of values that were taken from the second table
*/
size_t merge_result_table_with (size_t n, size_t k,
int64_t *I0, float *D0,
const int64_t *I1, const float *D1,
bool keep_min = true,
int64_t translation = 0);
/// a balanced assignment has a IF of 1
double imbalance_factor (int n, int k, const int64_t *assign);
/// same, takes a histogram as input
double imbalance_factor (int k, const int *hist);
void fvec_argsort (size_t n, const float *vals,
size_t *perm);
void fvec_argsort_parallel (size_t n, const float *vals,
size_t *perm);
/// compute histogram on v
int ivec_hist (size_t n, const int * v, int vmax, int *hist);
/** Compute histogram of bits on a code array
*
* @param codes size(n, nbits / 8)
* @param hist size(nbits): nb of 1s in the array of codes
*/
void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist);
/// compute a checksum on a table.
size_t ivec_checksum (size_t n, const int *a);
/** random subsamples a set of vectors if there are too many of them
*
* @param d dimension of the vectors
* @param n on input: nb of input vectors, output: nb of output vectors
* @param nmax max nb of vectors to keep
* @param x input array, size *n-by-d
* @param seed random seed to use for sampling
* @return x or an array allocated with new [] with *n vectors
*/
const float *fvecs_maybe_subsample (
size_t d, size_t *n, size_t nmax, const float *x,
bool verbose = false, int64_t seed = 1234);
/** Convert binary vector to +1/-1 valued float vector.
*
* @param d dimension of the vector (multiple of 8)
* @param x_in input binary vector (uint8_t table of size d / 8)
* @param x_out output float vector (float table of size d)
*/
void binary_to_real(size_t d, const uint8_t *x_in, float *x_out);
/** Convert float vector to binary vector. Components > 0 are converted to 1,
* others to 0.
*
* @param d dimension of the vector (multiple of 8)
* @param x_in input float vector (float table of size d)
* @param x_out output binary vector (uint8_t table of size d / 8)
*/
void real_to_binary(size_t d, const float *x_in, uint8_t *x_out);
/** A reasonable hashing function */
uint64_t hash_bytes (const uint8_t *bytes, int64_t n);
/** Whether OpenMP annotations were respected. */
bool check_openmp();
} // namspace faiss
#endif /* FAISS_utils_h */