Skip to content

Commit

Permalink
add C APIs for NGTQG (#98)
Browse files Browse the repository at this point in the history
add C APIs for NGTQG
  • Loading branch information
masajiro authored Apr 13, 2021
1 parent 16eadf8 commit a51241d
Show file tree
Hide file tree
Showing 14 changed files with 415 additions and 100 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.13.5
1.13.6
5 changes: 4 additions & 1 deletion bin/ngtqg/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,17 @@ before the command as follows.

Quantize the objects of the specified index and build a quantized graph into the index.

$ ngtqg quantize [-E max_no_of_edges] index
$ ngtqg quantize [-E max_no_of_edges] [-Q dimension_of_subvector] index

*index*
Specify the name of the directory for the existing index such as ANNG or ONNG to be quantized. The index only with L2 distance and normalized cosine similarity distance can be quantized. You should build the ANNG or ONNG with normalized cosine similarity in order to use cosine similarity for the quantized graph.

**-E** *max_no_of_edges*
Specify the maximum number of edges to build a qunatized graph. Since every 16 objects that are associated with edges of each node are processed, the number should be a multiple of 16.

**-Q** *dimension_of_subvector*
Specify dimension of a suvbector for quantized objects. The dimension should be a divisor of the dimension of the inserted objects.

### SEARCH

Search the index using the specified query data.
Expand Down
2 changes: 1 addition & 1 deletion bin/ngtqg/ngtqg.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright (C) 2016-2020 Yahoo Japan Corporation
// Copyright (C) 2020 Yahoo Japan Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down
10 changes: 5 additions & 5 deletions lib/NGT/Capi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ NGTObjectDistances ngt_create_empty_results(NGTError error) {
}

static bool ngt_search_index_(NGT::Index* pindex, NGT::Object *ngtquery, size_t size, float epsilon, float radius, NGTObjectDistances results, int edge_size = INT_MIN) {
// set search prameters.
// set search parameters.
NGT::SearchContainer sc(*ngtquery); // search parametera container.

sc.setResults(static_cast<NGT::ObjectDistances*>(results)); // set the result set.
Expand Down Expand Up @@ -683,18 +683,18 @@ uint8_t* ngt_get_object_as_integer(NGTObjectSpace object_space, ObjectID id, NGT

void ngt_destroy_results(NGTObjectDistances results) {
if(results == NULL) return;
delete(static_cast<NGT::ObjectDistances*>(results));
delete static_cast<NGT::ObjectDistances*>(results);
}

void ngt_destroy_property(NGTProperty prop) {
if(prop == NULL) return;
delete(static_cast<NGT::Property*>(prop));
delete static_cast<NGT::Property*>(prop);
}

void ngt_close_index(NGTIndex index) {
if(index == NULL) return;
(static_cast<NGT::Index*>(index))->close();
delete(static_cast<NGT::Index*>(index));
delete static_cast<NGT::Index*>(index);
}

int16_t ngt_get_property_edge_size_for_creation(NGTProperty prop, NGTError error) {
Expand Down Expand Up @@ -886,7 +886,7 @@ bool ngt_optimizer_set_processing_modes(NGTOptimizer optimizer, bool searchParam
void ngt_destroy_optimizer(NGTOptimizer optimizer)
{
if(optimizer == NULL) return;
delete(static_cast<NGT::GraphOptimizer*>(optimizer));
delete static_cast<NGT::GraphOptimizer*>(optimizer);
}

bool ngt_refine_anng(NGTIndex index, float epsilon, float accuracy, int noOfEdges, int exploreEdgeSize, size_t batchSize, NGTError error)
Expand Down
2 changes: 1 addition & 1 deletion lib/NGT/Capi.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ bool ngt_optimizer_set_processing_modes(NGTOptimizer optimizer, bool searchParam
void ngt_destroy_optimizer(NGTOptimizer);

// refine: the specified index by searching each node.
// epsilon, exepectedAccuracy and edgeSize: the same as the prameters for search. but if edgeSize is INT_MIN, default is used.
// epsilon, exepectedAccuracy and edgeSize: the same as the parameters for search. but if edgeSize is INT_MIN, default is used.
// noOfEdges: if this is not 0, kNNG with k = noOfEdges is build
// batchSize: batch size for parallelism.
bool ngt_refine_anng(NGTIndex index, float epsilon, float expectedAccuracy,
Expand Down
2 changes: 1 addition & 1 deletion lib/NGT/Command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ using namespace std;
const string usage = "Usage: ngt optimize-search-parameters [-m optimization-target(s|p|a)] [-q #-of-queries] [-n #-of-results] index\n"
"\t-m mode\n"
"\t\ts: optimize search parameters (the number of explored edges).\n"
"\t\tp: optimize prefetch prameters.\n"
"\t\tp: optimize prefetch parameters.\n"
"\t\ta: generate an accuracy table to specify an expected accuracy instead of an epsilon for search.\n";

string indexPath;
Expand Down
2 changes: 1 addition & 1 deletion lib/NGT/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1417,7 +1417,7 @@ findPathAmongIdenticalObjects(GraphAndTreeIndex &graph, size_t srcid, size_t dst
done.insert(tid);
GraphNode &node = *graph.GraphIndex::getNode(tid);
#ifdef NGT_SHARED_MEMORY_ALLOCATOR
for (auto i = node.begin(graph.repository.allocator); i != node.end(graph.GraphIndex::repository.allocator); ++i) {
for (auto i = node.begin(graph.GraphIndex::repository.allocator); i != node.end(graph.GraphIndex::repository.allocator); ++i) {
#else
for (auto i = node.begin(); i != node.end(); ++i) {
#endif
Expand Down
132 changes: 132 additions & 0 deletions lib/NGT/NGTQ/Capi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//
// Copyright (C) 2021 Yahoo Japan Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

#include <string>
#include <iostream>
#include <sstream>

#include "NGT/Capi.h"
#include "NGT/NGTQ/Capi.h"
#include "NGT/NGTQ/QuantizedGraph.h"

static bool operate_error_string_(const std::stringstream &ss, NGTError error){
if(error != NULL){
try{
std::string *error_str = static_cast<std::string*>(error);
*error_str = ss.str();
}catch(std::exception &err){
std::cerr << ss.str() << " > " << err.what() << std::endl;
return false;
}
}else{
std::cerr << ss.str() << std::endl;
}
return true;
}

void ngtqg_initialize_query(NGTQGQuery *query) {
query->query = 0;
query->size = 20;
query->epsilon = 0.03;
query->result_expansion = 3.0;
query->radius = FLT_MAX;
}

NGTQGIndex ngtqg_open_index(const char *index_path, NGTError error) {
try{
std::string index_path_str(index_path);
auto *index = new NGTQG::Index(index_path_str);
index->disableLog();
return static_cast<NGTQGIndex>(index);
}catch(std::exception &err){
std::stringstream ss;
ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what();
operate_error_string_(ss, error);
return NULL;
}
}

void ngtqg_close_index(NGTQGIndex index) {
if(index == NULL) return;
(static_cast<NGTQG::Index*>(index))->close();
delete static_cast<NGTQG::Index*>(index);
}

static bool ngtqg_search_index_(NGTQG::Index* pindex, std::vector<float> &query, NGTQGQuery &param, NGTObjectDistances results) {
// set search parameters.
NGTQG::SearchQuery sq(query); // Query.

sq.setResults(static_cast<NGT::ObjectDistances*>(results)); // set the result set.
sq.setSize(param.size); // the number of resultant objects.
sq.setRadius(param.radius); // search radius.
sq.setEpsilon(param.epsilon); // exploration coefficient.
sq.setResultExpansion(param.result_expansion); // result expansion.

auto tmp = static_cast<NGT::ObjectDistances*>(results);

pindex->search(sq);

return true;
}

bool ngtqg_search_index(NGTQGIndex index, NGTQGQuery query, NGTObjectDistances results, NGTError error) {
if(index == NULL || query.query == NULL || results == NULL){
std::stringstream ss;
ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " query = " << query.query << " results = " << results;
operate_error_string_(ss, error);
return false;
}

NGTQG::Index* pindex = static_cast<NGTQG::Index*>(index);
int32_t dim = pindex->getObjectSpace().getDimension();

NGT::Object *ngtquery = NULL;

if(query.radius < 0.0){
query.radius = FLT_MAX;
}

try{
std::vector<float> vquery(&query.query[0], &query.query[dim]);
ngtqg_search_index_(pindex, vquery, query, results);
}catch(std::exception &err) {
std::stringstream ss;
ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what();
operate_error_string_(ss, error);
if(ngtquery != NULL){
pindex->deleteObject(ngtquery);
}
return false;
}
return true;
}

void ngtqg_initialize_quantization_parameters(NGTQGQuantizationParameters *parameters) {
parameters->dimension_of_subvector = 0;
parameters->max_number_of_edges = 128;
}

void ngtqg_quantize(const char *indexPath, NGTQGQuantizationParameters parameters, NGTError error) {
try{
NGTQG::Index::quantize(indexPath, parameters.dimension_of_subvector, parameters.max_number_of_edges);
}catch(std::exception &err){
std::stringstream ss;
ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what();
operate_error_string_(ss, error);
return;
}
}

137 changes: 137 additions & 0 deletions lib/NGT/NGTQ/Capi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//
// Copyright (C) 2021 Yahoo Japan Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

/***
{
// simple quantization and search example
std::string indexPath = "onng_index"; // ONNG
std::string queryPath = "query.tsv"; // Query file.
NGTError err = ngt_create_error_object();
// quantize the specified existing index
// build quantized objects and a quantized graph
NGTQGQuantizationParameters quantizationParameters;
ngtqg_initialize_quantization_parameters(&quantizationParameters);
ngtqg_quantize(indexPath.c_str(), quantizationParameters, err);
// open the index (ANNG or ONNG).
index = ngtqg_open_index(indexPath.c_str(), err);
if (index == NULL) {
std::cerr << ngt_get_error_string(err) << std::endl;
return false;
}
std::ifstream is(queryPath); // open a query file.
if (!is) {
std::cerr << "Cannot open the specified file. " << queryPath << std::endl;
return false;
}
// get the dimension of the index to check the dimension of the query
NGTProperty property = ngt_create_property(err);
ngt_get_property(index, property, err);
size_t dimension = ngt_get_property_dimension(property, err);
ngt_destroy_property(property);
std::string line;
float queryVector[dimension];
if (!getline(is, line)) { // read a query object from the query file.
std::cerr << "no data" << std::endl;
}
std::vector<std::string> tokens;
NGT::Common::tokenize(line, tokens, " \t"); // split a string into words by the separators.
// create a query vector from the tokens.
if (tokens.size() != dimension) {
std::cerr << "dimension of the query is invalid. dimesion=" << tokens.size() << ":" << dimension << std::endl;
return false;
}
for (std::vector<std::string>::iterator ti = tokens.begin(); ti != tokens.end(); ++ti) {
queryVector[distance(tokens.begin(), ti)] = NGT::Common::strtod(*ti);
}
// set search parameters.
NGTObjectDistances result = ngt_create_empty_results(err);
NGTQGQuery query;
ngtqg_initialize_query(&query);
query.query = queryVector;
query.size = 20;
query.epsilon = 0.03;
query.result_expansion = 2;
// search with the quantized graph
bool status = ngtqg_search_index(index, query, result, err);
NGTObjectSpace objectSpace = ngt_get_object_space(index, err);
auto rsize = ngt_get_result_size(result, err);
// show resultant objects.
std::cout << "Rank\tID\tDistance\tObject" << std::endl;
for (size_t i = 0; i < rsize; i++) {
NGTObjectDistance object = ngt_get_result(result, i, err);
std::cout << i + 1 << "\t" << object.id << "\t" << object.distance << "\t";
float *objectVector = ngt_get_object_as_float(objectSpace, object.id, err);
for (size_t i = 0; i < dimension; i++) {
std::cout << objectVector[i] << " ";
}
std::cout << std::endl;
}
ngt_destroy_results(result);
ngtqg_close_index(index);
}
***/

#pragma once

#ifdef __cplusplus
extern "C" {
#endif

#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>

#include "NGT/Capi.h"

typedef void* NGTQGIndex;
typedef NGTObjectDistance NGTObjectDistance;
typedef NGTError NGTQGError;

typedef struct {
float *query;
size_t size; // # of returned objects
float epsilon;
float result_expansion;
float radius;
} NGTQGQuery;

typedef struct {
float dimension_of_subvector;
size_t max_number_of_edges;
} NGTQGQuantizationParameters;

NGTQGIndex ngtqg_open_index(const char *, NGTError);

void ngtqg_close_index(NGTQGIndex);

void ngtqg_initialize_quantization_parameters(NGTQGQuantizationParameters *);

void ngtqg_quantize(const char *, NGTQGQuantizationParameters, NGTError);

void ngtqg_initialize_query(NGTQGQuery *);

bool ngtqg_search_index(NGTQGIndex, NGTQGQuery, NGTObjectDistances, NGTError);

#ifdef __cplusplus
}
#endif
Loading

0 comments on commit a51241d

Please # to comment.