Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

[Backport 2.18] Update approximate_threshold to 15K documents (#2229) #2234

Merged
merged 1 commit into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,19 @@ public void testClearCache() throws Exception {
if (isRunningAgainstOldCluster()) {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docId, NUM_DOCS);
} else {
queryCnt = NUM_DOCS;
validateClearCacheOnUpgrade(queryCnt);

docId = NUM_DOCS;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docId, NUM_DOCS);

queryCnt = queryCnt + NUM_DOCS;
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
} else {
validateClearCacheOnUpgrade(queryCnt);
deleteKNNIndex(testIndex);
}
}

// validation steps for Clear Cache API after upgrading node to new version
private void validateClearCacheOnUpgrade(int queryCount) throws Exception {
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, queryCount, K);

clearCache(Collections.singletonList(testIndex));
assertEquals(0, getTotalGraphsInCache());
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, queryCount, K);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
package org.opensearch.knn.bwc;

import org.junit.Assert;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.engine.KNNEngine;

import java.util.List;
import java.util.Map;

import static org.opensearch.knn.TestUtils.KNN_ALGO_PARAM_EF_CONSTRUCTION_MIN_VALUE;
Expand Down Expand Up @@ -52,6 +55,8 @@ public void testKNNIndexDefaultLegacyFieldMapping() throws Exception {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
// update index setting to allow build graph always since we test graph count that are loaded into memory
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
validateKNNIndexingOnUpgrade(NUM_DOCS);
}
}
Expand Down Expand Up @@ -275,13 +280,14 @@ public void testNoParametersOnUpgrade() throws Exception {

// KNN indexing tests when the cluster is upgraded to latest version
public void validateKNNIndexingOnUpgrade(int numOfDocs) throws Exception {
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
forceMergeKnnIndex(testIndex);
QUERY_COUNT = numOfDocs;
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);
cleanUpCache();
clearCache(List.of(testIndex));
DOC_ID = numOfDocs;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
QUERY_COUNT = QUERY_COUNT + NUM_DOCS;
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);
forceMergeKnnIndex(testIndex);
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);
deleteKNNIndex(testIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.opensearch.knn.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;

import java.util.Collections;
Expand Down Expand Up @@ -34,6 +35,8 @@ public void testKNNWarmupDefaultLegacyFieldMapping() throws Exception {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
// update index setting to allow build graph always since we test graph count that are loaded into memory
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
validateKNNWarmupOnUpgrade();
}
}
Expand Down Expand Up @@ -65,6 +68,8 @@ public void testKNNWarmupDefaultMethodFieldMapping() throws Exception {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKNNIndexMethodFieldMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
// update index setting to allow build graph always since we test graph count that are loaded into memory
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
validateKNNWarmupOnUpgrade();
}
}
Expand All @@ -85,21 +90,26 @@ public void testKNNWarmupCustomMethodFieldMapping() throws Exception {
}

public void validateKNNWarmupOnUpgrade() throws Exception {
// update index setting to allow build graph always since we test graph count that are loaded into memory
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
int totalGraph = getTotalGraphsInCache();
assertTrue(totalGraph > graphCount);

QUERY_COUNT = NUM_DOCS;
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);

DOC_ID = NUM_DOCS;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
forceMergeKnnIndex(testIndex);

int updatedGraphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > updatedGraphCount);

QUERY_COUNT = QUERY_COUNT + NUM_DOCS;

validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);
deleteKNNIndex(testIndex);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ public void testClearCache() throws Exception {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
int docIdOld = 0;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
break;
case UPGRADED:
queryCnt = NUM_DOCS;
Expand All @@ -42,14 +45,8 @@ public void testClearCache() throws Exception {

// validation steps for Clear Cache API after upgrading all nodes from old version to new version
public void validateClearCacheOnUpgrade(int queryCount) throws Exception {
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, queryCount, K);

clearCache(Collections.singletonList(testIndex));
assertEquals(0, getTotalGraphsInCache());
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, queryCount, K);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@

package org.opensearch.knn.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.knn.index.KNNSettings;

import java.util.Collections;
import java.util.List;

import static org.opensearch.knn.TestUtils.NODES_BWC_CLUSTER;

Expand All @@ -20,44 +24,22 @@ public void testKNNWarmup() throws Exception {
switch (getClusterType()) {
case OLD:
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
int docIdOld = 0;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, 0, NUM_DOCS);
break;
case MIXED:
int totalDocsCountMixed;
int docIdMixed;
if (isFirstMixedRound()) {
docIdMixed = NUM_DOCS;
totalDocsCountMixed = 2 * NUM_DOCS;
} else {
docIdMixed = 2 * NUM_DOCS;
totalDocsCountMixed = 3 * NUM_DOCS;
}
validateKNNWarmupOnUpgrade(totalDocsCountMixed, docIdMixed);
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
clearCache(List.of(testIndex));
break;
case UPGRADED:
int docIdUpgraded = 3 * NUM_DOCS;
int totalDocsCountUpgraded = 4 * NUM_DOCS;
validateKNNWarmupOnUpgrade(totalDocsCountUpgraded, docIdUpgraded);

updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, NUM_DOCS, NUM_DOCS);
int updatedGraphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > updatedGraphCount);
deleteKNNIndex(testIndex);
}

}

// validation steps for KNN Warmup after upgrading each node from old version to new version
public void validateKNNWarmupOnUpgrade(int totalDocsCount, int docId) throws Exception {
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);

addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docId, NUM_DOCS);

int updatedGraphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > updatedGraphCount);

validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, totalDocsCount, K);
}

}
1 change: 1 addition & 0 deletions release-notes/opensearch-knn.release-notes-2.18.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Compatible with OpenSearch 2.18.0
* Add support to build vector data structures greedily and perform exact search when there are no engine files [#1942](https://github.com/opensearch-project/k-NN/issues/1942)
* Add CompressionLevel Calculation for PQ [#2200](https://github.com/opensearch-project/k-NN/pull/2200)
* Remove FSDirectory dependency from native engine constructing side and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182)
* Update approximate_threshold to 15K documents [#2229](https://github.com/opensearch-project/k-NN/pull/2229)
### Bug Fixes
* Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946)
* KNN80DocValues should only be considered for BinaryDocValues fields [#2147](https://github.com/opensearch-project/k-NN/pull/2147)
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public class KNNSettings {
public static final boolean KNN_DEFAULT_FAISS_AVX2_DISABLED_VALUE = false;
public static final boolean KNN_DEFAULT_FAISS_AVX512_DISABLED_VALUE = false;
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE = "l2";
public static final Integer INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE = 0;
public static final Integer INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE = 15_000;
public static final Integer INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_MIN = -1;
public static final Integer INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_MAX = Integer.MAX_VALUE - 2;
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE_FOR_BINARY = "hamming";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ public NativeEngines990KnnVectorsFormat() {
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()));
}

public NativeEngines990KnnVectorsFormat(int approximateThreshold) {
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()), approximateThreshold);
}

public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsFormat) {
this(flatVectorsFormat, KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
* able to pick this class if its in test folder. Don't use this codec outside of testing
*/
public class UnitTestCodec extends FilterCodec {
private static final Integer BUILD_GRAPH_ALWAYS = 0;

public UnitTestCodec() {
super("UnitTestCodec", KNNCodecVersion.current().getDefaultKnnCodecSupplier().get());
}
Expand All @@ -30,7 +32,7 @@ public KnnVectorsFormat knnVectorsFormat() {
return new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new NativeEngines990KnnVectorsFormat();
return new NativeEngines990KnnVectorsFormat(BUILD_GRAPH_ALWAYS);
}
};
}
Expand Down
22 changes: 22 additions & 0 deletions src/test/java/org/opensearch/knn/KNNSingleNodeTestCase.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.opensearch.knn;

import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.cluster.ClusterName;
Expand All @@ -14,6 +15,7 @@
import org.opensearch.cluster.block.ClusterBlocks;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.index.memory.NativeMemoryCacheManager;
import org.opensearch.knn.index.memory.NativeMemoryLoadStrategy;
Expand Down Expand Up @@ -104,6 +106,14 @@ protected void createKnnIndexMapping(String indexName, String fieldName, Integer
OpenSearchAssertions.assertAcked(client().admin().indices().putMapping(request).actionGet());
}

/**
* Create simple k-NN mapping with engine
*/
protected void updateIndexSetting(String indexName, Settings setting) {
UpdateSettingsRequest request = new UpdateSettingsRequest(setting, indexName);
OpenSearchAssertions.assertAcked(client().admin().indices().updateSettings(request).actionGet());
}

/**
* Create simple k-NN mapping which can be nested.
* e.g. fieldPath = "a.b.c" will create mapping for "c" as knn_vector
Expand Down Expand Up @@ -140,6 +150,18 @@ protected Settings getKNNDefaultIndexSettings() {
return Settings.builder().put("number_of_shards", 1).put("number_of_replicas", 0).put("index.knn", true).build();
}

/**
* Get default k-NN settings for test cases with build graph always
*/
protected Settings getKNNDefaultIndexSettingsBuildsGraphAlways() {
return Settings.builder()
.put("number_of_shards", 1)
.put("number_of_replicas", 0)
.put("index.knn", true)
.put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0)
.build();
}

/**
* Add a k-NN doc to an index
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
* Integration tests to test Circuit Breaker functionality
*/
public class KNNCircuitBreakerIT extends KNNRestTestCase {
private static final Integer ALWAYS_BUILD_GRAPH = 0;

/**
* To trip the circuit breaker, we will create two indices and index documents. Each index will be small enough so
* that individually they fit into the cache, but together they do not. To prevent Lucene conditions where
Expand All @@ -39,6 +41,7 @@ private void tripCb() throws Exception {
.put("number_of_shards", 1)
.put("number_of_replicas", numNodes - 1)
.put("index.knn", true)
.put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, ALWAYS_BUILD_GRAPH)
.build();

String indexName1 = INDEX_NAME + "1";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import static org.hamcrest.Matchers.containsString;

public class KNNESSettingsTestIT extends KNNRestTestCase {

public static final int ALWAYS_BUILD_GRAPH = 0;

/**
* KNN Index writes should be blocked when the plugin disabled
* @throws Exception Exception from test
Expand Down Expand Up @@ -72,7 +75,7 @@ public void testQueriesPluginDisabled() throws Exception {
}

public void testItemRemovedFromCache_expiration() throws Exception {
createKnnIndex(INDEX_NAME, createKnnIndexMapping(FIELD_NAME, 2));
createKnnIndex(INDEX_NAME, buildKNNIndexSettings(ALWAYS_BUILD_GRAPH), createKnnIndexMapping(FIELD_NAME, 2));
updateClusterSettings(KNNSettings.KNN_CACHE_ITEM_EXPIRY_ENABLED, true);
updateClusterSettings(KNNSettings.KNN_CACHE_ITEM_EXPIRY_TIME_MINUTES, "1m");

Expand Down Expand Up @@ -118,8 +121,8 @@ public void testUpdateIndexSetting() throws IOException {
}

@SuppressWarnings("unchecked")
public void testCacheRebuiltAfterUpdateIndexSettings() throws IOException {
createKnnIndex(INDEX_NAME, createKnnIndexMapping(FIELD_NAME, 2));
public void testCacheRebuiltAfterUpdateIndexSettings() throws Exception {
createKnnIndex(INDEX_NAME, buildKNNIndexSettings(ALWAYS_BUILD_GRAPH), createKnnIndexMapping(FIELD_NAME, 2));

Float[] vector = { 6.0f, 6.0f };
addKnnDoc(INDEX_NAME, "1", FIELD_NAME, vector);
Expand Down
Loading
Loading