Skip to content

Commit

Permalink
[Backport 2.x] Update approximate_threshold to 15K documents (#2229) (#…
Browse files Browse the repository at this point in the history
…2231)

* Update approximate_threshold to 15K documents (#2229)

* Update threshold to 15K documents

After comparing indexing and search performance, we are updating
default value to be 15000.

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

* Fix bwc test

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

* Update test method

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

* Flush data after index

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

---------

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

* Remove udpate cluster setting

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

* update warmup rolling upgrade scenario

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>

---------

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>
  • Loading branch information
VijayanB authored Oct 24, 2024
1 parent be0837d commit bdbddcf
Show file tree
Hide file tree
Showing 23 changed files with 116 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,19 @@ public void testClearCache() throws Exception {
if (isRunningAgainstOldCluster()) {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docId, NUM_DOCS);
} else {
queryCnt = NUM_DOCS;
validateClearCacheOnUpgrade(queryCnt);

docId = NUM_DOCS;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docId, NUM_DOCS);

queryCnt = queryCnt + NUM_DOCS;
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
} else {
validateClearCacheOnUpgrade(queryCnt);
deleteKNNIndex(testIndex);
}
}

// validation steps for Clear Cache API after upgrading node to new version
private void validateClearCacheOnUpgrade(int queryCount) throws Exception {
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, queryCount, K);

clearCache(Collections.singletonList(testIndex));
assertEquals(0, getTotalGraphsInCache());
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, queryCount, K);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
package org.opensearch.knn.bwc;

import org.junit.Assert;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.engine.KNNEngine;

import java.util.List;
import java.util.Map;

import static org.opensearch.knn.TestUtils.KNN_ALGO_PARAM_EF_CONSTRUCTION_MIN_VALUE;
Expand Down Expand Up @@ -52,6 +55,8 @@ public void testKNNIndexDefaultLegacyFieldMapping() throws Exception {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
// update index setting to allow build graph always since we test graph count that are loaded into memory
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
validateKNNIndexingOnUpgrade(NUM_DOCS);
}
}
Expand Down Expand Up @@ -275,13 +280,14 @@ public void testNoParametersOnUpgrade() throws Exception {

// KNN indexing tests when the cluster is upgraded to latest version
public void validateKNNIndexingOnUpgrade(int numOfDocs) throws Exception {
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
forceMergeKnnIndex(testIndex);
QUERY_COUNT = numOfDocs;
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);
cleanUpCache();
clearCache(List.of(testIndex));
DOC_ID = numOfDocs;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
QUERY_COUNT = QUERY_COUNT + NUM_DOCS;
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);
forceMergeKnnIndex(testIndex);
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);
deleteKNNIndex(testIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.opensearch.knn.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;

import java.util.Collections;
Expand Down Expand Up @@ -34,6 +35,8 @@ public void testKNNWarmupDefaultLegacyFieldMapping() throws Exception {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
// update index setting to allow build graph always since we test graph count that are loaded into memory
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
validateKNNWarmupOnUpgrade();
}
}
Expand Down Expand Up @@ -65,6 +68,8 @@ public void testKNNWarmupDefaultMethodFieldMapping() throws Exception {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKNNIndexMethodFieldMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
// update index setting to allow build graph always since we test graph count that are loaded into memory
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
validateKNNWarmupOnUpgrade();
}
}
Expand All @@ -85,21 +90,26 @@ public void testKNNWarmupCustomMethodFieldMapping() throws Exception {
}

public void validateKNNWarmupOnUpgrade() throws Exception {
// update index setting to allow build graph always since we test graph count that are loaded into memory
updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
int totalGraph = getTotalGraphsInCache();
assertTrue(totalGraph > graphCount);

QUERY_COUNT = NUM_DOCS;
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);

DOC_ID = NUM_DOCS;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
forceMergeKnnIndex(testIndex);

int updatedGraphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > updatedGraphCount);

QUERY_COUNT = QUERY_COUNT + NUM_DOCS;

validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K);
deleteKNNIndex(testIndex);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ public void testClearCache() throws Exception {
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
int docIdOld = 0;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
break;
case UPGRADED:
queryCnt = NUM_DOCS;
Expand All @@ -42,14 +45,8 @@ public void testClearCache() throws Exception {

// validation steps for Clear Cache API after upgrading all nodes from old version to new version
public void validateClearCacheOnUpgrade(int queryCount) throws Exception {
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, queryCount, K);

clearCache(Collections.singletonList(testIndex));
assertEquals(0, getTotalGraphsInCache());
validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, queryCount, K);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@

package org.opensearch.knn.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.knn.index.KNNSettings;

import java.util.Collections;
import java.util.List;

import static org.opensearch.knn.TestUtils.NODES_BWC_CLUSTER;

Expand All @@ -20,44 +24,22 @@ public void testKNNWarmup() throws Exception {
switch (getClusterType()) {
case OLD:
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
int docIdOld = 0;
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS);
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, 0, NUM_DOCS);
break;
case MIXED:
int totalDocsCountMixed;
int docIdMixed;
if (isFirstMixedRound()) {
docIdMixed = NUM_DOCS;
totalDocsCountMixed = 2 * NUM_DOCS;
} else {
docIdMixed = 2 * NUM_DOCS;
totalDocsCountMixed = 3 * NUM_DOCS;
}
validateKNNWarmupOnUpgrade(totalDocsCountMixed, docIdMixed);
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);
clearCache(List.of(testIndex));
break;
case UPGRADED:
int docIdUpgraded = 3 * NUM_DOCS;
int totalDocsCountUpgraded = 4 * NUM_DOCS;
validateKNNWarmupOnUpgrade(totalDocsCountUpgraded, docIdUpgraded);

updateIndexSettings(testIndex, Settings.builder().put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, NUM_DOCS, NUM_DOCS);
int updatedGraphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > updatedGraphCount);
deleteKNNIndex(testIndex);
}

}

// validation steps for KNN Warmup after upgrading each node from old version to new version
public void validateKNNWarmupOnUpgrade(int totalDocsCount, int docId) throws Exception {
int graphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > graphCount);

addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docId, NUM_DOCS);

int updatedGraphCount = getTotalGraphsInCache();
knnWarmup(Collections.singletonList(testIndex));
assertTrue(getTotalGraphsInCache() > updatedGraphCount);

validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, totalDocsCount, K);
}

}
1 change: 1 addition & 0 deletions release-notes/opensearch-knn.release-notes-2.18.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Compatible with OpenSearch 2.18.0
* Add support to build vector data structures greedily and perform exact search when there are no engine files [#1942](https://github.com/opensearch-project/k-NN/issues/1942)
* Add CompressionLevel Calculation for PQ [#2200](https://github.com/opensearch-project/k-NN/pull/2200)
* Remove FSDirectory dependency from native engine constructing side and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182)
* Update approximate_threshold to 15K documents [#2229](https://github.com/opensearch-project/k-NN/pull/2229)
### Bug Fixes
* Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946)
* KNN80DocValues should only be considered for BinaryDocValues fields [#2147](https://github.com/opensearch-project/k-NN/pull/2147)
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public class KNNSettings {
public static final boolean KNN_DEFAULT_FAISS_AVX2_DISABLED_VALUE = false;
public static final boolean KNN_DEFAULT_FAISS_AVX512_DISABLED_VALUE = false;
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE = "l2";
public static final Integer INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE = 0;
public static final Integer INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE = 15_000;
public static final Integer INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_MIN = -1;
public static final Integer INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_MAX = Integer.MAX_VALUE - 2;
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE_FOR_BINARY = "hamming";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ public NativeEngines990KnnVectorsFormat() {
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()));
}

public NativeEngines990KnnVectorsFormat(int approximateThreshold) {
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()), approximateThreshold);
}

public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsFormat) {
this(flatVectorsFormat, KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
* able to pick this class if its in test folder. Don't use this codec outside of testing
*/
public class UnitTestCodec extends FilterCodec {
private static final Integer BUILD_GRAPH_ALWAYS = 0;

public UnitTestCodec() {
super("UnitTestCodec", KNNCodecVersion.current().getDefaultKnnCodecSupplier().get());
}
Expand All @@ -30,7 +32,7 @@ public KnnVectorsFormat knnVectorsFormat() {
return new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new NativeEngines990KnnVectorsFormat();
return new NativeEngines990KnnVectorsFormat(BUILD_GRAPH_ALWAYS);
}
};
}
Expand Down
22 changes: 22 additions & 0 deletions src/test/java/org/opensearch/knn/KNNSingleNodeTestCase.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.opensearch.knn;

import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.cluster.ClusterName;
Expand All @@ -14,6 +15,7 @@
import org.opensearch.cluster.block.ClusterBlocks;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.index.memory.NativeMemoryCacheManager;
import org.opensearch.knn.index.memory.NativeMemoryLoadStrategy;
Expand Down Expand Up @@ -104,6 +106,14 @@ protected void createKnnIndexMapping(String indexName, String fieldName, Integer
OpenSearchAssertions.assertAcked(client().admin().indices().putMapping(request).actionGet());
}

/**
* Create simple k-NN mapping with engine
*/
protected void updateIndexSetting(String indexName, Settings setting) {
UpdateSettingsRequest request = new UpdateSettingsRequest(setting, indexName);
OpenSearchAssertions.assertAcked(client().admin().indices().updateSettings(request).actionGet());
}

/**
* Create simple k-NN mapping which can be nested.
* e.g. fieldPath = "a.b.c" will create mapping for "c" as knn_vector
Expand Down Expand Up @@ -140,6 +150,18 @@ protected Settings getKNNDefaultIndexSettings() {
return Settings.builder().put("number_of_shards", 1).put("number_of_replicas", 0).put("index.knn", true).build();
}

/**
* Get default k-NN settings for test cases with build graph always
*/
protected Settings getKNNDefaultIndexSettingsBuildsGraphAlways() {
return Settings.builder()
.put("number_of_shards", 1)
.put("number_of_replicas", 0)
.put("index.knn", true)
.put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, 0)
.build();
}

/**
* Add a k-NN doc to an index
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
* Integration tests to test Circuit Breaker functionality
*/
public class KNNCircuitBreakerIT extends KNNRestTestCase {
private static final Integer ALWAYS_BUILD_GRAPH = 0;

/**
* To trip the circuit breaker, we will create two indices and index documents. Each index will be small enough so
* that individually they fit into the cache, but together they do not. To prevent Lucene conditions where
Expand All @@ -39,6 +41,7 @@ private void tripCb() throws Exception {
.put("number_of_shards", 1)
.put("number_of_replicas", numNodes - 1)
.put("index.knn", true)
.put(KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD, ALWAYS_BUILD_GRAPH)
.build();

String indexName1 = INDEX_NAME + "1";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import static org.hamcrest.Matchers.containsString;

public class KNNESSettingsTestIT extends KNNRestTestCase {

public static final int ALWAYS_BUILD_GRAPH = 0;

/**
* KNN Index writes should be blocked when the plugin disabled
* @throws Exception Exception from test
Expand Down Expand Up @@ -72,7 +75,7 @@ public void testQueriesPluginDisabled() throws Exception {
}

public void testItemRemovedFromCache_expiration() throws Exception {
createKnnIndex(INDEX_NAME, createKnnIndexMapping(FIELD_NAME, 2));
createKnnIndex(INDEX_NAME, buildKNNIndexSettings(ALWAYS_BUILD_GRAPH), createKnnIndexMapping(FIELD_NAME, 2));
updateClusterSettings(KNNSettings.KNN_CACHE_ITEM_EXPIRY_ENABLED, true);
updateClusterSettings(KNNSettings.KNN_CACHE_ITEM_EXPIRY_TIME_MINUTES, "1m");

Expand Down Expand Up @@ -118,8 +121,8 @@ public void testUpdateIndexSetting() throws IOException {
}

@SuppressWarnings("unchecked")
public void testCacheRebuiltAfterUpdateIndexSettings() throws IOException {
createKnnIndex(INDEX_NAME, createKnnIndexMapping(FIELD_NAME, 2));
public void testCacheRebuiltAfterUpdateIndexSettings() throws Exception {
createKnnIndex(INDEX_NAME, buildKNNIndexSettings(ALWAYS_BUILD_GRAPH), createKnnIndexMapping(FIELD_NAME, 2));

Float[] vector = { 6.0f, 6.0f };
addKnnDoc(INDEX_NAME, "1", FIELD_NAME, vector);
Expand Down
Loading

0 comments on commit bdbddcf

Please # to comment.