From 23e2861096404fcf12059bb17f27058360ec6570 Mon Sep 17 00:00:00 2001 From: Mark Czotter Date: Fri, 11 Feb 2022 14:46:52 +0100 Subject: [PATCH 1/8] feat(index): optimize bool index queries as much as possible 1. merge term filters specified on the same field to reduce the number of bool query clauses 2. flatten deep bool query hierarchies to reduce the chance of getting HTTP 400 error due to `indices.query.bool.max_nested_depth` setting, which defaults to depth of `20`. --- .../b2international/index/BoolQueryTest.java | 95 +++++++++++++++++++ .../query/AbstractExpressionBuilder.java | 87 ++++++++++++++++- .../index/query/BoolExpression.java | 10 +- .../index/query/Expressions.java | 39 +++++++- 4 files changed, 223 insertions(+), 8 deletions(-) create mode 100644 commons/com.b2international.index.tests/src/com/b2international/index/BoolQueryTest.java diff --git a/commons/com.b2international.index.tests/src/com/b2international/index/BoolQueryTest.java b/commons/com.b2international.index.tests/src/com/b2international/index/BoolQueryTest.java new file mode 100644 index 00000000000..94aa5937d3f --- /dev/null +++ b/commons/com.b2international.index.tests/src/com/b2international/index/BoolQueryTest.java @@ -0,0 +1,95 @@ +/* + * Copyright 2022 B2i Healthcare Pte Ltd, http://b2i.sg + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.b2international.index; + +import static org.junit.Assert.assertEquals; + +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.function.BiFunction; + +import org.junit.Test; + +import com.b2international.index.query.Expression; +import com.b2international.index.query.Expressions; +import com.b2international.index.query.Expressions.ExpressionBuilder; +import com.b2international.index.query.Query; + +/** + * @since 8.1 + */ +public class BoolQueryTest extends BaseIndexTest { + + @Override + protected Collection> getTypes() { + return List.of(Fixtures.Data.class); + } + + @Test + public void deepShouldOnlyBooleanQueryShouldBeFlattenedToPreventError() throws Exception { + // this kind of deep boolean query throws an error in both HTTP and TCP ES clients (flattening solves it for certain searches) + search(Query.select(Fixtures.Data.class).where(generateDeepBooleanClause(1000, ExpressionBuilder::should)).build()); + } + + @Test + public void deepFilterOnlyBooleanQueryShouldBeFlattenedToPreventError() throws Exception { + // this kind of deep boolean query throws an error in both HTTP and TCP ES clients (flattening solves it for certain searches) + search(Query.select(Fixtures.Data.class).where(generateDeepBooleanClause(1000, ExpressionBuilder::filter)).build()); + } + + @Test + public void mergeMultipleSingleTermShouldClauses() throws Exception { + String id1 = UUID.randomUUID().toString(); + String id2 = UUID.randomUUID().toString(); + String id3 = UUID.randomUUID().toString(); + Expression actual = Expressions.bool() + .should(Expressions.exactMatch("id", id1)) + .should(Expressions.exactMatch("id", id2)) + .should(Expressions.exactMatch("id", id3)) + .build(); + assertEquals(Expressions.matchAny("id", Set.of(id1, id2, id3)), actual); + } + + @Test + public void mergeMultipleSingleAndMultiTermShouldClauses() throws Exception { + String id1 = UUID.randomUUID().toString(); + String id2 = UUID.randomUUID().toString(); + String id3 = UUID.randomUUID().toString(); + String id4 = UUID.randomUUID().toString(); + Expression actual = Expressions.bool() + .should(Expressions.exactMatch("id", id1)) + .should(Expressions.exactMatch("id", id2)) + .should(Expressions.matchAny("id", Set.of(id3, id4))) + .build(); + assertEquals(Expressions.matchAny("id", Set.of(id1, id2, id3, id4)), actual); + } + + private Expression generateDeepBooleanClause(int depth, BiFunction boolClause) { + ExpressionBuilder root = Expressions.bool(); + ExpressionBuilder current = root; + boolClause.apply(current, Expressions.exactMatch("id", UUID.randomUUID().toString())); + for (int i = 0; i < depth; i++) { + ExpressionBuilder nested = Expressions.bool(); + boolClause.apply(nested, Expressions.exactMatch("id", UUID.randomUUID().toString())); + current.should(nested.build()); + current = nested; + } + return root.build(); + } + +} diff --git a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java index ac4e5bf5c3e..a9a90dbef69 100644 --- a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java +++ b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java @@ -1,5 +1,5 @@ /* - * Copyright 2011-2017 B2i Healthcare Pte Ltd, http://b2i.sg + * Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,13 @@ import static com.google.common.collect.Lists.newArrayList; +import java.util.Collection; import java.util.List; +import java.util.Set; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; /** * Abstract superclass for building query {@link Expression}s. @@ -72,13 +78,90 @@ public B setMinimumNumberShouldMatch(int minShouldMatch) { public Expression build() { if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.isEmpty()) { return Expressions.matchAll(); - } else if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.size() == 1) { + } else if (isSingleFilter()) { // shortcut to reduce number of nested Boolean clauses return filterClauses.get(0); } else { + // before creating the boolean query make sure we flatten the query as much as possible + + // or(A=B, or(A=C, or(A=D))) - should only clauses deeply nested inside bool queries (and the minShouldMatch is 1 on all levels) + flattenShoulds(); + + // then optimize term filters that match the same field, field1=A or field1=B or field1=C should be converted to field1=any(A, B, C) + // during EsQueryBuilder the system will optimize it again to fit into the max term count ES setting, see EsQueryBuilder + mergeTermFilters(); + + if (isSingleFilter()) { + return filterClauses.get(0); + } + + if (isSingleShould()) { + return shouldClauses.get(0); + } + final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses); be.setMinShouldMatch(minShouldMatch); return be; } } + + private boolean isSingleFilter() { + return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.size() == 1; + } + + private boolean isSingleShould() { + return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.size() == 1 && filterClauses.isEmpty(); + } + + protected final void flattenShoulds() { + if (!mustClauses.isEmpty() || !mustNotClauses.isEmpty() || !filterClauses.isEmpty() || minShouldMatch != 1) { + return; + } + for (Expression expression : List.copyOf(shouldClauses)) { + if (expression instanceof BoolExpression) { + BoolExpression bool = (BoolExpression) expression; + if (bool.isShouldOnly() && bool.minShouldMatch() == 1) { + shouldClauses.addAll(bool.filterClauses()); + shouldClauses.remove(bool); + } + } + } + } + + protected final void mergeTermFilters() { + // check each must, mustNot, should and filter clause and merge term/terms queries into a single terms query, targeting the same field + mergeTermFilters(mustClauses); + mergeTermFilters(mustNotClauses); + mergeTermFilters(shouldClauses); + mergeTermFilters(filterClauses); + } + + private void mergeTermFilters(List clauses) { + Multimap termExpressionsByField = HashMultimap.create(); + for (Expression expression : List.copyOf(clauses)) { + if (expression instanceof SingleArgumentPredicate) { + termExpressionsByField.put(((SingleArgumentPredicate) expression).getField(), expression); + } else if (expression instanceof SetPredicate) { + termExpressionsByField.put(((SetPredicate) expression).getField(), expression); + } + } + + for (String field : Set.copyOf(termExpressionsByField.keySet())) { + Collection termExpressions = termExpressionsByField.removeAll(field); + if (termExpressions.size() > 1) { + Set values = Sets.newHashSet(); + for (Expression expression : termExpressions) { + if (expression instanceof SingleArgumentPredicate) { + values.add(((SingleArgumentPredicate) expression).getArgument()); + } else if (expression instanceof SetPredicate) { + values.addAll(((SetPredicate) expression).values()); + } + } + // replace all clauses with a single expression + clauses.add(Expressions.matchAnyObject(field, values)); + clauses.removeAll(termExpressions); + } + } + } + } diff --git a/commons/com.b2international.index/src/com/b2international/index/query/BoolExpression.java b/commons/com.b2international.index/src/com/b2international/index/query/BoolExpression.java index 744e0bce445..fcd83409d83 100644 --- a/commons/com.b2international.index/src/com/b2international/index/query/BoolExpression.java +++ b/commons/com.b2international.index/src/com/b2international/index/query/BoolExpression.java @@ -1,5 +1,5 @@ /* - * Copyright 2011-2016 B2i Healthcare Pte Ltd, http://b2i.sg + * Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -114,5 +114,13 @@ private void append(final String name, final List expressions, final builder.append(")"); } } + + public boolean isFilterOnly() { + return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && !filterClauses.isEmpty(); + } + + public boolean isShouldOnly() { + return mustClauses.isEmpty() && mustNotClauses.isEmpty() && !shouldClauses.isEmpty() && filterClauses.isEmpty(); + } } diff --git a/commons/com.b2international.index/src/com/b2international/index/query/Expressions.java b/commons/com.b2international.index/src/com/b2international/index/query/Expressions.java index 4d10391ba30..974ebdbae3c 100644 --- a/commons/com.b2international.index/src/com/b2international/index/query/Expressions.java +++ b/commons/com.b2international.index/src/com/b2international/index/query/Expressions.java @@ -1,5 +1,5 @@ /* - * Copyright 2011-2021 B2i Healthcare Pte Ltd, http://b2i.sg + * Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,7 @@ package com.b2international.index.query; import java.math.BigDecimal; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -42,7 +39,18 @@ protected ExpressionBuilder getSelf() { } + /** + * @return a new boolean query expression builder + * @deprecated - use {@link Expressions#bool()} instead + */ public static ExpressionBuilder builder() { + return bool(); + } + + /** + * @return a new boolean query expression builder + */ + public static ExpressionBuilder bool() { return new ExpressionBuilder(); } @@ -272,4 +280,25 @@ public static Expression scriptQuery(String script, Map params) return new ScriptQueryExpression(script, params); } + public static Expression matchAnyObject(String field, Iterable values) { + Object firstValue = Iterables.getFirst(values, null); + if (firstValue == null) { + return matchNone(); + } else if (firstValue instanceof String) { + return matchAny(field, (Iterable) values); + } else if (firstValue instanceof Long) { + return matchAnyLong(field, (Iterable) values); + } else if (firstValue instanceof BigDecimal) { + return matchAnyDecimal(field, (Iterable) values); + } else if (firstValue instanceof Double) { + return matchAnyDouble(field, (Iterable) values); + } else if (firstValue instanceof Enum) { + return matchAnyEnum(field, (Iterable>) values); + } else if (firstValue instanceof Integer) { + return matchAnyInt(field, (Iterable) values); + } else { + throw new UnsupportedOperationException("Unsupported term expression value type: " + firstValue); + } + } + } \ No newline at end of file From fda222c5a30cb059cde1f59086130c8dfd66e64c Mon Sep 17 00:00:00 2001 From: Mark Czotter Date: Fri, 11 Feb 2022 14:47:22 +0100 Subject: [PATCH 2/8] fix(index): throw IllegalArgumentException when Elasticsearch reports... ...an HTTP 400 error --- .../com/b2international/index/es/EsDocumentSearcher.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/commons/com.b2international.index/src/com/b2international/index/es/EsDocumentSearcher.java b/commons/com.b2international.index/src/com/b2international/index/es/EsDocumentSearcher.java index 54826f3af3f..cea4881bc1e 100644 --- a/commons/com.b2international.index/src/com/b2international/index/es/EsDocumentSearcher.java +++ b/commons/com.b2international.index/src/com/b2international/index/es/EsDocumentSearcher.java @@ -1,5 +1,5 @@ /* - * Copyright 2017-2021 B2i Healthcare Pte Ltd, http://b2i.sg + * Copyright 2017-2022 B2i Healthcare Pte Ltd, http://b2i.sg * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,11 +28,13 @@ import org.apache.lucene.search.TotalHits; import org.apache.lucene.search.TotalHits.Relation; import org.apache.solr.common.util.JavaBinCodec; +import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.AggregationBuilders; @@ -183,6 +185,9 @@ public Hits search(Query query) throws IOException { try { response = client.search(req); } catch (Exception e) { + if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) { + throw new IllegalArgumentException(e.getMessage(), e); + } admin.log().error("Couldn't execute query", e); throw new IndexException("Couldn't execute query: " + e.getMessage(), null); } From 0cd5b2cb4ce79b5eb01c908e303e8e2199d51d74 Mon Sep 17 00:00:00 2001 From: Mark Czotter Date: Fri, 11 Feb 2022 15:31:22 +0100 Subject: [PATCH 3/8] fix(index): wrap revision queries properly in must/filter clauses... ...instead of always using must (which is mainly for scoring) --- .../b2international/index/query/Query.java | 10 +++++++++ .../revision/DefaultRevisionSearcher.java | 22 +++++-------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/commons/com.b2international.index/src/com/b2international/index/query/Query.java b/commons/com.b2international.index/src/com/b2international/index/query/Query.java index b0883fa0d5a..6fa4995c65d 100644 --- a/commons/com.b2international.index/src/com/b2international/index/query/Query.java +++ b/commons/com.b2international.index/src/com/b2international/index/query/Query.java @@ -16,11 +16,13 @@ package com.b2international.index.query; import java.util.List; +import java.util.function.BiFunction; import java.util.stream.Stream; import com.b2international.commons.CompareUtils; import com.b2international.index.Hits; import com.b2international.index.Searcher; +import com.b2international.index.query.Expressions.ExpressionBuilder; import com.b2international.index.revision.Revision; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; @@ -267,4 +269,12 @@ public final Stream> stream(Searcher searcher) { return searcher.stream(this); } + public AfterWhereBuilder withFilter(Expression additionalFilter) { + final BiFunction boolClause = isWithScores() ? ExpressionBuilder::must : ExpressionBuilder::filter; + ExpressionBuilder altered = Expressions.bool(); + boolClause.apply(altered, getWhere()); + altered.filter(additionalFilter); + return withWhere(altered.build()); + } + } diff --git a/commons/com.b2international.index/src/com/b2international/index/revision/DefaultRevisionSearcher.java b/commons/com.b2international.index/src/com/b2international/index/revision/DefaultRevisionSearcher.java index 5f5074d8d19..4ac4aa71a24 100644 --- a/commons/com.b2international.index/src/com/b2international/index/revision/DefaultRevisionSearcher.java +++ b/commons/com.b2international.index/src/com/b2international/index/revision/DefaultRevisionSearcher.java @@ -1,5 +1,5 @@ /* - * Copyright 2011-2021 B2i Healthcare Pte Ltd, http://b2i.sg + * Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,23 +87,11 @@ public Hits search(Query query) throws IOException { if (query.isRevisionQuery()) { if (query.getSelection().getParentScope() == null) { // rewrite query if we are looking for revision, otherwise if we are looking for unversioned nested use it as is - query = query.withWhere( - Expressions.builder() - .must(query.getWhere()) - .filter(branch.toRevisionFilter()) - .build() - ) - .build(); + query = query.withFilter(branch.toRevisionFilter()).build(); } else { checkArgument(Revision.class.isAssignableFrom(query.getSelection().getParentScope()), "Searching non-revision documents require a revision parent type: %s", query); // run a query on the parent documents with nested match on the children - query = query.withWhere( - Expressions.builder() - .must(query.getWhere()) - .filter(Expressions.hasParent(query.getSelection().getParentScope(), branch.toRevisionFilter())) - .build() - ) - .build(); + query = query.withFilter(Expressions.hasParent(query.getSelection().getParentScope(), branch.toRevisionFilter())).build(); } } return searcher.search(query); @@ -111,8 +99,8 @@ public Hits search(Query query) throws IOException { @Override public Aggregation aggregate(AggregationBuilder aggregation) throws IOException { - aggregation.query(Expressions.builder() - .must(aggregation.getQuery()) + aggregation.query(Expressions.bool() + .filter(aggregation.getQuery()) .filter(branch.toRevisionFilter()) .build()); return searcher.aggregate(aggregation); From 546cb2e23359297bb1d01875febc13bb90c0e7a1 Mon Sep 17 00:00:00 2001 From: Mark Czotter Date: Fri, 11 Feb 2022 15:32:15 +0100 Subject: [PATCH 4/8] fix(index): remove must/filter clauses from term filter merging They cause the resulting boolean logic to change from and to or, which leads to incorrect results. --- .../index/query/AbstractExpressionBuilder.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java index a9a90dbef69..c374f77fc65 100644 --- a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java +++ b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java @@ -121,7 +121,7 @@ protected final void flattenShoulds() { if (expression instanceof BoolExpression) { BoolExpression bool = (BoolExpression) expression; if (bool.isShouldOnly() && bool.minShouldMatch() == 1) { - shouldClauses.addAll(bool.filterClauses()); + shouldClauses.addAll(bool.shouldClauses()); shouldClauses.remove(bool); } } @@ -129,11 +129,10 @@ protected final void flattenShoulds() { } protected final void mergeTermFilters() { - // check each must, mustNot, should and filter clause and merge term/terms queries into a single terms query, targeting the same field - mergeTermFilters(mustClauses); + // check each mustNot and should clause list and merge term/terms queries into a single terms query, targeting the same field + // XXX merging must/filter queries will change the boolean logic from AND to OR which leads to incorrect results mergeTermFilters(mustNotClauses); mergeTermFilters(shouldClauses); - mergeTermFilters(filterClauses); } private void mergeTermFilters(List clauses) { From 9196e970f5cbfdced7884e7c82f2b5a1e75db7d2 Mon Sep 17 00:00:00 2001 From: Mark Czotter Date: Fri, 11 Feb 2022 15:50:14 +0100 Subject: [PATCH 5/8] fix(index): remove unnecessary query wrapping when building ES query --- .../b2international/index/es/query/EsQueryBuilder.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/commons/com.b2international.index/src/com/b2international/index/es/query/EsQueryBuilder.java b/commons/com.b2international.index/src/com/b2international/index/es/query/EsQueryBuilder.java index 6a4c0a8f331..86c1deb7e60 100644 --- a/commons/com.b2international.index/src/com/b2international/index/es/query/EsQueryBuilder.java +++ b/commons/com.b2international.index/src/com/b2international/index/es/query/EsQueryBuilder.java @@ -77,17 +77,9 @@ public boolean needsScoring() { public QueryBuilder build(Expression expression) { checkNotNull(expression, "expression"); - // always filter by type visit(expression); if (deque.size() == 1) { - QueryBuilder queryBuilder = deque.pop(); - if (needsScoring) { - return queryBuilder; - } else { - return QueryBuilders.boolQuery() - .must(QueryBuilders.matchAllQuery()) - .filter(queryBuilder); - } + return deque.pop(); } else { throw newIllegalStateException(); } From 8b9c489f5228898c92e9f627a7cbae8bff564de6 Mon Sep 17 00:00:00 2001 From: Mark Czotter Date: Fri, 11 Feb 2022 15:50:43 +0100 Subject: [PATCH 6/8] fix(index): do not wrap single should clause into bool query --- .../b2international/index/query/AbstractExpressionBuilder.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java index c374f77fc65..82b1ea3251b 100644 --- a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java +++ b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java @@ -81,6 +81,9 @@ public Expression build() { } else if (isSingleFilter()) { // shortcut to reduce number of nested Boolean clauses return filterClauses.get(0); + } else if (isSingleShould()) { + // shortcut to reduce number of nested Boolean clauses + return shouldClauses.get(0); } else { // before creating the boolean query make sure we flatten the query as much as possible From 8d679a1c89f866b30be3edb7818cf891e835bfa5 Mon Sep 17 00:00:00 2001 From: Mark Czotter Date: Fri, 11 Feb 2022 16:24:24 +0100 Subject: [PATCH 7/8] feat(index): reduce must/filter clauses to single expression... ...by using the intersection of the specified values --- .../b2international/index/BoolQueryTest.java | 32 ++++++++- .../query/AbstractExpressionBuilder.java | 69 +++++++++++++++---- 2 files changed, 87 insertions(+), 14 deletions(-) diff --git a/commons/com.b2international.index.tests/src/com/b2international/index/BoolQueryTest.java b/commons/com.b2international.index.tests/src/com/b2international/index/BoolQueryTest.java index 94aa5937d3f..217b716c387 100644 --- a/commons/com.b2international.index.tests/src/com/b2international/index/BoolQueryTest.java +++ b/commons/com.b2international.index.tests/src/com/b2international/index/BoolQueryTest.java @@ -53,7 +53,7 @@ public void deepFilterOnlyBooleanQueryShouldBeFlattenedToPreventError() throws E } @Test - public void mergeMultipleSingleTermShouldClauses() throws Exception { + public void mergeSingleTermShouldClauses() throws Exception { String id1 = UUID.randomUUID().toString(); String id2 = UUID.randomUUID().toString(); String id3 = UUID.randomUUID().toString(); @@ -66,7 +66,7 @@ public void mergeMultipleSingleTermShouldClauses() throws Exception { } @Test - public void mergeMultipleSingleAndMultiTermShouldClauses() throws Exception { + public void mergeSingleAndMultiTermShouldClauses() throws Exception { String id1 = UUID.randomUUID().toString(); String id2 = UUID.randomUUID().toString(); String id3 = UUID.randomUUID().toString(); @@ -78,6 +78,34 @@ public void mergeMultipleSingleAndMultiTermShouldClauses() throws Exception { .build(); assertEquals(Expressions.matchAny("id", Set.of(id1, id2, id3, id4)), actual); } + + @Test + public void mergeSingleAndMultiTermFilterClauses() throws Exception { + String id1 = UUID.randomUUID().toString(); + String id2 = UUID.randomUUID().toString(); + String id3 = UUID.randomUUID().toString(); + String id4 = UUID.randomUUID().toString(); + Expression actual = Expressions.bool() + .filter(Expressions.exactMatch("id", id1)) + .filter(Expressions.matchAny("id", Set.of(id1, id2))) + .filter(Expressions.matchAny("id", Set.of(id1, id3, id4))) + .build(); + assertEquals(Expressions.exactMatch("id", id1), actual); + } + + @Test + public void mergeDisjunctTermFilterClauses() throws Exception { + String id1 = UUID.randomUUID().toString(); + String id2 = UUID.randomUUID().toString(); + String id3 = UUID.randomUUID().toString(); + String id4 = UUID.randomUUID().toString(); + Expression actual = Expressions.bool() + .filter(Expressions.exactMatch("id", id1)) + .filter(Expressions.matchAny("id", Set.of(id2, id3))) + .filter(Expressions.matchAny("id", Set.of(id3, id4))) + .build(); + assertEquals(Expressions.matchNone(), actual); + } private Expression generateDeepBooleanClause(int depth, BiFunction boolClause) { ExpressionBuilder root = Expressions.bool(); diff --git a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java index 82b1ea3251b..ffdf6df6e88 100644 --- a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java +++ b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java @@ -15,8 +15,7 @@ */ package com.b2international.index.query; -import static com.google.common.collect.Lists.newArrayList; - +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Set; @@ -32,10 +31,10 @@ */ public abstract class AbstractExpressionBuilder>{ - protected final List mustClauses = newArrayList(); - protected final List mustNotClauses = newArrayList(); - protected final List shouldClauses = newArrayList(); - protected final List filterClauses = newArrayList(); + protected final List mustClauses = new ArrayList<>(1); + protected final List mustNotClauses = new ArrayList<>(1); + protected final List shouldClauses = new ArrayList<>(3); + protected final List filterClauses = new ArrayList<>(3); protected int minShouldMatch = 1; protected AbstractExpressionBuilder() {} @@ -102,9 +101,15 @@ public Expression build() { return shouldClauses.get(0); } - final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses); - be.setMinShouldMatch(minShouldMatch); - return be; + // if after the optimization the resulting bool clauses are empty, then return a MatchNone expression + if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.isEmpty()) { + return Expressions.matchNone(); + } else { + // otherwise create the bool expression as usual + final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses); + be.setMinShouldMatch(minShouldMatch); + return be; + } } } @@ -133,11 +138,50 @@ protected final void flattenShoulds() { protected final void mergeTermFilters() { // check each mustNot and should clause list and merge term/terms queries into a single terms query, targeting the same field - // XXX merging must/filter queries will change the boolean logic from AND to OR which leads to incorrect results mergeTermFilters(mustNotClauses); mergeTermFilters(shouldClauses); + // XXX merging must/filter queries will change the boolean logic from AND to OR which leads to incorrect results + // instead calculate the intersection of the values and use that for the expressions + reduceTermFilters(mustClauses); + reduceTermFilters(filterClauses); } + private void reduceTermFilters(List clauses) { + Multimap termExpressionsByField = HashMultimap.create(); + for (Expression expression : List.copyOf(clauses)) { + if (expression instanceof SingleArgumentPredicate) { + termExpressionsByField.put(((SingleArgumentPredicate) expression).getField(), expression); + } else if (expression instanceof SetPredicate) { + termExpressionsByField.put(((SetPredicate) expression).getField(), expression); + } + } + + for (String field : Set.copyOf(termExpressionsByField.keySet())) { + Collection termExpressions = termExpressionsByField.removeAll(field); + if (termExpressions.size() > 1) { + Set values = null; + for (Expression expression : termExpressions) { + if (values != null && values.isEmpty()) { + break; + } + Set expressionValues; + if (expression instanceof SingleArgumentPredicate) { + expressionValues = Set.of(((SingleArgumentPredicate) expression).getArgument()); + } else if (expression instanceof SetPredicate) { + expressionValues = Set.copyOf(((SetPredicate) expression).values()); + } else { + throw new IllegalStateException("Invalid clause detected when processing term/terms clauses: " + expression); + } + values = values == null ? expressionValues : Sets.intersection(values, expressionValues); + } + // remove all matching clauses first + clauses.removeAll(termExpressions); + // add the new merged expression + clauses.add(Expressions.matchAnyObject(field, values)); + } + } + } + private void mergeTermFilters(List clauses) { Multimap termExpressionsByField = HashMultimap.create(); for (Expression expression : List.copyOf(clauses)) { @@ -159,9 +203,10 @@ private void mergeTermFilters(List clauses) { values.addAll(((SetPredicate) expression).values()); } } - // replace all clauses with a single expression - clauses.add(Expressions.matchAnyObject(field, values)); + // remove all matching clauses first clauses.removeAll(termExpressions); + // add the new merged expression + clauses.add(Expressions.matchAnyObject(field, values)); } } } From 1642f5a90092f1ab64ba04fcf8ea9210f1d1d4d2 Mon Sep 17 00:00:00 2001 From: Mark Czotter Date: Fri, 11 Feb 2022 16:55:53 +0100 Subject: [PATCH 8/8] fix(index): generate new Set instead of reusing SetView from... ...Sets.intersection when collapsing term filters --- .../b2international/index/query/AbstractExpressionBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java index ffdf6df6e88..d4ecfaae554 100644 --- a/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java +++ b/commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java @@ -172,7 +172,7 @@ private void reduceTermFilters(List clauses) { } else { throw new IllegalStateException("Invalid clause detected when processing term/terms clauses: " + expression); } - values = values == null ? expressionValues : Sets.intersection(values, expressionValues); + values = values == null ? expressionValues : Set.copyOf(Sets.intersection(values, expressionValues)); } // remove all matching clauses first clauses.removeAll(termExpressions);