Skip to content

Commit da5e13f

Browse files
authored
Merge pull request #976 from b2ihealthcare/issue/SO-4080-flatten-bool-queries
SO-4080: optimize bool index queries
2 parents 761fbd9 + 1642f5a commit da5e13f

File tree

8 files changed

+329
-44
lines changed

8 files changed

+329
-44
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/*
2+
* Copyright 2022 B2i Healthcare Pte Ltd, http://b2i.sg
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.b2international.index;
17+
18+
import static org.junit.Assert.assertEquals;
19+
20+
import java.util.Collection;
21+
import java.util.List;
22+
import java.util.Set;
23+
import java.util.UUID;
24+
import java.util.function.BiFunction;
25+
26+
import org.junit.Test;
27+
28+
import com.b2international.index.query.Expression;
29+
import com.b2international.index.query.Expressions;
30+
import com.b2international.index.query.Expressions.ExpressionBuilder;
31+
import com.b2international.index.query.Query;
32+
33+
/**
34+
* @since 8.1
35+
*/
36+
public class BoolQueryTest extends BaseIndexTest {
37+
38+
@Override
39+
protected Collection<Class<?>> getTypes() {
40+
return List.of(Fixtures.Data.class);
41+
}
42+
43+
@Test
44+
public void deepShouldOnlyBooleanQueryShouldBeFlattenedToPreventError() throws Exception {
45+
// this kind of deep boolean query throws an error in both HTTP and TCP ES clients (flattening solves it for certain searches)
46+
search(Query.select(Fixtures.Data.class).where(generateDeepBooleanClause(1000, ExpressionBuilder::should)).build());
47+
}
48+
49+
@Test
50+
public void deepFilterOnlyBooleanQueryShouldBeFlattenedToPreventError() throws Exception {
51+
// this kind of deep boolean query throws an error in both HTTP and TCP ES clients (flattening solves it for certain searches)
52+
search(Query.select(Fixtures.Data.class).where(generateDeepBooleanClause(1000, ExpressionBuilder::filter)).build());
53+
}
54+
55+
@Test
56+
public void mergeSingleTermShouldClauses() throws Exception {
57+
String id1 = UUID.randomUUID().toString();
58+
String id2 = UUID.randomUUID().toString();
59+
String id3 = UUID.randomUUID().toString();
60+
Expression actual = Expressions.bool()
61+
.should(Expressions.exactMatch("id", id1))
62+
.should(Expressions.exactMatch("id", id2))
63+
.should(Expressions.exactMatch("id", id3))
64+
.build();
65+
assertEquals(Expressions.matchAny("id", Set.of(id1, id2, id3)), actual);
66+
}
67+
68+
@Test
69+
public void mergeSingleAndMultiTermShouldClauses() throws Exception {
70+
String id1 = UUID.randomUUID().toString();
71+
String id2 = UUID.randomUUID().toString();
72+
String id3 = UUID.randomUUID().toString();
73+
String id4 = UUID.randomUUID().toString();
74+
Expression actual = Expressions.bool()
75+
.should(Expressions.exactMatch("id", id1))
76+
.should(Expressions.exactMatch("id", id2))
77+
.should(Expressions.matchAny("id", Set.of(id3, id4)))
78+
.build();
79+
assertEquals(Expressions.matchAny("id", Set.of(id1, id2, id3, id4)), actual);
80+
}
81+
82+
@Test
83+
public void mergeSingleAndMultiTermFilterClauses() throws Exception {
84+
String id1 = UUID.randomUUID().toString();
85+
String id2 = UUID.randomUUID().toString();
86+
String id3 = UUID.randomUUID().toString();
87+
String id4 = UUID.randomUUID().toString();
88+
Expression actual = Expressions.bool()
89+
.filter(Expressions.exactMatch("id", id1))
90+
.filter(Expressions.matchAny("id", Set.of(id1, id2)))
91+
.filter(Expressions.matchAny("id", Set.of(id1, id3, id4)))
92+
.build();
93+
assertEquals(Expressions.exactMatch("id", id1), actual);
94+
}
95+
96+
@Test
97+
public void mergeDisjunctTermFilterClauses() throws Exception {
98+
String id1 = UUID.randomUUID().toString();
99+
String id2 = UUID.randomUUID().toString();
100+
String id3 = UUID.randomUUID().toString();
101+
String id4 = UUID.randomUUID().toString();
102+
Expression actual = Expressions.bool()
103+
.filter(Expressions.exactMatch("id", id1))
104+
.filter(Expressions.matchAny("id", Set.of(id2, id3)))
105+
.filter(Expressions.matchAny("id", Set.of(id3, id4)))
106+
.build();
107+
assertEquals(Expressions.matchNone(), actual);
108+
}
109+
110+
private Expression generateDeepBooleanClause(int depth, BiFunction<ExpressionBuilder, Expression, ExpressionBuilder> boolClause) {
111+
ExpressionBuilder root = Expressions.bool();
112+
ExpressionBuilder current = root;
113+
boolClause.apply(current, Expressions.exactMatch("id", UUID.randomUUID().toString()));
114+
for (int i = 0; i < depth; i++) {
115+
ExpressionBuilder nested = Expressions.bool();
116+
boolClause.apply(nested, Expressions.exactMatch("id", UUID.randomUUID().toString()));
117+
current.should(nested.build());
118+
current = nested;
119+
}
120+
return root.build();
121+
}
122+
123+
}

commons/com.b2international.index/src/com/b2international/index/es/EsDocumentSearcher.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2017-2021 B2i Healthcare Pte Ltd, http://b2i.sg
2+
* Copyright 2017-2022 B2i Healthcare Pte Ltd, http://b2i.sg
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -28,11 +28,13 @@
2828
import org.apache.lucene.search.TotalHits;
2929
import org.apache.lucene.search.TotalHits.Relation;
3030
import org.apache.solr.common.util.JavaBinCodec;
31+
import org.elasticsearch.ElasticsearchStatusException;
3132
import org.elasticsearch.action.get.GetRequest;
3233
import org.elasticsearch.action.get.GetResponse;
3334
import org.elasticsearch.action.search.SearchRequest;
3435
import org.elasticsearch.action.search.SearchResponse;
3536
import org.elasticsearch.index.query.QueryBuilder;
37+
import org.elasticsearch.rest.RestStatus;
3638
import org.elasticsearch.search.SearchHit;
3739
import org.elasticsearch.search.SearchHits;
3840
import org.elasticsearch.search.aggregations.AggregationBuilders;
@@ -183,6 +185,9 @@ public <T> Hits<T> search(Query<T> query) throws IOException {
183185
try {
184186
response = client.search(req);
185187
} catch (Exception e) {
188+
if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) {
189+
throw new IllegalArgumentException(e.getMessage(), e);
190+
}
186191
admin.log().error("Couldn't execute query", e);
187192
throw new IndexException("Couldn't execute query: " + e.getMessage(), null);
188193
}

commons/com.b2international.index/src/com/b2international/index/es/query/EsQueryBuilder.java

+1-9
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,9 @@ public boolean needsScoring() {
7777

7878
public QueryBuilder build(Expression expression) {
7979
checkNotNull(expression, "expression");
80-
// always filter by type
8180
visit(expression);
8281
if (deque.size() == 1) {
83-
QueryBuilder queryBuilder = deque.pop();
84-
if (needsScoring) {
85-
return queryBuilder;
86-
} else {
87-
return QueryBuilders.boolQuery()
88-
.must(QueryBuilders.matchAllQuery())
89-
.filter(queryBuilder);
90-
}
82+
return deque.pop();
9183
} else {
9284
throw newIllegalStateException();
9385
}

commons/com.b2international.index/src/com/b2international/index/query/AbstractExpressionBuilder.java

+141-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2011-2017 B2i Healthcare Pte Ltd, http://b2i.sg
2+
* Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -15,9 +15,14 @@
1515
*/
1616
package com.b2international.index.query;
1717

18-
import static com.google.common.collect.Lists.newArrayList;
19-
18+
import java.util.ArrayList;
19+
import java.util.Collection;
2020
import java.util.List;
21+
import java.util.Set;
22+
23+
import com.google.common.collect.HashMultimap;
24+
import com.google.common.collect.Multimap;
25+
import com.google.common.collect.Sets;
2126

2227
/**
2328
* Abstract superclass for building query {@link Expression}s.
@@ -26,10 +31,10 @@
2631
*/
2732
public abstract class AbstractExpressionBuilder<B extends AbstractExpressionBuilder<B>>{
2833

29-
protected final List<Expression> mustClauses = newArrayList();
30-
protected final List<Expression> mustNotClauses = newArrayList();
31-
protected final List<Expression> shouldClauses = newArrayList();
32-
protected final List<Expression> filterClauses = newArrayList();
34+
protected final List<Expression> mustClauses = new ArrayList<>(1);
35+
protected final List<Expression> mustNotClauses = new ArrayList<>(1);
36+
protected final List<Expression> shouldClauses = new ArrayList<>(3);
37+
protected final List<Expression> filterClauses = new ArrayList<>(3);
3338
protected int minShouldMatch = 1;
3439

3540
protected AbstractExpressionBuilder() {}
@@ -72,13 +77,138 @@ public B setMinimumNumberShouldMatch(int minShouldMatch) {
7277
public Expression build() {
7378
if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.isEmpty()) {
7479
return Expressions.matchAll();
75-
} else if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.size() == 1) {
80+
} else if (isSingleFilter()) {
7681
// shortcut to reduce number of nested Boolean clauses
7782
return filterClauses.get(0);
83+
} else if (isSingleShould()) {
84+
// shortcut to reduce number of nested Boolean clauses
85+
return shouldClauses.get(0);
7886
} else {
79-
final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses);
80-
be.setMinShouldMatch(minShouldMatch);
81-
return be;
87+
// before creating the boolean query make sure we flatten the query as much as possible
88+
89+
// or(A=B, or(A=C, or(A=D))) - should only clauses deeply nested inside bool queries (and the minShouldMatch is 1 on all levels)
90+
flattenShoulds();
91+
92+
// then optimize term filters that match the same field, field1=A or field1=B or field1=C should be converted to field1=any(A, B, C)
93+
// during EsQueryBuilder the system will optimize it again to fit into the max term count ES setting, see EsQueryBuilder
94+
mergeTermFilters();
95+
96+
if (isSingleFilter()) {
97+
return filterClauses.get(0);
98+
}
99+
100+
if (isSingleShould()) {
101+
return shouldClauses.get(0);
102+
}
103+
104+
// if after the optimization the resulting bool clauses are empty, then return a MatchNone expression
105+
if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.isEmpty()) {
106+
return Expressions.matchNone();
107+
} else {
108+
// otherwise create the bool expression as usual
109+
final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses);
110+
be.setMinShouldMatch(minShouldMatch);
111+
return be;
112+
}
113+
}
114+
}
115+
116+
private boolean isSingleFilter() {
117+
return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.size() == 1;
118+
}
119+
120+
private boolean isSingleShould() {
121+
return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.size() == 1 && filterClauses.isEmpty();
122+
}
123+
124+
protected final void flattenShoulds() {
125+
if (!mustClauses.isEmpty() || !mustNotClauses.isEmpty() || !filterClauses.isEmpty() || minShouldMatch != 1) {
126+
return;
127+
}
128+
for (Expression expression : List.copyOf(shouldClauses)) {
129+
if (expression instanceof BoolExpression) {
130+
BoolExpression bool = (BoolExpression) expression;
131+
if (bool.isShouldOnly() && bool.minShouldMatch() == 1) {
132+
shouldClauses.addAll(bool.shouldClauses());
133+
shouldClauses.remove(bool);
134+
}
135+
}
82136
}
83137
}
138+
139+
protected final void mergeTermFilters() {
140+
// check each mustNot and should clause list and merge term/terms queries into a single terms query, targeting the same field
141+
mergeTermFilters(mustNotClauses);
142+
mergeTermFilters(shouldClauses);
143+
// XXX merging must/filter queries will change the boolean logic from AND to OR which leads to incorrect results
144+
// instead calculate the intersection of the values and use that for the expressions
145+
reduceTermFilters(mustClauses);
146+
reduceTermFilters(filterClauses);
147+
}
148+
149+
private void reduceTermFilters(List<Expression> clauses) {
150+
Multimap<String, Expression> termExpressionsByField = HashMultimap.create();
151+
for (Expression expression : List.copyOf(clauses)) {
152+
if (expression instanceof SingleArgumentPredicate<?>) {
153+
termExpressionsByField.put(((SingleArgumentPredicate<?>) expression).getField(), expression);
154+
} else if (expression instanceof SetPredicate<?>) {
155+
termExpressionsByField.put(((SetPredicate<?>) expression).getField(), expression);
156+
}
157+
}
158+
159+
for (String field : Set.copyOf(termExpressionsByField.keySet())) {
160+
Collection<Expression> termExpressions = termExpressionsByField.removeAll(field);
161+
if (termExpressions.size() > 1) {
162+
Set<Object> values = null;
163+
for (Expression expression : termExpressions) {
164+
if (values != null && values.isEmpty()) {
165+
break;
166+
}
167+
Set<Object> expressionValues;
168+
if (expression instanceof SingleArgumentPredicate<?>) {
169+
expressionValues = Set.of(((SingleArgumentPredicate<?>) expression).getArgument());
170+
} else if (expression instanceof SetPredicate<?>) {
171+
expressionValues = Set.copyOf(((SetPredicate<?>) expression).values());
172+
} else {
173+
throw new IllegalStateException("Invalid clause detected when processing term/terms clauses: " + expression);
174+
}
175+
values = values == null ? expressionValues : Set.copyOf(Sets.intersection(values, expressionValues));
176+
}
177+
// remove all matching clauses first
178+
clauses.removeAll(termExpressions);
179+
// add the new merged expression
180+
clauses.add(Expressions.matchAnyObject(field, values));
181+
}
182+
}
183+
}
184+
185+
private void mergeTermFilters(List<Expression> clauses) {
186+
Multimap<String, Expression> termExpressionsByField = HashMultimap.create();
187+
for (Expression expression : List.copyOf(clauses)) {
188+
if (expression instanceof SingleArgumentPredicate<?>) {
189+
termExpressionsByField.put(((SingleArgumentPredicate<?>) expression).getField(), expression);
190+
} else if (expression instanceof SetPredicate<?>) {
191+
termExpressionsByField.put(((SetPredicate<?>) expression).getField(), expression);
192+
}
193+
}
194+
195+
for (String field : Set.copyOf(termExpressionsByField.keySet())) {
196+
Collection<Expression> termExpressions = termExpressionsByField.removeAll(field);
197+
if (termExpressions.size() > 1) {
198+
Set<Object> values = Sets.newHashSet();
199+
for (Expression expression : termExpressions) {
200+
if (expression instanceof SingleArgumentPredicate<?>) {
201+
values.add(((SingleArgumentPredicate<?>) expression).getArgument());
202+
} else if (expression instanceof SetPredicate<?>) {
203+
values.addAll(((SetPredicate<?>) expression).values());
204+
}
205+
}
206+
// remove all matching clauses first
207+
clauses.removeAll(termExpressions);
208+
// add the new merged expression
209+
clauses.add(Expressions.matchAnyObject(field, values));
210+
}
211+
}
212+
}
213+
84214
}

commons/com.b2international.index/src/com/b2international/index/query/BoolExpression.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2011-2016 B2i Healthcare Pte Ltd, http://b2i.sg
2+
* Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -114,5 +114,13 @@ private void append(final String name, final List<Expression> expressions, final
114114
builder.append(")");
115115
}
116116
}
117+
118+
public boolean isFilterOnly() {
119+
return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && !filterClauses.isEmpty();
120+
}
121+
122+
public boolean isShouldOnly() {
123+
return mustClauses.isEmpty() && mustNotClauses.isEmpty() && !shouldClauses.isEmpty() && filterClauses.isEmpty();
124+
}
117125

118126
}

0 commit comments

Comments
 (0)