Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

SO-4080: optimize bool index queries #976

Merged
merged 8 commits into from
Feb 11, 2022
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright 2022 B2i Healthcare Pte Ltd, http://b2i.sg
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.b2international.index;

import static org.junit.Assert.assertEquals;

import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.function.BiFunction;

import org.junit.Test;

import com.b2international.index.query.Expression;
import com.b2international.index.query.Expressions;
import com.b2international.index.query.Expressions.ExpressionBuilder;
import com.b2international.index.query.Query;

/**
* @since 8.1
*/
public class BoolQueryTest extends BaseIndexTest {

@Override
protected Collection<Class<?>> getTypes() {
return List.of(Fixtures.Data.class);
}

@Test
public void deepShouldOnlyBooleanQueryShouldBeFlattenedToPreventError() throws Exception {
// this kind of deep boolean query throws an error in both HTTP and TCP ES clients (flattening solves it for certain searches)
search(Query.select(Fixtures.Data.class).where(generateDeepBooleanClause(1000, ExpressionBuilder::should)).build());
}

@Test
public void deepFilterOnlyBooleanQueryShouldBeFlattenedToPreventError() throws Exception {
// this kind of deep boolean query throws an error in both HTTP and TCP ES clients (flattening solves it for certain searches)
search(Query.select(Fixtures.Data.class).where(generateDeepBooleanClause(1000, ExpressionBuilder::filter)).build());
}

@Test
public void mergeSingleTermShouldClauses() throws Exception {
String id1 = UUID.randomUUID().toString();
String id2 = UUID.randomUUID().toString();
String id3 = UUID.randomUUID().toString();
Expression actual = Expressions.bool()
.should(Expressions.exactMatch("id", id1))
.should(Expressions.exactMatch("id", id2))
.should(Expressions.exactMatch("id", id3))
.build();
assertEquals(Expressions.matchAny("id", Set.of(id1, id2, id3)), actual);
}

@Test
public void mergeSingleAndMultiTermShouldClauses() throws Exception {
String id1 = UUID.randomUUID().toString();
String id2 = UUID.randomUUID().toString();
String id3 = UUID.randomUUID().toString();
String id4 = UUID.randomUUID().toString();
Expression actual = Expressions.bool()
.should(Expressions.exactMatch("id", id1))
.should(Expressions.exactMatch("id", id2))
.should(Expressions.matchAny("id", Set.of(id3, id4)))
.build();
assertEquals(Expressions.matchAny("id", Set.of(id1, id2, id3, id4)), actual);
}

@Test
public void mergeSingleAndMultiTermFilterClauses() throws Exception {
String id1 = UUID.randomUUID().toString();
String id2 = UUID.randomUUID().toString();
String id3 = UUID.randomUUID().toString();
String id4 = UUID.randomUUID().toString();
Expression actual = Expressions.bool()
.filter(Expressions.exactMatch("id", id1))
.filter(Expressions.matchAny("id", Set.of(id1, id2)))
.filter(Expressions.matchAny("id", Set.of(id1, id3, id4)))
.build();
assertEquals(Expressions.exactMatch("id", id1), actual);
}

@Test
public void mergeDisjunctTermFilterClauses() throws Exception {
String id1 = UUID.randomUUID().toString();
String id2 = UUID.randomUUID().toString();
String id3 = UUID.randomUUID().toString();
String id4 = UUID.randomUUID().toString();
Expression actual = Expressions.bool()
.filter(Expressions.exactMatch("id", id1))
.filter(Expressions.matchAny("id", Set.of(id2, id3)))
.filter(Expressions.matchAny("id", Set.of(id3, id4)))
.build();
assertEquals(Expressions.matchNone(), actual);
}

private Expression generateDeepBooleanClause(int depth, BiFunction<ExpressionBuilder, Expression, ExpressionBuilder> boolClause) {
ExpressionBuilder root = Expressions.bool();
ExpressionBuilder current = root;
boolClause.apply(current, Expressions.exactMatch("id", UUID.randomUUID().toString()));
for (int i = 0; i < depth; i++) {
ExpressionBuilder nested = Expressions.bool();
boolClause.apply(nested, Expressions.exactMatch("id", UUID.randomUUID().toString()));
current.should(nested.build());
current = nested;
}
return root.build();
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2017-2021 B2i Healthcare Pte Ltd, http://b2i.sg
* Copyright 2017-2022 B2i Healthcare Pte Ltd, http://b2i.sg
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,11 +28,13 @@
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.TotalHits.Relation;
import org.apache.solr.common.util.JavaBinCodec;
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
Expand Down Expand Up @@ -183,6 +185,9 @@ public <T> Hits<T> search(Query<T> query) throws IOException {
try {
response = client.search(req);
} catch (Exception e) {
if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) {
throw new IllegalArgumentException(e.getMessage(), e);
}
admin.log().error("Couldn't execute query", e);
throw new IndexException("Couldn't execute query: " + e.getMessage(), null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,9 @@ public boolean needsScoring() {

public QueryBuilder build(Expression expression) {
checkNotNull(expression, "expression");
// always filter by type
visit(expression);
if (deque.size() == 1) {
QueryBuilder queryBuilder = deque.pop();
if (needsScoring) {
return queryBuilder;
} else {
return QueryBuilders.boolQuery()
.must(QueryBuilders.matchAllQuery())
.filter(queryBuilder);
}
return deque.pop();
} else {
throw newIllegalStateException();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2011-2017 B2i Healthcare Pte Ltd, http://b2i.sg
* Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,9 +15,14 @@
*/
package com.b2international.index.query;

import static com.google.common.collect.Lists.newArrayList;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;

/**
* Abstract superclass for building query {@link Expression}s.
Expand All @@ -26,10 +31,10 @@
*/
public abstract class AbstractExpressionBuilder<B extends AbstractExpressionBuilder<B>>{

protected final List<Expression> mustClauses = newArrayList();
protected final List<Expression> mustNotClauses = newArrayList();
protected final List<Expression> shouldClauses = newArrayList();
protected final List<Expression> filterClauses = newArrayList();
protected final List<Expression> mustClauses = new ArrayList<>(1);
protected final List<Expression> mustNotClauses = new ArrayList<>(1);
protected final List<Expression> shouldClauses = new ArrayList<>(3);
protected final List<Expression> filterClauses = new ArrayList<>(3);
protected int minShouldMatch = 1;

protected AbstractExpressionBuilder() {}
Expand Down Expand Up @@ -72,13 +77,138 @@ public B setMinimumNumberShouldMatch(int minShouldMatch) {
public Expression build() {
if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.isEmpty()) {
return Expressions.matchAll();
} else if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.size() == 1) {
} else if (isSingleFilter()) {
// shortcut to reduce number of nested Boolean clauses
return filterClauses.get(0);
} else if (isSingleShould()) {
// shortcut to reduce number of nested Boolean clauses
return shouldClauses.get(0);
} else {
final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses);
be.setMinShouldMatch(minShouldMatch);
return be;
// before creating the boolean query make sure we flatten the query as much as possible

// or(A=B, or(A=C, or(A=D))) - should only clauses deeply nested inside bool queries (and the minShouldMatch is 1 on all levels)
flattenShoulds();

// then optimize term filters that match the same field, field1=A or field1=B or field1=C should be converted to field1=any(A, B, C)
// during EsQueryBuilder the system will optimize it again to fit into the max term count ES setting, see EsQueryBuilder
mergeTermFilters();

if (isSingleFilter()) {
return filterClauses.get(0);
}

if (isSingleShould()) {
return shouldClauses.get(0);
}

// if after the optimization the resulting bool clauses are empty, then return a MatchNone expression
if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.isEmpty()) {
return Expressions.matchNone();
} else {
// otherwise create the bool expression as usual
final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses);
be.setMinShouldMatch(minShouldMatch);
return be;
}
}
}

private boolean isSingleFilter() {
return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.size() == 1;
}

private boolean isSingleShould() {
return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.size() == 1 && filterClauses.isEmpty();
}

protected final void flattenShoulds() {
if (!mustClauses.isEmpty() || !mustNotClauses.isEmpty() || !filterClauses.isEmpty() || minShouldMatch != 1) {
return;
}
for (Expression expression : List.copyOf(shouldClauses)) {
if (expression instanceof BoolExpression) {
BoolExpression bool = (BoolExpression) expression;
if (bool.isShouldOnly() && bool.minShouldMatch() == 1) {
shouldClauses.addAll(bool.shouldClauses());
shouldClauses.remove(bool);
}
}
}
}

protected final void mergeTermFilters() {
// check each mustNot and should clause list and merge term/terms queries into a single terms query, targeting the same field
mergeTermFilters(mustNotClauses);
mergeTermFilters(shouldClauses);
// XXX merging must/filter queries will change the boolean logic from AND to OR which leads to incorrect results
// instead calculate the intersection of the values and use that for the expressions
reduceTermFilters(mustClauses);
reduceTermFilters(filterClauses);
}

private void reduceTermFilters(List<Expression> clauses) {
Multimap<String, Expression> termExpressionsByField = HashMultimap.create();
for (Expression expression : List.copyOf(clauses)) {
if (expression instanceof SingleArgumentPredicate<?>) {
termExpressionsByField.put(((SingleArgumentPredicate<?>) expression).getField(), expression);
} else if (expression instanceof SetPredicate<?>) {
termExpressionsByField.put(((SetPredicate<?>) expression).getField(), expression);
}
}

for (String field : Set.copyOf(termExpressionsByField.keySet())) {
Collection<Expression> termExpressions = termExpressionsByField.removeAll(field);
if (termExpressions.size() > 1) {
Set<Object> values = null;
for (Expression expression : termExpressions) {
if (values != null && values.isEmpty()) {
break;
}
Set<Object> expressionValues;
if (expression instanceof SingleArgumentPredicate<?>) {
expressionValues = Set.of(((SingleArgumentPredicate<?>) expression).getArgument());
} else if (expression instanceof SetPredicate<?>) {
expressionValues = Set.copyOf(((SetPredicate<?>) expression).values());
} else {
throw new IllegalStateException("Invalid clause detected when processing term/terms clauses: " + expression);
}
values = values == null ? expressionValues : Sets.intersection(values, expressionValues);
}
// remove all matching clauses first
clauses.removeAll(termExpressions);
// add the new merged expression
clauses.add(Expressions.matchAnyObject(field, values));
}
}
}

private void mergeTermFilters(List<Expression> clauses) {
Multimap<String, Expression> termExpressionsByField = HashMultimap.create();
for (Expression expression : List.copyOf(clauses)) {
if (expression instanceof SingleArgumentPredicate<?>) {
termExpressionsByField.put(((SingleArgumentPredicate<?>) expression).getField(), expression);
} else if (expression instanceof SetPredicate<?>) {
termExpressionsByField.put(((SetPredicate<?>) expression).getField(), expression);
}
}

for (String field : Set.copyOf(termExpressionsByField.keySet())) {
Collection<Expression> termExpressions = termExpressionsByField.removeAll(field);
if (termExpressions.size() > 1) {
Set<Object> values = Sets.newHashSet();
for (Expression expression : termExpressions) {
if (expression instanceof SingleArgumentPredicate<?>) {
values.add(((SingleArgumentPredicate<?>) expression).getArgument());
} else if (expression instanceof SetPredicate<?>) {
values.addAll(((SetPredicate<?>) expression).values());
}
}
// remove all matching clauses first
clauses.removeAll(termExpressions);
// add the new merged expression
clauses.add(Expressions.matchAnyObject(field, values));
}
}
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2011-2016 B2i Healthcare Pte Ltd, http://b2i.sg
* Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -114,5 +114,13 @@ private void append(final String name, final List<Expression> expressions, final
builder.append(")");
}
}

public boolean isFilterOnly() {
return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && !filterClauses.isEmpty();
}

public boolean isShouldOnly() {
return mustClauses.isEmpty() && mustNotClauses.isEmpty() && !shouldClauses.isEmpty() && filterClauses.isEmpty();
}

}
Loading