|
1 | 1 | /*
|
2 |
| - * Copyright 2011-2017 B2i Healthcare Pte Ltd, http://b2i.sg |
| 2 | + * Copyright 2011-2022 B2i Healthcare Pte Ltd, http://b2i.sg |
3 | 3 | *
|
4 | 4 | * Licensed under the Apache License, Version 2.0 (the "License");
|
5 | 5 | * you may not use this file except in compliance with the License.
|
|
15 | 15 | */
|
16 | 16 | package com.b2international.index.query;
|
17 | 17 |
|
18 |
| -import static com.google.common.collect.Lists.newArrayList; |
19 |
| - |
| 18 | +import java.util.ArrayList; |
| 19 | +import java.util.Collection; |
20 | 20 | import java.util.List;
|
| 21 | +import java.util.Set; |
| 22 | + |
| 23 | +import com.google.common.collect.HashMultimap; |
| 24 | +import com.google.common.collect.Multimap; |
| 25 | +import com.google.common.collect.Sets; |
21 | 26 |
|
22 | 27 | /**
|
23 | 28 | * Abstract superclass for building query {@link Expression}s.
|
|
26 | 31 | */
|
27 | 32 | public abstract class AbstractExpressionBuilder<B extends AbstractExpressionBuilder<B>>{
|
28 | 33 |
|
29 |
| - protected final List<Expression> mustClauses = newArrayList(); |
30 |
| - protected final List<Expression> mustNotClauses = newArrayList(); |
31 |
| - protected final List<Expression> shouldClauses = newArrayList(); |
32 |
| - protected final List<Expression> filterClauses = newArrayList(); |
| 34 | + protected final List<Expression> mustClauses = new ArrayList<>(1); |
| 35 | + protected final List<Expression> mustNotClauses = new ArrayList<>(1); |
| 36 | + protected final List<Expression> shouldClauses = new ArrayList<>(3); |
| 37 | + protected final List<Expression> filterClauses = new ArrayList<>(3); |
33 | 38 | protected int minShouldMatch = 1;
|
34 | 39 |
|
35 | 40 | protected AbstractExpressionBuilder() {}
|
@@ -72,13 +77,138 @@ public B setMinimumNumberShouldMatch(int minShouldMatch) {
|
72 | 77 | public Expression build() {
|
73 | 78 | if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.isEmpty()) {
|
74 | 79 | return Expressions.matchAll();
|
75 |
| - } else if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.size() == 1) { |
| 80 | + } else if (isSingleFilter()) { |
76 | 81 | // shortcut to reduce number of nested Boolean clauses
|
77 | 82 | return filterClauses.get(0);
|
| 83 | + } else if (isSingleShould()) { |
| 84 | + // shortcut to reduce number of nested Boolean clauses |
| 85 | + return shouldClauses.get(0); |
78 | 86 | } else {
|
79 |
| - final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses); |
80 |
| - be.setMinShouldMatch(minShouldMatch); |
81 |
| - return be; |
| 87 | + // before creating the boolean query make sure we flatten the query as much as possible |
| 88 | + |
| 89 | + // or(A=B, or(A=C, or(A=D))) - should only clauses deeply nested inside bool queries (and the minShouldMatch is 1 on all levels) |
| 90 | + flattenShoulds(); |
| 91 | + |
| 92 | + // then optimize term filters that match the same field, field1=A or field1=B or field1=C should be converted to field1=any(A, B, C) |
| 93 | + // during EsQueryBuilder the system will optimize it again to fit into the max term count ES setting, see EsQueryBuilder |
| 94 | + mergeTermFilters(); |
| 95 | + |
| 96 | + if (isSingleFilter()) { |
| 97 | + return filterClauses.get(0); |
| 98 | + } |
| 99 | + |
| 100 | + if (isSingleShould()) { |
| 101 | + return shouldClauses.get(0); |
| 102 | + } |
| 103 | + |
| 104 | + // if after the optimization the resulting bool clauses are empty, then return a MatchNone expression |
| 105 | + if (mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.isEmpty()) { |
| 106 | + return Expressions.matchNone(); |
| 107 | + } else { |
| 108 | + // otherwise create the bool expression as usual |
| 109 | + final BoolExpression be = new BoolExpression(mustClauses, mustNotClauses, shouldClauses, filterClauses); |
| 110 | + be.setMinShouldMatch(minShouldMatch); |
| 111 | + return be; |
| 112 | + } |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + private boolean isSingleFilter() { |
| 117 | + return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.isEmpty() && filterClauses.size() == 1; |
| 118 | + } |
| 119 | + |
| 120 | + private boolean isSingleShould() { |
| 121 | + return mustClauses.isEmpty() && mustNotClauses.isEmpty() && shouldClauses.size() == 1 && filterClauses.isEmpty(); |
| 122 | + } |
| 123 | + |
| 124 | + protected final void flattenShoulds() { |
| 125 | + if (!mustClauses.isEmpty() || !mustNotClauses.isEmpty() || !filterClauses.isEmpty() || minShouldMatch != 1) { |
| 126 | + return; |
| 127 | + } |
| 128 | + for (Expression expression : List.copyOf(shouldClauses)) { |
| 129 | + if (expression instanceof BoolExpression) { |
| 130 | + BoolExpression bool = (BoolExpression) expression; |
| 131 | + if (bool.isShouldOnly() && bool.minShouldMatch() == 1) { |
| 132 | + shouldClauses.addAll(bool.shouldClauses()); |
| 133 | + shouldClauses.remove(bool); |
| 134 | + } |
| 135 | + } |
82 | 136 | }
|
83 | 137 | }
|
| 138 | + |
| 139 | + protected final void mergeTermFilters() { |
| 140 | + // check each mustNot and should clause list and merge term/terms queries into a single terms query, targeting the same field |
| 141 | + mergeTermFilters(mustNotClauses); |
| 142 | + mergeTermFilters(shouldClauses); |
| 143 | + // XXX merging must/filter queries will change the boolean logic from AND to OR which leads to incorrect results |
| 144 | + // instead calculate the intersection of the values and use that for the expressions |
| 145 | + reduceTermFilters(mustClauses); |
| 146 | + reduceTermFilters(filterClauses); |
| 147 | + } |
| 148 | + |
| 149 | + private void reduceTermFilters(List<Expression> clauses) { |
| 150 | + Multimap<String, Expression> termExpressionsByField = HashMultimap.create(); |
| 151 | + for (Expression expression : List.copyOf(clauses)) { |
| 152 | + if (expression instanceof SingleArgumentPredicate<?>) { |
| 153 | + termExpressionsByField.put(((SingleArgumentPredicate<?>) expression).getField(), expression); |
| 154 | + } else if (expression instanceof SetPredicate<?>) { |
| 155 | + termExpressionsByField.put(((SetPredicate<?>) expression).getField(), expression); |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + for (String field : Set.copyOf(termExpressionsByField.keySet())) { |
| 160 | + Collection<Expression> termExpressions = termExpressionsByField.removeAll(field); |
| 161 | + if (termExpressions.size() > 1) { |
| 162 | + Set<Object> values = null; |
| 163 | + for (Expression expression : termExpressions) { |
| 164 | + if (values != null && values.isEmpty()) { |
| 165 | + break; |
| 166 | + } |
| 167 | + Set<Object> expressionValues; |
| 168 | + if (expression instanceof SingleArgumentPredicate<?>) { |
| 169 | + expressionValues = Set.of(((SingleArgumentPredicate<?>) expression).getArgument()); |
| 170 | + } else if (expression instanceof SetPredicate<?>) { |
| 171 | + expressionValues = Set.copyOf(((SetPredicate<?>) expression).values()); |
| 172 | + } else { |
| 173 | + throw new IllegalStateException("Invalid clause detected when processing term/terms clauses: " + expression); |
| 174 | + } |
| 175 | + values = values == null ? expressionValues : Set.copyOf(Sets.intersection(values, expressionValues)); |
| 176 | + } |
| 177 | + // remove all matching clauses first |
| 178 | + clauses.removeAll(termExpressions); |
| 179 | + // add the new merged expression |
| 180 | + clauses.add(Expressions.matchAnyObject(field, values)); |
| 181 | + } |
| 182 | + } |
| 183 | + } |
| 184 | + |
| 185 | + private void mergeTermFilters(List<Expression> clauses) { |
| 186 | + Multimap<String, Expression> termExpressionsByField = HashMultimap.create(); |
| 187 | + for (Expression expression : List.copyOf(clauses)) { |
| 188 | + if (expression instanceof SingleArgumentPredicate<?>) { |
| 189 | + termExpressionsByField.put(((SingleArgumentPredicate<?>) expression).getField(), expression); |
| 190 | + } else if (expression instanceof SetPredicate<?>) { |
| 191 | + termExpressionsByField.put(((SetPredicate<?>) expression).getField(), expression); |
| 192 | + } |
| 193 | + } |
| 194 | + |
| 195 | + for (String field : Set.copyOf(termExpressionsByField.keySet())) { |
| 196 | + Collection<Expression> termExpressions = termExpressionsByField.removeAll(field); |
| 197 | + if (termExpressions.size() > 1) { |
| 198 | + Set<Object> values = Sets.newHashSet(); |
| 199 | + for (Expression expression : termExpressions) { |
| 200 | + if (expression instanceof SingleArgumentPredicate<?>) { |
| 201 | + values.add(((SingleArgumentPredicate<?>) expression).getArgument()); |
| 202 | + } else if (expression instanceof SetPredicate<?>) { |
| 203 | + values.addAll(((SetPredicate<?>) expression).values()); |
| 204 | + } |
| 205 | + } |
| 206 | + // remove all matching clauses first |
| 207 | + clauses.removeAll(termExpressions); |
| 208 | + // add the new merged expression |
| 209 | + clauses.add(Expressions.matchAnyObject(field, values)); |
| 210 | + } |
| 211 | + } |
| 212 | + } |
| 213 | + |
84 | 214 | }
|
0 commit comments