From c138a71d8530f74abb07d0502a4cab43faf14645 Mon Sep 17 00:00:00 2001 From: Dmitry Yemanov Date: Wed, 7 May 2025 11:10:08 +0300 Subject: [PATCH 1/2] Skip NULL keys in strong equality hash and merge joins. Do not store missing flags in sort keys if NULL keys are explicitly asked to be ignored. --- src/jrd/RecordSourceNodes.h | 7 +- src/jrd/optimizer/InnerJoin.cpp | 10 +- src/jrd/optimizer/Optimizer.cpp | 181 ++++++++++++++++++-------------- src/jrd/optimizer/Optimizer.h | 3 +- src/jrd/recsrc/HashJoin.cpp | 37 ++++--- src/jrd/recsrc/MergeJoin.cpp | 17 ++- src/jrd/recsrc/RecordSource.h | 34 +++--- src/jrd/recsrc/SortedStream.cpp | 99 +++++++++++------ 8 files changed, 225 insertions(+), 163 deletions(-) diff --git a/src/jrd/RecordSourceNodes.h b/src/jrd/RecordSourceNodes.h index cf3fad39fbf..8076a016095 100644 --- a/src/jrd/RecordSourceNodes.h +++ b/src/jrd/RecordSourceNodes.h @@ -51,7 +51,6 @@ class SortNode : public Firebird::PermanentStorage, public Printable public: explicit SortNode(MemoryPool& pool) : PermanentStorage(pool), - unique(false), expressions(pool), direction(pool), nullOrder(pool) @@ -76,7 +75,8 @@ class SortNode : public Firebird::PermanentStorage, public Printable { if (direction[index] == ORDER_ASC) return (nullOrder[index] == NULLS_DEFAULT) ? NULLS_FIRST : nullOrder[index]; - else if (direction[index] == ORDER_DESC) + + if (direction[index] == ORDER_DESC) return (nullOrder[index] == NULLS_DEFAULT) ? NULLS_LAST : nullOrder[index]; fb_assert(false); @@ -84,7 +84,8 @@ class SortNode : public Firebird::PermanentStorage, public Printable } public: - bool unique; // sort uses unique key - for DISTINCT and GROUP BY + bool unique = false; // sort uses unique key - for DISTINCT and GROUP BY + bool ignoreNulls = false; // sort skips NULL keys NestValueArray expressions; // sort expressions Firebird::Array direction; // rse_order_* Firebird::Array nullOrder; // rse_nulls_* diff --git a/src/jrd/optimizer/InnerJoin.cpp b/src/jrd/optimizer/InnerJoin.cpp index a6b1a364f2e..d34880ef42b 100644 --- a/src/jrd/optimizer/InnerJoin.cpp +++ b/src/jrd/optimizer/InnerJoin.cpp @@ -545,12 +545,15 @@ River* InnerJoin::formRiver() keys.add(FB_NEW_POOL(getPool()) NestValueArray(getPool())); keys.add(FB_NEW_POOL(getPool()) NestValueArray(getPool())); + bool ignoreNulls = true; + for (const auto match : stream.equiMatches) { NestConst node1; NestConst node2; + UCHAR blrOp; - if (!optimizer->getEquiJoinKeys(match, &node1, &node2)) + if (!optimizer->getEquiJoinKeys(match, &node1, &node2, &blrOp)) fb_assert(false); if (!node2->containsStream(stream.number)) @@ -565,6 +568,9 @@ River* InnerJoin::formRiver() keys[1]->add(node2); equiMatches.add(match); + + if (blrOp == blr_equiv) + ignoreNulls = false; } // Ensure the smallest stream is the one to be hashed, @@ -580,7 +586,7 @@ River* InnerJoin::formRiver() // Create a hash join rsb = FB_NEW_POOL(getPool()) - HashJoin(tdbb, csb, INNER_JOIN, 2, hashJoinRsbs, keys.begin(), stream.selectivity); + HashJoin(tdbb, csb, INNER_JOIN, ignoreNulls, 2, hashJoinRsbs, keys.begin(), stream.selectivity); // Clear priorly processed rsb's, as they're already incorporated into a hash join rsbs.clear(); diff --git a/src/jrd/optimizer/Optimizer.cpp b/src/jrd/optimizer/Optimizer.cpp index e31767d72d3..2dfa3e36756 100644 --- a/src/jrd/optimizer/Optimizer.cpp +++ b/src/jrd/optimizer/Optimizer.cpp @@ -1468,7 +1468,6 @@ SortedStream* Optimizer::generateSort(const StreamList& streams, ULONG items = sort->expressions.getCount() + 3 * streams.getCount() + 2 * (dbkeyStreams ? dbkeyStreams->getCount() : 0); - const NestConst* const end_node = sort->expressions.end(); // Collect all fields involved into the sort @@ -1566,33 +1565,35 @@ SortedStream* Optimizer::generateSort(const StreamList& streams, if (sort->unique) map->flags |= SortedStream::FLAG_UNIQUE; - sort_key_def* prev_key = nullptr; + if (sort->ignoreNulls) + map->flags |= SortedStream::FLAG_NO_NULLS; + + sort_key_def* prevKey = nullptr; // Loop thru sort keys building sort keys. Actually, to handle null values // correctly, two sort keys are made for each field, one for the null flag - // and one for field itself. + // and one for field itself -- unless NULLs are requested to be ignored. - dsc descriptor; + const auto keyItemCount = sort->expressions.getCount() * (sort->ignoreNulls ? 1 : 2); - SortedStream::SortMap::Item* map_item = map->items.getBuffer(items); - sort_key_def* sort_key = map->keyItems.getBuffer(2 * sort->expressions.getCount()); + SortedStream::SortMap::Item* mapItem = map->items.getBuffer(items); + sort_key_def* sortKey = map->keyItems.getBuffer(keyItemCount); const SortDirection* direction = sort->direction.begin(); const NullsPlacement* nullOrder = sort->nullOrder.begin(); - for (NestConst* node_ptr = sort->expressions.begin(); - node_ptr != end_node; - ++node_ptr, ++nullOrder, ++direction, ++map_item) + dsc descriptor; + + for (auto node : sort->expressions) { - // Pick up sort key expression. + // Pick up sort key expression - NestConst node = *node_ptr; - dsc* desc = &descriptor; + dsc* const desc = &descriptor; node->getDesc(tdbb, csb, desc); // Allow for "key" forms of International text to grow if (IS_INTL_DATA(desc)) { - // Turn varying text and cstrings into text. + // Turn varying text and cstrings into text if (desc->dsc_dtype == dtype_varying) { @@ -1608,57 +1609,67 @@ SortedStream* Optimizer::generateSort(const StreamList& streams, desc->dsc_length = INTL_key_length(tdbb, INTL_INDEX_TYPE(desc), desc->dsc_length); } - // Make key for null flag - sort_key->setSkdLength(SKD_text, 1); - sort_key->setSkdOffset(prev_key); + if (!sort->ignoreNulls) + { + // Make key for null flag + sortKey->setSkdLength(SKD_text, 1); + sortKey->setSkdOffset(prevKey); - // Handle nulls placement - sort_key->skd_flags = SKD_ascending; + // Handle nulls placement + sortKey->skd_flags = SKD_ascending; - // Have SQL-compliant nulls ordering for ODS11+ - if ((*nullOrder == NULLS_DEFAULT && *direction != ORDER_DESC) || *nullOrder == NULLS_FIRST) - sort_key->skd_flags |= SKD_descending; + // Have SQL-compliant nulls ordering for ODS11+ + if ((*nullOrder == NULLS_DEFAULT && *direction != ORDER_DESC) || *nullOrder == NULLS_FIRST) + sortKey->skd_flags |= SKD_descending; - prev_key = sort_key++; + prevKey = sortKey++; + } // Make key for sort key proper fb_assert(desc->dsc_dtype < FB_NELEM(sort_dtypes)); - sort_key->setSkdLength(sort_dtypes[desc->dsc_dtype], desc->dsc_length); - sort_key->setSkdOffset(&sort_key[-1], desc); - sort_key->skd_flags = SKD_ascending; + sortKey->setSkdLength(sort_dtypes[desc->dsc_dtype], desc->dsc_length); + sortKey->setSkdOffset(prevKey, desc); + + sortKey->skd_flags = SKD_ascending; if (*direction == ORDER_DESC) - sort_key->skd_flags |= SKD_descending; + sortKey->skd_flags |= SKD_descending; - if (!sort_key->skd_dtype) + if (!sortKey->skd_dtype) ERR_post(Arg::Gds(isc_invalid_sort_datatype) << Arg::Str(DSC_dtype_tostring(desc->dsc_dtype))); - if (sort_key->skd_dtype == SKD_varying || sort_key->skd_dtype == SKD_cstring) + if (sortKey->skd_dtype == SKD_varying || sortKey->skd_dtype == SKD_cstring) { if (desc->dsc_ttype() == ttype_binary) - sort_key->skd_flags |= SKD_binary; + sortKey->skd_flags |= SKD_binary; } if (SortedStream::hasVolatileKey(desc) && !refetchFlag) - sort_key->skd_flags |= SKD_separate_data; + sortKey->skd_flags |= SKD_separate_data; + + const auto flagOffset = sort->ignoreNulls ? 0 : prevKey->getSkdOffset(); - map_item->reset(node, prev_key->getSkdOffset()); - map_item->desc = *desc; - map_item->desc.dsc_address = (UCHAR*)(IPTR) sort_key->getSkdOffset(); + mapItem->reset(node, flagOffset); + mapItem->desc = *desc; + mapItem->desc.dsc_address = (UCHAR*)(IPTR) sortKey->getSkdOffset(); - prev_key = sort_key++; + prevKey = sortKey++; if (const auto fieldNode = nodeAs(node)) { - map_item->stream = fieldNode->fieldStream; - map_item->fieldId = fieldNode->fieldId; + mapItem->stream = fieldNode->fieldStream; + mapItem->fieldId = fieldNode->fieldId; } + + mapItem++; + direction++; + nullOrder++; } - fb_assert(prev_key); - ULONG map_length = prev_key ? ROUNDUP(prev_key->getSkdOffset() + prev_key->getSkdLength(), sizeof(SLONG)) : 0; - map->keyLength = map_length; - ULONG flag_offset = map_length; - map_length += fieldCount; + fb_assert(prevKey); + ULONG mapLength = prevKey ? ROUNDUP(prevKey->getSkdOffset() + prevKey->getSkdLength(), sizeof(SLONG)) : 0; + map->keyLength = mapLength; + ULONG flagOffset = mapLength; + mapLength += fieldCount; // Now go back and process all to fields involved with the sort @@ -1668,64 +1679,64 @@ SortedStream* Optimizer::generateSort(const StreamList& streams, continue; if (item.desc->dsc_dtype >= dtype_aligned) - map_length = FB_ALIGN(map_length, type_alignments[item.desc->dsc_dtype]); + mapLength = FB_ALIGN(mapLength, type_alignments[item.desc->dsc_dtype]); - map_item->reset(item.stream, (SSHORT) item.id, flag_offset++); - map_item->desc = *item.desc; - map_item->desc.dsc_address = (UCHAR*)(IPTR) map_length; - map_length += item.desc->dsc_length; - map_item++; + mapItem->reset(item.stream, (SSHORT) item.id, flagOffset++); + mapItem->desc = *item.desc; + mapItem->desc.dsc_address = (UCHAR*)(IPTR) mapLength; + mapLength += item.desc->dsc_length; + mapItem++; } // Make fields for record numbers and transaction ids for all streams - map_length = ROUNDUP(map_length, sizeof(SINT64)); + mapLength = ROUNDUP(mapLength, sizeof(SINT64)); for (const auto stream : streams) { - map_item->reset(stream, SortedStream::ID_DBKEY); - map_item->desc.makeInt64(0, (SINT64*)(IPTR) map_length); - map_length += map_item->desc.dsc_length; - map_item++; + mapItem->reset(stream, SortedStream::ID_DBKEY); + mapItem->desc.makeInt64(0, (SINT64*)(IPTR) mapLength); + mapLength += mapItem->desc.dsc_length; + mapItem++; - map_item->reset(stream, SortedStream::ID_TRANS); - map_item->desc.makeInt64(0, (SINT64*)(IPTR) map_length); - map_length += map_item->desc.dsc_length; - map_item++; + mapItem->reset(stream, SortedStream::ID_TRANS); + mapItem->desc.makeInt64(0, (SINT64*)(IPTR) mapLength); + mapLength += mapItem->desc.dsc_length; + mapItem++; } if (dbkeyStreams && dbkeyStreams->hasData()) { - map_length = ROUNDUP(map_length, sizeof(SINT64)); + mapLength = ROUNDUP(mapLength, sizeof(SINT64)); for (const auto stream : *dbkeyStreams) { - map_item->reset(stream, SortedStream::ID_DBKEY); - map_item->desc.makeInt64(0, (SINT64*)(IPTR) map_length); - map_length += map_item->desc.dsc_length; - map_item++; + mapItem->reset(stream, SortedStream::ID_DBKEY); + mapItem->desc.makeInt64(0, (SINT64*)(IPTR) mapLength); + mapLength += mapItem->desc.dsc_length; + mapItem++; } for (const auto stream : *dbkeyStreams) { - map_item->reset(stream, SortedStream::ID_DBKEY_VALID); - map_item->desc.makeText(1, CS_BINARY, (UCHAR*)(IPTR) map_length); - map_length += map_item->desc.dsc_length; - map_item++; + mapItem->reset(stream, SortedStream::ID_DBKEY_VALID); + mapItem->desc.makeText(1, CS_BINARY, (UCHAR*)(IPTR) mapLength); + mapLength += mapItem->desc.dsc_length; + mapItem++; } } for (const auto stream : streams) { - map_item->reset(stream, SortedStream::ID_DBKEY_VALID); - map_item->desc.makeText(1, CS_BINARY, (UCHAR*)(IPTR) map_length); - map_length += map_item->desc.dsc_length; - map_item++; + mapItem->reset(stream, SortedStream::ID_DBKEY_VALID); + mapItem->desc.makeText(1, CS_BINARY, (UCHAR*)(IPTR) mapLength); + mapLength += mapItem->desc.dsc_length; + mapItem++; } - fb_assert(map_item == map->items.end()); - fb_assert(sort_key == map->keyItems.end()); + fb_assert(mapItem == map->items.end()); + fb_assert(sortKey == map->keyItems.end()); - map_length = ROUNDUP(map_length, sizeof(SLONG)); + mapLength = ROUNDUP(mapLength, sizeof(SLONG)); // Make fields to store varying and cstring length @@ -1735,19 +1746,19 @@ SortedStream* Optimizer::generateSort(const StreamList& streams, if (sortKey.skd_dtype == SKD_varying || sortKey.skd_dtype == SKD_cstring) { - sortKey.skd_vary_offset = map_length; - map_length += sizeof(USHORT); + sortKey.skd_vary_offset = mapLength; + mapLength += sizeof(USHORT); map->flags |= SortedStream::FLAG_KEY_VARY; } } - if (map_length > MAX_SORT_RECORD) + if (mapLength > MAX_SORT_RECORD) { - ERR_post(Arg::Gds(isc_sort_rec_size_err) << Arg::Num(map_length)); + ERR_post(Arg::Gds(isc_sort_rec_size_err) << Arg::Num(mapLength)); // Msg438: sort record size of %ld bytes is too big } - map->length = map_length; + map->length = mapLength; // That was most unpleasant. Never the less, it's done (except for the debugging). // All that remains is to build the record source block for the sort. @@ -2426,6 +2437,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) HalfStaticArray scratch; scratch.grow(baseConjuncts * orgCount); ValueExprNode** classes = scratch.begin(); + bool ignoreNulls = true; // Compute equivalence classes among streams. This involves finding groups // of streams joined by field equalities. @@ -2443,8 +2455,9 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) NestConst node1; NestConst node2; + UCHAR blrOp; - if (!getEquiJoinKeys(*iter, &node1, &node2)) + if (!getEquiJoinKeys(*iter, &node1, &node2, &blrOp)) continue; for (unsigned i = 0; i < orgRivers.getCount(); i++) @@ -2480,6 +2493,9 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) if (eq_class == last_class) last_class += orgCount; + if (blrOp == blr_equiv) + ignoreNulls = false; + iter |= Optimizer::CONJUNCT_JOINED; } } @@ -2590,13 +2606,14 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) for (const auto river : joinedRivers) { const auto sort = FB_NEW_POOL(getPool()) SortNode(getPool()); + sort->ignoreNulls = ignoreNulls; for (const auto key : *keys[position++]) { fb_assert(river->isReferenced(key)); sort->direction.add(ORDER_ASC); // ascending sort - sort->nullOrder.add(NULLS_DEFAULT); // default nulls placement + sort->nullOrder.add(NULLS_DEFAULT); // default nulls placement sort->expressions.add(key); } @@ -2629,7 +2646,7 @@ bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) rsbs.add(river->getRecordSource()); finalRsb = FB_NEW_POOL(getPool()) - HashJoin(tdbb, csb, joinType, rsbs.getCount(), rsbs.begin(), keys.begin()); + HashJoin(tdbb, csb, joinType, ignoreNulls, rsbs.getCount(), rsbs.begin(), keys.begin()); } // Pick up any boolean that may apply @@ -2967,7 +2984,8 @@ bool Optimizer::checkEquiJoin(BoolExprNode* boolean) bool Optimizer::getEquiJoinKeys(BoolExprNode* boolean, NestConst* node1, - NestConst* node2) + NestConst* node2, + UCHAR* blrOp) { auto cmpNode = nodeAs(boolean); if (!cmpNode || (cmpNode->blrOp != blr_eql && cmpNode->blrOp != blr_equiv)) @@ -2981,6 +2999,7 @@ bool Optimizer::getEquiJoinKeys(BoolExprNode* boolean, *node1 = arg1; *node2 = arg2; + *blrOp = cmpNode->blrOp; return true; } diff --git a/src/jrd/optimizer/Optimizer.h b/src/jrd/optimizer/Optimizer.h index 9566b4b3882..3f3b97c8386 100644 --- a/src/jrd/optimizer/Optimizer.h +++ b/src/jrd/optimizer/Optimizer.h @@ -527,7 +527,8 @@ class Optimizer : public Firebird::PermanentStorage bool checkEquiJoin(BoolExprNode* boolean); bool getEquiJoinKeys(BoolExprNode* boolean, NestConst* node1, - NestConst* node2); + NestConst* node2, + UCHAR* blrOp); Firebird::string getStreamName(StreamType stream); Firebird::string makeAlias(StreamType stream); diff --git a/src/jrd/recsrc/HashJoin.cpp b/src/jrd/recsrc/HashJoin.cpp index 3ba9941c8ca..b9e31b2dc9c 100644 --- a/src/jrd/recsrc/HashJoin.cpp +++ b/src/jrd/recsrc/HashJoin.cpp @@ -249,12 +249,13 @@ class HashJoin::HashTable : public PermanentStorage }; -HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, +HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, bool ignoreNulls, FB_SIZE_T count, RecordSource* const* args, NestValueArray* const* keys, double selectivity) : RecordSource(csb), m_joinType(joinType), m_boolean(nullptr), + m_ignoreNulls(ignoreNulls), m_args(csb->csb_pool, count - 1) { fb_assert(count >= 2); @@ -263,12 +264,13 @@ HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, } HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, - BoolExprNode* boolean, + BoolExprNode* boolean, bool ignoreNulls, RecordSource* const* args, NestValueArray* const* keys, double selectivity) : RecordSource(csb), m_joinType(OUTER_JOIN), m_boolean(boolean), + m_ignoreNulls(ignoreNulls), m_args(csb->csb_pool, 1) { init(tdbb, csb, 2, args, keys, selectivity); @@ -456,8 +458,8 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const while (m_args[i].buffer->getRecord(tdbb)) { - const auto hash = computeHash(tdbb, request, m_args[i], keyBuffer); - impure->irsb_hash_table->put(i, hash, counter++); + if (const auto hash = computeHash(tdbb, request, m_args[i], keyBuffer)) + impure->irsb_hash_table->put(i, hash.value(), counter++); } } @@ -466,8 +468,20 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const // Compute and hash the comparison keys - impure->irsb_leader_hash = - computeHash(tdbb, request, m_leader, impure->irsb_leader_buffer); + const auto hash = computeHash(tdbb, request, m_leader, impure->irsb_leader_buffer); + + if (!hash) + { + if (m_joinType == INNER_JOIN || m_joinType == SEMI_JOIN) + continue; + + if (m_joinType == OUTER_JOIN) + inner->nullRecords(tdbb); + + return true; + } + + impure->irsb_leader_hash = hash.value(); // Ensure the every inner stream having matches for this hash slot. // Setup the hash table for the iteration through collisions. @@ -657,10 +671,8 @@ void HashJoin::nullRecords(thread_db* tdbb) const arg.source->nullRecords(tdbb); } -ULONG HashJoin::computeHash(thread_db* tdbb, - Request* request, - const SubStream& sub, - UCHAR* keyBuffer) const +std::optional HashJoin::computeHash(thread_db* tdbb, Request* request, + const SubStream& sub, UCHAR* keyBuffer) const { memset(keyBuffer, 0, sub.totalKeyLength); @@ -668,10 +680,9 @@ ULONG HashJoin::computeHash(thread_db* tdbb, for (FB_SIZE_T i = 0; i < sub.keys->getCount(); i++) { - dsc* const desc = EVL_expr(tdbb, request, (*sub.keys)[i]); const USHORT keyLength = sub.keyLengths[i]; - if (desc && !(request->req_flags & req_null)) + if (const auto desc = EVL_expr(tdbb, request, (*sub.keys)[i])) { if (desc->isText()) { @@ -727,6 +738,8 @@ ULONG HashJoin::computeHash(thread_db* tdbb, } } } + else if (m_ignoreNulls) + return std::nullopt; keyPtr += keyLength; } diff --git a/src/jrd/recsrc/MergeJoin.cpp b/src/jrd/recsrc/MergeJoin.cpp index de3bf491a6d..29f738bd0de 100644 --- a/src/jrd/recsrc/MergeJoin.cpp +++ b/src/jrd/recsrc/MergeJoin.cpp @@ -424,23 +424,18 @@ int MergeJoin::compare(thread_db* tdbb, const NestValueArray* node1, for (const NestConst* const end = node1->end(); ptr1 != end; ++ptr1, ++ptr2) { - const dsc* const desc1 = EVL_expr(tdbb, request, *ptr1); - const bool null1 = (request->req_flags & req_null); + const auto desc1 = EVL_expr(tdbb, request, *ptr1); + const auto desc2 = EVL_expr(tdbb, request, *ptr2); - const dsc* const desc2 = EVL_expr(tdbb, request, *ptr2); - const bool null2 = (request->req_flags & req_null); - - if (null1 && !null2) + if (!desc1 && desc2) return -1; - if (null2 && !null1) + if (!desc2 && desc1) return 1; - if (!null1 && !null2) + if (desc1 && desc2) { - const int result = MOV_compare(tdbb, desc1, desc2); - - if (result != 0) + if (const int result = MOV_compare(tdbb, desc1, desc2)) return result; } } diff --git a/src/jrd/recsrc/RecordSource.h b/src/jrd/recsrc/RecordSource.h index d3f215bf497..90e4bec4f17 100644 --- a/src/jrd/recsrc/RecordSource.h +++ b/src/jrd/recsrc/RecordSource.h @@ -653,6 +653,7 @@ namespace Jrd static const USHORT FLAG_UNIQUE = 0x2; // sorts using unique key - for distinct and group by static const USHORT FLAG_KEY_VARY = 0x4; // sort key contains varying length string(s) static const USHORT FLAG_REFETCH = 0x8; // refetch data after sorting + static const USHORT FLAG_NO_NULLS = 0x10; // skip records containing NULL keys // Special values for SortMap::Item::fieldId. static const SSHORT ID_DBKEY = -1; // dbkey value @@ -689,21 +690,14 @@ namespace Jrd NestConst node; // expression node }; - explicit SortMap(MemoryPool& p) - : PermanentStorage(p), - length(0), - keyLength(0), - flags(0), - keyItems(p), - items(p) - { - } + explicit SortMap(MemoryPool& p) : PermanentStorage(p) + {} - ULONG length; // sort record length - ULONG keyLength; // key length - USHORT flags; // misc sort flags - Firebird::Array keyItems; // address of key descriptors - Firebird::Array items; + ULONG length = 0; // sort record length + ULONG keyLength = 0; // key length + USHORT flags = 0; // misc sort flags + Firebird::Array keyItems {getPool()}; // address of key descriptors + Firebird::Array items {getPool()}; }; SortedStream(CompilerScratch* csb, RecordSource* next, SortMap* map); @@ -1247,11 +1241,11 @@ namespace Jrd }; public: - HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, + HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, bool ignoreNulls, FB_SIZE_T count, RecordSource* const* args, NestValueArray* const* keys, double selectivity = 0); HashJoin(thread_db* tdbb, CompilerScratch* csb, - BoolExprNode* boolean, + BoolExprNode* boolean, bool ignoreNulls, RecordSource* const* args, NestValueArray* const* keys, double selectivity = 0); @@ -1280,12 +1274,13 @@ namespace Jrd void init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, RecordSource* const* args, NestValueArray* const* keys, double selectivity); - ULONG computeHash(thread_db* tdbb, Request* request, - const SubStream& sub, UCHAR* buffer) const; + std::optional computeHash(thread_db* tdbb, Request* request, + const SubStream& sub, UCHAR* buffer) const; bool fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) const; const JoinType m_joinType; const NestConst m_boolean; + const bool m_ignoreNulls; SubStream m_leader; Firebird::Array m_args; @@ -1323,8 +1318,7 @@ namespace Jrd public: MergeJoin(CompilerScratch* csb, FB_SIZE_T count, - SortedStream* const* args, - const NestValueArray* const* keys); + SortedStream* const* args, const NestValueArray* const* keys); void close(thread_db* tdbb) const override; diff --git a/src/jrd/recsrc/SortedStream.cpp b/src/jrd/recsrc/SortedStream.cpp index 911602b4f37..9742eab98a5 100644 --- a/src/jrd/recsrc/SortedStream.cpp +++ b/src/jrd/recsrc/SortedStream.cpp @@ -205,9 +205,27 @@ Sort* SortedStream::init(thread_db* tdbb) const // mapping is done in get_sort(). dsc to, temp; + HalfStaticArray keys; while (m_next->getRecord(tdbb)) { + // Loop thru all field (keys and hangers on) involved in the sort. + // Be careful to null field all unused bytes in the sort key. + + for (const auto& item : m_map->items) + { + if (!isKey(&item.desc)) + break; + + fb_assert(item.node); + + const auto desc = EVL_expr(tdbb, request, item.node); + keys.add(desc); + } + + if ((m_map->flags & FLAG_NO_NULLS) && keys.exist(nullptr)) + continue; + // "Put" a record to sort. Actually, get the address of a place // to build a record. @@ -217,33 +235,33 @@ Sort* SortedStream::init(thread_db* tdbb) const // Zero out the sort key. This solves a multitude of problems. memset(data, 0, m_map->length); + FB_SIZE_T keyPos = 0; // Loop thru all field (keys and hangers on) involved in the sort. // Be careful to null field all unused bytes in the sort key. - const SortMap::Item* const end_item = m_map->items.begin() + m_map->items.getCount(); - for (const SortMap::Item* item = m_map->items.begin(); item < end_item; item++) + for (const auto& item : m_map->items) { - to = item->desc; - to.dsc_address = data + (IPTR) to.dsc_address; + to = item.desc; + to.dsc_address += (IPTR) data; + + dsc* from; bool flag = false; - dsc* from = nullptr; - if (item->node) + if (isKey(&item.desc)) { - from = EVL_expr(tdbb, request, item->node); - if (request->req_flags & req_null) + if (!(from = keys[keyPos++])) flag = true; } else { from = &temp; - record_param* const rpb = &request->req_rpb[item->stream]; + record_param* const rpb = &request->req_rpb[item.stream]; - if (item->fieldId < 0) + if (item.fieldId < 0) { - switch (item->fieldId) + switch (item.fieldId) { case ID_TRANS: *reinterpret_cast(to.dsc_address) = rpb->rpb_transaction_nr; @@ -260,20 +278,25 @@ Sort* SortedStream::init(thread_db* tdbb) const continue; } - if (!EVL_field(rpb->rpb_relation, rpb->rpb_record, item->fieldId, from)) + if (!EVL_field(rpb->rpb_relation, rpb->rpb_record, item.fieldId, from)) flag = true; } - *(data + item->flagOffset) = flag ? TRUE : FALSE; + const bool hasNullFlag = !isKey(&item.desc) || !(m_map->flags & FLAG_NO_NULLS); + + if (hasNullFlag) + *(data + item.flagOffset) = flag ? TRUE : FALSE; + else + fb_assert(!flag); if (!flag) { // If an INTL string is moved into the key portion of the sort record, // then we want to sort by language dependent order - if (IS_INTL_DATA(&item->desc) && isKey(&item->desc)) + if (IS_INTL_DATA(&item.desc) && isKey(&item.desc)) { - INTL_string_to_key(tdbb, INTL_INDEX_TYPE(&item->desc), from, &to, + INTL_string_to_key(tdbb, INTL_INDEX_TYPE(&item.desc), from, &to, (m_map->flags & FLAG_UNIQUE ? INTL_KEY_UNIQUE : INTL_KEY_SORT)); } else @@ -300,27 +323,35 @@ bool SortedStream::compareKeys(const UCHAR* p, const UCHAR* q) const // Binary-distinct varying length string keys may in fact be equal. // Re-check the keys at the higher level. See CORE-4909. - fb_assert(m_map->keyItems.getCount() % 2 == 0); - const USHORT count = m_map->keyItems.getCount() / 2; thread_db* tdbb = JRD_get_thread_data(); - for (USHORT i = 0; i < count; i++) + for (const auto& item : m_map->items) { - const SortMap::Item* const item = &m_map->items[i]; + if (!isKey(&item.desc)) + break; - const UCHAR flag1 = *(p + item->flagOffset); - const UCHAR flag2 = *(q + item->flagOffset); + fb_assert(item.node); - if (flag1 != flag2) - return false; + bool notNull = true; - if (!flag1) + if (!(m_map->flags & FLAG_NO_NULLS)) { - dsc desc1 = item->desc; - desc1.dsc_address = const_cast(p) + (IPTR) desc1.dsc_address; + const UCHAR flag1 = *(p + item.flagOffset); + const UCHAR flag2 = *(q + item.flagOffset); + + if (flag1 != flag2) + return false; - dsc desc2 = item->desc; - desc2.dsc_address = const_cast(q) + (IPTR) desc2.dsc_address; + notNull = (flag1 == TRUE); + } + + if (notNull) + { + dsc desc1 = item.desc; + desc1.dsc_address += (IPTR) const_cast(p); + + dsc desc2 = item.desc; + desc2.dsc_address += (IPTR) const_cast(q); if (MOV_compare(tdbb, &desc1, &desc2)) return false; @@ -349,10 +380,6 @@ void SortedStream::mapData(thread_db* tdbb, Request* request, UCHAR* data) const for (const auto& item : m_map->items) { - const auto flag = (*(data + item.flagOffset) == TRUE); - from = item.desc; - from.dsc_address = data + (IPTR) from.dsc_address; - if (item.node && !nodeIs(item.node)) continue; @@ -365,6 +392,12 @@ void SortedStream::mapData(thread_db* tdbb, Request* request, UCHAR* data) const if (hasVolatileKey(&item.desc) && isKey(&item.desc)) continue; + const bool hasNullFlag = !isKey(&item.desc) || !(m_map->flags & FLAG_NO_NULLS); + const bool flag = hasNullFlag && (*(data + item.flagOffset) == TRUE); + + from = item.desc; + from.dsc_address += (IPTR) data; + const auto rpb = &request->req_rpb[item.stream]; const auto relation = rpb->rpb_relation; const auto id = item.fieldId; @@ -540,7 +573,7 @@ void SortedStream::mapData(thread_db* tdbb, Request* request, UCHAR* data) const if (item.stream != stream || item.fieldId < 0) continue; - const auto null1 = (*(data + item.flagOffset) == TRUE); + const auto null1 = !(m_map->flags & FLAG_NO_NULLS) && (*(data + item.flagOffset) == TRUE); const auto null2 = !EVL_field(relation, temp.rpb_record, item.fieldId, &from); if (null1 != null2) From b909540a1f8370a02a960f5dc1ca3fa311999675 Mon Sep 17 00:00:00 2001 From: Dmitry Yemanov Date: Wed, 28 May 2025 16:23:40 +0300 Subject: [PATCH 2/2] Quick fix for the wrong results, thanks to Denis Simonov --- src/jrd/recsrc/HashJoin.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/jrd/recsrc/HashJoin.cpp b/src/jrd/recsrc/HashJoin.cpp index b9e31b2dc9c..f65fdb854da 100644 --- a/src/jrd/recsrc/HashJoin.cpp +++ b/src/jrd/recsrc/HashJoin.cpp @@ -458,8 +458,10 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const while (m_args[i].buffer->getRecord(tdbb)) { + const auto position = counter++; + if (const auto hash = computeHash(tdbb, request, m_args[i], keyBuffer)) - impure->irsb_hash_table->put(i, hash.value(), counter++); + impure->irsb_hash_table->put(i, hash.value(), position); } }