From 0fba5d81f211c9705a0a5426147c0c85b88baf2b Mon Sep 17 00:00:00 2001
From: Michael Kay
Date: Thu, 23 Jan 2025 17:56:01 +0000
Subject: [PATCH] Define more detailed rules for duplicates in maps
---
.../src/function-catalog.xml | 285 +++++++++---------
.../src/xpath-functions.xml | 7 +-
2 files changed, 143 insertions(+), 149 deletions(-)
diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml
index 9e3e0f5e2..72c9ea433 100644
--- a/specifications/xpath-functions-40/src/function-catalog.xml
+++ b/specifications/xpath-functions-40/src/function-catalog.xml
@@ -23027,7 +23027,7 @@ xs:QName('xs:double')
same key, then the way this is handled is
- controlled by the second ($options
) argument.
+ controlled by the $options
argument.
@@ -23045,6 +23045,22 @@ xs:QName('xs:double')
def="option-parameter-conventions"
>option parameter conventions apply.
+
+ In the event that two or more entries in the input maps have the
+ :
+
+ A single entry is created by combining the values of the duplicates,
+ in a way determined by the supplied $options
.
+ The key of the combined entry is one of the duplicate keys:
+ which one is chosen is .
+ (Two keys that are deemed duplicates may differ: for example they may have
+ different type annotations, or they may be xs:dateTime
+ values with different timezones.)
+ The position of the combined entry in the
+ of the result map corresponds to the position of the first appearance of
+ the corresponding key value in the input.
+
+
-
The entries that may appear in the $options
map are as follows:
@@ -23055,74 +23071,57 @@ xs:QName('xs:double')
taken if two maps in the input sequence $maps
contain entries with key values
K1 and K2 where K1 and K2 are the
same key.
+ def="dt-same-key">same key. This option and the combine
+ option are mutually exclusive.
xs:string
use-first
-
- An error is raised if duplicate keys are encountered.
+
+ Equivalent to specifying "combine": fn(){error(xs:QName("err:FOJS0003"), ...)
+ (the remaining arguments to fn:error being
+ ).
- If duplicate keys are present, all but the first of a set of duplicates are ignored,
- where the ordering is based on the order of maps in the $maps
argument.
+ >Equivalent to specifying "combine": fn($a, $b){ $a }
.
- If duplicate keys are present, all but the last of a set of duplicates are ignored,
- where the ordering is based on the order of maps in the $maps
argument.
+ >Equivalent to specifying "combine": fn($a, $b){ $b }
.
- If duplicate keys are present, all but one of a set of duplicates are ignored,
- and it is implementation-dependent
- which one is retained.
+ >Equivalent to specifying "combine": fn($a, $b){ one-of($a, $b) }
+ where one-of
chooses either $a
or $b
in
+ an way.
- If duplicate keys are present, the result map includes an entry for the key whose
- associated value is the
- sequence concatenation
- of all the values associated with the key,
- retaining order based on the order of maps in the $maps
argument.
- The key value in the result map that corresponds to such a set of duplicates must
- be the same key as each of the duplicates, but it is
- otherwise unconstrained: for example if the duplicate keys are xs:byte(1)
- and xs:short(1)
, the key in the result could legitimately be xs:long(1)
.
+ >Equivalent to specifying "combine": fn($a, $b){ $a, $b }
.
-
+
+
+ Supplies a function for handling duplicate keys: specifically, the action to be
+ taken if two maps in the input sequence $maps
contain entries with key values
+ K1 and K2 where K1 and K2 are the
+ same key. This option and the duplicates
+ option are mutually exclusive.
+
+ (fn($existing-value as item()*, $new-value as item()*) as item()*)?
+ fn($a, $b){ $a }
+
+
+ A function with signature fn(item()*, item()*) as item()*
.
+ The function is called for any entry in an input map that has the
+ as a previous entry. The first argument
+ is the existing value associated with the key; the second argument
+ is the value associated with the key in the duplicate input entry,
+ and the result is the new value to be associated with the key.
+
+
+
+
+
@@ -23133,26 +23132,24 @@ xs:QName('xs:double')
let $FOJS0003 := QName("http://www.w3.org/2005/xqt-errors", "FOJS0003")
-let $duplicates-handler := {
- "use-first": fn($a, $b) { $a },
- "use-last": fn($a, $b) { $b },
- "combine": fn($a, $b) { $a, $b },
- "reject": fn($a, $b) { fn:error($FOJS0003) },
- "use-any": fn($a, $b) { fn:random-number-generator()?permute(($a, $b))[1] }
-}
-let $combine := fn($A as map(*), $B as map(*), $deduplicator as fn(*)) {
- fold-left(map:keys($B), $A, fn($z, $k) {
- if (map:contains($z, $k))
- then map:put($z, $k, $deduplicator($z($k), $B($k)))
- else map:put($z, $k, $B($k))
- })
-}
-return fold-left($maps, {},
- $combine(?, ?, $duplicates-handler($options?duplicates otherwise "use-first"))
-)
+let $combiner := $options?combine
+ otherwise {
+ "use-first": fn($a, $b) { $a },
+ "use-last": fn($a, $b) { $b },
+ "combine": fn($a, $b) { $a, $b },
+ "reject": fn($a, $b) { fn:error($FOJS0003) },
+ "use-any": fn($a, $b) { fn:random-number-generator()?permute(($a, $b))[1] }
+ } ($options?duplicates)
+ otherwise fn($a, $b) { $a }
+
+return map:of-pairs($maps =!> map:pairs(), { "combine": $combiner });
+ An error is raised if both the combine
and duplicates
+ options are present.
+
An error is raised if the value of
$options
indicates that duplicates are to be rejected, and a duplicate key is encountered.
@@ -23165,17 +23162,18 @@ return fold-left($maps, {},
- By way of explanation, $combine
is a function that combines
- two maps by iterating over the keys of the second map, adding each key and its corresponding
- value to the first map as it proceeds. The second call of fn:fold-left
- in the return
clause then iterates over the maps supplied in the call
- to map:merge, accumulating a single map that absorbs successive maps
- in the input sequence by calling $combine
.
-
-
- This algorithm processes the supplied maps in a defined order, but processes the keys within
- each map in implementation-dependent order.
-
+ By way of explanation, the function first reduces the sequence of input maps
+ to a sequence of key-value pairs, retaining order of both the maps and of the
+ entries within each map. It then combines key-value pairs having the
+ by applying the $combine
function
+ successively to pairs of duplicates. The position in the
+ of the result map of an entry formed by combining duplicates corresponds to the
+ position of the first occurrence of the key in the input sequence. This is true
+ even whien the option use-last
is used: the value of the resulting
+ entry corresponds to the last entry with a given key, but the position of the entry
+ in the result map corresponds to the position of the first entry with that key.
+
+
The use of fn:random-number-generator represents one possible conformant
implementation for "duplicates": "use-any"
, but it is not the only conformant
implementation and is not intended to be a realistic implementation. The purpose of this
@@ -23270,6 +23268,14 @@ return fold-left($maps, {},
+
+
+ For consistency with the new functions map:build
+ and map:of-pairs, the handling of duplicates
+ may now be controlled by the combine
option as an alternative
+ to the existing duplicates
option.
+
+
@@ -23299,9 +23305,11 @@ return fold-left($maps, {},
$input
argument.
- The $options
argument can be used to control the ordering of the result,
- and the way in which duplicate keys are handled.
+
The $options
argument can be used to control
+ the way in which duplicate keys are handled.
The option parameter conventions apply.
+ The handling of duplicates is defined to be the same as in an equivalent call of
+ the map:build function: see the formal equivalent below.
The entries that may appear in the $options
map are as follows:
@@ -23315,28 +23323,17 @@ return fold-left($maps, {},
(fn($existing-value as item()*, $new-value as item()*) as item()*)?
fn:op(',')
-
-
+
+ The function is called for any entry in an input map that has the
+ as a previous entry. The first argument
+ is the existing value associated with the key; the second argument
+ is the value associated with the key in the duplicate input entry,
+ and the result is the new value to be associated with the key.
+
+
+
+
@@ -24636,50 +24633,48 @@ else map:put($map, $key, $action(()))
new key-value pair to the map, with that key and that value.
If the key is already present, the processor calls the combine
function in the $options
argument to combine the existing value for the key with the new value,
- and replaces the entry with this combined value.
+ and replaces the entry with this combined value.
+ The key of the combined entry is taken from one of the duplicate entries:
+ it is which one is used. (It is
+ possible for two keys to be considered duplicates even if they differ:
+ for example, they may have different type annotations, or they may
+ be xs:dateTime
values with different timezones.)
+
+ The position of the combined entry in the
+ of the result map is based on the position of the first entry having that key
+ in the input sequence (that is, the order of keys in the result is the order
+ of first appearance in the input.
+
- The $options
argument can be used to control the way in which duplicate keys are handled.
- The option parameter conventions apply.
-
+ The $options
argument can be used to control the
+ and the way in which duplicate keys are handled.
+ The option parameter conventions apply.
+
- The entries that may appear in the $options
map are as follows:
+ The entries that may appear in the $options
map are as follows:
-
-
- A function that is used to combine two different values that are supplied
- for the same key. The default is to combine the two values using
- .
-
- (fn($existing-value as item()*, $new-value as item()*) as item()*)?
- fn:op(',')
-
-
-
+
+
+ A function that is used to combine two different values that are supplied
+ for the same key. The default is to combine the two values using
+ , retaining their order
+ in the input sequence.
+
+ (fn($existing-value as item()*, $new-value as item()*) as item()*)?
+ fn:op(',')
+
+
+ The function is called for any entry in an input map that has the
+ as a previous entry. The first argument
+ is the existing value associated with the key; the second argument
+ is the value associated with the key in the duplicate input entry,
+ and the result is the new value to be associated with the key.
+
+
+
+
+
diff --git a/specifications/xpath-functions-40/src/xpath-functions.xml b/specifications/xpath-functions-40/src/xpath-functions.xml
index 8c9372a8c..37781aefa 100644
--- a/specifications/xpath-functions-40/src/xpath-functions.xml
+++ b/specifications/xpath-functions-40/src/xpath-functions.xml
@@ -13299,11 +13299,10 @@ ISBN 0 521 77752 6.
to the chosen radix.
- Raised if the option in an option map is not
- described in the specification, if it is not supported by the implementation and
- if its name is in no namespace.
+ Raised if an inconsistent set of options is supplied
+ in an option map.