From 0fba5d81f211c9705a0a5426147c0c85b88baf2b Mon Sep 17 00:00:00 2001 From: Michael Kay Date: Thu, 23 Jan 2025 17:56:01 +0000 Subject: [PATCH] Define more detailed rules for duplicates in maps --- .../src/function-catalog.xml | 285 +++++++++--------- .../src/xpath-functions.xml | 7 +- 2 files changed, 143 insertions(+), 149 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index 9e3e0f5e2..72c9ea433 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -23027,7 +23027,7 @@ xs:QName('xs:double') same key, then the way this is handled is - controlled by the second ($options) argument.

+ controlled by the $options argument.

@@ -23045,6 +23045,22 @@ xs:QName('xs:double') def="option-parameter-conventions" >option parameter conventions apply.

+ +

In the event that two or more entries in the input maps have the + :

+ +

A single entry is created by combining the values of the duplicates, + in a way determined by the supplied $options.

+

The key of the combined entry is one of the duplicate keys: + which one is chosen is . + (Two keys that are deemed duplicates may differ: for example they may have + different type annotations, or they may be xs:dateTime + values with different timezones.)

+

The position of the combined entry in the + of the result map corresponds to the position of the first appearance of + the corresponding key value in the input.

+
+

The entries that may appear in the $options map are as follows:

@@ -23055,74 +23071,57 @@ xs:QName('xs:double') taken if two maps in the input sequence $maps contain entries with key values K1 and K2 where K1 and K2 are the same key. + def="dt-same-key">same key. This option and the combine + option are mutually exclusive. xs:string use-first - - An error is raised if duplicate keys are encountered. + + Equivalent to specifying "combine": fn(){error(xs:QName("err:FOJS0003"), ...) + (the remaining arguments to fn:error being + ). - If duplicate keys are present, all but the first of a set of duplicates are ignored, - where the ordering is based on the order of maps in the $maps argument. + >Equivalent to specifying "combine": fn($a, $b){ $a }. - If duplicate keys are present, all but the last of a set of duplicates are ignored, - where the ordering is based on the order of maps in the $maps argument. + >Equivalent to specifying "combine": fn($a, $b){ $b }. - If duplicate keys are present, all but one of a set of duplicates are ignored, - and it is implementation-dependent - which one is retained. + >Equivalent to specifying "combine": fn($a, $b){ one-of($a, $b) } + where one-of chooses either $a or $b in + an way. - If duplicate keys are present, the result map includes an entry for the key whose - associated value is the - sequence concatenation - of all the values associated with the key, - retaining order based on the order of maps in the $maps argument. - The key value in the result map that corresponds to such a set of duplicates must - be the same key as each of the duplicates, but it is - otherwise unconstrained: for example if the duplicate keys are xs:byte(1) - and xs:short(1), the key in the result could legitimately be xs:long(1). + >Equivalent to specifying "combine": fn($a, $b){ $a, $b }. - + + + Supplies a function for handling duplicate keys: specifically, the action to be + taken if two maps in the input sequence $maps contain entries with key values + K1 and K2 where K1 and K2 are the + same key. This option and the duplicates + option are mutually exclusive. + + (fn($existing-value as item()*, $new-value as item()*) as item()*)? + fn($a, $b){ $a } + + + A function with signature fn(item()*, item()*) as item()*. + The function is called for any entry in an input map that has the + as a previous entry. The first argument + is the existing value associated with the key; the second argument + is the value associated with the key in the duplicate input entry, + and the result is the new value to be associated with the key. + + + + + @@ -23133,26 +23132,24 @@ xs:QName('xs:double') let $FOJS0003 := QName("http://www.w3.org/2005/xqt-errors", "FOJS0003") -let $duplicates-handler := { - "use-first": fn($a, $b) { $a }, - "use-last": fn($a, $b) { $b }, - "combine": fn($a, $b) { $a, $b }, - "reject": fn($a, $b) { fn:error($FOJS0003) }, - "use-any": fn($a, $b) { fn:random-number-generator()?permute(($a, $b))[1] } -} -let $combine := fn($A as map(*), $B as map(*), $deduplicator as fn(*)) { - fold-left(map:keys($B), $A, fn($z, $k) { - if (map:contains($z, $k)) - then map:put($z, $k, $deduplicator($z($k), $B($k))) - else map:put($z, $k, $B($k)) - }) -} -return fold-left($maps, {}, - $combine(?, ?, $duplicates-handler($options?duplicates otherwise "use-first")) -) +let $combiner := $options?combine + otherwise { + "use-first": fn($a, $b) { $a }, + "use-last": fn($a, $b) { $b }, + "combine": fn($a, $b) { $a, $b }, + "reject": fn($a, $b) { fn:error($FOJS0003) }, + "use-any": fn($a, $b) { fn:random-number-generator()?permute(($a, $b))[1] } + } ($options?duplicates) + otherwise fn($a, $b) { $a } + +return map:of-pairs($maps =!> map:pairs(), { "combine": $combiner }); +

An error is raised if both the combine and duplicates + options are present.

+

An error is raised if the value of $options indicates that duplicates are to be rejected, and a duplicate key is encountered.

@@ -23165,17 +23162,18 @@ return fold-left($maps, {}, -

By way of explanation, $combine is a function that combines - two maps by iterating over the keys of the second map, adding each key and its corresponding - value to the first map as it proceeds. The second call of fn:fold-left - in the return clause then iterates over the maps supplied in the call - to map:merge, accumulating a single map that absorbs successive maps - in the input sequence by calling $combine.

- - -

This algorithm processes the supplied maps in a defined order, but processes the keys within - each map in implementation-dependent order.

- +

By way of explanation, the function first reduces the sequence of input maps + to a sequence of key-value pairs, retaining order of both the maps and of the + entries within each map. It then combines key-value pairs having the + by applying the $combine function + successively to pairs of duplicates. The position in the + of the result map of an entry formed by combining duplicates corresponds to the + position of the first occurrence of the key in the input sequence. This is true + even whien the option use-last is used: the value of the resulting + entry corresponds to the last entry with a given key, but the position of the entry + in the result map corresponds to the position of the first entry with that key. +

+

The use of fn:random-number-generator represents one possible conformant implementation for "duplicates": "use-any", but it is not the only conformant implementation and is not intended to be a realistic implementation. The purpose of this @@ -23270,6 +23268,14 @@ return fold-left($maps, {}, + + +

For consistency with the new functions map:build + and map:of-pairs, the handling of duplicates + may now be controlled by the combine option as an alternative + to the existing duplicates option.

+ + @@ -23299,9 +23305,11 @@ return fold-left($maps, {}, $input argument.

-

The $options argument can be used to control the ordering of the result, - and the way in which duplicate keys are handled. +

The $options argument can be used to control + the way in which duplicate keys are handled. The option parameter conventions apply. + The handling of duplicates is defined to be the same as in an equivalent call of + the map:build function: see the formal equivalent below.

The entries that may appear in the $options map are as follows:

@@ -23315,28 +23323,17 @@ return fold-left($maps, {}, (fn($existing-value as item()*, $new-value as item()*) as item()*)? fn:op(',') - - + + The function is called for any entry in an input map that has the + as a previous entry. The first argument + is the existing value associated with the key; the second argument + is the value associated with the key in the duplicate input entry, + and the result is the new value to be associated with the key. + + + + @@ -24636,50 +24633,48 @@ else map:put($map, $key, $action(())) new key-value pair to the map, with that key and that value.

If the key is already present, the processor calls the combine function in the $options argument to combine the existing value for the key with the new value, - and replaces the entry with this combined value.

+ and replaces the entry with this combined value.

+

The key of the combined entry is taken from one of the duplicate entries: + it is which one is used. (It is + possible for two keys to be considered duplicates even if they differ: + for example, they may have different type annotations, or they may + be xs:dateTime values with different timezones.) +

+

The position of the combined entry in the + of the result map is based on the position of the first entry having that key + in the input sequence (that is, the order of keys in the result is the order + of first appearance in the input.

+ -

The $options argument can be used to control the way in which duplicate keys are handled. - The option parameter conventions apply. -

+

The $options argument can be used to control the + and the way in which duplicate keys are handled. + The option parameter conventions apply. +

-

The entries that may appear in the $options map are as follows:

+

The entries that may appear in the $options map are as follows:

- - - A function that is used to combine two different values that are supplied - for the same key. The default is to combine the two values using - . - - (fn($existing-value as item()*, $new-value as item()*) as item()*)? - fn:op(',') - - - + + + A function that is used to combine two different values that are supplied + for the same key. The default is to combine the two values using + , retaining their order + in the input sequence. + + (fn($existing-value as item()*, $new-value as item()*) as item()*)? + fn:op(',') + + + The function is called for any entry in an input map that has the + as a previous entry. The first argument + is the existing value associated with the key; the second argument + is the value associated with the key in the duplicate input entry, + and the result is the new value to be associated with the key. + + + + + diff --git a/specifications/xpath-functions-40/src/xpath-functions.xml b/specifications/xpath-functions-40/src/xpath-functions.xml index 8c9372a8c..37781aefa 100644 --- a/specifications/xpath-functions-40/src/xpath-functions.xml +++ b/specifications/xpath-functions-40/src/xpath-functions.xml @@ -13299,11 +13299,10 @@ ISBN 0 521 77752 6. to the chosen radix.

-

Raised if the option in an option map is not - described in the specification, if it is not supported by the implementation and - if its name is in no namespace.

+

Raised if an inconsistent set of options is supplied + in an option map.