diff --git a/Guides/Grouped.md b/Guides/Grouped.md new file mode 100644 index 00000000..3df54438 --- /dev/null +++ b/Guides/Grouped.md @@ -0,0 +1,68 @@ +# Grouped + +[[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/Grouped.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/GroupedTests.swift)] + +Groups up elements of a sequence into a new Dictionary, whose values are Arrays of grouped elements, each keyed by the result of the given closure. + +```swift +let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Avocado", "Coconut"] +let fruitsByLetter = fruits.grouped(by: { $0.first! }) +// Results in: +// [ +// "B": ["Banana"], +// "A": ["Apricot", "Apple", "Avocado"], +// "C": ["Cherry", "Coconut"], +// ] +``` + +If you wish to achieve a similar effect but for single values (instead of Arrays of grouped values), see [`keyed(by:)`](Keyed.md). + +## Detailed Design + +The `grouped(by:)` method is declared as a `Sequence` extension returning +`[GroupKey: [Element]]`. + +```swift +extension Sequence { + public func grouped( + by keyForValue: (Element) throws -> GroupKey + ) rethrows -> [GroupKey: [Element]] +} +``` + +### Complexity + +Calling `grouped(by:)` is an O(_n_) operation. + +### Comparison with other languages + +| Language | Grouping API | +|---------------|--------------| +| Java | [`groupingBy`](https://docs.oracle.com/en/java/javase/20/docs/api/java.base/java/util/stream/Collectors.html#groupingBy(java.util.function.Function)) | +| Kotlin | [`groupBy`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/group-by.html) | +| C# | [`GroupBy`](https://learn.microsoft.com/en-us/dotnet/api/system.linq.enumerable.groupby?view=net-7.0#system-linq-enumerable-groupby) | +| Rust | [`group_by`](https://doc.rust-lang.org/std/primitive.slice.html#method.group_by) | +| Ruby | [`group_by`](https://ruby-doc.org/3.2.2/Enumerable.html#method-i-group_by) | +| Python | [`groupby`](https://docs.python.org/3/library/itertools.html#itertools.groupby) | +| PHP (Laravel) | [`groupBy`](https://laravel.com/docs/10.x/collections#method-groupby) | + +#### Naming + +All the surveyed languages name this operation with a variant of "grouped" or "grouping". The past tense `grouped(by:)` best fits [Swift's API Design Guidelines](https://www.swift.org/documentation/api-design-guidelines/). + +#### Customization points + +Java and C# are interesting in that they provide multiple overloads with several points of customization: + +1. Changing the type of the groups. + 1. E.g. the groups can be Sets instead of Arrays. + 1. Akin to calling `.transformValues { group in Set(group) }` on the resultant dictionary, but avoiding the intermediate allocation of Arrays of each group. +2. Picking which elements end up in the groupings. + 1. The default is the elements of the input sequence, but can be changed. + 2. Akin to calling `.transformValues { group in group.map(someTransform) }` on the resultant dictionary, but avoiding the intermediate allocation of Arrays of each group. +3. Changing the type of the outermost collection. + 1. E.g using an `OrderedDictionary`, `SortedDictionary` or `TreeDictionary` instead of the default (hashed, unordered) `Dictionary`. + 2. There's no great way to achieve this with the `grouped(by:)`. One could wrap the resultant dictionary in an initializer to one of the other dictionary types, but that isn't sufficient: Once the `Dictionary` loses the ordering, there's no way to get it back when constructing one of the ordered dictionary variants. + +It is not clear which of these points of customization are worth supporting, or what the best way to express them might be. diff --git a/Guides/Keyed.md b/Guides/Keyed.md new file mode 100644 index 00000000..b2b72b41 --- /dev/null +++ b/Guides/Keyed.md @@ -0,0 +1,78 @@ +# Keyed + +[[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/Keyed.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/KeyedTests.swift)] + +Stores the elements of a sequence as the values of a Dictionary, keyed by the result of the given closure. + +```swift +let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Blackberry", "Avocado", "Coconut"] +let fruitByLetter = fruits.keyed(by: { $0.first! }) +// Results in: +// [ +// "A": "Avocado", +// "B": "Blackberry", +// "C": "Coconut", +// ] +``` + +On a key-collision, the latest element is kept by default. Alternatively, you can provide a closure which specifies which value to keep: + +```swift +let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Blackberry", "Avocado", "Coconut"] +let fruitsByLetter = fruits.keyed( + by: { $0.first! }, + resolvingConflictsWith: { key, old, new in old } // Always pick the first fruit +) +// Results in: +// [ +// "A": "Apricot", +// "B": "Banana", +// "C": "Cherry", +// ] +``` + +## Detailed Design + +The `keyed(by:)` and `keyed(by:resolvingConflictsWith:)` methods are declared in an `Sequence` extension, both returning `[Key: Element]`. + +```swift +extension Sequence { + public func keyed( + by keyForValue: (Element) throws -> Key + ) rethrows -> [Key: Element] + + public func keyed( + by keyForValue: (Element) throws -> Key, + resolvingConflictsWith resolve: ((Key, Element, Element) throws -> Element)? = nil + ) rethrows -> [Key: Element] +} +``` + +### Complexity + +Calling `keyed(by:)` is an O(_n_) operation. + +### Comparison with other languages + +| Language | "Keying" API | +|---------------|-------------| +| Java | [`toMap`](https://docs.oracle.com/en/java/javase/20/docs/api/java.base/java/util/stream/Collectors.html#toMap(java.util.function.Function,java.util.function.Function)) | +| Kotlin | [`associatedBy`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/associate-by.html) | +| C# | [`ToDictionary`](https://learn.microsoft.com/en-us/dotnet/api/system.linq.enumerable.todictionary?view=net-7.0#system-linq-enumerable-todictionary) | +| Ruby (ActiveSupport) | [`index_by`](https://rubydoc.info/gems/activesupport/7.0.5/Enumerable#index_by-instance_method) | +| PHP (Laravel) | [`keyBy`](https://laravel.com/docs/10.x/collections#method-keyby) | + +#### Rejected alternative names + +1. Java's `toMap` is referring to `Map`/`HashMap`, their naming for Dictionaries and other associative collections. It's easy to confuse with the transformation function, `Sequence.map(_:)`. +2. C#'s `toXXX()` naming doesn't suite Swift well, which tends to prefer `Foo.init` over `toFoo()` methods. +3. Ruby's `index_by` naming doesn't fit Swift well, where "index" is a specific term (e.g. the `associatedtype Index` on `Collection`). There is also a [`index(by:)`](Index.md) method in swift-algorithms, is specifically to do with matching elements up with their indices, and not any arbitrary derived value. + +#### Alternative names + +Kotlin's `associatedBy` naming is a good alterative, and matches the past tense of [Swift's API Design Guidelines](https://www.swift.org/documentation/api-design-guidelines/), though perhaps we'd spell it `associated(by:)`. + +#### Customization points + +Java and C# are interesting in that they provide overloads that let you customize the type of the outermost collection. E.g. using an `OrderedDictionary` instead of the default (hashed, unordered) `Dictionary`. diff --git a/README.md b/README.md index 270055fb..4218cdf9 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,10 @@ Read more about the package, and the intent behind it, in the [announcement on s - [`adjacentPairs()`](https://github.com/apple/swift-algorithms/blob/main/Guides/AdjacentPairs.md): Lazily iterates over tuples of adjacent elements. - [`chunked(by:)`, `chunked(on:)`, `chunks(ofCount:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Chunked.md): Eager and lazy operations that break a collection into chunks based on either a binary predicate or when the result of a projection changes or chunks of a given count. - [`firstNonNil(_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/FirstNonNil.md): Returns the first non-`nil` result from transforming a sequence's elements. +- [`grouped(by:)](https://github.com/apple/swift-algorithms/blob/main/Guides/Grouped.md): Group up elements using the given closure, returning a Dictionary of those groups, keyed by the results of the closure. - [`indexed()`](https://github.com/apple/swift-algorithms/blob/main/Guides/Indexed.md): Iterate over tuples of a collection's indices and elements. - [`interspersed(with:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Intersperse.md): Place a value between every two elements of a sequence. +- [`keyed(by:)`, `keyed(by:resolvingConflictsBy:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Keyed.md): Returns a Dictionary that associates elements of a sequence with the keys returned by the given closure. - [`partitioningIndex(where:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Partition.md): Returns the starting index of the partition of a collection that matches a predicate. - [`reductions(_:)`, `reductions(_:_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Reductions.md): Returns all the intermediate states of reducing the elements of a sequence or collection. - [`split(maxSplits:omittingEmptySubsequences:whereSeparator)`, `split(separator:maxSplits:omittingEmptySubsequences)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Split.md): Lazy versions of the Standard Library's eager operations that split sequences and collections into subsequences separated by the specified separator element. diff --git a/Sources/Algorithms/Grouped.swift b/Sources/Algorithms/Grouped.swift new file mode 100644 index 00000000..34d284da --- /dev/null +++ b/Sources/Algorithms/Grouped.swift @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2021 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +extension Sequence { + /// Groups up elements of `self` into a new Dictionary, + /// whose values are Arrays of grouped elements, + /// each keyed by the group key returned by the given closure. + /// - Parameters: + /// - keyForValue: A closure that returns a key for each element in + /// `self`. + /// - Returns: A dictionary containing grouped elements of self, keyed by + /// the keys derived by the `keyForValue` closure. + @inlinable + public func grouped(by keyForValue: (Element) throws -> GroupKey) rethrows -> [GroupKey: [Element]] { + try Dictionary(grouping: self, by: keyForValue) + } +} diff --git a/Sources/Algorithms/Keyed.swift b/Sources/Algorithms/Keyed.swift new file mode 100755 index 00000000..0446ef69 --- /dev/null +++ b/Sources/Algorithms/Keyed.swift @@ -0,0 +1,65 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +extension Sequence { + /// Creates a new Dictionary from the elements of `self`, keyed by the + /// results returned by the given `keyForValue` closure. + /// + /// If the key derived for a new element collides with an existing key from a previous element, + /// the latest value will be kept. + /// + /// - Parameters: + /// - keyForValue: A closure that returns a key for each element in `self`. + @inlinable + public func keyed( + by keyForValue: (Element) throws -> Key + ) rethrows -> [Key: Element] { + return try self.keyed(by: keyForValue, resolvingConflictsWith: { _, old, new in new }) + } + + /// Creates a new Dictionary from the elements of `self`, keyed by the + /// results returned by the given `keyForValue` closure. As the dictionary is + /// built, the initializer calls the `resolve` closure with the current and + /// new values for any duplicate keys. Pass a closure as `resolve` that + /// returns the value to use in the resulting dictionary: The closure can + /// choose between the two values, combine them to produce a new value, or + /// even throw an error. + /// + /// - Parameters: + /// - keyForValue: A closure that returns a key for each element in `self`. + /// - resolve: A closure that is called with the values for any duplicate + /// keys that are encountered. The closure returns the desired value for + /// the final dictionary. + @inlinable + public func keyed( + by keyForValue: (Element) throws -> Key, + resolvingConflictsWith resolve: (Key, Element, Element) throws -> Element + ) rethrows -> [Key: Element] { + var result = [Key: Element]() + + for element in self { + let key = try keyForValue(element) + + if let oldValue = result.updateValue(element, forKey: key) { + let valueToKeep = try resolve(key, oldValue, element) + + // This causes a second look-up for the same key. The standard library can avoid that + // by calling `mutatingFind` to get access to the bucket where the value will end up, + // and updating in place. + // Swift Algorithms doesn't have access to that API, so we make do. + // When this gets merged into the standard library, we should optimize this. + result[key] = valueToKeep + } + } + + return result + } +} diff --git a/Tests/SwiftAlgorithmsTests/GroupedTests.swift b/Tests/SwiftAlgorithmsTests/GroupedTests.swift new file mode 100644 index 00000000..579016c4 --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/GroupedTests.swift @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +import Algorithms + +final class GroupedTests: XCTestCase { + private class SampleError: Error {} + + // Based on https://github.com/apple/swift/blob/4d1d8a9de5ebc132a17aee9fc267461facf89bf8/validation-test/stdlib/Dictionary.swift#L1974-L1988 + + func testGroupedBy() { + let r = 0..<10 + + let d1 = r.grouped(by: { $0 % 3 }) + XCTAssertEqual(3, d1.count) + XCTAssertEqual(d1[0]!, [0, 3, 6, 9]) + XCTAssertEqual(d1[1]!, [1, 4, 7]) + XCTAssertEqual(d1[2]!, [2, 5, 8]) + + let d2 = r.grouped(by: { $0 }) + XCTAssertEqual(10, d2.count) + + let d3 = (0..<0).grouped(by: { $0 }) + XCTAssertEqual(0, d3.count) + } + + func testThrowingFromKeyFunction() { + let input = ["Apple", "Banana", "Cherry"] + let error = SampleError() + + XCTAssertThrowsError( + try input.grouped(by: { (_: String) -> Character in throw error }) + ) { thrownError in + XCTAssertIdentical(error, thrownError as? SampleError) + } + } +} diff --git a/Tests/SwiftAlgorithmsTests/KeyedTests.swift b/Tests/SwiftAlgorithmsTests/KeyedTests.swift new file mode 100644 index 00000000..53b1c989 --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/KeyedTests.swift @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +import Algorithms + +final class KeyedTests: XCTestCase { + private class SampleError: Error {} + + func testUniqueKeys() { + let d = ["Apple", "Banana", "Cherry"].keyed(by: { $0.first! }) + XCTAssertEqual(d.count, 3) + XCTAssertEqual(d["A"]!, "Apple") + XCTAssertEqual(d["B"]!, "Banana") + XCTAssertEqual(d["C"]!, "Cherry") + XCTAssertNil(d["D"]) + } + + func testEmpty() { + let d = EmptyCollection().keyed(by: { $0.first! }) + XCTAssertEqual(d.count, 0) + } + + func testNonUniqueKeys() throws { + let d = ["Apple", "Avocado", "Banana", "Cherry"].keyed(by: { $0.first! }) + XCTAssertEqual(d.count, 3) + XCTAssertEqual(d["A"]!, "Avocado", "On a key-collision, keyed(by:) should take the latest value.") + XCTAssertEqual(d["B"]!, "Banana") + XCTAssertEqual(d["C"]!, "Cherry") + } + + func testNonUniqueKeysWithMergeFunction() { + var resolveCallHistory = [(key: Character, current: String, new: String)]() + let expectedCallHistory = [ + (key: "A", current: "Apple", new: "Avocado"), + (key: "C", current: "Cherry", new: "Coconut"), + ] + + let d = ["Apple", "Avocado", "Banana", "Cherry", "Coconut"].keyed( + by: { $0.first! }, + resolvingConflictsWith: { key, older, newer in + resolveCallHistory.append((key, older, newer)) + return "\(older)-\(newer)" + } + ) + + XCTAssertEqual(d.count, 3) + XCTAssertEqual(d["A"]!, "Apple-Avocado") + XCTAssertEqual(d["B"]!, "Banana") + XCTAssertEqual(d["C"]!, "Cherry-Coconut") + XCTAssertNil(d["D"]) + + XCTAssertEqual( + resolveCallHistory.map(String.init(describing:)), // quick/dirty workaround: tuples aren't Equatable + expectedCallHistory.map(String.init(describing:)) + ) + } + + func testThrowingFromKeyFunction() { + let input = ["Apple", "Banana", "Cherry"] + let error = SampleError() + + XCTAssertThrowsError( + try input.keyed(by: { (_: String) -> Character in throw error }) + ) { thrownError in + XCTAssertIdentical(error, thrownError as? SampleError) + } + } + + func testThrowingFromCombineFunction() { + let input = ["Apple", "Avocado", "Banana", "Cherry"] + let error = SampleError() + + XCTAssertThrowsError( + try input.keyed(by: { $0.first! }, resolvingConflictsWith: { _, _, _ in throw error }) + ) { thrownError in + XCTAssertIdentical(error, thrownError as? SampleError) + } + } +}