From d225803774ee0488ddd5c87dd79cb4fc6d78d80c Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 9 Oct 2024 14:00:36 -0600 Subject: [PATCH 1/7] Cleanup algorithms code Delete lots of dead code, get off of collections and onto iterators, simplify implementations. --- .../Algorithms/Algorithms/Ranges.swift | 134 ++------ .../Algorithms/Algorithms/Replace.swift | 2 +- .../Algorithms/Algorithms/Split.swift | 62 ++-- .../Algorithms/Algorithms/Trim.swift | 8 +- .../Consumers/CollectionConsumer.swift | 25 -- .../Consumers/FixedPatternConsumer.swift | 28 -- .../Algorithms/Consumers/ManyConsumer.swift | 47 --- .../Consumers/PredicateConsumer.swift | 74 ----- .../Algorithms/Matching/FirstMatch.swift | 24 -- .../Algorithms/Matching/MatchReplace.swift | 63 ---- .../Algorithms/Matching/MatchResult.swift | 9 - .../Algorithms/Matching/Matches.swift | 288 ++---------------- .../Matching/MatchingCollectionConsumer.swift | 48 --- .../Matching/MatchingCollectionSearcher.swift | 104 ------- .../Searchers/CollectionSearcher.swift | 54 ---- .../Searchers/ConsumerSearcher.swift | 109 ------- .../Searchers/NaivePatternSearcher.swift | 93 ------ .../Algorithms/Searchers/PatternOrEmpty.swift | 65 ---- .../Searchers/PredicateSearcher.swift | 44 --- Sources/_StringProcessing/CMakeLists.txt | 7 - .../RegexTests/AlgorithmsInternalsTests.swift | 15 +- 21 files changed, 80 insertions(+), 1223 deletions(-) delete mode 100644 Sources/_StringProcessing/Algorithms/Consumers/ManyConsumer.swift delete mode 100644 Sources/_StringProcessing/Algorithms/Consumers/PredicateConsumer.swift delete mode 100644 Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionConsumer.swift delete mode 100644 Sources/_StringProcessing/Algorithms/Searchers/ConsumerSearcher.swift delete mode 100644 Sources/_StringProcessing/Algorithms/Searchers/NaivePatternSearcher.swift delete mode 100644 Sources/_StringProcessing/Algorithms/Searchers/PatternOrEmpty.swift delete mode 100644 Sources/_StringProcessing/Algorithms/Searchers/PredicateSearcher.swift diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 3f9b8d49a..57834a324 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -11,107 +11,33 @@ // MARK: `RangesCollection` -struct RangesCollection { - public typealias Base = Searcher.Searched - - let base: Base +struct RangesSequence { + let input: Searcher.Searched let searcher: Searcher - private(set) public var startIndex: Index - init(base: Base, searcher: Searcher) { - self.base = base + init(input: Searcher.Searched, searcher: Searcher) { + self.input = input self.searcher = searcher - - var state = searcher.state(for: base, in: base.startIndex..: IteratorProtocol { - public typealias Base = Searcher.Searched - - let base: Base - let searcher: Searcher - var state: Searcher.State - - init(base: Base, searcher: Searcher) { - self.base = base - self.searcher = searcher - self.state = searcher.state(for: base, in: base.startIndex.. Range? { - searcher.search(base, &state) - } -} - -extension RangesCollection: Sequence { - public func makeIterator() -> RangesIterator { - Iterator(base: base, searcher: searcher) - } -} -extension RangesCollection: Collection { - // TODO: Custom `SubSequence` for the sake of more efficient slice iteration - - public struct Index { - var range: Range? + struct Iterator: IteratorProtocol { + let base: RangesSequence var state: Searcher.State - } - - public var endIndex: Index { - // TODO: Avoid calling `state(for:startingAt)` here - Index( - range: nil, - state: searcher.state(for: base, in: base.startIndex.. Index { - var index = index - formIndex(after: &index) - return index - } - public subscript(index: Index) -> Range { - guard let range = index.range else { - fatalError("Cannot subscript using endIndex") + init(_ base: RangesSequence) { + self.base = base + self.state = base.searcher.state(for: base.input, in: base.input.startIndex.. Bool { - switch (lhs.range, rhs.range) { - case (nil, nil): - return true - case (nil, _?), (_?, nil): - return false - case (let lhs?, let rhs?): - return lhs.lowerBound == rhs.lowerBound + public mutating func next() -> Range? { + base.searcher.search(base.input, &state) } } +} - static func < (lhs: Self, rhs: Self) -> Bool { - switch (lhs.range, rhs.range) { - case (nil, _): - return false - case (_, nil): - return true - case (let lhs?, let rhs?): - return lhs.lowerBound < rhs.lowerBound - } +extension RangesSequence: Sequence { + public func makeIterator() -> Iterator { + Iterator(self) } } @@ -122,8 +48,8 @@ extension RangesCollection.Index: Comparable { extension Collection { func _ranges( of searcher: S - ) -> RangesCollection where S.Searched == Self { - RangesCollection(base: self, searcher: searcher) + ) -> RangesSequence where S.Searched == Self { + RangesSequence(input: self, searcher: searcher) } } @@ -132,7 +58,7 @@ extension Collection { extension Collection where Element: Equatable { func _ranges( of other: C - ) -> RangesCollection> where C.Element == Element { + ) -> RangesSequence> where C.Element == Element { _ranges(of: ZSearcher(pattern: Array(other), by: ==)) } @@ -163,8 +89,8 @@ extension Collection where Element: Equatable { } @available(SwiftStdlib 5.7, *) -struct RegexRangesCollection { - let base: RegexMatchesCollection +struct RegexRangesSequence { + let base: RegexMatchesSequence init( input: String, @@ -181,9 +107,9 @@ struct RegexRangesCollection { } @available(SwiftStdlib 5.7, *) -extension RegexRangesCollection: Sequence { +extension RegexRangesSequence: Sequence { struct Iterator: IteratorProtocol { - var matchesBase: RegexMatchesCollection.Iterator + var matchesBase: RegexMatchesSequence.Iterator mutating func next() -> Range? { matchesBase.next().map(\.range) @@ -195,16 +121,6 @@ extension RegexRangesCollection: Sequence { } } -@available(SwiftStdlib 5.7, *) -extension RegexRangesCollection: Collection { - typealias Index = RegexMatchesCollection.Index - - var startIndex: Index { base.startIndex } - var endIndex: Index { base.endIndex } - func index(after i: Index) -> Index { base.index(after: i) } - subscript(position: Index) -> Range { base[position].range } -} - // MARK: Regex algorithms extension Collection where SubSequence == Substring { @@ -214,8 +130,8 @@ extension Collection where SubSequence == Substring { of regex: R, subjectBounds: Range, searchBounds: Range - ) -> RegexRangesCollection { - RegexRangesCollection( + ) -> RegexRangesSequence { + RegexRangesSequence( input: self[...].base, subjectBounds: subjectBounds, searchBounds: searchBounds, @@ -226,7 +142,7 @@ extension Collection where SubSequence == Substring { @_disfavoredOverload func _ranges( of regex: R - ) -> RegexRangesCollection { + ) -> RegexRangesSequence { _ranges( of: regex, subjectBounds: startIndex..( + func _replacing( _ ranges: Ranges, with replacement: Replacement, maxReplacements: Int = .max diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index da4eecc60..b722932b9 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -11,15 +11,15 @@ // MARK: `SplitCollection` -struct SplitCollection { - public typealias Base = Searcher.Searched +struct SplitSequence { + public typealias Input = Searcher.Searched - let ranges: RangesCollection + let ranges: RangesSequence var maxSplits: Int var omittingEmptySubsequences: Bool init( - ranges: RangesCollection, + ranges: RangesSequence, maxSplits: Int, omittingEmptySubsequences: Bool) { @@ -29,53 +29,53 @@ struct SplitCollection { } init( - base: Base, + input: Input, searcher: Searcher, maxSplits: Int, omittingEmptySubsequences: Bool) { - self.ranges = base._ranges(of: searcher) + self.ranges = input._ranges(of: searcher) self.maxSplits = maxSplits self.omittingEmptySubsequences = omittingEmptySubsequences } } -extension SplitCollection: Sequence { +extension SplitSequence: Sequence { public struct Iterator: IteratorProtocol { - let base: Base - var index: Base.Index - var ranges: RangesCollection.Iterator - var maxSplits: Int - var omittingEmptySubsequences: Bool + var ranges: RangesSequence.Iterator + var index: Input.Index + var maxSplits: Int var splitCounter = 0 + var omittingEmptySubsequences: Bool var isDone = false + var input: Input { ranges.base.input } + init( - ranges: RangesCollection, + ranges: RangesSequence, maxSplits: Int, omittingEmptySubsequences: Bool ) { - self.base = ranges.base - self.index = base.startIndex + self.index = ranges.input.startIndex self.ranges = ranges.makeIterator() self.maxSplits = maxSplits self.omittingEmptySubsequences = omittingEmptySubsequences } - public mutating func next() -> Base.SubSequence? { + public mutating func next() -> Input.SubSequence? { guard !isDone else { return nil } /// Return the rest of base if it's non-empty or we're including /// empty subsequences. - func finish() -> Base.SubSequence? { + func finish() -> Input.SubSequence? { isDone = true - return index == base.endIndex && omittingEmptySubsequences + return index == input.endIndex && omittingEmptySubsequences ? nil - : base[index...] + : input[index...] } - if index == base.endIndex { + if index == input.endIndex { return finish() } @@ -96,7 +96,7 @@ extension SplitCollection: Sequence { } splitCounter += 1 - return base[index..( + func _split( by separator: Searcher, maxSplits: Int, omittingEmptySubsequences: Bool - ) -> SplitCollection where Searcher.Searched == Self { - SplitCollection( - base: self, + ) -> SplitSequence where Searcher.Searched == Self { + SplitSequence( + input: self, searcher: separator, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) @@ -126,12 +126,12 @@ extension Collection { extension Collection where Element: Equatable { @_disfavoredOverload - func split( + func _split( by separator: C, maxSplits: Int, omittingEmptySubsequences: Bool - ) -> SplitCollection> where C.Element == Element { - split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) + ) -> SplitSequence> where C.Element == Element { + _split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } // FIXME: Return `some Collection` for SE-0346 @@ -159,7 +159,7 @@ extension Collection where Element: Equatable { return str._split(separator: sep, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) as! [SubSequence] default: - return Array(split( + return Array(_split( by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) @@ -186,7 +186,7 @@ extension StringProtocol where SubSequence == Substring { maxSplits: Int = .max, omittingEmptySubsequences: Bool = true ) -> [Substring] { - Array(self[...].split( + Array(self[...]._split( by: SubstringSearcher(text: "" as Substring, pattern: separator[...]), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) @@ -199,7 +199,7 @@ extension StringProtocol where SubSequence == Substring { maxSplits: Int = .max, omittingEmptySubsequences: Bool = true ) -> [Substring] { - Array(self[...].split( + Array(self[...]._split( by: SubstringSearcher(text: "" as Substring, pattern: separator[...]), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index e870e1493..ff385856c 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -44,7 +44,7 @@ extension RangeReplaceableCollection { // MARK: Predicate algorithms extension Collection { - fileprivate func endOfPrefix(while predicate: (Element) throws -> Bool) rethrows -> Index { + fileprivate func _endOfPrefix(while predicate: (Element) throws -> Bool) rethrows -> Index { try firstIndex(where: { try !predicate($0) }) ?? endIndex } @@ -52,7 +52,7 @@ extension Collection { public func trimmingPrefix( while predicate: (Element) throws -> Bool ) rethrows -> SubSequence { - let end = try endOfPrefix(while: predicate) + let end = try _endOfPrefix(while: predicate) return self[end...] } } @@ -62,7 +62,7 @@ extension Collection where SubSequence == Self { public mutating func trimPrefix( while predicate: (Element) throws -> Bool ) throws { - let end = try endOfPrefix(while: predicate) + let end = try _endOfPrefix(while: predicate) self = self[end...] } } @@ -73,7 +73,7 @@ extension RangeReplaceableCollection { public mutating func trimPrefix( while predicate: (Element) throws -> Bool ) rethrows { - let end = try endOfPrefix(while: predicate) + let end = try _endOfPrefix(while: predicate) removeSubrange(startIndex.. - ) -> Consumed.Index? -} - -extension BidirectionalCollectionConsumer { - func consumingBack(_ consumed: Consumed) -> Consumed.Index? { - consumingBack(consumed, in: consumed.startIndex.. Bool - where Consumed.SubSequence == Consumed - { - guard let index = consumingBack(consumed) else { return false } - consumed = consumed[.. - ) -> Consumed.Index? { - var index = range.upperBound - var patternIndex = pattern.endIndex - - while true { - if patternIndex == pattern.startIndex { - return index - } - - if index == range.lowerBound { - return nil - } - - consumed.formIndex(before: &index) - pattern.formIndex(before: &patternIndex) - - if consumed[index] != pattern[patternIndex] { - return nil - } - } - } -} diff --git a/Sources/_StringProcessing/Algorithms/Consumers/ManyConsumer.swift b/Sources/_StringProcessing/Algorithms/Consumers/ManyConsumer.swift deleted file mode 100644 index 10d9fd5c3..000000000 --- a/Sources/_StringProcessing/Algorithms/Consumers/ManyConsumer.swift +++ /dev/null @@ -1,47 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift.org open source project -// -// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -struct ManyConsumer { - let base: Base -} - -extension ManyConsumer: CollectionConsumer { - typealias Consumed = Base.Consumed - - func consuming( - _ consumed: Base.Consumed, - in range: Range - ) -> Base.Consumed.Index? { - var result = range.lowerBound - while let index = base.consuming(consumed, in: result.. - ) -> Base.Consumed.Index? { - var result = range.upperBound - while let index = base.consumingBack( - consumed, - in: range.lowerBound.. { - let predicate: (Consumed.Element) -> Bool -} - -extension PredicateConsumer: CollectionConsumer { - public func consuming( - _ consumed: Consumed, - in range: Range - ) -> Consumed.Index? { - let start = range.lowerBound - guard start != range.upperBound && predicate(consumed[start]) else { - return nil - } - return consumed.index(after: start) - } -} - -extension PredicateConsumer: BidirectionalCollectionConsumer - where Consumed: BidirectionalCollection -{ - func consumingBack( - _ consumed: Consumed, - in range: Range - ) -> Consumed.Index? { - let end = range.upperBound - guard end != range.lowerBound else { return nil } - let previous = consumed.index(before: end) - return predicate(consumed[previous]) ? previous : nil - } -} - -extension PredicateConsumer: StatelessCollectionSearcher { - public typealias Searched = Consumed - - public func search( - _ searched: Searched, - in range: Range - ) -> Range? { - // TODO: Make this reusable - guard let index = searched[range].firstIndex(where: predicate) else { - return nil - } - return index.. - ) -> Range? { - // TODO: Make this reusable - guard let index = searched[range].lastIndex(where: predicate) else { - return nil - } - return index..( - of searcher: S - ) -> _MatchResult? where S.Searched == Self { - var state = searcher.state(for: self, in: startIndex..( - of searcher: S - ) -> _BackwardMatchResult? - where S.BackwardSearched == Self - { - var state = searcher.backwardState(for: self, in: startIndex..( - _ searcher: Searcher, - with replacement: (_MatchResult) throws -> Replacement, - subrange: Range, - maxReplacements: Int = .max - ) rethrows -> Self where Searcher.Searched == SubSequence, - Replacement.Element == Element - { - precondition(maxReplacements >= 0) - - var index = subrange.lowerBound - var result = Self() - result.append(contentsOf: self[..( - _ searcher: Searcher, - with replacement: (_MatchResult) throws -> Replacement, - maxReplacements: Int = .max - ) rethrows -> Self where Searcher.Searched == SubSequence, - Replacement.Element == Element - { - try _replacing( - searcher, - with: replacement, - subrange: startIndex..( - _ searcher: Searcher, - with replacement: (_MatchResult) throws -> Replacement, - maxReplacements: Int = .max - ) rethrows where Searcher.Searched == SubSequence, - Replacement.Element == Element - { - self = try _replacing( - searcher, - with: replacement, - maxReplacements: maxReplacements) - } -} - // MARK: Regex algorithms extension RangeReplaceableCollection where SubSequence == Substring { diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift index 94e6d8c3b..7d8157045 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchResult.swift @@ -17,12 +17,3 @@ struct _MatchResult { match.startIndex.. { - let match: S.BackwardSearched.SubSequence - let result: S.Match - - var range: Range { - match.startIndex.. { - public typealias Base = Searcher.Searched - - let base: Base - let searcher: Searcher - private(set) public var startIndex: Index - - init(base: Base, searcher: Searcher) { - self.base = base - self.searcher = searcher - - var state = searcher.state(for: base, in: base.startIndex..: IteratorProtocol { - public typealias Base = Searcher.Searched - - let base: Base - let searcher: Searcher - var state: Searcher.State - - init(base: Base, searcher: Searcher) { - self.base = base - self.searcher = searcher - self.state = searcher.state(for: base, in: base.startIndex.. _MatchResult? { - searcher.matchingSearch(base, &state).map { range, result in - _MatchResult(match: base[range], result: result) - } - } -} - -extension MatchesCollection: Sequence { - public func makeIterator() -> MatchesIterator { - Iterator(base: base, searcher: searcher) - } -} - -extension MatchesCollection: Collection { - // TODO: Custom `SubSequence` for the sake of more efficient slice iteration - - struct Index { - var match: (range: Range, match: Searcher.Match)? - var state: Searcher.State - } - - public var endIndex: Index { - // TODO: Avoid calling `state(for:startingAt)` here - Index( - match: nil, - state: searcher.state(for: base, in: base.startIndex.. Index { - var index = index - formIndex(after: &index) - return index - } - - public subscript(index: Index) -> _MatchResult { - guard let (range, result) = index.match else { - fatalError("Cannot subscript using endIndex") - } - return _MatchResult(match: base[range], result: result) - } -} - -extension MatchesCollection.Index: Comparable { - public static func == (lhs: Self, rhs: Self) -> Bool { - switch (lhs.match?.range, rhs.match?.range) { - case (nil, nil): - return true - case (nil, _?), (_?, nil): - return false - case (let lhs?, let rhs?): - return lhs.lowerBound == rhs.lowerBound - } - } - - public static func < (lhs: Self, rhs: Self) -> Bool { - switch (lhs.match?.range, rhs.match?.range) { - case (nil, _): - return false - case (_, nil): - return true - case (let lhs?, let rhs?): - return lhs.lowerBound < rhs.lowerBound - } - } -} - -// MARK: `ReversedMatchesCollection` -// TODO: reversed matches - -struct ReversedMatchesCollection< - Searcher: BackwardMatchingCollectionSearcher -> { - public typealias Base = Searcher.BackwardSearched - - let base: Base - let searcher: Searcher - - init(base: Base, searcher: Searcher) { - self.base = base - self.searcher = searcher - } -} - -extension ReversedMatchesCollection: Sequence { - struct Iterator: IteratorProtocol { - let base: Base - let searcher: Searcher - var state: Searcher.BackwardState - - init(base: Base, searcher: Searcher) { - self.base = base - self.searcher = searcher - self.state = searcher.backwardState( - for: base, in: base.startIndex.. _BackwardMatchResult? { - searcher.matchingSearchBack(base, &state).map { range, result in - _BackwardMatchResult(match: base[range], result: result) - } - } - } - - public func makeIterator() -> Iterator { - Iterator(base: base, searcher: searcher) - } -} - -// TODO: `Collection` conformance - -// MARK: `CollectionSearcher` algorithms - -extension Collection { - func _matches( - of searcher: S - ) -> MatchesCollection where S.Searched == Self { - MatchesCollection(base: self, searcher: searcher) - } -} - -extension BidirectionalCollection { - func _matchesFromBack( - of searcher: S - ) -> ReversedMatchesCollection where S.BackwardSearched == Self { - ReversedMatchesCollection(base: self, searcher: searcher) - } -} - // MARK: Regex algorithms @available(SwiftStdlib 5.7, *) -struct RegexMatchesCollection { +struct RegexMatchesSequence { let input: String let subjectBounds: Range let searchBounds: Range let regex: Regex - let startIndex: Index - + init( input: String, subjectBounds: Range, @@ -201,15 +28,11 @@ struct RegexMatchesCollection { self.subjectBounds = subjectBounds self.searchBounds = searchBounds self.regex = regex - self.startIndex = (try? regex._firstMatch( - input, - subjectBounds: subjectBounds, - searchBounds: searchBounds)).map(Index.match) ?? .end } } @available(SwiftStdlib 5.7, *) -extension RegexMatchesCollection: Sequence { +extension RegexMatchesSequence: Sequence { /// Returns the index to start searching for the next match after `match`. fileprivate func searchIndex(after match: Regex.Match) -> String.Index? { if !match.range.isEmpty { @@ -218,7 +41,7 @@ extension RegexMatchesCollection: Sequence { // If the last match was an empty match, advance by one position and // run again, unless at the end of `input`. - if match.range.lowerBound == input.endIndex { + guard match.range.lowerBound < subjectBounds.upperBound else { return nil } @@ -231,29 +54,26 @@ extension RegexMatchesCollection: Sequence { } struct Iterator: IteratorProtocol { - let base: RegexMatchesCollection + let base: RegexMatchesSequence // Because `RegexMatchesCollection` eagerly computes the first match for // its `startIndex`, the iterator can use that match for its initial // iteration. For subsequent calls to `next()`, this value is `false`, and // `nextStart` is used to search for the next match. var initialIteration = true - var nextStart: String.Index? - - init(_ matches: RegexMatchesCollection) { + + // Set to nil when iteration is finished (because some regex can empty-match + // at the end of the subject). + var currentPosition: String.Index? + + init(_ matches: RegexMatchesSequence) { self.base = matches - self.nextStart = base.startIndex.match.flatMap(base.searchIndex(after:)) + self.currentPosition = base.subjectBounds.lowerBound } mutating func next() -> Regex.Match? { - // Initial case with pre-computed first match - if initialIteration { - initialIteration = false - return base.startIndex.match - } - - // `nextStart` is `nil` when iteration has completed - guard let start = nextStart, start <= base.searchBounds.upperBound else { + // `currentPosition` is `nil` when iteration has completed + guard let position = currentPosition, position <= base.searchBounds.upperBound else { return nil } @@ -261,8 +81,8 @@ extension RegexMatchesCollection: Sequence { let match = try? base.regex._firstMatch( base.input, subjectBounds: base.subjectBounds, - searchBounds: start...Match) - case end - - var match: Regex.Match? { - switch self { - case .match(let match): return match - case .end: return nil - } - } - - static func == (lhs: Self, rhs: Self) -> Bool { - switch (lhs, rhs) { - case (.match(let lhs), .match(let rhs)): - return lhs.range == rhs.range - case (.end, .end): - return true - case (.end, .match), (.match, .end): - return false - } - } - - static func < (lhs: Self, rhs: Self) -> Bool { - switch (lhs, rhs) { - case (.match(let lhs), .match(let rhs)): - // This implementation uses a tuple comparison so that an empty - // range `i.. Index { - guard let currentMatch = i.match else { - fatalError("Can't advance past the 'endIndex' of a match collection.") - } - - guard - let start = searchIndex(after: currentMatch), - start <= searchBounds.upperBound, - let nextMatch = try? regex._firstMatch( - input, - subjectBounds: subjectBounds, - searchBounds: start.. Regex.Match { - guard let match = position.match else { - fatalError("Can't subscript the 'endIndex' of a match collection.") - } - return match - } -} - extension BidirectionalCollection where SubSequence == Substring { @available(SwiftStdlib 5.7, *) @_disfavoredOverload func _matches( of regex: R - ) -> RegexMatchesCollection { - RegexMatchesCollection( + ) -> RegexMatchesSequence { + RegexMatchesSequence( input: self[...].base, subjectBounds: startIndex.. - ) -> (upperBound: Consumed.Index, match: Match)? -} - -extension MatchingCollectionConsumer { - func consuming( - _ consumed: Consumed, - in range: Range - ) -> Consumed.Index? { - matchingConsuming(consumed, in: range)?.upperBound - } -} - -// MARK: Consuming from the back - -protocol BidirectionalMatchingCollectionConsumer: - MatchingCollectionConsumer, BidirectionalCollectionConsumer -{ - func matchingConsumingBack( - _ consumed: Consumed, - in range: Range - ) -> (lowerBound: Consumed.Index, match: Match)? -} - -extension BidirectionalMatchingCollectionConsumer { - func consumingBack( - _ consumed: Consumed, - in range: Range - ) -> Consumed.Index? { - matchingConsumingBack(consumed, in: range)?.lowerBound - } -} - diff --git a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift index 902d94591..b75f30c73 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/MatchingCollectionSearcher.swift @@ -25,107 +25,3 @@ extension MatchingCollectionSearcher { matchingSearch(searched, &state)?.range } } - -protocol MatchingStatelessCollectionSearcher: - MatchingCollectionSearcher, StatelessCollectionSearcher -{ - func matchingSearch( - _ searched: Searched, - in range: Range - ) -> (range: Range, match: Match)? -} - -extension MatchingStatelessCollectionSearcher { - // for disambiguation between the `MatchingCollectionSearcher` and - // `StatelessCollectionSearcher` overloads - func search( - _ searched: Searched, - _ state: inout State - ) -> Range? { - matchingSearch(searched, &state)?.range - } - - func matchingSearch( - _ searched: Searched, - _ state: inout State - ) -> (range: Range, match: Match)? { - // TODO: deduplicate this logic with `StatelessCollectionSearcher`? - - guard - case .index(let index) = state.position, - let (range, value) = matchingSearch(searched, in: index.. - ) -> Range? { - matchingSearch(searched, in: range)?.range - } -} - -// MARK: Searching from the back - -protocol BackwardMatchingCollectionSearcher: BackwardCollectionSearcher { - associatedtype Match - func matchingSearchBack( - _ searched: BackwardSearched, - _ state: inout BackwardState - ) -> (range: Range, match: Match)? -} - -protocol BackwardMatchingStatelessCollectionSearcher: - BackwardMatchingCollectionSearcher, BackwardStatelessCollectionSearcher -{ - func matchingSearchBack( - _ searched: BackwardSearched, - in range: Range - ) -> (range: Range, match: Match)? -} - -extension BackwardMatchingStatelessCollectionSearcher { - func searchBack( - _ searched: BackwardSearched, - in range: Range - ) -> Range? { - matchingSearchBack(searched, in: range)?.range - } - - func matchingSearchBack( - _ searched: BackwardSearched, - _ state: inout BackwardState) -> (range: Range, match: Match)? - { - // TODO: deduplicate this logic with `StatelessBackwardCollectionSearcher`? - - guard - case .index(let index) = state.position, - let (range, value) = matchingSearchBack(searched, in: state.end..) -> BackwardState - func searchBack( - _ searched: BackwardSearched, - _ state: inout BackwardState - ) -> Range? -} - -protocol BackwardStatelessCollectionSearcher: BackwardCollectionSearcher - where BackwardState == DefaultSearcherState -{ - func searchBack( - _ searched: BackwardSearched, - in range: Range - ) -> Range? -} - -extension BackwardStatelessCollectionSearcher { - func backwardState( - for searched: BackwardSearched, - in range: Range - ) -> BackwardState { - BackwardState(position: .index(range.upperBound), end: range.lowerBound) - } - - func searchBack( - _ searched: BackwardSearched, - _ state: inout BackwardState) -> Range? { - guard - case .index(let index) = state.position, - let range = searchBack(searched, in: state.end.. { - let consumer: Consumer -} - -extension ConsumerSearcher: StatelessCollectionSearcher { - typealias Searched = Consumer.Consumed - - func search( - _ searched: Searched, - in range: Range - ) -> Range? { - var start = range.lowerBound - while true { - if let end = consumer.consuming(searched, in: start.. - ) -> Range? { - var end = range.upperBound - while true { - if let start = consumer.consumingBack( - searched, in: range.lowerBound.. - ) -> (range: Range, match: Consumer.Match)? { - var start = range.lowerBound - while true { - if let (end, value) = consumer.matchingConsuming( - searched, - in: start.. - ) -> (range: Range, match: Match)? { - var end = range.upperBound - while true { - if let (start, value) = consumer.matchingConsumingBack( - searched, in: range.lowerBound.. - where Searched.Element: Equatable, Pattern.Element == Searched.Element -{ - let pattern: Pattern -} - -extension NaivePatternSearcher: StatelessCollectionSearcher { - func search( - _ searched: Searched, - in range: Range - ) -> Range? { - var searchStart = range.lowerBound - - guard let patternFirst = pattern.first else { - return searchStart.. - ) -> Range? { - var searchEnd = range.upperBound - - guard let otherLastIndex = pattern.indices.last else { - return searchEnd.. { - let searcher: Searcher? -} - -extension PatternOrEmpty: CollectionSearcher { - typealias Searched = Searcher.Searched - - struct State { - enum Representation { - case state(Searcher.State) - case empty(index: Searched.Index, end: Searched.Index) - case emptyDone - } - - let representation: Representation - } - - func state( - for searched: Searcher.Searched, - in range: Range - ) -> State { - if let searcher = searcher { - return State( - representation: .state(searcher.state(for: searched, in: range))) - } else { - return State( - representation: .empty(index: range.lowerBound, end: range.upperBound)) - } - } - - func search( - _ searched: Searched, - _ state: inout State - ) -> Range? { - switch state.representation { - case .state(var s): - // TODO: Avoid a potential copy-on-write copy here - let result = searcher!.search(searched, &s) - state = State(representation: .state(s)) - return result - case .empty(let index, let end): - if index == end { - state = State(representation: .emptyDone) - } else { - state = State( - representation: .empty(index: searched.index(after: index), end: end)) - } - return index.. { - let predicate: (Searched.Element) -> Bool -} - -extension PredicateSearcher: StatelessCollectionSearcher { - func search( - _ searched: Searched, - in range: Range - ) -> Range? { - guard let index = searched[range].firstIndex(where: predicate) else { - return nil - } - return index.. - ) -> Range? { - guard let index = searched[range].lastIndex(where: predicate) else { - return nil - } - return index.. = matches + let _: RegexMatchesSequence = matches XCTAssertEqual(matches.map(\.output), expected) - - let i = matches.index(matches.startIndex, offsetBy: 3) - XCTAssertEqual(matches[i].output, expected[3]) - let j = matches.index(i, offsetBy: 5) - XCTAssertEqual(j, matches.endIndex) - - var index = matches.startIndex - while index < matches.endIndex { - XCTAssertEqual( - matches[index].output, - expected[matches.distance(from: matches.startIndex, to: index)]) - matches.formIndex(after: &index) - } } } From e0905ab7af5eb49ffcc4e6ce0f3f1d88b906249c Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 9 Oct 2024 14:32:37 -0600 Subject: [PATCH 2/7] wip: dont create array --- .../Algorithms/Algorithms/Replace.swift | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift index c67cfbfbd..fae7b88ed 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift @@ -49,13 +49,15 @@ extension RangeReplaceableCollection { var result = Self() var index = startIndex - - // `maxRanges` is a workaround for https://github.com/apple/swift/issues/59522 - let maxRanges = ranges.prefix(maxReplacements) - for range in maxRanges { + var replacements = 0 + + for range in ranges { + if replacements == maxReplacements { break } + result.append(contentsOf: self[index.. Date: Wed, 9 Oct 2024 14:49:16 -0600 Subject: [PATCH 3/7] Update Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift Co-authored-by: Nate Cook --- Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 57834a324..5f0f0479d 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -29,7 +29,7 @@ struct RangesSequence { self.state = base.searcher.state(for: base.input, in: base.input.startIndex.. Range? { + mutating func next() -> Range? { base.searcher.search(base.input, &state) } } From bb4b619bb0b5541be7e211596a1b880b8afd1b96 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 9 Oct 2024 14:49:25 -0600 Subject: [PATCH 4/7] Update Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift Co-authored-by: Nate Cook --- Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift index 5f0f0479d..a82fb875c 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift @@ -36,7 +36,7 @@ struct RangesSequence { } extension RangesSequence: Sequence { - public func makeIterator() -> Iterator { + func makeIterator() -> Iterator { Iterator(self) } } From f59dc411f4efc068b0bba991840f396f12df2a15 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 9 Oct 2024 14:49:53 -0600 Subject: [PATCH 5/7] Update Sources/_StringProcessing/Algorithms/Algorithms/Split.swift --- Sources/_StringProcessing/Algorithms/Algorithms/Split.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index b722932b9..5b71ca433 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -12,7 +12,7 @@ // MARK: `SplitCollection` struct SplitSequence { - public typealias Input = Searcher.Searched + typealias Input = Searcher.Searched let ranges: RangesSequence var maxSplits: Int From 35465900c4ff780bbe103fbbbec97ce285b4bb64 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 9 Oct 2024 14:50:23 -0600 Subject: [PATCH 6/7] Update Sources/_StringProcessing/Algorithms/Algorithms/Split.swift --- Sources/_StringProcessing/Algorithms/Algorithms/Split.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index 5b71ca433..7a4a3b283 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -41,7 +41,7 @@ struct SplitSequence { } extension SplitSequence: Sequence { - public struct Iterator: IteratorProtocol { + struct Iterator: IteratorProtocol { var ranges: RangesSequence.Iterator var index: Input.Index From 676c7dbd924e287fdf93176e6cd9e4229875975b Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 9 Oct 2024 14:53:53 -0600 Subject: [PATCH 7/7] remove more superfluous publics --- Sources/_StringProcessing/Algorithms/Algorithms/Split.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift index 7a4a3b283..de92c0d8a 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Split.swift @@ -63,7 +63,7 @@ extension SplitSequence: Sequence { self.omittingEmptySubsequences = omittingEmptySubsequences } - public mutating func next() -> Input.SubSequence? { + mutating func next() -> Input.SubSequence? { guard !isDone else { return nil } /// Return the rest of base if it's non-empty or we're including @@ -101,7 +101,7 @@ extension SplitSequence: Sequence { } } - public func makeIterator() -> Iterator { + func makeIterator() -> Iterator { Iterator(ranges: ranges, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) } }