Skip to content

Cleanup algorithms code #771

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 7 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 25 additions & 109 deletions Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,107 +11,33 @@

// MARK: `RangesCollection`

struct RangesCollection<Searcher: CollectionSearcher> {
public typealias Base = Searcher.Searched

let base: Base
let searcher: Searcher
private(set) public var startIndex: Index

init(base: Base, searcher: Searcher) {
self.base = base
self.searcher = searcher

var state = searcher.state(for: base, in: base.startIndex..<base.endIndex)
self.startIndex = Index(range: nil, state: state)

if let range = searcher.search(base, &state) {
self.startIndex = Index(range: range, state: state)
} else {
self.startIndex = endIndex
}
}
}

struct RangesIterator<Searcher: CollectionSearcher>: IteratorProtocol {
public typealias Base = Searcher.Searched

let base: Base
struct RangesSequence<Searcher: CollectionSearcher> {
let input: Searcher.Searched
let searcher: Searcher
var state: Searcher.State

init(base: Base, searcher: Searcher) {
self.base = base
init(input: Searcher.Searched, searcher: Searcher) {
self.input = input
self.searcher = searcher
self.state = searcher.state(for: base, in: base.startIndex..<base.endIndex)
}

public mutating func next() -> Range<Base.Index>? {
searcher.search(base, &state)
}
}

extension RangesCollection: Sequence {
public func makeIterator() -> RangesIterator<Searcher> {
Iterator(base: base, searcher: searcher)
}
}

extension RangesCollection: Collection {
// TODO: Custom `SubSequence` for the sake of more efficient slice iteration

public struct Index {
var range: Range<Searcher.Searched.Index>?
struct Iterator: IteratorProtocol {
let base: RangesSequence
var state: Searcher.State
}

public var endIndex: Index {
// TODO: Avoid calling `state(for:startingAt)` here
Index(
range: nil,
state: searcher.state(for: base, in: base.startIndex..<base.endIndex))
}

public func formIndex(after index: inout Index) {
guard index != endIndex else { fatalError("Cannot advance past endIndex") }
index.range = searcher.search(base, &index.state)
}

public func index(after index: Index) -> Index {
var index = index
formIndex(after: &index)
return index
}

public subscript(index: Index) -> Range<Base.Index> {
guard let range = index.range else {
fatalError("Cannot subscript using endIndex")
init(_ base: RangesSequence) {
self.base = base
self.state = base.searcher.state(for: base.input, in: base.input.startIndex..<base.input.endIndex)
}
return range
}
}

extension RangesCollection.Index: Comparable {
static func == (lhs: Self, rhs: Self) -> Bool {
switch (lhs.range, rhs.range) {
case (nil, nil):
return true
case (nil, _?), (_?, nil):
return false
case (let lhs?, let rhs?):
return lhs.lowerBound == rhs.lowerBound
mutating func next() -> Range<Searcher.Searched.Index>? {
base.searcher.search(base.input, &state)
}
}
}

static func < (lhs: Self, rhs: Self) -> Bool {
switch (lhs.range, rhs.range) {
case (nil, _):
return false
case (_, nil):
return true
case (let lhs?, let rhs?):
return lhs.lowerBound < rhs.lowerBound
}
extension RangesSequence: Sequence {
func makeIterator() -> Iterator {
Iterator(self)
}
}

Expand All @@ -122,8 +48,8 @@ extension RangesCollection.Index: Comparable {
extension Collection {
func _ranges<S: CollectionSearcher>(
of searcher: S
) -> RangesCollection<S> where S.Searched == Self {
RangesCollection(base: self, searcher: searcher)
) -> RangesSequence<S> where S.Searched == Self {
RangesSequence(input: self, searcher: searcher)
}
}

Expand All @@ -132,7 +58,7 @@ extension Collection {
extension Collection where Element: Equatable {
func _ranges<C: Collection>(
of other: C
) -> RangesCollection<ZSearcher<Self>> where C.Element == Element {
) -> RangesSequence<ZSearcher<Self>> where C.Element == Element {
_ranges(of: ZSearcher(pattern: Array(other), by: ==))
}

Expand Down Expand Up @@ -163,8 +89,8 @@ extension Collection where Element: Equatable {
}

@available(SwiftStdlib 5.7, *)
struct RegexRangesCollection<Output> {
let base: RegexMatchesCollection<Output>
struct RegexRangesSequence<Output> {
let base: RegexMatchesSequence<Output>

init(
input: String,
Expand All @@ -181,9 +107,9 @@ struct RegexRangesCollection<Output> {
}

@available(SwiftStdlib 5.7, *)
extension RegexRangesCollection: Sequence {
extension RegexRangesSequence: Sequence {
struct Iterator: IteratorProtocol {
var matchesBase: RegexMatchesCollection<Output>.Iterator
var matchesBase: RegexMatchesSequence<Output>.Iterator

mutating func next() -> Range<String.Index>? {
matchesBase.next().map(\.range)
Expand All @@ -195,16 +121,6 @@ extension RegexRangesCollection: Sequence {
}
}

@available(SwiftStdlib 5.7, *)
extension RegexRangesCollection: Collection {
typealias Index = RegexMatchesCollection<Output>.Index

var startIndex: Index { base.startIndex }
var endIndex: Index { base.endIndex }
func index(after i: Index) -> Index { base.index(after: i) }
subscript(position: Index) -> Range<String.Index> { base[position].range }
}

// MARK: Regex algorithms

extension Collection where SubSequence == Substring {
Expand All @@ -214,8 +130,8 @@ extension Collection where SubSequence == Substring {
of regex: R,
subjectBounds: Range<String.Index>,
searchBounds: Range<String.Index>
) -> RegexRangesCollection<R.RegexOutput> {
RegexRangesCollection(
) -> RegexRangesSequence<R.RegexOutput> {
RegexRangesSequence(
input: self[...].base,
subjectBounds: subjectBounds,
searchBounds: searchBounds,
Expand All @@ -226,7 +142,7 @@ extension Collection where SubSequence == Substring {
@_disfavoredOverload
func _ranges<R: RegexComponent>(
of regex: R
) -> RegexRangesCollection<R.RegexOutput> {
) -> RegexRangesSequence<R.RegexOutput> {
_ranges(
of: regex,
subjectBounds: startIndex..<endIndex,
Expand Down
12 changes: 7 additions & 5 deletions Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ extension Substring {
}

extension RangeReplaceableCollection {
func _replacing<Ranges: Collection, Replacement: Collection>(
func _replacing<Ranges: Sequence, Replacement: Collection>(
_ ranges: Ranges,
with replacement: Replacement,
maxReplacements: Int = .max
Expand All @@ -49,13 +49,15 @@ extension RangeReplaceableCollection {

var result = Self()
var index = startIndex

// `maxRanges` is a workaround for https://github.com/apple/swift/issues/59522
let maxRanges = ranges.prefix(maxReplacements)
for range in maxRanges {
var replacements = 0

for range in ranges {
if replacements == maxReplacements { break }

result.append(contentsOf: self[index..<range.lowerBound])
result.append(contentsOf: replacement)
index = range.upperBound
replacements += 1
}

result.append(contentsOf: self[index...])
Expand Down
66 changes: 33 additions & 33 deletions Sources/_StringProcessing/Algorithms/Algorithms/Split.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@

// MARK: `SplitCollection`

struct SplitCollection<Searcher: CollectionSearcher> {
public typealias Base = Searcher.Searched
struct SplitSequence<Searcher: CollectionSearcher> {
typealias Input = Searcher.Searched

let ranges: RangesCollection<Searcher>
let ranges: RangesSequence<Searcher>
var maxSplits: Int
var omittingEmptySubsequences: Bool

init(
ranges: RangesCollection<Searcher>,
ranges: RangesSequence<Searcher>,
maxSplits: Int,
omittingEmptySubsequences: Bool)
{
Expand All @@ -29,53 +29,53 @@ struct SplitCollection<Searcher: CollectionSearcher> {
}

init(
base: Base,
input: Input,
searcher: Searcher,
maxSplits: Int,
omittingEmptySubsequences: Bool)
{
self.ranges = base._ranges(of: searcher)
self.ranges = input._ranges(of: searcher)
self.maxSplits = maxSplits
self.omittingEmptySubsequences = omittingEmptySubsequences
}
}

extension SplitCollection: Sequence {
public struct Iterator: IteratorProtocol {
let base: Base
var index: Base.Index
var ranges: RangesCollection<Searcher>.Iterator
var maxSplits: Int
var omittingEmptySubsequences: Bool
extension SplitSequence: Sequence {
struct Iterator: IteratorProtocol {
var ranges: RangesSequence<Searcher>.Iterator
var index: Input.Index

var maxSplits: Int
var splitCounter = 0
var omittingEmptySubsequences: Bool
var isDone = false

var input: Input { ranges.base.input }

init(
ranges: RangesCollection<Searcher>,
ranges: RangesSequence<Searcher>,
maxSplits: Int,
omittingEmptySubsequences: Bool
) {
self.base = ranges.base
self.index = base.startIndex
self.index = ranges.input.startIndex
self.ranges = ranges.makeIterator()
self.maxSplits = maxSplits
self.omittingEmptySubsequences = omittingEmptySubsequences
}

public mutating func next() -> Base.SubSequence? {
mutating func next() -> Input.SubSequence? {
guard !isDone else { return nil }

/// Return the rest of base if it's non-empty or we're including
/// empty subsequences.
func finish() -> Base.SubSequence? {
func finish() -> Input.SubSequence? {
isDone = true
return index == base.endIndex && omittingEmptySubsequences
return index == input.endIndex && omittingEmptySubsequences
? nil
: base[index...]
: input[index...]
}

if index == base.endIndex {
if index == input.endIndex {
return finish()
}

Expand All @@ -96,26 +96,26 @@ extension SplitCollection: Sequence {
}

splitCounter += 1
return base[index..<range.lowerBound]
return input[index..<range.lowerBound]
}
}
}

public func makeIterator() -> Iterator {
func makeIterator() -> Iterator {
Iterator(ranges: ranges, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
}
}

// MARK: `CollectionSearcher` algorithms

extension Collection {
func split<Searcher: CollectionSearcher>(
func _split<Searcher: CollectionSearcher>(
by separator: Searcher,
maxSplits: Int,
omittingEmptySubsequences: Bool
) -> SplitCollection<Searcher> where Searcher.Searched == Self {
SplitCollection(
base: self,
) -> SplitSequence<Searcher> where Searcher.Searched == Self {
SplitSequence(
input: self,
searcher: separator,
maxSplits: maxSplits,
omittingEmptySubsequences: omittingEmptySubsequences)
Expand All @@ -126,12 +126,12 @@ extension Collection {

extension Collection where Element: Equatable {
@_disfavoredOverload
func split<C: Collection>(
func _split<C: Collection>(
by separator: C,
maxSplits: Int,
omittingEmptySubsequences: Bool
) -> SplitCollection<ZSearcher<Self>> where C.Element == Element {
split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
) -> SplitSequence<ZSearcher<Self>> where C.Element == Element {
_split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
}

// FIXME: Return `some Collection<SubSequence>` for SE-0346
Expand Down Expand Up @@ -159,7 +159,7 @@ extension Collection where Element: Equatable {
return str._split(separator: sep, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences) as! [SubSequence]

default:
return Array(split(
return Array(_split(
by: ZSearcher(pattern: Array(separator), by: ==),
maxSplits: maxSplits,
omittingEmptySubsequences: omittingEmptySubsequences))
Expand All @@ -186,7 +186,7 @@ extension StringProtocol where SubSequence == Substring {
maxSplits: Int = .max,
omittingEmptySubsequences: Bool = true
) -> [Substring] {
Array(self[...].split(
Array(self[...]._split(
by: SubstringSearcher(text: "" as Substring, pattern: separator[...]),
maxSplits: maxSplits,
omittingEmptySubsequences: omittingEmptySubsequences))
Expand All @@ -199,7 +199,7 @@ extension StringProtocol where SubSequence == Substring {
maxSplits: Int = .max,
omittingEmptySubsequences: Bool = true
) -> [Substring] {
Array(self[...].split(
Array(self[...]._split(
by: SubstringSearcher(text: "" as Substring, pattern: separator[...]),
maxSplits: maxSplits,
omittingEmptySubsequences: omittingEmptySubsequences))
Expand Down
Loading