Skip to content

Commit

Permalink
Fix Source.lines so it works even if lines span multiple chunks (#186)
Browse files Browse the repository at this point in the history
  • Loading branch information
wjoel authored Aug 5, 2024
1 parent d22828b commit 33bdd62
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion core/src/main/scala/ox/channels/SourceTextOps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ trait SourceTextOps[+T]:
if newlineIdx == -1 then (buf ++ chunk, acc)
else
val (chunk1, chunk2) = chunk.splitAt(newlineIdx)
splitChunksAtNewLine(Chunk.empty, chunk2.drop(1), acc :+ (if buffer != null then buffer ++ chunk1 else chunk1))
splitChunksAtNewLine(Chunk.empty, chunk2.drop(1), acc :+ (buf ++ chunk1))

val (newBuffer, toEmit) =
if nextChunk.length == 0 then (null, Vector.empty)
Expand Down
7 changes: 7 additions & 0 deletions core/src/test/scala/ox/channels/SourceTextOpsTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ class SourceTextOpsTest extends AnyWordSpec with Matchers {
val chunk = Chunk.fromArray(inputBytes)
Source.fromValues(chunk).lines(Charset.forName("ISO-8859-2")).toList shouldBe List("zażółć", "gęślą", "jaźń")
}

"decode lines correctly across chunk boundaries" in supervised {
val lines = List("aa", "bbbbb", "cccccccc", "ddd", "ee", "fffff")
val inputBytes = lines.mkString("\n").getBytes("UTF-8")
val chunk = inputBytes.grouped(5).map(Chunk.fromArray)
Source.fromIterator(chunk).lines(Charset.forName("UTF-8")).toList should contain theSameElementsInOrderAs lines
}
}

"decodeStringUtf8" should {
Expand Down

0 comments on commit 33bdd62

Please # to comment.