Fix Source.lines so it works even if lines span multiple chunks (#186)

softwaremill · Aug 5, 2024 · 33bdd62 · 33bdd62
1 parent d22828b
commit 33bdd62
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 1 deletion.
diff --git a/core/src/main/scala/ox/channels/SourceTextOps.scala b/core/src/main/scala/ox/channels/SourceTextOps.scala
@@ -29,7 +29,7 @@ trait SourceTextOps[+T]:
             if newlineIdx == -1 then (buf ++ chunk, acc)
             else
               val (chunk1, chunk2) = chunk.splitAt(newlineIdx)
-              splitChunksAtNewLine(Chunk.empty, chunk2.drop(1), acc :+ (if buffer != null then buffer ++ chunk1 else chunk1))
+              splitChunksAtNewLine(Chunk.empty, chunk2.drop(1), acc :+ (buf ++ chunk1))
 
           val (newBuffer, toEmit) =
             if nextChunk.length == 0 then (null, Vector.empty)

diff --git a/core/src/test/scala/ox/channels/SourceTextOpsTest.scala b/core/src/test/scala/ox/channels/SourceTextOpsTest.scala
@@ -73,6 +73,13 @@ class SourceTextOpsTest extends AnyWordSpec with Matchers {
       val chunk = Chunk.fromArray(inputBytes)
       Source.fromValues(chunk).lines(Charset.forName("ISO-8859-2")).toList shouldBe List("zażółć", "gęślą", "jaźń")
     }
+
+    "decode lines correctly across chunk boundaries" in supervised {
+      val lines = List("aa", "bbbbb", "cccccccc", "ddd", "ee", "fffff")
+      val inputBytes = lines.mkString("\n").getBytes("UTF-8")
+      val chunk = inputBytes.grouped(5).map(Chunk.fromArray)
+      Source.fromIterator(chunk).lines(Charset.forName("UTF-8")).toList should contain theSameElementsInOrderAs lines
+    }
   }
 
   "decodeStringUtf8" should {