From 8536d3cc3399f4c1e3b5c77ce202952915e54463 Mon Sep 17 00:00:00 2001 From: Adam Liliemark Date: Tue, 11 Feb 2025 20:09:57 +0100 Subject: [PATCH 1/4] Add bit_array.split_once --- src/gleam/bit_array.gleam | 18 +++++++++++++++++ src/gleam_stdlib.erl | 12 ++++++++++- src/gleam_stdlib.mjs | 24 ++++++++++++++++++++++ test/gleam/bit_array_test.gleam | 35 +++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index df75be59..d2930436 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -59,6 +59,24 @@ pub fn slice( take length: Int, ) -> Result(BitArray, Nil) +/// Splits a bit array into two parts at the location of the pattern. +/// +/// The result will not include the pattern, and returns an error if the +/// pattern is not found. +/// +/// ## Examples +/// +/// ```gleam +/// split_once(from: <<1, 2, 3>>, on: <<2>>) +/// // -> Ok(#(<<1>>, <<3>>)) +/// ``` +@external(erlang, "gleam_stdlib", "bit_array_split_once") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once") +pub fn split_once( + from bits: BitArray, + on pattern: BitArray, +) -> Result(#(BitArray, BitArray), Nil) + /// Tests to see whether a bit array is valid UTF-8. /// pub fn is_utf8(bits: BitArray) -> Bool { diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 3fda5df9..3ce7a7ee 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -14,7 +14,7 @@ inspect/1, float_to_string/1, int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2, crop_string/2, base16_encode/1, base16_decode/1, string_replace/3, slice/3, - bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1 + bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2 ]). %% Taken from OTP's uri_string module @@ -231,6 +231,16 @@ bit_array_slice(Bin, Pos, Len) -> catch error:badarg -> {error, nil} end. +bit_array_split_once(Bin, Sub) -> + try + case binary:split(Bin, [Sub]) of + [<<>>, <<>>] -> {error, nil}; + [Part1, Part2] -> {ok, {Part1, Part2}}; + _ -> {error, nil} + end + catch error:badarg -> {error, nil} + end. + base_decode64(S) -> try {ok, base64:decode(S)} catch error:_ -> {error, nil} diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 700a3620..6e743cd2 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -425,6 +425,30 @@ export function bit_array_slice(bits, position, length) { return new Ok(new BitArray(buffer)); } +export function bit_array_split_once(bits, pattern) { + try { + if (!(bits instanceof BitArray) || !(pattern instanceof BitArray) || pattern.buffer.length < 1 || pattern.buffer.length >= bits.buffer.length) { + return new Error(Nil); + } + + let i = 0; + const n = bits.buffer.length - pattern.buffer.length + 1; + + find: for (; i < n; i++) { + for (let j = 0; j < pattern.buffer.length; j++) { + if (bits.buffer[i + j] !== pattern.buffer[j]) continue find; + } + const before = bits.buffer.slice(0, i); + const after = bits.buffer.slice(i + pattern.buffer.length); + return new Ok([new BitArray(before), new BitArray(after)]); + } + + return new Error(Nil); + } catch (e) { + return new Error(Nil); + } +} + export function codepoint(int) { return new UtfCodepoint(int); } diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 638a8b2d..2faac54d 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -202,6 +202,41 @@ pub fn slice_erlang_only_test() { |> should.equal(Error(Nil)) } +pub fn split_once_test() { + <<"hello":utf8>> + |> bit_array.split_once(<<"l":utf8>>) + |> should.equal(Ok(#(<<"he":utf8>>, <<"lo":utf8>>))) + + <<"hello":utf8>> + |> bit_array.split_once(<<"o":utf8>>) + |> should.equal(Ok(#(<<"hell":utf8>>, <<>>))) + + <<"hello":utf8>> + |> bit_array.split_once(<<"h":utf8>>) + |> should.equal(Ok(#(<<>>, <<"ello":utf8>>))) + + <<"hello":utf8>> + |> bit_array.split_once(<<1>>) + |> should.equal(Error(Nil)) + + <<"hello":utf8>> + |> bit_array.split_once(<<"":utf8>>) + |> should.equal(Error(Nil)) + + <<"hello":utf8>> + |> bit_array.split_once(<<"hello":utf8>>) + |> should.equal(Error(Nil)) +} + +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn split_once_erlang_only_test() { + <<0, 1, 2:7>> + |> bit_array.split_once(<<1>>) + |> should.equal(Error(Nil)) +} + pub fn to_string_test() { <<>> |> bit_array.to_string From 14b8688190d1a2d1a8dc109b638202bdfdaf7328 Mon Sep 17 00:00:00 2001 From: Adam Liliemark Date: Wed, 12 Feb 2025 09:18:06 +0100 Subject: [PATCH 2/4] Add bit_array.split, rework tests for split_once --- CHANGELOG.md | 1 + src/gleam/bit_array.gleam | 28 +++++++++++++++ src/gleam_stdlib.erl | 10 ++++-- src/gleam_stdlib.mjs | 55 ++++++++++++++++++++++++++--- test/gleam/bit_array_test.gleam | 61 ++++++++++++++++++++++++++++++--- 5 files changed, 143 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 008d5043..6d1d2d4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- The `bit_array` module gains the `split` and `split_once` functions. - The deprecated `drop_left`, `drop_right`, `pad_left`, `pad_right`, `trim_left`, and `trim_right` functions have been removed. - Fixed a bug that would result in `list.unique` having quadratic runtime. diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index d2930436..1411789a 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -64,11 +64,16 @@ pub fn slice( /// The result will not include the pattern, and returns an error if the /// pattern is not found. /// +/// This function runs in linear time. +/// /// ## Examples /// /// ```gleam /// split_once(from: <<1, 2, 3>>, on: <<2>>) /// // -> Ok(#(<<1>>, <<3>>)) +/// +/// split_once(from: <<0>>, on: <<1>>) +/// // -> Error(Nil) /// ``` @external(erlang, "gleam_stdlib", "bit_array_split_once") @external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once") @@ -77,6 +82,29 @@ pub fn split_once( on pattern: BitArray, ) -> Result(#(BitArray, BitArray), Nil) +/// Splits a bit array into parts at the locations of the pattern. +/// +/// The result will not include the pattern, and returns an empty +/// list if the pattern is not found. +/// +/// This function runs in linear time. +/// +/// ## Examples +/// +/// ```gleam +/// split(from: <<0, 1, 0, 2, 0, 3>>, on: <<0>>) +/// // -> Ok([<<1>>, <<2>>, <<3>>]) +/// +/// split(from: <<0>>, on: <<1>>) +/// // -> Ok([]) +/// ``` +@external(erlang, "gleam_stdlib", "bit_array_split") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_split") +pub fn split( + from bits: BitArray, + on pattern: BitArray, +) -> Result(List(BitArray), Nil) + /// Tests to see whether a bit array is valid UTF-8. /// pub fn is_utf8(bits: BitArray) -> Bool { diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 3ce7a7ee..5d0c4c86 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -14,7 +14,8 @@ inspect/1, float_to_string/1, int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2, crop_string/2, base16_encode/1, base16_decode/1, string_replace/3, slice/3, - bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2 + bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2, + bit_array_split/2 ]). %% Taken from OTP's uri_string module @@ -235,12 +236,17 @@ bit_array_split_once(Bin, Sub) -> try case binary:split(Bin, [Sub]) of [<<>>, <<>>] -> {error, nil}; - [Part1, Part2] -> {ok, {Part1, Part2}}; + [A, B] -> {ok, {A, B}}; _ -> {error, nil} end catch error:badarg -> {error, nil} end. +bit_array_split(Bin, Sub) -> + try {ok, binary:split(Bin, [Sub], [global, trim_all])} + catch error:badarg -> {error, nil} + end. + base_decode64(S) -> try {ok, base64:decode(S)} catch error:_ -> {error, nil} diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 6e743cd2..5c918979 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -14,6 +14,7 @@ import { DecodeError } from "./gleam/dynamic.mjs"; import { Some, None } from "./gleam/option.mjs"; import { Eq, Gt, Lt } from "./gleam/order.mjs"; import Dict from "./dict.mjs"; +import { Buffer } from 'node:buffer'; const Nil = undefined; const NOT_FOUND = {}; @@ -427,16 +428,19 @@ export function bit_array_slice(bits, position, length) { export function bit_array_split_once(bits, pattern) { try { - if (!(bits instanceof BitArray) || !(pattern instanceof BitArray) || pattern.buffer.length < 1 || pattern.buffer.length >= bits.buffer.length) { + if (!(bits instanceof BitArray) + || !(pattern instanceof BitArray) + || pattern.buffer.length < 1 + || pattern.buffer.length >= bits.buffer.length) { return new Error(Nil); } - let i = 0; const n = bits.buffer.length - pattern.buffer.length + 1; - - find: for (; i < n; i++) { + find: for (let i = 0; i < n; i++) { for (let j = 0; j < pattern.buffer.length; j++) { - if (bits.buffer[i + j] !== pattern.buffer[j]) continue find; + if (bits.buffer[i + j] !== pattern.buffer[j]) { + continue find; + } } const before = bits.buffer.slice(0, i); const after = bits.buffer.slice(i + pattern.buffer.length); @@ -449,6 +453,47 @@ export function bit_array_split_once(bits, pattern) { } } +export function bit_array_split(bits, pattern) { + try { + const patternEmpty = pattern.buffer.length < 1 + const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray) + if (incorrectArguments || patternEmpty) { + return new Error(Nil); + } + + const bitsEqualToPattern = Buffer.compare(bits.buffer, pattern.buffer) === 0 + const bitsEmpty = bits.buffer.length === 0 + if (bitsEqualToPattern || bitsEmpty) { + return new Ok(List.fromArray([])); + } + + const results = []; + let lastIndex = 0; + const n = bits.buffer.length - pattern.buffer.length + 1; + + find: for (let i = 0; i < n; i++) { + for (let j = 0; j < pattern.buffer.length; j++) { + if (bits.buffer[i + j] !== pattern.buffer[j]) { + continue find; + } + } + if (i > lastIndex) { + results.push(new BitArray(bits.buffer.slice(lastIndex, i))); + } + lastIndex = i + pattern.buffer.length; + i = lastIndex - 1; + } + + if (lastIndex < bits.buffer.length) { + results.push(new BitArray(bits.buffer.slice(lastIndex))); + } + + return new Ok(List.fromArray(results.length ? results : [bits])); + } catch (e) { + return new Error(Nil); + } +} + export function codepoint(int) { return new UtfCodepoint(int); } diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 2faac54d..19328fd7 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -215,16 +215,24 @@ pub fn split_once_test() { |> bit_array.split_once(<<"h":utf8>>) |> should.equal(Ok(#(<<>>, <<"ello":utf8>>))) - <<"hello":utf8>> + <<0, 1, 0, 2, 0, 3>> + |> bit_array.split_once(<<0, 2>>) + |> should.equal(Ok(#(<<0, 1>>, <<0, 3>>))) + + <<0, 1, 2, 0, 3, 4, 5>> + |> bit_array.split_once(<<>>) + |> should.equal(Error(Nil)) + + <<>> |> bit_array.split_once(<<1>>) |> should.equal(Error(Nil)) - <<"hello":utf8>> - |> bit_array.split_once(<<"":utf8>>) + <<1>> + |> bit_array.split_once(<<1>>) |> should.equal(Error(Nil)) - <<"hello":utf8>> - |> bit_array.split_once(<<"hello":utf8>>) + <<0>> + |> bit_array.split_once(<<1>>) |> should.equal(Error(Nil)) } @@ -237,6 +245,49 @@ pub fn split_once_erlang_only_test() { |> should.equal(Error(Nil)) } +pub fn split_test() { + <<"hello":utf8>> + |> bit_array.split(<<"l":utf8>>) + |> should.equal(Ok([<<"he":utf8>>, <<"o":utf8>>])) + + <<0, 1, 0, 2, 0, 3>> + |> bit_array.split(<<0>>) + |> should.equal(Ok([<<1>>, <<2>>, <<3>>])) + + <<1, 0>> + |> bit_array.split(<<0>>) + |> should.equal(Ok([<<1>>])) + + <<0, 1, 0, 2, 0, 3>> + |> bit_array.split(<<0, 2>>) + |> should.equal(Ok([<<0, 1>>, <<0, 3>>])) + + <<1>> + |> bit_array.split(<<0>>) + |> should.equal(Ok([<<1>>])) + + <<1>> + |> bit_array.split(<<1>>) + |> should.equal(Ok([])) + + <<>> + |> bit_array.split(<<1>>) + |> should.equal(Ok([])) + + <<0, 1, 2, 0, 3, 4, 5>> + |> bit_array.split(<<>>) + |> should.equal(Error(Nil)) +} + +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn split_erlang_only_test() { + <<0, 1, 2:7>> + |> bit_array.split(<<1>>) + |> should.equal(Error(Nil)) +} + pub fn to_string_test() { <<>> |> bit_array.to_string From 27128853984c2d98cb70a83157d87ab16b5e40df Mon Sep 17 00:00:00 2001 From: Adam Liliemark Date: Wed, 12 Feb 2025 12:59:15 +0100 Subject: [PATCH 3/4] minor refactor of js split_once --- src/gleam_stdlib.mjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 5c918979..859806e7 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -428,10 +428,10 @@ export function bit_array_slice(bits, position, length) { export function bit_array_split_once(bits, pattern) { try { - if (!(bits instanceof BitArray) - || !(pattern instanceof BitArray) - || pattern.buffer.length < 1 - || pattern.buffer.length >= bits.buffer.length) { + const patternEmpty = pattern.buffer.length < 1 + const patternLongerThanBits = pattern.buffer.length >= bits.buffer.length + const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray) + if (incorrectArguments || patternEmpty || patternLongerThanBits) { return new Error(Nil); } From f34f0658cdb849ada1e447503b2c8dd31b5e9308 Mon Sep 17 00:00:00 2001 From: Adam Liliemark Date: Fri, 14 Feb 2025 09:28:41 +0100 Subject: [PATCH 4/4] simplify js version, remove Buffer dependency, align with string/split --- src/gleam/bit_array.gleam | 6 +++--- src/gleam_stdlib.mjs | 17 +++++++++++------ test/gleam/bit_array_test.gleam | 18 +++++++++--------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index 1411789a..5603dea0 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -84,8 +84,8 @@ pub fn split_once( /// Splits a bit array into parts at the locations of the pattern. /// -/// The result will not include the pattern, and returns an empty -/// list if the pattern is not found. +/// The result will not include the pattern, and returns the input +/// as is if the pattern is not found. /// /// This function runs in linear time. /// @@ -96,7 +96,7 @@ pub fn split_once( /// // -> Ok([<<1>>, <<2>>, <<3>>]) /// /// split(from: <<0>>, on: <<1>>) -/// // -> Ok([]) +/// // -> Ok([<<0>>]) /// ``` @external(erlang, "gleam_stdlib", "bit_array_split") @external(javascript, "../gleam_stdlib.mjs", "bit_array_split") diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 859806e7..a3b16c32 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -14,7 +14,6 @@ import { DecodeError } from "./gleam/dynamic.mjs"; import { Some, None } from "./gleam/option.mjs"; import { Eq, Gt, Lt } from "./gleam/order.mjs"; import Dict from "./dict.mjs"; -import { Buffer } from 'node:buffer'; const Nil = undefined; const NOT_FOUND = {}; @@ -461,10 +460,9 @@ export function bit_array_split(bits, pattern) { return new Error(Nil); } - const bitsEqualToPattern = Buffer.compare(bits.buffer, pattern.buffer) === 0 - const bitsEmpty = bits.buffer.length === 0 - if (bitsEqualToPattern || bitsEmpty) { - return new Ok(List.fromArray([])); + const bitsShorter = bits.buffer.length < pattern.buffer.length + if (bitsShorter) { + return new Ok(List.fromArray([bits])) } const results = []; @@ -477,9 +475,16 @@ export function bit_array_split(bits, pattern) { continue find; } } + + const bitsEqualsPattern = bits.buffer.length === pattern.buffer.length + if (bitsEqualsPattern) { + return new Ok(List.fromArray([])); + } + if (i > lastIndex) { results.push(new BitArray(bits.buffer.slice(lastIndex, i))); } + lastIndex = i + pattern.buffer.length; i = lastIndex - 1; } @@ -488,7 +493,7 @@ export function bit_array_split(bits, pattern) { results.push(new BitArray(bits.buffer.slice(lastIndex))); } - return new Ok(List.fromArray(results.length ? results : [bits])); + return new Ok(List.fromArray(results)) } catch (e) { return new Error(Nil); } diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 19328fd7..c70a8c75 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -254,24 +254,24 @@ pub fn split_test() { |> bit_array.split(<<0>>) |> should.equal(Ok([<<1>>, <<2>>, <<3>>])) - <<1, 0>> - |> bit_array.split(<<0>>) - |> should.equal(Ok([<<1>>])) - <<0, 1, 0, 2, 0, 3>> |> bit_array.split(<<0, 2>>) |> should.equal(Ok([<<0, 1>>, <<0, 3>>])) - <<1>> + <<1, 0>> |> bit_array.split(<<0>>) |> should.equal(Ok([<<1>>])) - <<1>> + <<1, 0>> |> bit_array.split(<<1>>) - |> should.equal(Ok([])) + |> should.equal(Ok([<<0>>])) - <<>> - |> bit_array.split(<<1>>) + <<1>> + |> bit_array.split(<<0>>) + |> should.equal(Ok([<<1>>])) + + <<1, 2>> + |> bit_array.split(<<1, 2>>) |> should.equal(Ok([])) <<0, 1, 2, 0, 3, 4, 5>>