From 3a02e16bb34d43e8cc341682a12ff871c3b44729 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 12 Jan 2024 12:31:37 +0000 Subject: [PATCH 1/3] Remove DIRTY NIF flag https://www.erlang.org/doc/man/erl_nif.html documentation indicates the threshold for being a dirty NIF should be about 1ms. Generally compress/decompress is much quicker than this. Testing indicates a significant overhead from using this flag when compress/decompress is in fact very fast. --- c_src/zstd_nif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index 6997c61..1287d8e 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -405,8 +405,8 @@ static int zstd_on_upgrade(ErlNifEnv *env, void **priv, void **old, ERL_NIF_TERM } static ErlNifFunc nif_funcs[] = { - { "compress" , 2, zstd_nif_compress , ERL_DIRTY_JOB_CPU_BOUND }, - { "decompress" , 1, zstd_nif_decompress , ERL_DIRTY_JOB_CPU_BOUND }, + { "compress" , 2, zstd_nif_compress }, + { "decompress" , 1, zstd_nif_decompress }, { "new_compression_stream" , 0, zstd_nif_new_compression_stream }, { "new_decompression_stream" , 0, zstd_nif_new_decompression_stream }, From 04455c2d267291a68b62962624bd14556bd47d08 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 13 Jan 2024 00:20:33 +0000 Subject: [PATCH 2/3] Auto-select dirty nif based on byte_size Using a dirty_nif for smaller objects has a performance penalty so: - by default don't use the dirty nif if compressing < 250KB or uncompressing < 50KB - allow application to override and specifically request either dirty or quick method. --- c_src/zstd_nif.c | 6 ++-- src/zstd.erl | 32 +++++++++++++++++++-- test/zstd_tests.erl | 69 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 4 deletions(-) diff --git a/c_src/zstd_nif.c b/c_src/zstd_nif.c index 1287d8e..ddbade3 100644 --- a/c_src/zstd_nif.c +++ b/c_src/zstd_nif.c @@ -405,8 +405,10 @@ static int zstd_on_upgrade(ErlNifEnv *env, void **priv, void **old, ERL_NIF_TERM } static ErlNifFunc nif_funcs[] = { - { "compress" , 2, zstd_nif_compress }, - { "decompress" , 1, zstd_nif_decompress }, + { "dirty_compress" , 2, zstd_nif_compress , ERL_DIRTY_JOB_CPU_BOUND }, + { "dirty_decompress" , 1, zstd_nif_decompress , ERL_DIRTY_JOB_CPU_BOUND }, + { "quick_compress" , 2, zstd_nif_compress }, + { "quick_decompress" , 1, zstd_nif_decompress }, { "new_compression_stream" , 0, zstd_nif_new_compression_stream }, { "new_decompression_stream" , 0, zstd_nif_new_decompression_stream }, diff --git a/src/zstd.erl b/src/zstd.erl index 1080bec..e4485d5 100644 --- a/src/zstd.erl +++ b/src/zstd.erl @@ -2,6 +2,8 @@ -export([compress/1, compress/2]). -export([decompress/1]). +-export([quick_compress/2, quick_decompress/1]). +-export([dirty_compress/2, dirty_decompress/1]). -export([new_compression_stream/0, new_decompression_stream/0, compression_stream_init/1, compression_stream_init/2, decompression_stream_init/1, compression_stream_reset/2, compression_stream_reset/1, decompression_stream_reset/1, stream_flush/1, @@ -12,17 +14,43 @@ -define(APPNAME, zstd). -define(LIBNAME, zstd_nif). +% Thresholds at which it is preferable to use a dirty_nif +-define(UNCOMPRESSED_SIZE_DIRTY, 250000). +-define(COMPRESSED_SIZE_DIRTY, 50000). + -spec compress(Uncompressed :: binary()) -> Compressed :: binary(). compress(Binary) -> compress(Binary, 1). -spec compress(Uncompressed :: binary(), CompressionLevel :: 0..22) -> Compressed :: binary(). -compress(_, _) -> +compress(Uncompressed, Level) when byte_size(Uncompressed) > ?UNCOMPRESSED_SIZE_DIRTY -> + dirty_compress(Uncompressed, Level); +compress(Uncompressed, Level) -> + quick_compress(Uncompressed, Level). + +-spec dirty_compress( + Uncompressed :: binary(), CompressionLevel :: 0..22) -> Compressed :: binary(). +dirty_compress(_, _) -> + erlang:nif_error(?LINE). + +-spec quick_compress( + Uncompressed :: binary(), CompressionLevel :: 0..22) -> Compressed :: binary(). +quick_compress(_, _) -> erlang:nif_error(?LINE). -spec decompress(Compressed :: binary()) -> Uncompressed :: binary() | error. -decompress(_) -> +decompress(Compressed) when byte_size(Compressed) > ?COMPRESSED_SIZE_DIRTY -> + dirty_decompress(Compressed); +decompress(Compressed) -> + quick_decompress(Compressed). + +-spec dirty_decompress(Compressed :: binary()) -> Uncompressed :: binary() | error. +dirty_decompress(_) -> + erlang:nif_error(?LINE). + +-spec quick_decompress(Compressed :: binary()) -> Uncompressed :: binary() | error. +quick_decompress(_) -> erlang:nif_error(?LINE). -spec new_compression_stream() -> reference(). diff --git a/test/zstd_tests.erl b/test/zstd_tests.erl index a67ea1b..37113ba 100644 --- a/test/zstd_tests.erl +++ b/test/zstd_tests.erl @@ -20,3 +20,72 @@ zstd_stream_test() -> {ok, DBin1} = zstd:stream_decompress(DStream, CompressionBin), {ok, DBin2} = zstd:stream_decompress(DStream, FlushBin), ?assertEqual(Bin, <>). + +generate_randomkeys(Count, BucketRangeLow, BucketRangeHigh) -> + generate_randomkeys(Count, [], BucketRangeLow, BucketRangeHigh). + +generate_randomkeys(0, Acc, _BucketLow, _BucketHigh) -> + Acc; +generate_randomkeys(Count, Acc, BucketLow, BRange) -> + BNumber = + lists:flatten( + io_lib:format( + "~4..0B", [BucketLow + rand:uniform(BRange)])), + KNumber = + lists:flatten( + io_lib:format("~4..0B", [rand:uniform(1000)])), + K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, + RandKey = + {K, {Count + 1, {active, infinity}, erlang:phash2(K), null}}, + generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange). + + +compression_perf_test_() -> + {timeout, 60, fun compression_perf_testsizes/0}. + +compression_perf_testsizes() -> + compression_perf_tester(128), + compression_perf_tester(256), + compression_perf_tester(512), + compression_perf_tester(1024), + compression_perf_tester(2048), + compression_perf_tester(4096), + compression_perf_tester(8192). + +compression_perf_tester(N) -> + Loops = 100, + {TotalCS, TotalDS, TotalDC, TotalDD, TotalQC, TotalQD, TotalAC, TotalAD} = + lists:foldl( + fun(_A, {CST, DST, CTDT, DTDT, CTQT, DTQT, CTT, DTT}) -> + RB0 = + term_to_binary( + {base64:encode(crypto:strong_rand_bytes(N * 8)), + (generate_randomkeys(N, 1, 4))}), + {CTD0, CD0} = timer:tc(fun() -> zstd:dirty_compress(RB0, 1) end), + {DTD0, DD0} = timer:tc(fun() -> zstd:dirty_decompress(CD0) end), + {CTQ0, CQ0} = timer:tc(fun() -> zstd:quick_compress(RB0, 1) end), + {DTQ0, DQ0} = timer:tc(fun() -> zstd:quick_decompress(CQ0) end), + {CT0, C0} = timer:tc(fun() -> zstd:compress(RB0) end), + {DT0, D0} = timer:tc(fun() -> zstd:decompress(C0) end), + + ?assertMatch(RB0, DD0), + ?assertMatch(DD0, DQ0), + ?assertMatch(DQ0, D0), + + {CST + byte_size(RB0), DST + byte_size(C0), + CTDT + CTD0, DTDT + DTD0, CTQT + CTQ0, + DTQT + DTQ0, CTT + CT0, DTT + DT0} + end, + {0, 0, 0, 0, 0, 0, 0, 0}, + lists:seq(1, Loops) + ), + + io:format( + user, + "Over ~w loops tested size ~w compress_size ~w~n" + "mean compress time dirty_nif ~w quick_nif ~w auto_nif ~w~n" + "mean decompress time dirty_nif ~w quick_nif ~w auto_nif ~w~n~n", + [Loops, TotalCS div Loops, TotalDS div Loops, + TotalDC div Loops, TotalQC div Loops, TotalAC div Loops, + TotalDD div Loops, TotalQD div Loops, TotalAD div Loops] + ). \ No newline at end of file From d2f4eeffc1738f32d208b3f7db6ab0c102f0e488 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 23 Jan 2024 21:05:14 +0000 Subject: [PATCH 3/3] Add auto-formatting changes --- src/zstd.erl | 11 +++---- test/zstd_tests.erl | 79 +++++++++++++++++++++++---------------------- 2 files changed, 46 insertions(+), 44 deletions(-) diff --git a/src/zstd.erl b/src/zstd.erl index e4485d5..9c54f83 100644 --- a/src/zstd.erl +++ b/src/zstd.erl @@ -13,7 +13,6 @@ -define(APPNAME, zstd). -define(LIBNAME, zstd_nif). - % Thresholds at which it is preferable to use a dirty_nif -define(UNCOMPRESSED_SIZE_DIRTY, 250000). -define(COMPRESSED_SIZE_DIRTY, 50000). @@ -29,13 +28,13 @@ compress(Uncompressed, Level) when byte_size(Uncompressed) > ?UNCOMPRESSED_SIZE_ compress(Uncompressed, Level) -> quick_compress(Uncompressed, Level). --spec dirty_compress( - Uncompressed :: binary(), CompressionLevel :: 0..22) -> Compressed :: binary(). +-spec dirty_compress(Uncompressed :: binary(), CompressionLevel :: 0..22) -> + Compressed :: binary(). dirty_compress(_, _) -> erlang:nif_error(?LINE). --spec quick_compress( - Uncompressed :: binary(), CompressionLevel :: 0..22) -> Compressed :: binary(). +-spec quick_compress(Uncompressed :: binary(), CompressionLevel :: 0..22) -> + Compressed :: binary(). quick_compress(_, _) -> erlang:nif_error(?LINE). @@ -50,7 +49,7 @@ dirty_decompress(_) -> erlang:nif_error(?LINE). -spec quick_decompress(Compressed :: binary()) -> Uncompressed :: binary() | error. -quick_decompress(_) -> +quick_decompress(_) -> erlang:nif_error(?LINE). -spec new_compression_stream() -> reference(). diff --git a/test/zstd_tests.erl b/test/zstd_tests.erl index 37113ba..5bc1962 100644 --- a/test/zstd_tests.erl +++ b/test/zstd_tests.erl @@ -29,16 +29,13 @@ generate_randomkeys(0, Acc, _BucketLow, _BucketHigh) -> generate_randomkeys(Count, Acc, BucketLow, BRange) -> BNumber = lists:flatten( - io_lib:format( - "~4..0B", [BucketLow + rand:uniform(BRange)])), + io_lib:format("~4..0B", [BucketLow + rand:uniform(BRange)])), KNumber = lists:flatten( io_lib:format("~4..0B", [rand:uniform(1000)])), K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, - RandKey = - {K, {Count + 1, {active, infinity}, erlang:phash2(K), null}}, - generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange). - + RandKey = {K, {Count + 1, {active, infinity}, erlang:phash2(K), null}}, + generate_randomkeys(Count - 1, [RandKey | Acc], BucketLow, BRange). compression_perf_test_() -> {timeout, 60, fun compression_perf_testsizes/0}. @@ -51,41 +48,47 @@ compression_perf_testsizes() -> compression_perf_tester(2048), compression_perf_tester(4096), compression_perf_tester(8192). - + compression_perf_tester(N) -> Loops = 100, {TotalCS, TotalDS, TotalDC, TotalDD, TotalQC, TotalQD, TotalAC, TotalAD} = - lists:foldl( - fun(_A, {CST, DST, CTDT, DTDT, CTQT, DTQT, CTT, DTT}) -> - RB0 = - term_to_binary( - {base64:encode(crypto:strong_rand_bytes(N * 8)), - (generate_randomkeys(N, 1, 4))}), - {CTD0, CD0} = timer:tc(fun() -> zstd:dirty_compress(RB0, 1) end), - {DTD0, DD0} = timer:tc(fun() -> zstd:dirty_decompress(CD0) end), - {CTQ0, CQ0} = timer:tc(fun() -> zstd:quick_compress(RB0, 1) end), - {DTQ0, DQ0} = timer:tc(fun() -> zstd:quick_decompress(CQ0) end), - {CT0, C0} = timer:tc(fun() -> zstd:compress(RB0) end), - {DT0, D0} = timer:tc(fun() -> zstd:decompress(C0) end), + lists:foldl(fun(_A, {CST, DST, CTDT, DTDT, CTQT, DTQT, CTT, DTT}) -> + RB0 = term_to_binary({base64:encode( + crypto:strong_rand_bytes(N * 8)), + generate_randomkeys(N, 1, 4)}), + {CTD0, CD0} = timer:tc(fun() -> zstd:dirty_compress(RB0, 1) end), + {DTD0, DD0} = timer:tc(fun() -> zstd:dirty_decompress(CD0) end), + {CTQ0, CQ0} = timer:tc(fun() -> zstd:quick_compress(RB0, 1) end), + {DTQ0, DQ0} = timer:tc(fun() -> zstd:quick_decompress(CQ0) end), + {CT0, C0} = timer:tc(fun() -> zstd:compress(RB0) end), + {DT0, D0} = timer:tc(fun() -> zstd:decompress(C0) end), - ?assertMatch(RB0, DD0), - ?assertMatch(DD0, DQ0), - ?assertMatch(DQ0, D0), + ?assertMatch(RB0, DD0), + ?assertMatch(DD0, DQ0), + ?assertMatch(DQ0, D0), - {CST + byte_size(RB0), DST + byte_size(C0), - CTDT + CTD0, DTDT + DTD0, CTQT + CTQ0, - DTQT + DTQ0, CTT + CT0, DTT + DT0} - end, - {0, 0, 0, 0, 0, 0, 0, 0}, - lists:seq(1, Loops) - ), + {CST + byte_size(RB0), + DST + byte_size(C0), + CTDT + CTD0, + DTDT + DTD0, + CTQT + CTQ0, + DTQT + DTQ0, + CTT + CT0, + DTT + DT0} + end, + {0, 0, 0, 0, 0, 0, 0, 0}, + lists:seq(1, Loops)), - io:format( - user, - "Over ~w loops tested size ~w compress_size ~w~n" - "mean compress time dirty_nif ~w quick_nif ~w auto_nif ~w~n" - "mean decompress time dirty_nif ~w quick_nif ~w auto_nif ~w~n~n", - [Loops, TotalCS div Loops, TotalDS div Loops, - TotalDC div Loops, TotalQC div Loops, TotalAC div Loops, - TotalDD div Loops, TotalQD div Loops, TotalAD div Loops] - ). \ No newline at end of file + io:format(user, + "Over ~w loops tested size ~w compress_size ~w~n" + "mean compress time dirty_nif ~w quick_nif ~w auto_nif ~w~n" + "mean decompress time dirty_nif ~w quick_nif ~w auto_nif ~w~n~n", + [Loops, + TotalCS div Loops, + TotalDS div Loops, + TotalDC div Loops, + TotalQC div Loops, + TotalAC div Loops, + TotalDD div Loops, + TotalQD div Loops, + TotalAD div Loops]).