Skip to content

Commit 3071797

Browse files
committed
src: improve buffer.transcode performance
1 parent 20aff2b commit 3071797

File tree

2 files changed

+77
-63
lines changed

2 files changed

+77
-63
lines changed

benchmark/buffers/buffer-transcode.js

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
'use strict';
2+
const common = require('../common.js');
3+
const assert = require('node:assert');
4+
const buffer = require('node:buffer');
5+
6+
const encodings = ['latin1', 'ascii', 'ucs2', 'utf8'];
7+
8+
const bench = common.createBenchmark(main, {
9+
fromEncoding: encodings,
10+
toEncoding: encodings,
11+
length: [1, 10, 1000],
12+
n: [1e5],
13+
}, {
14+
combinationFilter(p) {
15+
return !(p.fromEncoding === 'ucs2' && p.toEncoding === 'utf8');
16+
},
17+
});
18+
19+
function main({ n, fromEncoding, toEncoding, length }) {
20+
const input = Buffer.from('a'.repeat(length));
21+
let out = 0;
22+
bench.start();
23+
for (let i = 0; i < n; i++) {
24+
const dest = buffer.transcode(input, fromEncoding, toEncoding);
25+
out += dest.buffer.byteLength;
26+
}
27+
bench.end(n);
28+
assert.ok(out >= 0);
29+
}

src/node_i18n.cc

+48-63
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
#include "node_i18n.h"
4444
#include "node_external_reference.h"
45+
#include "simdutf.h"
4546

4647
#if defined(NODE_HAVE_I18N_SUPPORT)
4748

@@ -147,7 +148,6 @@ MaybeLocal<Object> Transcode(Environment* env,
147148
const char* source,
148149
const size_t source_length,
149150
UErrorCode* status) {
150-
*status = U_ZERO_ERROR;
151151
MaybeLocal<Object> ret;
152152
MaybeStackBuffer<char> result;
153153
Converter to(toEncoding);
@@ -170,22 +170,21 @@ MaybeLocal<Object> Transcode(Environment* env,
170170
return ret;
171171
}
172172

173-
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
174-
const char* fromEncoding,
175-
const char* toEncoding,
176-
const char* source,
177-
const size_t source_length,
178-
UErrorCode* status) {
179-
*status = U_ZERO_ERROR;
180-
MaybeLocal<Object> ret;
173+
MaybeLocal<Object> TranscodeLatin1ToUcs2(Environment* env,
174+
const char* fromEncoding,
175+
const char* toEncoding,
176+
const char* source,
177+
const size_t source_length,
178+
UErrorCode* status) {
181179
MaybeStackBuffer<UChar> destbuf(source_length);
182-
Converter from(fromEncoding);
183-
const size_t length_in_chars = source_length * sizeof(UChar);
184-
ucnv_toUChars(from.conv(), *destbuf, length_in_chars,
185-
source, source_length, status);
186-
if (U_SUCCESS(*status))
187-
ret = ToBufferEndian(env, &destbuf);
188-
return ret;
180+
auto actual_length =
181+
simdutf::convert_latin1_to_utf16(source, source_length, destbuf.out());
182+
if (actual_length == 0) {
183+
*status = U_INVALID_CHAR_FOUND;
184+
return {};
185+
}
186+
187+
return Buffer::New(env, &destbuf);
189188
}
190189

191190
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
@@ -194,13 +193,11 @@ MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
194193
const char* source,
195194
const size_t source_length,
196195
UErrorCode* status) {
197-
*status = U_ZERO_ERROR;
198196
MaybeStackBuffer<UChar> sourcebuf;
199197
MaybeLocal<Object> ret;
200198
Converter to(toEncoding);
201199

202-
size_t sublen = ucnv_getMinCharSize(to.conv());
203-
std::string sub(sublen, '?');
200+
std::string sub(to.min_char_size(), '?');
204201
to.set_subst_chars(sub.c_str());
205202

206203
const size_t length_in_chars = source_length / sizeof(UChar);
@@ -221,26 +218,20 @@ MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
221218
const char* source,
222219
const size_t source_length,
223220
UErrorCode* status) {
224-
*status = U_ZERO_ERROR;
225-
MaybeStackBuffer<UChar> destbuf;
226-
int32_t result_length;
227-
u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
228-
source, source_length, status);
229-
MaybeLocal<Object> ret;
230-
if (U_SUCCESS(*status)) {
231-
destbuf.SetLength(result_length);
232-
ret = ToBufferEndian(env, &destbuf);
233-
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
234-
*status = U_ZERO_ERROR;
235-
destbuf.AllocateSufficientStorage(result_length);
236-
u_strFromUTF8(*destbuf, result_length, &result_length,
237-
source, source_length, status);
238-
if (U_SUCCESS(*status)) {
239-
destbuf.SetLength(result_length);
240-
ret = ToBufferEndian(env, &destbuf);
241-
}
221+
size_t expected_utf16_length =
222+
simdutf::utf16_length_from_utf8(source, source_length);
223+
MaybeStackBuffer<UChar> destbuf(expected_utf16_length);
224+
auto actual_length =
225+
simdutf::convert_utf8_to_utf16(source, source_length, destbuf.out());
226+
227+
if (actual_length == 0) {
228+
*status = U_INVALID_CHAR_FOUND;
229+
return {};
242230
}
243-
return ret;
231+
232+
CHECK_EQ(actual_length, expected_utf16_length);
233+
234+
return Buffer::New(env, &destbuf);
244235
}
245236

246237
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
@@ -249,32 +240,27 @@ MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
249240
const char* source,
250241
const size_t source_length,
251242
UErrorCode* status) {
252-
*status = U_ZERO_ERROR;
253-
MaybeLocal<Object> ret;
254243
const size_t length_in_chars = source_length / sizeof(UChar);
255-
int32_t result_length;
256-
MaybeStackBuffer<UChar> sourcebuf;
257-
MaybeStackBuffer<char> destbuf;
258-
CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
259-
u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
260-
*sourcebuf, length_in_chars, status);
261-
if (U_SUCCESS(*status)) {
262-
destbuf.SetLength(result_length);
263-
ret = ToBufferEndian(env, &destbuf);
264-
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
265-
*status = U_ZERO_ERROR;
266-
destbuf.AllocateSufficientStorage(result_length);
267-
u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
268-
length_in_chars, status);
269-
if (U_SUCCESS(*status)) {
270-
destbuf.SetLength(result_length);
271-
ret = ToBufferEndian(env, &destbuf);
272-
}
244+
size_t expected_utf8_length = simdutf::utf8_length_from_utf16(
245+
reinterpret_cast<const char16_t*>(source), length_in_chars);
246+
247+
MaybeStackBuffer<char> destbuf(expected_utf8_length);
248+
auto actual_length =
249+
simdutf::convert_utf16_to_utf8(reinterpret_cast<const char16_t*>(source),
250+
length_in_chars,
251+
destbuf.out());
252+
253+
if (actual_length == 0) {
254+
*status = U_INVALID_CHAR_FOUND;
255+
return {};
273256
}
274-
return ret;
257+
258+
CHECK_EQ(actual_length, expected_utf8_length);
259+
260+
return Buffer::New(env, &destbuf);
275261
}
276262

277-
const char* EncodingName(const enum encoding encoding) {
263+
constexpr const char* EncodingName(const enum encoding encoding) {
278264
switch (encoding) {
279265
case ASCII: return "us-ascii";
280266
case LATIN1: return "iso8859-1";
@@ -284,7 +270,7 @@ const char* EncodingName(const enum encoding encoding) {
284270
}
285271
}
286272

287-
bool SupportedEncoding(const enum encoding encoding) {
273+
constexpr bool SupportedEncoding(const enum encoding encoding) {
288274
switch (encoding) {
289275
case ASCII:
290276
case LATIN1:
@@ -309,8 +295,7 @@ void Transcode(const FunctionCallbackInfo<Value>&args) {
309295
switch (fromEncoding) {
310296
case ASCII:
311297
case LATIN1:
312-
if (toEncoding == UCS2)
313-
tfn = &TranscodeToUcs2;
298+
if (toEncoding == UCS2) tfn = &TranscodeLatin1ToUcs2;
314299
break;
315300
case UTF8:
316301
if (toEncoding == UCS2)

0 commit comments

Comments
 (0)