Skip to content

Commit a4a445a

Browse files
committed
util: add a method to get string encoding info
Add a util method to check the encoding information of a string. The encoing information is from V8 Refs: #56090
1 parent c4aa34a commit a4a445a

File tree

5 files changed

+112
-0
lines changed

5 files changed

+112
-0
lines changed

doc/api/util.md

+20
Original file line numberDiff line numberDiff line change
@@ -3142,6 +3142,26 @@ util.isArray({});
31423142
// Returns: false
31433143
```
31443144
3145+
## `util.getStringEncodingInfo(content)`
3146+
3147+
<!-- YAML
3148+
added: REPLACEME
3149+
-->
3150+
3151+
* `content` {string}
3152+
* Returns: {Object}
3153+
3154+
Return the encoding and byte length of the content.
3155+
3156+
```js
3157+
const { getStringEncodingInfo } = require('node:util');
3158+
3159+
getStringEncodingInfo('hello world');
3160+
// Returns: { encoding: 'latin1', byteLength: 11 }
3161+
getStringEncodingInfo('你好');
3162+
// Returns: { encoding: 'utf16le', byteLength: 4 }
3163+
```
3164+
31453165
[Common System Errors]: errors.md#common-system-errors
31463166
[Custom inspection functions on objects]: #custom-inspection-functions-on-objects
31473167
[Custom promisified functions]: #custom-promisified-functions

lib/util.js

+24
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,29 @@ function getCallSites(frameCount = 10, options) {
419419
return binding.getCallSites(frameCount);
420420
};
421421

422+
/**
423+
* Get the encoding info of a string and
424+
* throwing a exception when the content is not a string
425+
* @param {string} content
426+
* @returns {{encoding: 'latin1' | 'utf16le', byteLength: number}}
427+
*/
428+
function getStringEncodingInfo(content) {
429+
if (typeof content !== 'string') {
430+
throw new ERR_INVALID_ARG_TYPE('content', 'string', content);
431+
}
432+
const isOneByte = binding.isStringOneByte(content);
433+
if (isOneByte) {
434+
return {
435+
encoding: 'latin1',
436+
byteLength: content.length,
437+
};
438+
}
439+
return {
440+
encoding: 'utf16le',
441+
byteLength: content.length * 2,
442+
};
443+
}
444+
422445
// Keep the `exports =` so that various functions can still be monkeypatched
423446
module.exports = {
424447
_errnoException,
@@ -470,6 +493,7 @@ module.exports = {
470493
},
471494
types,
472495
parseEnv,
496+
getStringEncodingInfo,
473497
};
474498

475499
defineLazyProperties(

src/node_external_reference.h

+4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ namespace node {
1212

1313
using CFunctionCallbackWithOneByteString =
1414
uint32_t (*)(v8::Local<v8::Value>, const v8::FastOneByteString&);
15+
16+
using CFunctionCallbackReturnBool = bool (*)(v8::Local<v8::Value> unused,
17+
v8::Local<v8::Value> receiver);
1518
using CFunctionCallback = void (*)(v8::Local<v8::Value> unused,
1619
v8::Local<v8::Value> receiver);
1720
using CFunctionCallbackReturnDouble =
@@ -90,6 +93,7 @@ class ExternalReferenceRegistry {
9093
#define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \
9194
V(CFunctionCallback) \
9295
V(CFunctionCallbackWithOneByteString) \
96+
V(CFunctionCallbackReturnBool) \
9397
V(CFunctionCallbackReturnDouble) \
9498
V(CFunctionCallbackReturnInt32) \
9599
V(CFunctionCallbackValueReturnDouble) \

src/node_util.cc

+23
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,21 @@ static void GetCallSites(const FunctionCallbackInfo<Value>& args) {
298298
args.GetReturnValue().Set(callsites);
299299
}
300300

301+
static void IsStringOneByte(const FunctionCallbackInfo<Value>& args) {
302+
CHECK_EQ(args.Length(), 1);
303+
CHECK(args[0]->IsString());
304+
bool is_one_byte = args[0].As<String>()->IsOneByte();
305+
args.GetReturnValue().Set(is_one_byte);
306+
}
307+
308+
static bool FastIsStringOneByte(Local<Value> receiver,
309+
const Local<Value> target) {
310+
CHECK(target->IsString());
311+
return target.As<String>()->IsOneByte();
312+
}
313+
314+
CFunction fast_is_string_one_byte_(CFunction::Make(FastIsStringOneByte));
315+
301316
static void IsInsideNodeModules(const FunctionCallbackInfo<Value>& args) {
302317
Isolate* isolate = args.GetIsolate();
303318
CHECK_EQ(args.Length(), 2);
@@ -356,6 +371,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
356371
registry->Register(fast_guess_handle_type_.GetTypeInfo());
357372
registry->Register(ParseEnv);
358373
registry->Register(IsInsideNodeModules);
374+
registry->Register(IsStringOneByte);
375+
registry->Register(FastIsStringOneByte);
376+
registry->Register(fast_is_string_one_byte_.GetTypeInfo());
359377
}
360378

361379
void Initialize(Local<Object> target,
@@ -471,6 +489,11 @@ void Initialize(Local<Object> target,
471489
"guessHandleType",
472490
GuessHandleType,
473491
&fast_guess_handle_type_);
492+
SetFastMethodNoSideEffect(context,
493+
target,
494+
"isStringOneByte",
495+
IsStringOneByte,
496+
&fast_is_string_one_byte_);
474497
}
475498

476499
} // namespace util
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Flags: --expose-internals
2+
'use strict';
3+
require('../common');
4+
const assert = require('assert');
5+
const { getStringEncodingInfo } = require('util');
6+
7+
[
8+
undefined,
9+
null,
10+
false,
11+
5n,
12+
5,
13+
Symbol(),
14+
() => {},
15+
{},
16+
].forEach((value) => {
17+
assert.throws(
18+
() => { getStringEncodingInfo(value); },
19+
/The "content" argument must be of type string/
20+
);
21+
});
22+
23+
{
24+
const latin1String = 'hello world!';
25+
// Run this inside a for loop to trigger the fast API
26+
for (let i = 0; i < 10_000; i++) {
27+
const { encoding, byteLength } = getStringEncodingInfo(latin1String);
28+
assert.strictEqual(encoding, 'latin1');
29+
assert.strictEqual(byteLength, latin1String.length);
30+
}
31+
}
32+
33+
{
34+
const utf16String = '你好😀😃';
35+
// Run this inside a for loop to trigger the fast API
36+
for (let i = 0; i < 10_000; i++) {
37+
const { encoding, byteLength } = getStringEncodingInfo(utf16String);
38+
assert.strictEqual(encoding, 'utf16le');
39+
assert.strictEqual(byteLength, utf16String.length * 2);
40+
}
41+
}

0 commit comments

Comments
 (0)