File tree 5 files changed +73
-0
lines changed
guide/src/reference/types
5 files changed +73
-0
lines changed Original file line number Diff line number Diff line change @@ -3522,6 +3522,22 @@ impl JsString {
3522
3522
None
3523
3523
}
3524
3524
}
3525
+
3526
+ /// Returns whether this string is a valid UTF-16 string.
3527
+ ///
3528
+ /// This is useful for learning whether `as_string()` will return a lossless
3529
+ /// representation of the JS string. If this string contains lone surrogates
3530
+ /// then `as_string()` will succeed but it will be a lossy representation of
3531
+ /// the JS string because lone surrogates will become replacement
3532
+ /// characters.
3533
+ ///
3534
+ /// If this function returns `false` then to get a lossless representation
3535
+ /// of the string you'll need to manually use `char_code_at` accessor to
3536
+ /// access the raw code points.
3537
+ pub fn is_valid_utf16 ( & self ) -> bool {
3538
+ let iter = ( 0 ..self . length ( ) ) . map ( |i| self . char_code_at ( i) as u16 ) ;
3539
+ std:: char:: decode_utf16 ( iter) . all ( |i| i. is_ok ( ) )
3540
+ }
3525
3541
}
3526
3542
3527
3543
impl PartialEq < str > for JsString {
Original file line number Diff line number Diff line change @@ -541,3 +541,15 @@ fn raw() {
541
541
) ;
542
542
assert ! ( JsString :: raw_0( & JsValue :: null( ) . unchecked_into( ) ) . is_err( ) ) ;
543
543
}
544
+
545
+ #[ wasm_bindgen_test]
546
+ fn is_valid_utf16 ( ) {
547
+ assert ! ( JsString :: from( "a" ) . is_valid_utf16( ) ) ;
548
+ assert ! ( JsString :: from( "" ) . is_valid_utf16( ) ) ;
549
+ assert ! ( JsString :: from( "🥑" ) . is_valid_utf16( ) ) ;
550
+ assert ! ( JsString :: from( "Why hello there this, 🥑, is 🥑 and is 🥑" ) . is_valid_utf16( ) ) ;
551
+
552
+ assert ! ( JsString :: from_char_code1( 0x00 ) . is_valid_utf16( ) ) ;
553
+ assert ! ( !JsString :: from_char_code1( 0xd800 ) . is_valid_utf16( ) ) ;
554
+ assert ! ( !JsString :: from_char_code1( 0xdc00 ) . is_valid_utf16( ) ) ;
555
+ }
Original file line number Diff line number Diff line change @@ -20,3 +20,24 @@ with handles to JavaScript string values, use the `js_sys::JsString` type.
20
20
``` js
21
21
{{#include ../ ../ ../ ../ examples/ guide- supported- types- examples/ str .js }}
22
22
```
23
+
24
+ ## UTF-16 vs UTF-8
25
+
26
+ Strings in JavaScript are by default encoded as if they're almost UTF-16. They
27
+ may, however, contain lone surrogates (only one element of a two-u16 pair to
28
+ create one unicode code point).
29
+
30
+ When passing a string to Rust from JS the ` TextEncoder ` API will be used to
31
+ convert between utf-16 and utf-8. If there are no lone surrogates then both
32
+ strings will be equivalent in terms of the unicode code point sequences they
33
+ describe.
34
+
35
+ If the JS string has a lone surrogate, however, then the ` TextEncoder `
36
+ implementation will replace lone surrogates with a unicode replacement
37
+ character. This means that the string Rust receives is a lossy representation of
38
+ the string in JS.
39
+
40
+ If you want to guarantee a lossless representation of the JS string in
41
+ Rust it's recommended you use ` js_sys::JsString ` as an argument type, and then
42
+ afterwards use ` js_sys::JsString::is_valid_utf16_string ` to determine whether
43
+ the string can be losslessly represented as ` String ` in Rust.
Original file line number Diff line number Diff line change @@ -8,6 +8,9 @@ Copies the string's contents back and forth between the JavaScript
8
8
garbage-collected heap and the Wasm linear memory with ` TextDecoder ` and
9
9
` TextEncoder `
10
10
11
+ > ** Note** : Be sure to check out the [ documentation for ` str ` ] ( str.html ) to
12
+ > learn about some caveats when working with strings between JS and Rust.
13
+
11
14
## Example Rust Usage
12
15
13
16
``` rust
Original file line number Diff line number Diff line change @@ -260,6 +260,27 @@ impl JsValue {
260
260
///
261
261
/// If this JS value is not an instance of a string or if it's not valid
262
262
/// utf-8 then this returns `None`.
263
+ ///
264
+ /// # UTF-16 vs UTF-8
265
+ ///
266
+ /// Strings in JavaScript are by default encoded as if they're almost
267
+ /// UTF-16. They may, however, contain lone surrogates (only one element of
268
+ /// a two-u16 pair to create one unicode code point).
269
+ ///
270
+ /// If the `JsValue` is a string, then `TextEncoder` will be used to convert
271
+ /// between utf-16 and utf-8. If there are no lone surrogates then both
272
+ /// strings will be equivalent in terms of the unicode code point sequences
273
+ /// they describe.
274
+ ///
275
+ /// If the JS string has a lone surrogate, however, then this function will
276
+ /// still return `Some`. The `TextEncoder` implementation will replace lone
277
+ /// surrogates with a unicode replacement character.
278
+ ///
279
+ /// If you want to guarantee a lossless representation of the JS string in
280
+ /// Rust it's recommended you use `js_sys::JsString::is_valid_utf16_string`.
281
+ /// If that returns `true` then this function is lossless. If that function
282
+ /// returns `false` then this function is lossy and you'll need to access
283
+ /// the raw u16 values instead.
263
284
#[ cfg( feature = "std" ) ]
264
285
pub fn as_string ( & self ) -> Option < String > {
265
286
unsafe {
You can’t perform that action at this time.
0 commit comments