Skip to content

Commit

Permalink
FIX: using replacement char for all chars outside the Basic Multiling…
Browse files Browse the repository at this point in the history
…ual Plane
  • Loading branch information
Oldes committed Oct 26, 2023
1 parent 3eb0b1b commit 3a72825
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/core/s-unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
}
*str = src;
if (state != UTF8_ACCEPT) return 0; //UNI_REPLACEMENT_CHAR;
if (codepoint >= UNI_MAX_BMP) return UNI_REPLACEMENT_CHAR;
return codepoint;
#else
const UTF8 *source = *str;
Expand Down
5 changes: 5 additions & 0 deletions src/tests/units/make-test.r3
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,11 @@ Rebol [
--assert "make bitset! #{FF}" = try [to string! quote #[bitset! #{FF}] ] ; bitset!
--assert "make image! [1x1 #{FFFFFF}]" = try [to string! quote #[image! 1x1 #{FFFFFF}] ] ; image!
--assert "integer! percent!" = try [to string! quote #[typeset! [integer! percent!]] ] ; typeset!

--test-- "to string! with chars outside the BMP"
;; current Rebol is able to use only 16bit Unicode..
--assert #{EFBFBD} = to binary! to string! #{F09F989A}
--assert #{EFBFBD} = to binary! to string! #{F09F989C}
===end-group===

===start-group=== "make/to tag"
Expand Down

0 comments on commit 3a72825

Please # to comment.