Skip to content

Commit 07ff474

Browse files
author
Fabrice Bellard
committed
use Unicode normalization in String.prototype.localeCompare
1 parent 7cefa7b commit 07ff474

File tree

2 files changed

+100
-33
lines changed

2 files changed

+100
-33
lines changed

quickjs.c

+100-31
Original file line numberDiff line numberDiff line change
@@ -41109,26 +41109,6 @@ static BOOL test_final_sigma(JSString *p, int sigma_pos)
4110941109
return !lre_is_cased(c1);
4111041110
}
4111141111

41112-
static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
41113-
int argc, JSValueConst *argv)
41114-
{
41115-
JSValue a, b;
41116-
int cmp;
41117-
41118-
a = JS_ToStringCheckObject(ctx, this_val);
41119-
if (JS_IsException(a))
41120-
return JS_EXCEPTION;
41121-
b = JS_ToString(ctx, argv[0]);
41122-
if (JS_IsException(b)) {
41123-
JS_FreeValue(ctx, a);
41124-
return JS_EXCEPTION;
41125-
}
41126-
cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b));
41127-
JS_FreeValue(ctx, a);
41128-
JS_FreeValue(ctx, b);
41129-
return JS_NewInt32(ctx, cmp);
41130-
}
41131-
4113241112
static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
4113341113
int argc, JSValueConst *argv, int to_lower)
4113441114
{
@@ -41214,23 +41194,38 @@ static JSValue JS_NewUTF32String(JSContext *ctx, const uint32_t *buf, int len)
4121441194
return JS_EXCEPTION;
4121541195
}
4121641196

41197+
static int js_string_normalize1(JSContext *ctx, uint32_t **pout_buf,
41198+
JSValueConst val,
41199+
UnicodeNormalizationEnum n_type)
41200+
{
41201+
int buf_len, out_len;
41202+
uint32_t *buf, *out_buf;
41203+
41204+
buf_len = JS_ToUTF32String(ctx, &buf, val);
41205+
if (buf_len < 0)
41206+
return -1;
41207+
out_len = unicode_normalize(&out_buf, buf, buf_len, n_type,
41208+
ctx->rt, (DynBufReallocFunc *)js_realloc_rt);
41209+
js_free(ctx, buf);
41210+
if (out_len < 0)
41211+
return -1;
41212+
*pout_buf = out_buf;
41213+
return out_len;
41214+
}
41215+
4121741216
static JSValue js_string_normalize(JSContext *ctx, JSValueConst this_val,
4121841217
int argc, JSValueConst *argv)
4121941218
{
4122041219
const char *form, *p;
4122141220
size_t form_len;
41222-
int is_compat, buf_len, out_len;
41221+
int is_compat, out_len;
4122341222
UnicodeNormalizationEnum n_type;
4122441223
JSValue val;
41225-
uint32_t *buf, *out_buf;
41224+
uint32_t *out_buf;
4122641225

4122741226
val = JS_ToStringCheckObject(ctx, this_val);
4122841227
if (JS_IsException(val))
4122941228
return val;
41230-
buf_len = JS_ToUTF32String(ctx, &buf, val);
41231-
JS_FreeValue(ctx, val);
41232-
if (buf_len < 0)
41233-
return JS_EXCEPTION;
4123441229

4123541230
if (argc == 0 || JS_IsUndefined(argv[0])) {
4123641231
n_type = UNICODE_NFC;
@@ -41256,22 +41251,96 @@ static JSValue js_string_normalize(JSContext *ctx, JSValueConst this_val,
4125641251
JS_FreeCString(ctx, form);
4125741252
JS_ThrowRangeError(ctx, "bad normalization form");
4125841253
fail1:
41259-
js_free(ctx, buf);
41254+
JS_FreeValue(ctx, val);
4126041255
return JS_EXCEPTION;
4126141256
}
4126241257
JS_FreeCString(ctx, form);
4126341258
}
4126441259

41265-
out_len = unicode_normalize(&out_buf, buf, buf_len, n_type,
41266-
ctx->rt, (DynBufReallocFunc *)js_realloc_rt);
41267-
js_free(ctx, buf);
41260+
out_len = js_string_normalize1(ctx, &out_buf, val, n_type);
41261+
JS_FreeValue(ctx, val);
4126841262
if (out_len < 0)
4126941263
return JS_EXCEPTION;
4127041264
val = JS_NewUTF32String(ctx, out_buf, out_len);
4127141265
js_free(ctx, out_buf);
4127241266
return val;
4127341267
}
41274-
#endif /* CONFIG_ALL_UNICODE */
41268+
41269+
/* return < 0, 0 or > 0 */
41270+
static int js_UTF32_compare(const uint32_t *buf1, int buf1_len,
41271+
const uint32_t *buf2, int buf2_len)
41272+
{
41273+
int i, len, c, res;
41274+
len = min_int(buf1_len, buf2_len);
41275+
for(i = 0; i < len; i++) {
41276+
/* Note: range is limited so a subtraction is valid */
41277+
c = buf1[i] - buf2[i];
41278+
if (c != 0)
41279+
return c;
41280+
}
41281+
if (buf1_len == buf2_len)
41282+
res = 0;
41283+
else if (buf1_len < buf2_len)
41284+
res = -1;
41285+
else
41286+
res = 1;
41287+
return res;
41288+
}
41289+
41290+
static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
41291+
int argc, JSValueConst *argv)
41292+
{
41293+
JSValue a, b;
41294+
int cmp, a_len, b_len;
41295+
uint32_t *a_buf, *b_buf;
41296+
41297+
a = JS_ToStringCheckObject(ctx, this_val);
41298+
if (JS_IsException(a))
41299+
return JS_EXCEPTION;
41300+
b = JS_ToString(ctx, argv[0]);
41301+
if (JS_IsException(b)) {
41302+
JS_FreeValue(ctx, a);
41303+
return JS_EXCEPTION;
41304+
}
41305+
a_len = js_string_normalize1(ctx, &a_buf, a, UNICODE_NFC);
41306+
JS_FreeValue(ctx, a);
41307+
if (a_len < 0) {
41308+
JS_FreeValue(ctx, b);
41309+
return JS_EXCEPTION;
41310+
}
41311+
41312+
b_len = js_string_normalize1(ctx, &b_buf, b, UNICODE_NFC);
41313+
JS_FreeValue(ctx, b);
41314+
if (b_len < 0) {
41315+
js_free(ctx, a_buf);
41316+
return JS_EXCEPTION;
41317+
}
41318+
cmp = js_UTF32_compare(a_buf, a_len, b_buf, b_len);
41319+
js_free(ctx, a_buf);
41320+
js_free(ctx, b_buf);
41321+
return JS_NewInt32(ctx, cmp);
41322+
}
41323+
#else /* CONFIG_ALL_UNICODE */
41324+
static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
41325+
int argc, JSValueConst *argv)
41326+
{
41327+
JSValue a, b;
41328+
int cmp;
41329+
41330+
a = JS_ToStringCheckObject(ctx, this_val);
41331+
if (JS_IsException(a))
41332+
return JS_EXCEPTION;
41333+
b = JS_ToString(ctx, argv[0]);
41334+
if (JS_IsException(b)) {
41335+
JS_FreeValue(ctx, a);
41336+
return JS_EXCEPTION;
41337+
}
41338+
cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b));
41339+
JS_FreeValue(ctx, a);
41340+
JS_FreeValue(ctx, b);
41341+
return JS_NewInt32(ctx, cmp);
41342+
}
41343+
#endif /* !CONFIG_ALL_UNICODE */
4127541344

4127641345
/* also used for String.prototype.valueOf */
4127741346
static JSValue js_string_toString(JSContext *ctx, JSValueConst this_val,

test262_errors.txt

-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Er
55
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
66
test262/test/built-ins/RegExp/unicode_full_case_folding.js:20: Test262Error: \u0390 does not match \u1fd3
77
test262/test/built-ins/RegExp/unicode_full_case_folding.js:20: strict mode: Test262Error: \u0390 does not match \u1fd3
8-
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
9-
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: strict mode: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
108
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: TypeError: ArrayBuffer is detached (Testing with Float64Array.)
119
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: strict mode: TypeError: ArrayBuffer is detached (Testing with Float64Array.)
1210
test262/test/language/expressions/assignment/target-member-computed-reference-null.js:32: Test262Error: Expected a DummyError but got a TypeError

0 commit comments

Comments
 (0)