Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add RegExp.escape #687

Merged
merged 1 commit into from
Nov 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions libunicode-table.h
Original file line number Diff line number Diff line change
@@ -572,6 +572,16 @@ static const uint8_t unicode_prop_ID_Continue1_index[66] = {
0x01, 0x0e,
};

static const uint8_t unicode_prop_White_Space_table[22] = {
0x88, 0x84, 0x91, 0x80, 0xe3, 0x80, 0x99, 0x80,
0x55, 0xde, 0x80, 0x49, 0x7e, 0x8a, 0x9c, 0x0c,
0x80, 0xae, 0x80, 0x4f, 0x9f, 0x80,
};

static const uint8_t unicode_prop_White_Space_index[3] = {
0x01, 0x30, 0x00,
};

static const uint8_t unicode_cc_table[916] = {
0xb2, 0xcf, 0xd4, 0x00, 0xe8, 0x03, 0xdc, 0x00,
0xe8, 0x00, 0xd8, 0x04, 0xdc, 0x01, 0xca, 0x03,
@@ -4262,12 +4272,6 @@ static const uint8_t unicode_prop_Variation_Selector_table[13] = {
0x6d, 0x02, 0xef, 0x40, 0xef,
};

static const uint8_t unicode_prop_White_Space_table[22] = {
0x88, 0x84, 0x91, 0x80, 0xe3, 0x80, 0x99, 0x80,
0x55, 0xde, 0x80, 0x49, 0x7e, 0x8a, 0x9c, 0x0c,
0x80, 0xae, 0x80, 0x4f, 0x9f, 0x80,
};

static const uint8_t unicode_prop_Bidi_Mirrored_table[173] = {
0xa7, 0x81, 0x91, 0x00, 0x80, 0x9b, 0x00, 0x80,
0x9c, 0x00, 0x80, 0xac, 0x80, 0x8e, 0x80, 0x4e,
7 changes: 7 additions & 0 deletions libunicode.c
Original file line number Diff line number Diff line change
@@ -545,6 +545,13 @@ BOOL lre_is_id_continue(uint32_t c)
sizeof(unicode_prop_ID_Continue1_index) / 3);
}

BOOL lre_is_white_space(uint32_t c)
{
return lre_is_in_table(c, unicode_prop_White_Space_table,
unicode_prop_White_Space_index,
sizeof(unicode_prop_White_Space_index) / 3);
}

#define UNICODE_DECOMP_LEN_MAX 18

typedef enum {
1 change: 1 addition & 0 deletions libunicode.h
Original file line number Diff line number Diff line change
@@ -107,6 +107,7 @@ int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode);

LRE_BOOL lre_is_id_start(uint32_t c);
LRE_BOOL lre_is_id_continue(uint32_t c);
LRE_BOOL lre_is_white_space(uint32_t c);

int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
UnicodeNormalizationEnum n_type,
48 changes: 48 additions & 0 deletions quickjs.c
Original file line number Diff line number Diff line change
@@ -43827,6 +43827,53 @@ void *lre_realloc(void *opaque, void *ptr, size_t size)
return js_realloc_rt(ctx->rt, ptr, size);
}

static JSValue js_regexp_escape(JSContext *ctx, JSValue this_val,
int argc, JSValue *argv)
{
StringBuffer b_s, *b = &b_s;
JSString *p;
uint32_t c, i;
char s[16];

if (!JS_IsString(argv[0]))
return JS_ThrowTypeError(ctx, "not a string");
p = JS_VALUE_GET_STRING(argv[0]);
string_buffer_init2(ctx, b, 0, p->is_wide_char);
for (i = 0; i < p->len; i++) {
c = p->is_wide_char ? (uint32_t)p->u.str16[i] : (uint32_t)p->u.str8[i];
if (c < 33) {
if (c >= 9 && c <= 13) {
string_buffer_putc8(b, '\\');
string_buffer_putc8(b, "tnvfr"[c - 9]);
} else {
goto hex2;
}
} else if (c < 128) {
if ((c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z')) {
if (i == 0)
goto hex2;
} else if (strchr(",-=<>#&!%:;@~'`\"", c)) {
goto hex2;
} else if (c != '_') {
string_buffer_putc8(b, '\\');
}
string_buffer_putc8(b, c);
} else if (c < 256) {
hex2:
snprintf(s, sizeof(s), "\\x%02x", c);
string_buffer_puts8(b, s);
} else if (is_surrogate(c) || lre_is_white_space(c) || c == 0xFEFF) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

U+FEFF is "Zero Width No-Break Space", which isn't whitespace (Zs) but format (Cf); test262 explicitly tests for it.

snprintf(s, sizeof(s), "\\u%04x", c);
string_buffer_puts8(b, s);
} else {
string_buffer_putc16(b, c);
}
}
return string_buffer_end(b);
}

static JSValue js_regexp_exec(JSContext *ctx, JSValue this_val,
int argc, JSValue *argv)
{
@@ -44855,6 +44902,7 @@ static JSValue js_regexp_Symbol_split(JSContext *ctx, JSValue this_val,
}

static const JSCFunctionListEntry js_regexp_funcs[] = {
JS_CFUNC_DEF("escape", 1, js_regexp_escape ),
JS_CGETSET_DEF("[Symbol.species]", js_get_this, NULL ),
};

2 changes: 1 addition & 1 deletion test262.conf
Original file line number Diff line number Diff line change
@@ -174,7 +174,7 @@ regexp-modifiers=skip
regexp-named-groups
regexp-unicode-property-escapes
regexp-v-flag
RegExp.escape=skip
RegExp.escape
resizable-arraybuffer
rest-parameters
Set
4 changes: 3 additions & 1 deletion unicode_gen.c
Original file line number Diff line number Diff line change
@@ -1574,6 +1574,7 @@ void build_flags_tables(FILE *f)
build_prop_table(f, PROP_Case_Ignorable, TRUE);
build_prop_table(f, PROP_ID_Start, TRUE);
build_prop_table(f, PROP_ID_Continue1, TRUE);
build_prop_table(f, PROP_White_Space, TRUE);
}

void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len,
@@ -1813,7 +1814,8 @@ void build_prop_list_table(FILE *f)
for(i = 0; i < PROP_TABLE_COUNT; i++) {
if (i == PROP_ID_Start ||
i == PROP_Case_Ignorable ||
i == PROP_ID_Continue1) {
i == PROP_ID_Continue1 ||
i == PROP_White_Space) {
/* already generated */
} else {
build_prop_table(f, i, FALSE);
Loading