From 6f53dbb83ee2439aa77fd0889f707015f50e4ead Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Fri, 20 Jan 2023 10:28:26 +0200 Subject: [PATCH] mb_scrub does not attempt to scrub known-valid UTF-8 strings --- ext/mbstring/mbstring.c | 13 ++++++++----- ext/mbstring/tests/mb_scrub.phpt | 8 ++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 630b926af4684..1ef21530b047e 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -5066,12 +5066,10 @@ PHP_FUNCTION(mb_chr) /* {{{ */ PHP_FUNCTION(mb_scrub) { - char* str; - size_t str_len; - zend_string *enc_name = NULL; + zend_string *str, *enc_name = NULL; ZEND_PARSE_PARAMETERS_START(1, 2) - Z_PARAM_STRING(str, str_len) + Z_PARAM_STR(str) Z_PARAM_OPTIONAL Z_PARAM_STR_OR_NULL(enc_name) ZEND_PARSE_PARAMETERS_END(); @@ -5081,7 +5079,12 @@ PHP_FUNCTION(mb_scrub) RETURN_THROWS(); } - RETURN_STR(php_mb_convert_encoding_ex(str, str_len, enc, enc)); + if (enc == &mbfl_encoding_utf8 && (GC_FLAGS(str) & IS_STR_VALID_UTF8)) { + /* A valid UTF-8 string will not be changed by mb_scrub; so just increment the refcount and return it */ + RETURN_STR_COPY(str); + } + + RETURN_STR(php_mb_convert_encoding_ex(ZSTR_VAL(str), ZSTR_LEN(str), enc, enc)); } /* }}} */ diff --git a/ext/mbstring/tests/mb_scrub.phpt b/ext/mbstring/tests/mb_scrub.phpt index 1b2d8ab4e34e2..6eb580bf31cc9 100644 --- a/ext/mbstring/tests/mb_scrub.phpt +++ b/ext/mbstring/tests/mb_scrub.phpt @@ -8,7 +8,15 @@ var_dump( "?" === mb_scrub("\x80"), "?" === mb_scrub("\x80", 'UTF-8') ); + +$utf8str = "abc 日本語 Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞"; +// Check $utf8str so it is marked as 'valid UTF-8' +// This will enable optimized implementation of mb_scrub +if (!mb_check_encoding($utf8str, 'UTF-8')) + die("Test string should be valid UTF-8"); +var_dump(mb_scrub($utf8str)); ?> --EXPECT-- bool(true) bool(true) +string(122) "abc 日本語 Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞"