From 972c5501d76eb9a98058e1f823155711eddcab46 Mon Sep 17 00:00:00 2001 From: nemtrif Date: Sat, 28 Oct 2023 11:14:26 -0400 Subject: [PATCH] Fix for issue #111 Fixing regression caused by the fix for #78, which leads to utf8::unchecked::utf16to8() chopping off the last character in many cases. --- source/utf8/unchecked.h | 4 ++-- tests/test_cpp11.cpp | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index 6f928b7..65d4948 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -183,10 +183,10 @@ namespace utf8 { while (start != end) { utfchar32_t cp = utf8::internal::mask16(*start++); - if (start == end) - return result; // Take care of surrogate pairs first if (utf8::internal::is_lead_surrogate(cp)) { + if (start == end) + return result; utfchar32_t trail_surrogate = utf8::internal::mask16(*start++); cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; } diff --git a/tests/test_cpp11.cpp b/tests/test_cpp11.cpp index ee3518a..e521966 100644 --- a/tests/test_cpp11.cpp +++ b/tests/test_cpp11.cpp @@ -50,6 +50,11 @@ TEST(CPP11APITests, test_utf16to8) u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; string u = utf16to8(utf16string); EXPECT_EQ (u.size(), 10); + + u16string h16 = u"h!"; + string h8; + utf8::unchecked::utf16to8(h16.begin(), h16.end(), std::back_inserter(h8)); + EXPECT_EQ (h8, "h!"); } TEST(CPP11APITests, test_utf8to16)