From 3b56f6ddcb379b8f16ac34f52f415a68d604420a Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Sun, 19 May 2024 23:13:03 -0400 Subject: [PATCH] Mark U+A8FA DEVANAGARI CARET as zero-width --- scripts/unicode.py | 4 ++++ src/lib.rs | 1 + src/tables.rs | 2 +- tests/tests.rs | 7 +++++-- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/unicode.py b/scripts/unicode.py index fd8e27f..f748711 100755 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -245,6 +245,10 @@ def load_zero_widths() -> "list[bool]": # This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width zw_map[0x070F] = True + # U+A8FA DEVANAGARI CARET + # https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447 + zw_map[0xA8FA] = True + return zw_map diff --git a/src/lib.rs b/src/lib.rs index 23e75e7..eb3900e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,7 @@ //! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D) //! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`). //! - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK. +//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA). //! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D) //! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2. //! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D) diff --git a/src/tables.rs b/src/tables.rs index a987758..f4464f7 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -438,7 +438,7 @@ pub mod charwidth { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x45, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x41, 0x55, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, - 0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x55, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05, 0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x50, 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x56, 0x40, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x05, 0x50, 0x50, 0x55, 0x55, 0x55, 0x55, diff --git a/tests/tests.rs b/tests/tests.rs index 451e003..97aa497 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -107,10 +107,8 @@ fn test_prepended_concatenation_marks() { #[test] fn test_syriac_abbreviation_mark() { assert_eq!('\u{070F}'.width(), Some(0)); - assert_eq!("\u{070F}".width(), 0); } - #[test] fn test_interlinear_annotation_chars() { assert_eq!('\u{FFF9}'.width(), Some(1)); @@ -137,6 +135,11 @@ fn test_marks() { assert_eq!('\u{09BE}'.width(), Some(0)); } +#[test] +fn test_devanagari_caret() { + assert_eq!('\u{A8FA}'.width(), Some(0)); +} + #[test] fn test_canonical_equivalence() { let norm_file = BufReader::new(