Skip to content

Commit

Permalink
Mark U+A8FA DEVANAGARI CARET as zero-width
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-Bertholet committed May 21, 2024
1 parent 934c875 commit 3b56f6d
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 3 deletions.
4 changes: 4 additions & 0 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ def load_zero_widths() -> "list[bool]":
# This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width
zw_map[0x070F] = True

# U+A8FA DEVANAGARI CARET
# https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
zw_map[0xA8FA] = True

return zw_map


Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
//! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
//! - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK.
//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
//! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
Expand Down
2 changes: 1 addition & 1 deletion src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ pub mod charwidth {
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x45, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x41, 0x55, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x55, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x00, 0x00, 0x00, 0x00, 0x50, 0x55, 0x45, 0x15, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x05, 0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x50, 0x55,
0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x56, 0x40, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x05, 0x50, 0x50, 0x55, 0x55, 0x55, 0x55,
Expand Down
7 changes: 5 additions & 2 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,8 @@ fn test_prepended_concatenation_marks() {
#[test]
fn test_syriac_abbreviation_mark() {
assert_eq!('\u{070F}'.width(), Some(0));
assert_eq!("\u{070F}".width(), 0);
}


#[test]
fn test_interlinear_annotation_chars() {
assert_eq!('\u{FFF9}'.width(), Some(1));
Expand All @@ -137,6 +135,11 @@ fn test_marks() {
assert_eq!('\u{09BE}'.width(), Some(0));
}

#[test]
fn test_devanagari_caret() {
assert_eq!('\u{A8FA}'.width(), Some(0));
}

#[test]
fn test_canonical_equivalence() {
let norm_file = BufReader::new(
Expand Down

0 comments on commit 3b56f6d

Please # to comment.