Publish 0.2.0 with newlines treated as width 1 (#68)

Manishearth · web-flow · commit 79eab0d9fc20 · 2024-09-19T20:59:50.000Z
* Revert "Treat newlines as width 0 in the 0.1 stream, publish 0.1.14 (#67)" This reverts commit 9eaafa5. * Update readme * Bump to 0.2
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 
 name = "unicode-width"
-version = "0.1.14"
+version = "0.2.0"
 authors = [
     "kwantam <kwantam@gmail.com>",
     "Manish Goregaokar <manishsmail@gmail.com>",
diff --git a/README.md b/README.md
@@ -55,3 +55,16 @@ to your `Cargo.toml`:
 [dependencies]
 unicode-width = "0.1.11"
 ```
+
+
+## Changelog
+
+
+### 0.2.0
+
+ - Treat `\n` as width 1 (#60)
+ - Treat ambiguous `Modifier_Letter`s as narrow (#63)
+ - Support `Grapheme_Cluster_Break=Prepend` (#62)
+ - Support lots of ligatures (#53)
+
+Note: If you are using `unicode-width` for linebreaking, the change treating `\n` as width 1 _may cause behavior changes_. It is recommended that in such cases you feed already-line segmented text to `unicode-width`. In other words, please apply higher level control character based line breaking protocols before feeding text to `unicode-width`. Relying on any character producing a stable width in this crate is likely the sign of a bug.
diff --git a/scripts/unicode.py b/scripts/unicode.py
@@ -1281,10 +1281,7 @@ def lookup_fns(
     s += """
     if c <= '\\u{A0}' {
         match c {
-            // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering
-            // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.
-            // https://github.com/unicode-rs/unicode-width/issues/60
-            '\\n' => (0, WidthInfo::LINE_FEED),
+            '\\n' => (1, WidthInfo::LINE_FEED),
             '\\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),
             _ => (1, WidthInfo::DEFAULT),
         }
diff --git a/src/tables.rs b/src/tables.rs
@@ -215,10 +215,7 @@ fn width_in_str(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) {
     }
     if c <= '\u{A0}' {
         match c {
-            // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering
-            // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.
-            // https://github.com/unicode-rs/unicode-width/issues/60
-            '\n' => (0, WidthInfo::LINE_FEED),
+            '\n' => (1, WidthInfo::LINE_FEED),
             '\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),
             _ => (1, WidthInfo::DEFAULT),
         }
@@ -510,10 +507,7 @@ fn width_in_str_cjk(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) {
     }
     if c <= '\u{A0}' {
         match c {
-            // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering
-            // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.
-            // https://github.com/unicode-rs/unicode-width/issues/60
-            '\n' => (0, WidthInfo::LINE_FEED),
+            '\n' => (1, WidthInfo::LINE_FEED),
             '\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),
             _ => (1, WidthInfo::DEFAULT),
         }
diff --git a/tests/tests.rs b/tests/tests.rs
@@ -214,23 +214,18 @@ fn test_control_line_break() {
     assert_width!('\r', None, None);
     assert_width!('\n', None, None);
     assert_width!("\r", 1, 1);
-    // This is 0 due to #60
-    assert_width!("\n", 0, 0);
-    assert_width!("\r\n", 0, 0);
+    assert_width!("\n", 1, 1);
+    assert_width!("\r\n", 1, 1);
     assert_width!("\0", 1, 1);
-    assert_width!("1\t2\r\n3\u{85}4", 6, 6);
-    assert_width!("\r\u{FE0F}\n", 1, 1);
-    assert_width!("\r\u{200D}\n", 1, 1);
+    assert_width!("1\t2\r\n3\u{85}4", 7, 7);
+    assert_width!("\r\u{FE0F}\n", 2, 2);
+    assert_width!("\r\u{200D}\n", 2, 2);
 }
 
 #[test]
 fn char_str_consistent() {
     let mut s = String::with_capacity(4);
     for c in '\0'..=char::MAX {
-        // Newlines are special cased (#60)
-        if c == '\n' {
-            continue;
-        }
         s.clear();
         s.push(c);
         assert_eq!(c.width().unwrap_or(1), s.width());
@@ -423,10 +418,6 @@ fn test_khmer_coeng() {
             assert_width!(format!("\u{17D2}{c}"), 0, 0);
             assert_width!(format!("\u{17D2}\u{200D}\u{200D}{c}"), 0, 0);
         } else {
-            // Newlines are special cased (#60)
-            if c == '\n' {
-                continue;
-            }
             assert_width!(
                 format!("\u{17D2}{c}"),
                 c.width().unwrap_or(1),
@@ -597,11 +588,6 @@ fn emoji_test_file() {
     }
 }
 
-#[test]
-fn test_newline_zero_issue_60() {
-    assert_width!("a\na", 2, 2);
-}
-
 // Test traits are unsealed
 
 #[cfg(feature = "cjk")]

Original file line number	Diff line number	Diff line change
`@@ -215,10 +215,7 @@ fn width_in_str(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) {`
`215`	`215`	`}`
`216`	`216`	`if c <= '\u{A0}' {`
`217`	`217`	`match c {`
`218`		`- // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering`
`219`		`- // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.`
`220`		`- // https://github.com/unicode-rs/unicode-width/issues/60`
`221`		`- '\n' => (0, WidthInfo::LINE_FEED),`
	`218`	`+ '\n' => (1, WidthInfo::LINE_FEED),`
`222`	`219`	`'\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),`
`223`	`220`	`_ => (1, WidthInfo::DEFAULT),`
`224`	`221`	`}`
`@@ -510,10 +507,7 @@ fn width_in_str_cjk(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) {`
`510`	`507`	`}`
`511`	`508`	`if c <= '\u{A0}' {`
`512`	`509`	`match c {`
`513`		`- // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering`
`514`		`- // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.`
`515`		`- // https://github.com/unicode-rs/unicode-width/issues/60`
`516`		`- '\n' => (0, WidthInfo::LINE_FEED),`
	`510`	`+ '\n' => (1, WidthInfo::LINE_FEED),`
`517`	`511`	`'\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),`
`518`	`512`	`_ => (1, WidthInfo::DEFAULT),`
`519`	`513`	`}`