From 1967b05020d5900e79788ee06eeb10a7faae5f85 Mon Sep 17 00:00:00 2001 From: thedavidmeister Date: Tue, 28 Nov 2023 13:53:45 +0400 Subject: [PATCH 1/7] always quote strings --- src/ser.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/ser.rs b/src/ser.rs index 23c761bb..4cf1b10d 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -340,11 +340,7 @@ where } fn visit_str(self, v: &str) -> Result { - Ok(if crate::de::digits_but_not_number(v) { - ScalarStyle::SingleQuoted - } else { - ScalarStyle::Any - }) + Ok(ScalarStyle::SingleQuoted) } fn visit_unit(self) -> Result { From c4b9ef5e7d200aba20839a703e7ef3a59cdf8acb Mon Sep 17 00:00:00 2001 From: thedavidmeister Date: Tue, 28 Nov 2023 13:59:44 +0400 Subject: [PATCH 2/7] fix tests --- src/ser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ser.rs b/src/ser.rs index 4cf1b10d..8a17cfbc 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -339,7 +339,7 @@ where Ok(ScalarStyle::SingleQuoted) } - fn visit_str(self, v: &str) -> Result { + fn visit_str(self, _v: &str) -> Result { Ok(ScalarStyle::SingleQuoted) } From c3fb5bc0869b545a3b296c027b0c5755eb5ffb9a Mon Sep 17 00:00:00 2001 From: thedavidmeister Date: Tue, 28 Nov 2023 14:51:56 +0400 Subject: [PATCH 3/7] tests and fix for long hexadecimal quoting --- src/de.rs | 14 ++++++++++++++ src/ser.rs | 8 ++++++-- tests/test_serde.rs | 21 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/de.rs b/src/de.rs index 566f5f00..f0b467da 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1096,6 +1096,20 @@ pub(crate) fn digits_but_not_number(scalar: &str) -> bool { scalar.len() > 1 && scalar.starts_with('0') && scalar[1..].bytes().all(|b| b.is_ascii_digit()) } +/// If a string looks like it could be parsed as some other type by some YAML +/// parser on the round trip, or could otherwise be ambiguous, then we should +/// serialize it with quotes to be safe. +/// This avoids the norway problem https://hitchdev.com/strictyaml/why/implicit-typing-removed/ +pub(crate) fn ambiguous_string(scalar: &str) -> bool { + parse_bool(scalar).is_some() + || parse_null(scalar.as_bytes()).is_some() + || scalar.len() == 0 + || scalar.bytes().nth(0).unwrap().is_ascii_digit() + || scalar.starts_with('-') + || scalar.starts_with('.') + || scalar.starts_with("+") +} + pub(crate) fn visit_int<'de, V>(visitor: V, v: &str) -> Result, V> where V: Visitor<'de>, diff --git a/src/ser.rs b/src/ser.rs index 8a17cfbc..08b0567a 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -339,8 +339,12 @@ where Ok(ScalarStyle::SingleQuoted) } - fn visit_str(self, _v: &str) -> Result { - Ok(ScalarStyle::SingleQuoted) + fn visit_str(self, v: &str) -> Result { + if crate::de::ambiguous_string(v) { + Ok(ScalarStyle::SingleQuoted) + } else { + Ok(ScalarStyle::Any) + } } fn visit_unit(self) -> Result { diff --git a/tests/test_serde.rs b/tests/test_serde.rs index f16e89c0..22651f08 100644 --- a/tests/test_serde.rs +++ b/tests/test_serde.rs @@ -314,6 +314,27 @@ fn test_strings_needing_quote() { leading_zeros: '007' "}; test_serde(&thing, yaml); + + #[derive(Serialize, Deserialize, PartialEq, Debug)] + struct Struct2 { + string: String, + } + // Long hex values that don't fit in a u64 need to be quoted. + let thing2 = Struct2 { + string: "0xffaed20B7B67e498A3bEEf97386ec1849EFeE6Ac".to_owned(), + }; + let yaml2 = indoc! {" + string: '0xffaed20B7B67e498A3bEEf97386ec1849EFeE6Ac' + "}; + test_serde(&thing2, yaml2); + + let thing3 = Struct2 { + string: "".to_owned(), + }; + let yaml3 = indoc! {" + string: '' + "}; + test_serde(&thing3, yaml3); } #[test] From e923a53e7739f9e2810243c4fb33463108fa957e Mon Sep 17 00:00:00 2001 From: thedavidmeister Date: Tue, 28 Nov 2023 15:10:10 +0400 Subject: [PATCH 4/7] handling for NO on serialization --- README.md | 5 +++-- src/de.rs | 21 ++++++++++++++------- src/lib.rs | 5 +++-- tests/test_serde.rs | 21 +++++++++++++++++++-- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 27c58d81..0425f57b 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,9 @@ fn main() -> Result<(), serde_yaml::Error> { map.insert("y".to_string(), 2.0); // Serialize it to a YAML string. + // y is quoted to avoid ambiguity in parsers that might read it as `true`. let yaml = serde_yaml::to_string(&map)?; - assert_eq!(yaml, "x: 1.0\ny: 2.0\n"); + assert_eq!(yaml, "x: 1.0\n'y': 2.0\n"); // Deserialize it back to a Rust type. let deserialized_map: BTreeMap = serde_yaml::from_str(&yaml)?; @@ -75,7 +76,7 @@ fn main() -> Result<(), serde_yaml::Error> { let point = Point { x: 1.0, y: 2.0 }; let yaml = serde_yaml::to_string(&point)?; - assert_eq!(yaml, "x: 1.0\ny: 2.0\n"); + assert_eq!(yaml, "x: 1.0\n'y': 2.0\n"); let deserialized_point: Point = serde_yaml::from_str(&yaml)?; assert_eq!(point, deserialized_point); diff --git a/src/de.rs b/src/de.rs index f0b467da..571ae235 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1101,13 +1101,20 @@ pub(crate) fn digits_but_not_number(scalar: &str) -> bool { /// serialize it with quotes to be safe. /// This avoids the norway problem https://hitchdev.com/strictyaml/why/implicit-typing-removed/ pub(crate) fn ambiguous_string(scalar: &str) -> bool { - parse_bool(scalar).is_some() - || parse_null(scalar.as_bytes()).is_some() - || scalar.len() == 0 - || scalar.bytes().nth(0).unwrap().is_ascii_digit() - || scalar.starts_with('-') - || scalar.starts_with('.') - || scalar.starts_with("+") + let lower_scalar = scalar.to_lowercase(); + parse_bool(&lower_scalar).is_some() + || parse_null(&lower_scalar.as_bytes()).is_some() + || lower_scalar.len() == 0 + || lower_scalar.bytes().nth(0).unwrap().is_ascii_digit() + || lower_scalar.starts_with('-') + || lower_scalar.starts_with('.') + || lower_scalar.starts_with("+") + // Things that we don't parse as bool but could be parsed as bool by + // other YAML parsers. + || lower_scalar == "y" + || lower_scalar == "yes" + || lower_scalar == "n" + || lower_scalar == "no" } pub(crate) fn visit_int<'de, V>(visitor: V, v: &str) -> Result, V> diff --git a/src/lib.rs b/src/lib.rs index f22cee9d..b1440bae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,8 +24,9 @@ //! map.insert("y".to_string(), 2.0); //! //! // Serialize it to a YAML string. +//! // 'y' is quoted to avoid ambiguity in parsers that might read it as `true`. //! let yaml = serde_yaml::to_string(&map)?; -//! assert_eq!(yaml, "x: 1.0\ny: 2.0\n"); +//! assert_eq!(yaml, "x: 1.0\n'y': 2.0\n"); //! //! // Deserialize it back to a Rust type. //! let deserialized_map: BTreeMap = serde_yaml::from_str(&yaml)?; @@ -55,7 +56,7 @@ //! let point = Point { x: 1.0, y: 2.0 }; //! //! let yaml = serde_yaml::to_string(&point)?; -//! assert_eq!(yaml, "x: 1.0\ny: 2.0\n"); +//! assert_eq!(yaml, "x: 1.0\n'y': 2.0\n"); //! //! let deserialized_point: Point = serde_yaml::from_str(&yaml)?; //! assert_eq!(point, deserialized_point); diff --git a/tests/test_serde.rs b/tests/test_serde.rs index 22651f08..40e93ba2 100644 --- a/tests/test_serde.rs +++ b/tests/test_serde.rs @@ -195,7 +195,7 @@ fn test_map() { thing.insert("y".to_owned(), 2); let yaml = indoc! {" x: 1 - y: 2 + 'y': 2 "}; test_serde(&thing, yaml); } @@ -238,7 +238,7 @@ fn test_basic_struct() { }; let yaml = indoc! {r#" x: -4 - y: "hi\tquoted" + 'y': "hi\tquoted" z: true "#}; test_serde(&thing, yaml); @@ -335,6 +335,23 @@ fn test_strings_needing_quote() { string: '' "}; test_serde(&thing3, yaml3); + + let thing4 = Struct2 { + string: " ".to_owned(), + }; + let yaml4 = indoc! {" + string: ' ' + "}; + test_serde(&thing4, yaml4); + + // The literal norway problem https://hitchdev.com/strictyaml/why/implicit-typing-removed/ + let thing5 = Struct2 { + string: "NO".to_owned(), + }; + let yaml5 = indoc! {" + string: 'NO' + "}; + test_serde(&thing5, yaml5); } #[test] From f0d9ff48c450f16381eb09bc2fadf7dc0bf32143 Mon Sep 17 00:00:00 2001 From: thedavidmeister Date: Tue, 28 Nov 2023 15:38:10 +0400 Subject: [PATCH 5/7] more tests --- src/de.rs | 8 +++++ tests/test_serde.rs | 86 +++++++++++++++++++++++++++------------------ 2 files changed, 60 insertions(+), 34 deletions(-) diff --git a/src/de.rs b/src/de.rs index 571ae235..806950e2 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1115,6 +1115,14 @@ pub(crate) fn ambiguous_string(scalar: &str) -> bool { || lower_scalar == "yes" || lower_scalar == "n" || lower_scalar == "no" + || lower_scalar == "on" + || lower_scalar == "off" + || lower_scalar == "true" + || lower_scalar == "false" + || lower_scalar == "null" + || lower_scalar == "nil" + || lower_scalar == "~" + || lower_scalar == "nan" } pub(crate) fn visit_int<'de, V>(visitor: V, v: &str) -> Result, V> diff --git a/tests/test_serde.rs b/tests/test_serde.rs index 40e93ba2..ed24f2b9 100644 --- a/tests/test_serde.rs +++ b/tests/test_serde.rs @@ -314,44 +314,62 @@ fn test_strings_needing_quote() { leading_zeros: '007' "}; test_serde(&thing, yaml); +} +#[test] +fn test_moar_strings_needing_quote() { #[derive(Serialize, Deserialize, PartialEq, Debug)] - struct Struct2 { - string: String, + struct Struct { + s: String, } - // Long hex values that don't fit in a u64 need to be quoted. - let thing2 = Struct2 { - string: "0xffaed20B7B67e498A3bEEf97386ec1849EFeE6Ac".to_owned(), - }; - let yaml2 = indoc! {" - string: '0xffaed20B7B67e498A3bEEf97386ec1849EFeE6Ac' - "}; - test_serde(&thing2, yaml2); - - let thing3 = Struct2 { - string: "".to_owned(), - }; - let yaml3 = indoc! {" - string: '' - "}; - test_serde(&thing3, yaml3); - - let thing4 = Struct2 { - string: " ".to_owned(), - }; - let yaml4 = indoc! {" - string: ' ' - "}; - test_serde(&thing4, yaml4); - // The literal norway problem https://hitchdev.com/strictyaml/why/implicit-typing-removed/ - let thing5 = Struct2 { - string: "NO".to_owned(), - }; - let yaml5 = indoc! {" - string: 'NO' - "}; - test_serde(&thing5, yaml5); + for s in &[ + // Short hex values. + "0x0", + "0x1", + // Long hex values that don't fit in a u64 need to be quoted. + "0xffaed20B7B67e498A3bEEf97386ec1849EFeE6Ac", + // "empty" strings. + "", + " ", + // The norway problem https://hitchdev.com/strictyaml/why/implicit-typing-removed/ + "NO", + "no", + "No", + "Yes", + "YES", + "yes", + "True", + "TRUE", + "true", + "False", + "FALSE", + "false", + "y", + "Y", + "n", + "N", + "on", + "On", + "ON", + "off", + "Off", + "OFF", + "0", + "1", + "null", + "Null", + "NULL", + "nil", + "Nil", + "NIL", + ] { + let thing = Struct { + s: s.to_string(), + }; + let yaml = format!("s: '{}'\n", s); + test_serde(&thing, &yaml); + } } #[test] From 629dbd539eb4203867509fc263324811d5e1b400 Mon Sep 17 00:00:00 2001 From: thedavidmeister Date: Sat, 30 Dec 2023 01:39:47 +0400 Subject: [PATCH 6/7] more test cases --- tests/test_serde.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_serde.rs b/tests/test_serde.rs index ed24f2b9..c440b698 100644 --- a/tests/test_serde.rs +++ b/tests/test_serde.rs @@ -363,6 +363,10 @@ fn test_moar_strings_needing_quote() { "nil", "Nil", "NIL", + // https://hitchdev.com/strictyaml/why/implicit-typing-removed/#string-or-float + "9.3", + // https://github.com/dtolnay/serde-yaml/pull/398#discussion_r1432944356 + "2E234567", ] { let thing = Struct { s: s.to_string(), From 64afa739f39ded93bfbbe3fb8e49720eb4c6d12d Mon Sep 17 00:00:00 2001 From: thedavidmeister Date: Sat, 30 Dec 2023 01:46:13 +0400 Subject: [PATCH 7/7] more test cases --- src/de.rs | 1 + tests/test_serde.rs | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/de.rs b/src/de.rs index 806950e2..640e8795 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1105,6 +1105,7 @@ pub(crate) fn ambiguous_string(scalar: &str) -> bool { parse_bool(&lower_scalar).is_some() || parse_null(&lower_scalar.as_bytes()).is_some() || lower_scalar.len() == 0 + // Can unwrap because we just checked the length. || lower_scalar.bytes().nth(0).unwrap().is_ascii_digit() || lower_scalar.starts_with('-') || lower_scalar.starts_with('.') diff --git a/tests/test_serde.rs b/tests/test_serde.rs index c440b698..bd1fd7fe 100644 --- a/tests/test_serde.rs +++ b/tests/test_serde.rs @@ -367,6 +367,18 @@ fn test_moar_strings_needing_quote() { "9.3", // https://github.com/dtolnay/serde-yaml/pull/398#discussion_r1432944356 "2E234567", + // https://yaml.org/spec/1.2.2/#1022-tag-resolution + "0o7", + "0x3A", + "+12.3", + "0.", + "-0.0", + "12e3", + "-2E+05", + "0", + "-0", + "3", + "-19", ] { let thing = Struct { s: s.to_string(),