diff --git a/README.md b/README.md index 27c58d81..0425f57b 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,9 @@ fn main() -> Result<(), serde_yaml::Error> { map.insert("y".to_string(), 2.0); // Serialize it to a YAML string. + // y is quoted to avoid ambiguity in parsers that might read it as `true`. let yaml = serde_yaml::to_string(&map)?; - assert_eq!(yaml, "x: 1.0\ny: 2.0\n"); + assert_eq!(yaml, "x: 1.0\n'y': 2.0\n"); // Deserialize it back to a Rust type. let deserialized_map: BTreeMap = serde_yaml::from_str(&yaml)?; @@ -75,7 +76,7 @@ fn main() -> Result<(), serde_yaml::Error> { let point = Point { x: 1.0, y: 2.0 }; let yaml = serde_yaml::to_string(&point)?; - assert_eq!(yaml, "x: 1.0\ny: 2.0\n"); + assert_eq!(yaml, "x: 1.0\n'y': 2.0\n"); let deserialized_point: Point = serde_yaml::from_str(&yaml)?; assert_eq!(point, deserialized_point); diff --git a/src/de.rs b/src/de.rs index 566f5f00..640e8795 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1096,6 +1096,36 @@ pub(crate) fn digits_but_not_number(scalar: &str) -> bool { scalar.len() > 1 && scalar.starts_with('0') && scalar[1..].bytes().all(|b| b.is_ascii_digit()) } +/// If a string looks like it could be parsed as some other type by some YAML +/// parser on the round trip, or could otherwise be ambiguous, then we should +/// serialize it with quotes to be safe. +/// This avoids the norway problem https://hitchdev.com/strictyaml/why/implicit-typing-removed/ +pub(crate) fn ambiguous_string(scalar: &str) -> bool { + let lower_scalar = scalar.to_lowercase(); + parse_bool(&lower_scalar).is_some() + || parse_null(&lower_scalar.as_bytes()).is_some() + || lower_scalar.len() == 0 + // Can unwrap because we just checked the length. + || lower_scalar.bytes().nth(0).unwrap().is_ascii_digit() + || lower_scalar.starts_with('-') + || lower_scalar.starts_with('.') + || lower_scalar.starts_with("+") + // Things that we don't parse as bool but could be parsed as bool by + // other YAML parsers. + || lower_scalar == "y" + || lower_scalar == "yes" + || lower_scalar == "n" + || lower_scalar == "no" + || lower_scalar == "on" + || lower_scalar == "off" + || lower_scalar == "true" + || lower_scalar == "false" + || lower_scalar == "null" + || lower_scalar == "nil" + || lower_scalar == "~" + || lower_scalar == "nan" +} + pub(crate) fn visit_int<'de, V>(visitor: V, v: &str) -> Result, V> where V: Visitor<'de>, diff --git a/src/lib.rs b/src/lib.rs index f22cee9d..b1440bae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,8 +24,9 @@ //! map.insert("y".to_string(), 2.0); //! //! // Serialize it to a YAML string. +//! // 'y' is quoted to avoid ambiguity in parsers that might read it as `true`. //! let yaml = serde_yaml::to_string(&map)?; -//! assert_eq!(yaml, "x: 1.0\ny: 2.0\n"); +//! assert_eq!(yaml, "x: 1.0\n'y': 2.0\n"); //! //! // Deserialize it back to a Rust type. //! let deserialized_map: BTreeMap = serde_yaml::from_str(&yaml)?; @@ -55,7 +56,7 @@ //! let point = Point { x: 1.0, y: 2.0 }; //! //! let yaml = serde_yaml::to_string(&point)?; -//! assert_eq!(yaml, "x: 1.0\ny: 2.0\n"); +//! assert_eq!(yaml, "x: 1.0\n'y': 2.0\n"); //! //! let deserialized_point: Point = serde_yaml::from_str(&yaml)?; //! assert_eq!(point, deserialized_point); diff --git a/src/ser.rs b/src/ser.rs index 23c761bb..08b0567a 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -340,11 +340,11 @@ where } fn visit_str(self, v: &str) -> Result { - Ok(if crate::de::digits_but_not_number(v) { - ScalarStyle::SingleQuoted + if crate::de::ambiguous_string(v) { + Ok(ScalarStyle::SingleQuoted) } else { - ScalarStyle::Any - }) + Ok(ScalarStyle::Any) + } } fn visit_unit(self) -> Result { diff --git a/tests/test_serde.rs b/tests/test_serde.rs index f16e89c0..bd1fd7fe 100644 --- a/tests/test_serde.rs +++ b/tests/test_serde.rs @@ -195,7 +195,7 @@ fn test_map() { thing.insert("y".to_owned(), 2); let yaml = indoc! {" x: 1 - y: 2 + 'y': 2 "}; test_serde(&thing, yaml); } @@ -238,7 +238,7 @@ fn test_basic_struct() { }; let yaml = indoc! {r#" x: -4 - y: "hi\tquoted" + 'y': "hi\tquoted" z: true "#}; test_serde(&thing, yaml); @@ -316,6 +316,78 @@ fn test_strings_needing_quote() { test_serde(&thing, yaml); } +#[test] +fn test_moar_strings_needing_quote() { + #[derive(Serialize, Deserialize, PartialEq, Debug)] + struct Struct { + s: String, + } + + for s in &[ + // Short hex values. + "0x0", + "0x1", + // Long hex values that don't fit in a u64 need to be quoted. + "0xffaed20B7B67e498A3bEEf97386ec1849EFeE6Ac", + // "empty" strings. + "", + " ", + // The norway problem https://hitchdev.com/strictyaml/why/implicit-typing-removed/ + "NO", + "no", + "No", + "Yes", + "YES", + "yes", + "True", + "TRUE", + "true", + "False", + "FALSE", + "false", + "y", + "Y", + "n", + "N", + "on", + "On", + "ON", + "off", + "Off", + "OFF", + "0", + "1", + "null", + "Null", + "NULL", + "nil", + "Nil", + "NIL", + // https://hitchdev.com/strictyaml/why/implicit-typing-removed/#string-or-float + "9.3", + // https://github.com/dtolnay/serde-yaml/pull/398#discussion_r1432944356 + "2E234567", + // https://yaml.org/spec/1.2.2/#1022-tag-resolution + "0o7", + "0x3A", + "+12.3", + "0.", + "-0.0", + "12e3", + "-2E+05", + "0", + "-0", + "3", + "-19", + ] { + let thing = Struct { + s: s.to_string(), + }; + let yaml = format!("s: '{}'\n", s); + test_serde(&thing, &yaml); + } +} + #[test] fn test_nested_vec() { let thing = vec![vec![1, 2, 3], vec![4, 5, 6]];