Skip to content

Commit

Permalink
Properly use xsi:nil to deserialize null values via serde
Browse files Browse the repository at this point in the history
This commit fixes an issue that causes `quick-xml` trying to deserialize
empty tags via the serde interface even if these tags were explicitly
marked as `xsi:nil="true"`

For example the following XML failed to deserialize before this commit:

```xml
<bar>
	<foo xsi:nil="true"/>
</bar>
```

into the following rust type:

```rust
#[derive(Deserialize)]
struct Bar {
    foo: Option<Inner>,
}

#[derive(Deserialize)]
struct Foo {
    baz: String,
}
```

Before this commit this failed to deserialize with an error message that
complained that the `baz` field was missing. After this commit this uses
the `xsi:nil` attribute to deserialize this into `foo: None` instead.
The standard (https://www.w3.org/TR/xmlschema-1/#xsi_nil) seems to
support this behaviour. 

Fix #497
  • Loading branch information
weiznich committed Jan 28, 2025
1 parent 8f91a9c commit fa3df2a
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 10 deletions.
10 changes: 9 additions & 1 deletion src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,16 @@ where
where
V: Visitor<'de>,
{
match self.map.de.peek()? {
let _ = self.map.de.peek()?;
match self.map.de.readable_peek().expect("This exists as we called peek before") {
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Start(start)
// if the `xsi:nil` attribute is set to true we got a none value
if start.has_nil_attr(&self.map.de.reader.reader) =>
{
self.map.de.skip_nil_tag()?;
visitor.visit_none()
}
_ => visitor.visit_some(self),
}
}
Expand Down
73 changes: 65 additions & 8 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2010,14 +2010,15 @@ pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
pub use self::simple_type::SimpleTypeDeserializer;
pub use crate::errors::serialize::DeError;

use crate::name::{LocalName, Namespace, PrefixDeclaration, ResolveResult};
use crate::{
de::map::ElementMapAccess,
encoding::Decoder,
errors::Error,
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
reader::Reader,
utils::CowRef,
NsReader,
};
use serde::de::{
self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor,
Expand Down Expand Up @@ -2534,6 +2535,10 @@ where
}
}

fn readable_peek(&self) -> Option<&DeEvent<'de>> {
self.peek.as_ref()
}

fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
// Replay skipped or peeked events
#[cfg(feature = "overlapped-lists")]
Expand Down Expand Up @@ -2764,6 +2769,14 @@ where
}
self.reader.read_to_end(name)
}

fn skip_nil_tag(&mut self) -> Result<(), DeError> {
let DeEvent::Start(start) = self.next()? else {
unreachable!("Only call this if the next event is a start event")
};
let name = start.name();
self.read_to_end(name)
}
}

impl<'de> Deserializer<'de, SliceReader<'de>> {
Expand All @@ -2783,7 +2796,7 @@ where
/// Create new deserializer that will borrow data from the specified string
/// and use specified entity resolver.
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
let mut reader = Reader::from_str(source);
let mut reader = NsReader::from_str(source);
let config = reader.config_mut();
config.expand_empty_elements = true;

Expand Down Expand Up @@ -2826,7 +2839,7 @@ where
/// will borrow instead of copy. If you have `&[u8]` which is known to represent
/// UTF-8, you can decode it first before using [`from_str`].
pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
let mut reader = Reader::from_reader(reader);
let mut reader = NsReader::from_reader(reader);
let config = reader.config_mut();
config.expand_empty_elements = true;

Expand Down Expand Up @@ -2945,9 +2958,17 @@ where
where
V: Visitor<'de>,
{
match self.peek()? {
let _ = self.peek()?;
match self.readable_peek().expect("This exists as we called peek before") {
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Eof => visitor.visit_none(),
DeEvent::Start(start)
// if the `xsi:nil` attribute is set to true we got a none value
if start.has_nil_attr(&self.reader.reader) =>
{
self.skip_nil_tag()?;
visitor.visit_none()
}
_ => visitor.visit_some(self),
}
}
Expand Down Expand Up @@ -3071,14 +3092,22 @@ pub trait XmlRead<'i> {

/// A copy of the reader's decoder used to decode strings.
fn decoder(&self) -> Decoder;

/// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
///
/// See [`NsReader::resolve_attribute`] for details
fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>);

/// Get the current default namespace
fn default_namespace(&self) -> Option<Namespace<'_>>;
}

/// XML input source that reads from a std::io input stream.
///
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_reader`]
pub struct IoReader<R: BufRead> {
reader: Reader<R>,
reader: NsReader<R>,
start_trimmer: StartTrimmer,
buf: Vec<u8>,
}
Expand Down Expand Up @@ -3113,7 +3142,7 @@ impl<R: BufRead> IoReader<R> {
/// assert_eq!(reader.error_position(), 28);
/// assert_eq!(reader.buffer_position(), 41);
/// ```
pub const fn get_ref(&self) -> &Reader<R> {
pub const fn get_ref(&self) -> &NsReader<R> {
&self.reader
}
}
Expand All @@ -3140,14 +3169,28 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
fn decoder(&self) -> Decoder {
self.reader.decoder()
}

fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
self.reader.resolve_attribute(name)
}

fn default_namespace(&self) -> Option<Namespace<'_>> {
self.reader.prefixes().find_map(|(key, value)| {
if PrefixDeclaration::Default == key {
Some(value)
} else {
None
}
})
}
}

/// XML input source that reads from a slice of bytes and can borrow from it.
///
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_str`].
pub struct SliceReader<'de> {
reader: Reader<&'de [u8]>,
reader: NsReader<&'de [u8]>,
start_trimmer: StartTrimmer,
}

Expand Down Expand Up @@ -3180,7 +3223,7 @@ impl<'de> SliceReader<'de> {
/// assert_eq!(reader.error_position(), 28);
/// assert_eq!(reader.buffer_position(), 41);
/// ```
pub const fn get_ref(&self) -> &Reader<&'de [u8]> {
pub const fn get_ref(&self) -> &NsReader<&'de [u8]> {
&self.reader
}
}
Expand All @@ -3205,6 +3248,20 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
fn decoder(&self) -> Decoder {
self.reader.decoder()
}

fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
self.reader.resolve_attribute(name)
}

fn default_namespace(&self) -> Option<Namespace<'_>> {
self.reader.prefixes().find_map(|(key, value)| {
if PrefixDeclaration::Default == key {
Some(value)
} else {
None
}
})
}
}

#[cfg(test)]
Expand Down
35 changes: 34 additions & 1 deletion src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,17 @@ use crate::errors::{Error, IllFormedError};
use crate::escape::{
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
};
#[cfg(feature = "serialize")]
use crate::name::Namespace;
use crate::name::{LocalName, QName};
#[cfg(feature = "serialize")]
use crate::utils::CowRef;
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
use attributes::{AttrError, Attribute, Attributes};

#[cfg(feature = "serialize")]
const XSI_NAMESPACE_URL: Namespace = Namespace(b"http://www.w3.org/2001/XMLSchema-instance");

/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
///
/// The name can be accessed using the [`name`] or [`local_name`] methods.
Expand Down Expand Up @@ -232,6 +237,34 @@ impl<'a> BytesStart<'a> {
Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
}
}

/// This method checks if the current tag has a `xsi::nil` attribute
///
/// This attribute should be used for deciding if the value is not set
/// according to https://www.w3.org/TR/xmlschema-1/#xsi_nil
#[cfg(feature = "serialize")]
pub(crate) fn has_nil_attr(&self, reader: &dyn crate::de::XmlRead) -> bool {
use crate::name::ResolveResult;

let default_ns = reader.default_namespace();
self.attributes().any(|attr| {
if let Ok(attr) = attr {
let value_is_true = &*attr.value == b"true" || &*attr.value == b"1";
let might_be_nil_attr = attr.key.0.ends_with(b"nil");
if value_is_true && might_be_nil_attr {
let (res, local_name) = reader.resolve_attribute(attr.key);
(matches!(res, ResolveResult::Bound(XSI_NAMESPACE_URL))
|| (matches!(res, ResolveResult::Unbound)
&& default_ns == Some(XSI_NAMESPACE_URL)))
&& local_name.as_ref() == b"nil"
} else {
false
}
} else {
false
}
})
}
}

/// Attribute-related methods
Expand Down Expand Up @@ -278,7 +311,7 @@ impl<'a> BytesStart<'a> {
}

/// Returns an iterator over the attributes of this tag.
pub fn attributes(&self) -> Attributes {
pub fn attributes<'b>(&'b self) -> Attributes<'b> {
Attributes::wrap(&self.buf, self.name_len, false)
}

Expand Down
131 changes: 131 additions & 0 deletions tests/serde-de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1639,3 +1639,134 @@ mod xml_prolog {
);
}
}

mod nil {
use quick_xml::DeError;

#[derive(Debug, serde::Deserialize, PartialEq)]
struct Foo {
tag: String,
}

#[derive(Debug, serde::Deserialize, PartialEq)]
struct Bar {
foo: Option<Foo>,
}

#[derive(Debug, PartialEq, serde::Deserialize)]
struct Helper {
#[serde(rename = "@attr")]
attr: String,
#[serde(rename = "$value")]
inner: Option<Foo>,
}

#[derive(Debug, PartialEq, serde::Deserialize)]
struct BarWithHelper {
foo: Helper,
}

macro_rules! assert_error_matches {
($res: expr, $err: pat) => {
assert!(
matches!($res, Err($err)),
concat!("Expected `", stringify!($err), "`, but got `{:?}`"),
$res
);
};
}

#[test]
fn true_() {
let data = r#"<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="true"/>"#;

let res = quick_xml::de::from_str::<Option<Foo>>(data).unwrap();
assert_eq!(res, None);

let data = r#"<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="true" attr="value"/>"#;
let res = quick_xml::de::from_str::<Helper>(data).unwrap();
assert_eq!(
res,
Helper {
attr: String::from("value"),
inner: None
}
);

let data = r#"<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="true"><tag>Foo</tag></foo>"#;
let res = quick_xml::de::from_str::<Option<Foo>>(data).unwrap();
assert_eq!(res, None);
}

#[test]
fn false_() {
let data =
r#"<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="false"/>"#;
let res = quick_xml::de::from_str::<Option<Foo>>(data);
assert_error_matches!(res, DeError::Custom(_));
let data = r#"<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="false" attr="value"/>"#;
let res = quick_xml::de::from_str::<Option<Foo>>(data);
assert_error_matches!(res, DeError::Custom(_));
let data = r#"<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="false"><tag>Foo</tag></foo>"#;
let res = quick_xml::de::from_str::<Option<Foo>>(data).unwrap();
assert_eq!(
res,
Some(Foo {
tag: String::from("Foo")
})
);
}

#[test]
fn nested_nil_true() {
let data = r#"<bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><foo xsi:nil="true"/></bar>"#;
let res = quick_xml::de::from_str::<Bar>(data).unwrap();
assert_eq!(res, Bar { foo: None });

let data = r#"<bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><foo xsi:nil="true" attr="value"/></bar>"#;
let res = quick_xml::de::from_str::<BarWithHelper>(data).unwrap();
assert_eq!(
res,
BarWithHelper {
foo: Helper {
attr: String::from("value"),
inner: None
}
}
);

let data = r#"<bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><foo xsi:nil="true"><tag>Foo</tag></foo></bar>"#;
let res = quick_xml::de::from_str::<Bar>(data).unwrap();
assert_eq!(res, Bar { foo: None });
}

#[test]
fn nested_nil_false() {
let data = r#"<bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><foo xsi:nil="false"/></bar>"#;
let res = quick_xml::de::from_str::<Bar>(data);
assert_error_matches!(res, DeError::Custom(_));

let data = r#"<bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><foo xsi:nil="false" attr="value"/></bar>"#;
let res = quick_xml::de::from_str::<BarWithHelper>(data).unwrap();
assert_eq!(
res,
BarWithHelper {
foo: Helper {
attr: String::from("value"),
inner: None
}
}
);

let data = r#"<bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><foo xsi:nil="false"><tag>Foo</tag></foo></bar>"#;
let res = quick_xml::de::from_str::<Bar>(data).unwrap();
assert_eq!(
res,
Bar {
foo: Some(Foo {
tag: String::from("Foo")
})
}
);
}
}

0 comments on commit fa3df2a

Please # to comment.