From 1eefdc91676728f4d10fafdf08c76069a9e55965 Mon Sep 17 00:00:00 2001 From: Kornel Date: Mon, 22 May 2023 17:38:50 +0100 Subject: [PATCH] Require spaces between prolog attrs --- src/reader/parser.rs | 20 ++++++++++++++++++++ src/reader/parser/inside_declaration.rs | 14 +++++++++++++- tests/oasis.fail.txt | 1 - tests/xmltest.fail.txt | 1 - 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/reader/parser.rs b/src/reader/parser.rs index ff163cb4..53455525 100644 --- a/src/reader/parser.rs +++ b/src/reader/parser.rs @@ -249,10 +249,12 @@ pub enum DeclarationSubstate { InsideVersionValue, AfterVersionValue, + BeforeEncoding, InsideEncoding, AfterEncoding, InsideEncodingValue, + AfterEncodingValue, BeforeStandaloneDecl, InsideStandaloneDecl, @@ -730,6 +732,24 @@ mod tests { expect_event!(r, p, Ok(XmlEvent::EndDocument)); } + #[test] + fn malformed_declaration_attrs() { + let (mut r, mut p) = test_data!(r#""#); + expect_event!(r, p, Err(_)); + + let (mut r, mut p) = test_data!(r#""#); + expect_event!(r, p, Err(_)); + + let (mut r, mut p) = test_data!(r#""#); + expect_event!(r, p, Err(_)); + + let (mut r, mut p) = test_data!(r#""#); + expect_event!(r, p, Err(_)); + + let (mut r, mut p) = test_data!(r#""#); + expect_event!(r, p, Err(_)); + } + #[test] fn opening_tag_in_attribute_value() { use crate::reader::error::{SyntaxError, Error, ErrorKind}; diff --git a/src/reader/parser/inside_declaration.rs b/src/reader/parser/inside_declaration.rs index be986d46..4ff1427b 100644 --- a/src/reader/parser/inside_declaration.rs +++ b/src/reader/parser/inside_declaration.rs @@ -89,6 +89,12 @@ impl PullParser { }), DeclarationSubstate::AfterVersionValue => match t { + Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)), + Token::ProcessingInstructionEnd => self.emit_start_document(), + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), + }, + + DeclarationSubstate::BeforeEncoding => match t { Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)), Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), Token::ProcessingInstructionEnd => self.emit_start_document(), @@ -114,9 +120,15 @@ impl PullParser { DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| { this.data.encoding = Some(value); - this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)) + this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue)) }), + DeclarationSubstate::AfterEncodingValue => match t { + Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)), + Token::ProcessingInstructionEnd => self.emit_start_document(), + _ => Some(self.error(SyntaxError::UnexpectedToken(t))), + }, + DeclarationSubstate::BeforeStandaloneDecl => match t { Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)), Token::ProcessingInstructionEnd => self.emit_start_document(), diff --git a/tests/oasis.fail.txt b/tests/oasis.fail.txt index ef7ccaa8..4c22812b 100644 --- a/tests/oasis.fail.txt +++ b/tests/oasis.fail.txt @@ -11,7 +11,6 @@ o-p12fail6 p12fail6.xml built-in entity refs excluded o-p12fail7 p12fail7.xml The public ID has a tab character, which is disallowed o-p30fail1 p30fail1.xml An XML declaration is not the same as a TextDecl o-p31fail1 p31fail1.xml external subset excludes doctypedecl -o-p32fail3 p32fail3.xml initial S is required o-p40fail1 p40fail1.xml S is required between attributes o-p44fail4 p44fail4.xml Whitespace required between attributes. o-p45fail2 p45fail2.xml S before contentspec is required. diff --git a/tests/xmltest.fail.txt b/tests/xmltest.fail.txt index f09412e9..d2b44c63 100644 --- a/tests/xmltest.fail.txt +++ b/tests/xmltest.fail.txt @@ -27,7 +27,6 @@ not-wf-sa-086 086.xml Public IDs may not contain "[". not-wf-sa-087 087.xml Public IDs may not contain "[". not-wf-sa-089 089.xml Parameter entities "are" always parsed; NDATA annotations are not permitted. not-wf-sa-091 091.xml Parameter entities "are" always parsed; NDATA annotations are not permitted. -not-wf-sa-096 096.xml Space is required before the standalone declaration. not-wf-sa-104 104.xml Internal general parsed entities are only well formed if they match the "content" production. not-wf-sa-115 115.xml The replacement text of this entity is an illegal character reference, which must be rejected when it is parsed in the context of an attribute value. not-wf-sa-116 116.xml Internal general parsed entities are only well formed if they match the "content" production. This is a partial character reference, not a full one.