diff --git a/codegen/src/main/kotlin/software/amazon/smithy/rust/codegen/smithy/RuntimeTypes.kt b/codegen/src/main/kotlin/software/amazon/smithy/rust/codegen/smithy/RuntimeTypes.kt index 5ed2ed1f35..ed4ebccd58 100644 --- a/codegen/src/main/kotlin/software/amazon/smithy/rust/codegen/smithy/RuntimeTypes.kt +++ b/codegen/src/main/kotlin/software/amazon/smithy/rust/codegen/smithy/RuntimeTypes.kt @@ -123,15 +123,15 @@ data class RuntimeType(val name: String?, val dependency: RustDependency?, val n fun Base64Encode(runtimeConfig: RuntimeConfig): RuntimeType = RuntimeType( "encode", - CargoDependency.SmithyHttp(runtimeConfig), - "${runtimeConfig.cratePrefix}_http::base64" + CargoDependency.SmithyTypes(runtimeConfig), + "${runtimeConfig.cratePrefix}_types::base64" ) fun Base64Decode(runtimeConfig: RuntimeConfig): RuntimeType = RuntimeType( "decode", - CargoDependency.SmithyHttp(runtimeConfig), - "${runtimeConfig.cratePrefix}_http::base64" + CargoDependency.SmithyTypes(runtimeConfig), + "${runtimeConfig.cratePrefix}_types::base64" ) fun TimestampFormat(runtimeConfig: RuntimeConfig, format: TimestampFormatTrait.Format): RuntimeType { diff --git a/rust-runtime/inlineable/src/blob_serde.rs b/rust-runtime/inlineable/src/blob_serde.rs index 823645b795..f07248f52b 100644 --- a/rust-runtime/inlineable/src/blob_serde.rs +++ b/rust-runtime/inlineable/src/blob_serde.rs @@ -5,8 +5,7 @@ use serde::de::{Error, Unexpected}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use smithy_http::base64; -use smithy_types::Blob; +use smithy_types::{base64, Blob}; pub struct BlobSer<'a>(pub &'a Blob); diff --git a/rust-runtime/smithy-http/Cargo.toml b/rust-runtime/smithy-http/Cargo.toml index 9154f55fd5..78bb8dc9bb 100644 --- a/rust-runtime/smithy-http/Cargo.toml +++ b/rust-runtime/smithy-http/Cargo.toml @@ -30,7 +30,6 @@ tokio-util = { version = "0.6", optional = true} [dev-dependencies] proptest = "1" -base64 = "0.13.0" tokio = {version = "1.6", features = ["macros", "rt", "fs", "io-util"]} tokio-stream = "0.1.5" tempfile = "3.2.0" diff --git a/rust-runtime/smithy-http/src/lib.rs b/rust-runtime/smithy-http/src/lib.rs index c1207efeaa..92837e3a16 100644 --- a/rust-runtime/smithy-http/src/lib.rs +++ b/rust-runtime/smithy-http/src/lib.rs @@ -5,7 +5,6 @@ #![cfg_attr(docsrs, feature(doc_cfg))] -pub mod base64; pub mod body; pub mod byte_stream; pub mod endpoint; diff --git a/rust-runtime/smithy-json/src/deserialize.rs b/rust-runtime/smithy-json/src/deserialize.rs index 7104f62f16..4f2240ce02 100644 --- a/rust-runtime/smithy-json/src/deserialize.rs +++ b/rust-runtime/smithy-json/src/deserialize.rs @@ -3,100 +3,31 @@ * SPDX-License-Identifier: Apache-2.0. */ -use crate::escape::unescape_string; use smithy_types::Number; -use std::borrow::Cow; -use std::fmt; -use std::str::Utf8Error; - -pub use crate::escape::Error as EscapeError; - -#[derive(Debug, PartialEq, Eq)] -pub enum ErrorReason { - InvalidUtf8, - InvalidEscape(char), - InvalidNumber, - ExpectedLiteral(String), - UnexpectedControlCharacter(u8), - UnexpectedToken(char, &'static str), - UnexpectedEOS, -} -use ErrorReason::*; - -#[derive(Debug, PartialEq, Eq)] -pub struct Error { - reason: ErrorReason, - offset: usize, -} - -impl std::error::Error for Error {} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Error at offset {}: ", self.offset)?; - match &self.reason { - InvalidUtf8 => write!(f, "invalid UTF-8 codepoint in JSON stream"), - InvalidEscape(escape) => write!(f, "invalid JSON escape: \\{}", escape), - InvalidNumber => write!(f, "invalid number"), - ExpectedLiteral(literal) => write!(f, "expected literal: {}", literal), - UnexpectedControlCharacter(value) => write!( - f, - "encountered unescaped control character in string: 0x{:X}", - value - ), - UnexpectedToken(token, expected) => write!( - f, - "unexpected token '{}'. Expected one of {}", - token, expected - ), - UnexpectedEOS => write!(f, "unexpected end of stream"), - } - } -} - -impl From for ErrorReason { - fn from(_: Utf8Error) -> Self { - InvalidUtf8 - } -} - -/// New-type around `&str` that indicates the string is an escaped JSON string. -/// Provides functions for retrieving the string in either form. -#[derive(Debug, PartialEq, Eq)] -pub struct EscapedStr<'a>(&'a str); -impl<'a> EscapedStr<'a> { - pub fn new(value: &'a str) -> EscapedStr<'a> { - EscapedStr(value) - } +mod error; +pub mod token; - /// Returns the escaped string value - pub fn as_escaped_str(&self) -> &str { - self.0 - } +pub use error::{Error, ErrorReason}; +pub use token::{EscapeError, EscapedStr, Offset, Token}; - /// Unescapes the string and returns it. - /// If the string doesn't need unescaping, it will be returned directly. - pub fn to_unescaped(&self) -> Result, EscapeError> { - unescape_string(self.0) - } -} - -/// Enum representing the different JSON tokens that can be returned by [json_token_iter]. -#[derive(Debug, PartialEq)] -pub enum Token<'a> { - StartArray, - EndArray, - ObjectKey(EscapedStr<'a>), - StartObject, - EndObject, - ValueBool(bool), - ValueNull, - ValueNumber(Number), - ValueString(EscapedStr<'a>), -} +use ErrorReason::*; -/// Returns an Iterator of `Result` over an slice of bytes. +/// ### JSON token parser as a Rust iterator +/// +/// This parser will parse and yield exactly one [Token] per iterator `next()` call. +/// Validation is done on the fly, so it is possible for it to parse an invalid JSON document +/// until it gets to the first [Error]. +/// +/// JSON string values are left escaped in the [Token::ValueString] as an [EscapedStr], +/// which is a new type around a slice of original `input` bytes so that the caller can decide +/// when to unescape and allocate into a [String]. +/// +/// ### Note: +/// +/// The parser *will* accept multiple valid JSON values. For example, `b"null true"` will yield +/// `ValueNull` and `ValueTrue`. It is the responsibility of the caller +/// to handle this for their use-case. pub fn json_token_iter(input: &[u8]) -> JsonTokenIterator { JsonTokenIterator { input, @@ -168,7 +99,7 @@ impl<'a> JsonTokenIterator<'a> { /// Creates an error at the given `offset` in the stream. fn error_at(&self, offset: usize, reason: ErrorReason) -> Error { - Error { reason, offset } + Error::new(reason, Some(offset)) } /// Creates an error at the current offset in the stream. @@ -199,40 +130,49 @@ impl<'a> JsonTokenIterator<'a> { self.state_stack.push(state); } + /// Returns current offset + fn offset(&self) -> Offset { + Offset(self.index) + } + /// Discards the '{' character and pushes the `ObjectFirstKeyOrEnd` state. fn start_object(&mut self) -> Token<'a> { + let offset = self.offset(); let byte = self.next_byte(); debug_assert_eq!(byte, Some(b'{')); self.state_stack.push(State::ObjectFirstKeyOrEnd); - Token::StartObject + Token::StartObject { offset } } /// Discards the '}' character and pops the current state. fn end_object(&mut self) -> Token<'a> { + let offset = self.offset(); let (byte, state) = (self.next_byte(), self.state_stack.pop()); debug_assert_eq!(byte, Some(b'}')); debug_assert!( state == Some(State::ObjectFirstKeyOrEnd) || state == Some(State::ObjectNextKeyOrEnd) ); - Token::EndObject + Token::EndObject { offset } } /// Discards the '[' character and pushes the `ArrayFirstValueOrEnd` state. fn start_array(&mut self) -> Token<'a> { + let offset = self.offset(); let byte = self.next_byte(); debug_assert_eq!(byte, Some(b'[')); self.state_stack.push(State::ArrayFirstValueOrEnd); - Token::StartArray + Token::StartArray { offset } } /// Discards the ']' character and pops the current state. fn end_array(&mut self) -> Token<'a> { + let offset = self.offset(); let (byte, state) = (self.next_byte(), self.state_stack.pop()); debug_assert_eq!(byte, Some(b']')); debug_assert!( state == Some(State::ArrayFirstValueOrEnd) || state == Some(State::ArrayNextValueOrEnd) ); - Token::EndArray + Token::EndArray { offset } } /// Reads a JSON string out of the stream. @@ -285,22 +225,32 @@ impl<'a> JsonTokenIterator<'a> { /// Expects a literal `null` next in the stream. fn expect_null(&mut self) -> Result, Error> { + let offset = self.offset(); self.expect_literal(b"null")?; - Ok(Token::ValueNull) + Ok(Token::ValueNull { offset }) } /// Expects a boolean `true` / `false` to be next in the stream and returns its value. fn expect_bool(&mut self) -> Result, Error> { + let offset = self.offset(); match self.peek_expect()? { b't' => { self.expect_literal(b"true")?; - Ok(Token::ValueBool(true)) + Ok(Token::ValueBool { + offset, + value: true, + }) } b'f' => { self.expect_literal(b"false")?; - Ok(Token::ValueBool(false)) + Ok(Token::ValueBool { + offset, + value: false, + }) } - _ => unreachable!(), + _ => unreachable!( + "this function must only be called when the next character is 't' or 'f'" + ), } } @@ -363,6 +313,7 @@ impl<'a> JsonTokenIterator<'a> { /// Expects a number in the stream, and returns its value. fn expect_number(&mut self) -> Result, Error> { + let offset = self.offset(); let (start, end, negative, floating) = self.scan_number(); let number_slice = &self.input[start..end]; @@ -371,25 +322,28 @@ impl<'a> JsonTokenIterator<'a> { let number_str = unsafe { std::str::from_utf8_unchecked(number_slice) }; use std::str::FromStr; - Ok(Token::ValueNumber(if floating { - Number::Float( - f64::from_str(&number_str).map_err(|_| self.error_at(start, InvalidNumber))?, - ) - } else if negative { - // If the negative value overflows, then stuff it into an f64 - let positive = - u64::from_str(&number_str[1..]).map_err(|_| self.error_at(start, InvalidNumber))?; - let negative = positive.wrapping_neg() as i64; - if negative > 0 { - Number::Float(-(positive as f64)) + Ok(Token::ValueNumber { + offset, + value: if floating { + Number::Float( + f64::from_str(&number_str).map_err(|_| self.error_at(start, InvalidNumber))?, + ) + } else if negative { + // If the negative value overflows, then stuff it into an f64 + let positive = u64::from_str(&number_str[1..]) + .map_err(|_| self.error_at(start, InvalidNumber))?; + let negative = positive.wrapping_neg() as i64; + if negative > 0 { + Number::Float(-(positive as f64)) + } else { + Number::NegInt(negative as i64) + } } else { - Number::NegInt(negative as i64) - } - } else { - Number::PosInt( - u64::from_str(&number_str).map_err(|_| self.error_at(start, InvalidNumber))?, - ) - })) + Number::PosInt( + u64::from_str(&number_str).map_err(|_| self.error_at(start, InvalidNumber))?, + ) + }, + }) } /// Reads a value from the stream and returns the next token. For objects and arrays, @@ -397,12 +351,14 @@ impl<'a> JsonTokenIterator<'a> { /// will be returned. fn read_value(&mut self) -> Result, Error> { self.discard_whitespace(); + let offset = self.offset(); match self.peek_expect()? { b'{' => Ok(self.start_object()), b'[' => Ok(self.start_array()), - b'"' => self - .read_string() - .map(|s| Token::ValueString(EscapedStr(s))), + b'"' => self.read_string().map(|s| Token::ValueString { + offset, + value: EscapedStr::new(s), + }), byte => { let value = match byte { b'n' => self.expect_null(), @@ -455,10 +411,14 @@ impl<'a> JsonTokenIterator<'a> { /// Expects an object key. fn object_key(&mut self) -> Result, Error> { + let offset = self.offset(); match self.peek_expect()? { b'"' => { self.replace_state(State::ObjectFieldValue); - self.read_string().map(|s| Token::ObjectKey(EscapedStr(s))) + self.read_string().map(|s| Token::ObjectKey { + offset, + key: EscapedStr::new(s), + }) } byte => Err(self.error(UnexpectedToken(byte.into(), "'\"'"))), } @@ -526,6 +486,10 @@ impl<'a> Iterator for JsonTokenIterator<'a> { #[cfg(test)] mod tests { + use crate::deserialize::token::test::{ + end_array, end_object, object_key, start_array, start_object, value_bool, value_null, + value_number, value_string, + }; use crate::deserialize::{json_token_iter, Error, ErrorReason, EscapedStr, Token}; use proptest::prelude::*; use smithy_types::Number; @@ -540,48 +504,41 @@ mod tests { #[test] fn test_empty_string() { let mut iter = json_token_iter(b"\"\""); - assert_eq!(Some(Ok(Token::ValueString(EscapedStr("")))), iter.next()); + assert_eq!(value_string(0, ""), iter.next()); assert_eq!(None, iter.next()); let mut iter = json_token_iter(b" \r\n\t \"\" "); - assert_eq!(Some(Ok(Token::ValueString(EscapedStr("")))), iter.next()); + assert_eq!(value_string(5, ""), iter.next()); assert_eq!(None, iter.next()); } #[test] fn test_empty_array() { let mut iter = json_token_iter(b"[]"); - assert_eq!(Some(Ok(Token::StartArray)), iter.next()); - assert_eq!(Some(Ok(Token::EndArray)), iter.next()); + assert_eq!(start_array(0), iter.next()); + assert_eq!(end_array(1), iter.next()); assert_eq!(None, iter.next()); } #[test] fn test_empty_object() { let mut iter = json_token_iter(b"{}"); - assert_eq!(Some(Ok(Token::StartObject)), iter.next()); - assert_eq!(Some(Ok(Token::EndObject)), iter.next()); + assert_eq!(start_object(0), iter.next()); + assert_eq!(end_object(1), iter.next()); assert_eq!(None, iter.next()); } #[test] fn test_null() { - assert_eq!( - Some(Ok(Token::ValueNull)), - json_token_iter(b" null ").next() - ); - - let tokens: Result, Error> = json_token_iter(b"[null, null,null]").collect(); - assert_eq!( - vec![ - Token::StartArray, - Token::ValueNull, - Token::ValueNull, - Token::ValueNull, - Token::EndArray - ], - tokens.unwrap() - ); + assert_eq!(value_null(1), json_token_iter(b" null ").next()); + + let mut iter = json_token_iter(b"[null, null,null]"); + assert_eq!(start_array(0), iter.next()); + assert_eq!(value_null(1), iter.next()); + assert_eq!(value_null(7), iter.next()); + assert_eq!(value_null(12), iter.next()); + assert_eq!(end_array(16), iter.next()); + assert_eq!(None, iter.next()); assert!(json_token_iter(b"n").next().unwrap().is_err()); assert!(json_token_iter(b"nul").next().unwrap().is_err()); @@ -594,34 +551,23 @@ mod tests { assert!(json_token_iter(b"truee").next().unwrap().is_err()); assert!(json_token_iter(b"f").next().unwrap().is_err()); assert!(json_token_iter(b"falsee").next().unwrap().is_err()); - assert_eq!( - Some(Ok(Token::ValueBool(true))), - json_token_iter(b" true ").next() - ); - assert_eq!( - Some(Ok(Token::ValueBool(false))), - json_token_iter(b"false").next() - ); - - let tokens: Result, Error> = json_token_iter(b"[true,false]").collect(); - assert_eq!( - vec![ - Token::StartArray, - Token::ValueBool(true), - Token::ValueBool(false), - Token::EndArray - ], - tokens.unwrap() - ); + assert_eq!(value_bool(1, true), json_token_iter(b" true ").next()); + assert_eq!(value_bool(0, false), json_token_iter(b"false").next()); + + let mut iter = json_token_iter(b"[true,false]"); + assert_eq!(start_array(0), iter.next()); + assert_eq!(value_bool(1, true), iter.next()); + assert_eq!(value_bool(6, false), iter.next()); + assert_eq!(end_array(11), iter.next()); + assert_eq!(None, iter.next()); } proptest! { #[test] fn string_prop_test(input in ".*") { let json: String = serde_json::to_string(&input).unwrap(); - let mut iter = json_token_iter(json.as_bytes()); - assert_eq!(Some(Ok(Token::ValueString(EscapedStr(&json[1..(json.len()-1)])))), iter.next()); + assert_eq!(value_string(0, &json[1..(json.len() - 1)]), iter.next()); assert_eq!(None, iter.next()); } @@ -629,12 +575,12 @@ mod tests { fn integer_prop_test(input: i64) { let json = serde_json::to_string(&input).unwrap(); let mut iter = json_token_iter(json.as_bytes()); - assert_eq!(Some(Ok(Token::ValueNumber( - if input < 0 { - Number::NegInt(input) - } else { - Number::PosInt(input as u64) - }))), iter.next()); + let expected = if input < 0 { + Number::NegInt(input) + } else { + Number::PosInt(input as u64) + }; + assert_eq!(value_number(0, expected), iter.next()); assert_eq!(None, iter.next()); } @@ -642,7 +588,7 @@ mod tests { fn float_prop_test(input: f64) { let json = serde_json::to_string(&input).unwrap(); let mut iter = json_token_iter(json.as_bytes()); - assert_eq!(Some(Ok(Token::ValueNumber(Number::Float(input)))), iter.next()); + assert_eq!(value_number(0, Number::Float(input)), iter.next()); assert_eq!(None, iter.next()); } } @@ -650,10 +596,7 @@ mod tests { #[test] fn valid_numbers() { let expect = |number, input| { - assert_eq!( - Token::ValueNumber(number), - json_token_iter(input).next().unwrap().unwrap() - ); + assert_eq!(value_number(0, number), json_token_iter(input).next()); }; expect(Number::Float(0.0), b"0."); expect(Number::Float(0.0), b"0e0"); @@ -674,10 +617,7 @@ mod tests { #[test] fn invalid_numbers_we_are_intentionally_accepting() { let expect = |number, input| { - assert_eq!( - Token::ValueNumber(number), - json_token_iter(input).next().unwrap().unwrap() - ); + assert_eq!(value_number(0, number), json_token_iter(input).next()); }; expect(Number::NegInt(-1), b"-01"); @@ -696,10 +636,10 @@ mod tests { let unexpected_token = |input, token, offset, msg| { let tokens: Vec> = json_token_iter(input).collect(); assert_eq!( - vec![Err(Error { - reason: ErrorReason::UnexpectedToken(token, msg), - offset - }),], + vec![Err(Error::new( + ErrorReason::UnexpectedToken(token, msg), + Some(offset) + ))], tokens, "input: \"{}\"", std::str::from_utf8(input).unwrap(), @@ -709,10 +649,7 @@ mod tests { let invalid_number = |input, offset| { let tokens: Vec> = json_token_iter(input).collect(); assert_eq!( - vec![Err(Error { - reason: ErrorReason::InvalidNumber, - offset - })], + vec![Err(Error::new(ErrorReason::InvalidNumber, Some(offset)))], tokens, "input: \"{}\"", std::str::from_utf8(input).unwrap(), @@ -741,91 +678,74 @@ mod tests { #[test] fn test_unclosed_array() { let mut iter = json_token_iter(br#" [null "#); - assert_eq!(Some(Ok(Token::StartArray)), iter.next()); - assert_eq!(Some(Ok(Token::ValueNull)), iter.next()); + assert_eq!(start_array(1), iter.next()); + assert_eq!(value_null(2), iter.next()); assert_eq!( - Some(Err(Error { - reason: ErrorReason::UnexpectedEOS, - offset: 7 - })), + Some(Err(Error::new(ErrorReason::UnexpectedEOS, Some(7)))), iter.next() ); } #[test] fn test_array_with_items() { - let tokens: Result, Error> = json_token_iter(b"[[], {}, \"test\"]").collect(); - assert_eq!( - vec![ - Token::StartArray, - Token::StartArray, - Token::EndArray, - Token::StartObject, - Token::EndObject, - Token::ValueString(EscapedStr("test")), - Token::EndArray, - ], - tokens.unwrap() - ) + let mut iter = json_token_iter(b"[[], {}, \"test\"]"); + assert_eq!(start_array(0), iter.next()); + assert_eq!(start_array(1), iter.next()); + assert_eq!(end_array(2), iter.next()); + assert_eq!(start_object(5), iter.next()); + assert_eq!(end_object(6), iter.next()); + assert_eq!(value_string(9, "test"), iter.next()); + assert_eq!(end_array(15), iter.next()); + assert_eq!(None, iter.next()); } #[test] fn test_object_with_items() { - let tokens: Result, Error> = json_token_iter( - br#" - { "some_int": 5, - "some_float": 5.2, - "some_negative": -5, - "some_negative_float": -2.4, - "some_string": "test", - "some_struct": { "nested": "asdf" }, - "some_array": ["one", "two"] } - "#, - ) - .collect(); - assert_eq!( - vec![ - Token::StartObject, - Token::ObjectKey(EscapedStr("some_int")), - Token::ValueNumber(Number::PosInt(5)), - Token::ObjectKey(EscapedStr("some_float")), - Token::ValueNumber(Number::Float(5.2)), - Token::ObjectKey(EscapedStr("some_negative")), - Token::ValueNumber(Number::NegInt(-5)), - Token::ObjectKey(EscapedStr("some_negative_float")), - Token::ValueNumber(Number::Float(-2.4)), - Token::ObjectKey(EscapedStr("some_string")), - Token::ValueString(EscapedStr("test")), - Token::ObjectKey(EscapedStr("some_struct")), - Token::StartObject, - Token::ObjectKey(EscapedStr("nested")), - Token::ValueString(EscapedStr("asdf")), - Token::EndObject, - Token::ObjectKey(EscapedStr("some_array")), - Token::StartArray, - Token::ValueString(EscapedStr("one")), - Token::ValueString(EscapedStr("two")), - Token::EndArray, - Token::EndObject, - ], - tokens.unwrap() - ) + let mut tokens = json_token_iter( + br#"{ "some_int": 5, + "some_float": 5.2, + "some_negative": -5, + "some_negative_float": -2.4, + "some_string": "test", + "some_struct": { "nested": "asdf" }, + "some_array": ["one", "two"] }"#, + ); + assert_eq!(start_object(0), tokens.next()); + assert_eq!(object_key(2, "some_int"), tokens.next()); + assert_eq!(value_number(14, Number::PosInt(5)), tokens.next()); + assert_eq!(object_key(35, "some_float"), tokens.next()); + assert_eq!(value_number(49, Number::Float(5.2)), tokens.next()); + assert_eq!(object_key(72, "some_negative"), tokens.next()); + assert_eq!(value_number(89, Number::NegInt(-5)), tokens.next()); + assert_eq!(object_key(111, "some_negative_float"), tokens.next()); + assert_eq!(value_number(134, Number::Float(-2.4)), tokens.next()); + assert_eq!(object_key(158, "some_string"), tokens.next()); + assert_eq!(value_string(173, "test"), tokens.next()); + assert_eq!(object_key(199, "some_struct"), tokens.next()); + assert_eq!(start_object(214), tokens.next()); + assert_eq!(object_key(216, "nested"), tokens.next()); + assert_eq!(value_string(226, "asdf"), tokens.next()); + assert_eq!(end_object(233), tokens.next()); + assert_eq!(object_key(254, "some_array"), tokens.next()); + assert_eq!(start_array(268), tokens.next()); + assert_eq!(value_string(269, "one"), tokens.next()); + assert_eq!(value_string(276, "two"), tokens.next()); + assert_eq!(end_array(281), tokens.next()); + assert_eq!(end_object(283), tokens.next()); + assert_eq!(None, tokens.next()); } #[test] fn test_object_trailing_comma() { let mut iter = json_token_iter(br#" { "test": "trailing", } "#); - assert_eq!(Some(Ok(Token::StartObject)), iter.next()); - assert_eq!(Some(Ok(Token::ObjectKey(EscapedStr("test")))), iter.next()); - assert_eq!( - Some(Ok(Token::ValueString(EscapedStr("trailing")))), - iter.next() - ); + assert_eq!(start_object(1), iter.next()); + assert_eq!(object_key(3, "test"), iter.next()); + assert_eq!(value_string(11, "trailing"), iter.next()); assert_eq!( - Some(Err(Error { - reason: ErrorReason::UnexpectedToken('}', "'\"'"), - offset: 23, - })), + Some(Err(Error::new( + ErrorReason::UnexpectedToken('}', "'\"'"), + Some(23), + ))), iter.next() ); assert_eq!(None, iter.next()); @@ -834,13 +754,10 @@ mod tests { #[test] fn test_object_no_colon() { let mut iter = json_token_iter(br#" {"test" "#); - assert_eq!(Some(Ok(Token::StartObject)), iter.next()); - assert_eq!(Some(Ok(Token::ObjectKey(EscapedStr("test")))), iter.next()); + assert_eq!(start_object(1), iter.next()); + assert_eq!(object_key(2, "test"), iter.next()); assert_eq!( - Some(Err(Error { - reason: ErrorReason::UnexpectedEOS, - offset: 9, - })), + Some(Err(Error::new(ErrorReason::UnexpectedEOS, Some(9),))), iter.next() ); assert_eq!(None, iter.next()); diff --git a/rust-runtime/smithy-json/src/deserialize/error.rs b/rust-runtime/smithy-json/src/deserialize/error.rs new file mode 100644 index 0000000000..22f58b5645 --- /dev/null +++ b/rust-runtime/smithy-json/src/deserialize/error.rs @@ -0,0 +1,84 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +use crate::escape::Error as EscapeError; +use std::borrow::Cow; +use std::fmt; +use std::str::Utf8Error; + +#[derive(Debug, PartialEq, Eq)] +pub enum ErrorReason { + Custom(Cow<'static, str>), + ExpectedLiteral(String), + InvalidEscape(char), + InvalidNumber, + InvalidUtf8, + UnescapeFailed(EscapeError), + UnexpectedControlCharacter(u8), + UnexpectedEOS, + UnexpectedToken(char, &'static str), +} +use ErrorReason::*; + +#[derive(Debug, PartialEq, Eq)] +pub struct Error { + reason: ErrorReason, + offset: Option, +} + +impl Error { + pub fn new(reason: ErrorReason, offset: Option) -> Self { + Error { reason, offset } + } + + /// Returns a custom error without an offset. + pub fn custom(message: &'static str) -> Error { + Error::new(ErrorReason::Custom(message.into()), None) + } +} + +impl std::error::Error for Error {} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(offset) = self.offset { + write!(f, "Error at offset {}: ", offset)?; + } + match &self.reason { + Custom(msg) => write!(f, "failed to parse JSON: {}", msg), + ExpectedLiteral(literal) => write!(f, "expected literal: {}", literal), + InvalidEscape(escape) => write!(f, "invalid JSON escape: \\{}", escape), + InvalidNumber => write!(f, "invalid number"), + InvalidUtf8 => write!(f, "invalid UTF-8 codepoint in JSON stream"), + UnescapeFailed(err) => write!(f, "failed to unescape JSON string: {}", err), + UnexpectedControlCharacter(value) => write!( + f, + "encountered unescaped control character in string: 0x{:X}", + value + ), + UnexpectedToken(token, expected) => write!( + f, + "unexpected token '{}'. Expected one of {}", + token, expected + ), + UnexpectedEOS => write!(f, "unexpected end of stream"), + } + } +} + +impl From for ErrorReason { + fn from(_: Utf8Error) -> Self { + InvalidUtf8 + } +} + +impl From for Error { + fn from(err: EscapeError) -> Self { + Error { + reason: ErrorReason::UnescapeFailed(err), + offset: None, + } + } +} diff --git a/rust-runtime/smithy-json/src/deserialize/token.rs b/rust-runtime/smithy-json/src/deserialize/token.rs new file mode 100644 index 0000000000..2011814863 --- /dev/null +++ b/rust-runtime/smithy-json/src/deserialize/token.rs @@ -0,0 +1,476 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +use crate::deserialize::error::{Error, ErrorReason}; +use crate::escape::unescape_string; +use smithy_types::instant::Format; +use smithy_types::{base64, Blob, Instant, Number}; +use std::borrow::Cow; + +pub use crate::escape::Error as EscapeError; + +/// New-type around `&str` that indicates the string is an escaped JSON string. +/// Provides functions for retrieving the string in either form. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub struct EscapedStr<'a>(&'a str); + +impl<'a> EscapedStr<'a> { + pub fn new(value: &'a str) -> EscapedStr<'a> { + EscapedStr(value) + } + + /// Returns the escaped string value + pub fn as_escaped_str(&self) -> &str { + self.0 + } + + /// Unescapes the string and returns it. + /// If the string doesn't need unescaping, it will be returned directly. + pub fn to_unescaped(&self) -> Result, EscapeError> { + unescape_string(self.0) + } +} + +/// Represents the location of a token +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +pub struct Offset(pub usize); + +impl Offset { + /// Creates a custom error from the offset + pub fn error(&self, msg: Cow<'static, str>) -> Error { + Error::new(ErrorReason::Custom(msg), Some(self.0)) + } +} + +/// Enum representing the different JSON tokens that can be returned by json_token_iter. +#[derive(Debug, PartialEq)] +pub enum Token<'a> { + StartArray { + offset: Offset, + }, + EndArray { + offset: Offset, + }, + ObjectKey { + offset: Offset, + key: EscapedStr<'a>, + }, + StartObject { + offset: Offset, + }, + EndObject { + offset: Offset, + }, + ValueBool { + offset: Offset, + value: bool, + }, + ValueNull { + offset: Offset, + }, + ValueNumber { + offset: Offset, + value: Number, + }, + ValueString { + offset: Offset, + value: EscapedStr<'a>, + }, +} + +impl<'a> Token<'a> { + pub fn offset(&self) -> Offset { + use Token::*; + *match self { + StartArray { offset } => offset, + EndArray { offset } => offset, + ObjectKey { offset, .. } => offset, + StartObject { offset } => offset, + EndObject { offset } => offset, + ValueBool { offset, .. } => offset, + ValueNull { offset } => offset, + ValueNumber { offset, .. } => offset, + ValueString { offset, .. } => offset, + } + } + + /// Builds an error from the token's offset + pub fn error(&self, msg: Cow<'static, str>) -> Error { + self.offset().error(msg) + } +} + +macro_rules! expect_fn { + ($name:ident, $token:ident, $doc:tt) => { + #[doc=$doc] + pub fn $name(token_result: Option, Error>>) -> Result<(), Error> { + match token_result.transpose()? { + Some(Token::$token { .. }) => Ok(()), + Some(token) => { + Err(token.error(Cow::Borrowed(concat!("expected ", stringify!($token))))) + } + None => Err(Error::new( + ErrorReason::Custom(Cow::Borrowed(concat!("expected ", stringify!($token)))), + None, + )), + } + } + }; +} + +expect_fn!( + expect_start_object, + StartObject, + "Expects a [Token::StartObject] token and returns an error if it's not present." +); +expect_fn!( + expect_start_array, + StartArray, + "Expects a [Token::StartArray] token and returns an error if it's not present." +); + +macro_rules! expect_value_or_null_fn { + ($name:ident, $token:ident, $typ:ident, $doc:tt) => { + #[doc=$doc] + pub fn $name(token: Option>) -> Result, Error> { + match token.transpose()? { + Some(Token::ValueNull { .. }) => Ok(None), + Some(Token::$token { value, .. }) => Ok(Some(value)), + _ => Err(Error::custom(concat!( + "expected ", + stringify!($token), + " or ValueNull" + ))), + } + } + }; +} + +expect_value_or_null_fn!(expect_bool_or_null, ValueBool, bool, "Expects a [Token::ValueBool] or [Token::ValueNull], and returns the bool value if it's not null."); +expect_value_or_null_fn!(expect_number_or_null, ValueNumber, Number, "Expects a [Token::ValueNumber] or [Token::ValueNull], and returns the [Number] value if it's not null."); +expect_value_or_null_fn!(expect_string_or_null, ValueString, EscapedStr, "Expects a [Token::ValueString] or [Token::ValueNull], and returns the [EscapedStr] value if it's not null."); + +/// Expects a [Token::ValueString] or [Token::ValueNull]. If the value is a string, its **unescaped** value will be returned. +pub fn expect_unescaped_string_or_null( + token: Option, Error>>, +) -> Result, Error> { + Ok(match expect_string_or_null(token)? { + Some(value) => Some(value.to_unescaped()?.to_string()), + None => None, + }) +} + +/// Expects a [Token::ValueString] or [Token::ValueNull]. If the value is a string, it interprets it as a base64 encoded [Blob] value. +pub fn expect_blob_or_null(token: Option, Error>>) -> Result, Error> { + Ok(match expect_string_or_null(token)? { + Some(value) => Some(Blob::new(base64::decode(value.as_escaped_str()).map_err( + |err| { + Error::new( + ErrorReason::Custom(Cow::Owned(format!("failed to decode base64: {}", err))), + None, + ) + }, + )?)), + None => None, + }) +} + +/// Expects a [Token::ValueNull], [Token::ValueString], or [Token::ValueNumber] depending +/// on the passed in `timestamp_format`. If there is a non-null value, it interprets it as an +/// [Instant] in the requested format. +pub fn expect_timestamp_or_null( + token: Option, Error>>, + timestamp_format: Format, +) -> Result, Error> { + Ok(match timestamp_format { + Format::EpochSeconds => { + expect_number_or_null(token)?.map(|v| Instant::from_f64(v.to_f64())) + } + Format::DateTime | Format::HttpDate => expect_string_or_null(token)? + .map(|v| Instant::from_str(v.as_escaped_str(), timestamp_format)) + .transpose() + .map_err(|err| { + Error::new( + ErrorReason::Custom(Cow::Owned(format!("failed to parse timestamp: {}", err))), + None, + ) + })?, + }) +} + +/// Skips an entire value in the token stream. Errors if it isn't a value. +pub fn skip_value<'a>( + tokens: &mut impl Iterator, Error>>, +) -> Result<(), Error> { + skip_inner(false, tokens) +} + +fn skip_inner<'a>( + inside_obj_or_array: bool, + tokens: &mut impl Iterator, Error>>, +) -> Result<(), Error> { + loop { + match tokens.next().transpose()? { + Some(Token::StartObject { .. }) | Some(Token::StartArray { .. }) => { + skip_inner(true, tokens)?; + if !inside_obj_or_array { + break; + } + } + Some(Token::EndObject { .. }) | Some(Token::EndArray { .. }) => { + debug_assert!(inside_obj_or_array); + break; + } + Some(Token::ValueNull { .. }) + | Some(Token::ValueBool { .. }) + | Some(Token::ValueNumber { .. }) + | Some(Token::ValueString { .. }) => { + if !inside_obj_or_array { + break; + } + } + Some(Token::ObjectKey { .. }) => {} + _ => return Err(Error::custom("expected value")), + } + } + Ok(()) +} + +#[cfg(test)] +pub mod test { + use super::*; + use crate::deserialize::error::ErrorReason::UnexpectedToken; + use crate::deserialize::json_token_iter; + + pub fn start_array<'a>(offset: usize) -> Option, Error>> { + Some(Ok(Token::StartArray { + offset: Offset(offset), + })) + } + + pub fn end_array<'a>(offset: usize) -> Option, Error>> { + Some(Ok(Token::EndArray { + offset: Offset(offset), + })) + } + + pub fn start_object<'a>(offset: usize) -> Option, Error>> { + Some(Ok(Token::StartObject { + offset: Offset(offset), + })) + } + + pub fn end_object<'a>(offset: usize) -> Option, Error>> { + Some(Ok(Token::EndObject { + offset: Offset(offset), + })) + } + + pub fn object_key(offset: usize, key: &str) -> Option> { + Some(Ok(Token::ObjectKey { + offset: Offset(offset), + key: EscapedStr::new(key), + })) + } + + pub fn value_bool<'a>(offset: usize, boolean: bool) -> Option, Error>> { + Some(Ok(Token::ValueBool { + offset: Offset(offset), + value: boolean, + })) + } + + pub fn value_number<'a>(offset: usize, number: Number) -> Option, Error>> { + Some(Ok(Token::ValueNumber { + offset: Offset(offset), + value: number, + })) + } + + pub fn value_null<'a>(offset: usize) -> Option, Error>> { + Some(Ok(Token::ValueNull { + offset: Offset(offset), + })) + } + + pub fn value_string(offset: usize, string: &str) -> Option> { + Some(Ok(Token::ValueString { + offset: Offset(offset), + value: EscapedStr::new(string), + })) + } + + #[test] + fn skip_simple_value() { + let mut tokens = json_token_iter(b"null true"); + skip_value(&mut tokens).unwrap(); + assert!(matches!( + tokens.next(), + Some(Ok(Token::ValueBool { value: true, .. })) + )) + } + + #[test] + fn skip_array() { + let mut tokens = json_token_iter(b"[1, 2, 3, 4] true"); + skip_value(&mut tokens).unwrap(); + assert!(matches!( + tokens.next(), + Some(Ok(Token::ValueBool { value: true, .. })) + )) + } + + #[test] + fn skip_object() { + let mut tokens = json_token_iter(b"{\"one\": 5, \"two\": 3} true"); + skip_value(&mut tokens).unwrap(); + assert!(matches!( + tokens.next(), + Some(Ok(Token::ValueBool { value: true, .. })) + )) + } + + #[test] + fn mismatched_braces() { + // The skip_value function doesn't need to explicitly handle these cases since + // token iterator's parser handles them. This test confirms that assumption. + assert_eq!( + Err(Error::new(UnexpectedToken(']', "'}', ','"), Some(10),)), + skip_value(&mut json_token_iter(br#"[{"foo": 5]}"#)) + ); + assert_eq!( + Err(Error::new(UnexpectedToken(']', "'}', ','"), Some(9),)), + skip_value(&mut json_token_iter(br#"{"foo": 5]}"#)) + ); + assert_eq!( + Err(Error::new(UnexpectedToken('}', "']', ','"), Some(4),)), + skip_value(&mut json_token_iter(br#"[5,6}"#)) + ); + } + + #[test] + fn skip_nested() { + let mut tokens = json_token_iter( + br#" + {"struct": {"foo": 5, "bar": 11, "arr": [1, 2, 3, {}, 5, []]}, + "arr": [[], [[]], [{"arr":[]}]], + "simple": "foo"} + true + "#, + ); + skip_value(&mut tokens).unwrap(); + assert!(matches!( + tokens.next(), + Some(Ok(Token::ValueBool { value: true, .. })) + )) + } + + #[test] + fn test_expect_start_object() { + assert_eq!( + Err(Error::new( + ErrorReason::Custom("expected StartObject".into()), + Some(2) + )), + expect_start_object(value_bool(2, true)) + ); + assert_eq!(Ok(()), expect_start_object(start_object(0))); + } + + #[test] + fn test_expect_start_array() { + assert_eq!( + Err(Error::new( + ErrorReason::Custom("expected StartArray".into()), + Some(2) + )), + expect_start_array(value_bool(2, true)) + ); + assert_eq!(Ok(()), expect_start_array(start_array(0))); + } + + #[test] + fn test_expect_string_or_null() { + assert_eq!(Ok(None), expect_string_or_null(value_null(0))); + assert_eq!( + Ok(Some(EscapedStr("test\\n"))), + expect_string_or_null(value_string(0, "test\\n")) + ); + assert_eq!( + Err(Error::custom("expected ValueString or ValueNull")), + expect_string_or_null(value_bool(0, true)) + ); + } + + #[test] + fn test_expect_unescaped_string_or_null() { + assert_eq!(Ok(None), expect_unescaped_string_or_null(value_null(0))); + assert_eq!( + Ok(Some("test\n".to_string())), + expect_unescaped_string_or_null(value_string(0, "test\\n")) + ); + assert_eq!( + Err(Error::custom("expected ValueString or ValueNull")), + expect_unescaped_string_or_null(value_bool(0, true)) + ); + } + + #[test] + fn test_expect_number_or_null() { + assert_eq!(Ok(None), expect_number_or_null(value_null(0))); + assert_eq!( + Ok(Some(Number::PosInt(5))), + expect_number_or_null(value_number(0, Number::PosInt(5))) + ); + assert_eq!( + Err(Error::custom("expected ValueNumber or ValueNull")), + expect_number_or_null(value_bool(0, true)) + ); + } + + #[test] + fn test_expect_blob_or_null() { + assert_eq!(Ok(None), expect_blob_or_null(value_null(0))); + assert_eq!( + Ok(Some(Blob::new(b"hello!".to_vec()))), + expect_blob_or_null(value_string(0, "aGVsbG8h")) + ); + assert_eq!( + Err(Error::custom("expected ValueString or ValueNull")), + expect_blob_or_null(value_bool(0, true)) + ); + } + + #[test] + fn test_expect_timestamp_or_null() { + assert_eq!( + Ok(None), + expect_timestamp_or_null(value_null(0), Format::HttpDate) + ); + assert_eq!( + Ok(Some(Instant::from_f64(2048.0))), + expect_timestamp_or_null(value_number(0, Number::Float(2048.0)), Format::EpochSeconds) + ); + assert_eq!( + Ok(Some(Instant::from_f64(1445412480.0))), + expect_timestamp_or_null( + value_string(0, "Wed, 21 Oct 2015 07:28:00 GMT"), + Format::HttpDate + ) + ); + assert_eq!( + Ok(Some(Instant::from_f64(1445412480.0))), + expect_timestamp_or_null(value_string(0, "2015-10-21T07:28:00Z"), Format::DateTime) + ); + assert_eq!( + Err(Error::custom("expected ValueNumber or ValueNull")), + expect_timestamp_or_null(value_string(0, "wrong"), Format::EpochSeconds) + ); + assert_eq!( + Err(Error::custom("expected ValueString or ValueNull")), + expect_timestamp_or_null(value_number(0, Number::Float(0.0)), Format::DateTime) + ); + } +} diff --git a/rust-runtime/smithy-types/Cargo.toml b/rust-runtime/smithy-types/Cargo.toml index d38af73f28..636bb28d18 100644 --- a/rust-runtime/smithy-types/Cargo.toml +++ b/rust-runtime/smithy-types/Cargo.toml @@ -13,5 +13,6 @@ default = ["chrono-conversions"] chrono = { version = "0.4", default-features = false, features = [] } [dev-dependencies] +base64 = "0.13.0" chrono = { version = "0.4", default-features = false, features = ["alloc"] } proptest = "1" diff --git a/rust-runtime/smithy-http/src/base64.rs b/rust-runtime/smithy-types/src/base64.rs similarity index 94% rename from rust-runtime/smithy-http/src/base64.rs rename to rust-runtime/smithy-types/src/base64.rs index c718d16d24..ca9ab53660 100644 --- a/rust-runtime/smithy-http/src/base64.rs +++ b/rust-runtime/smithy-types/src/base64.rs @@ -3,8 +3,10 @@ * SPDX-License-Identifier: Apache-2.0. */ -/// A correct, small, but not especially fast -/// base64 implementation +//! A correct, small, but not especially fast base64 implementation + +use std::error::Error; +use std::fmt; const BASE64_ENCODE_TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -86,6 +88,19 @@ pub enum DecodeError { InvalidLength, } +impl Error for DecodeError {} + +impl fmt::Display for DecodeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use DecodeError::*; + match self { + InvalidByte => write!(f, "invalid byte"), + InvalidPadding => write!(f, "invalid padding"), + InvalidLength => write!(f, "invalid length"), + } + } +} + fn decode_inner(inp: &str) -> Result, DecodeError> { // one base64 character is only 6 bits so it can't produce valid data. if inp.len() == 1 { diff --git a/rust-runtime/smithy-types/src/instant/format.rs b/rust-runtime/smithy-types/src/instant/format.rs index 490154a26b..46e67695dc 100644 --- a/rust-runtime/smithy-types/src/instant/format.rs +++ b/rust-runtime/smithy-types/src/instant/format.rs @@ -3,6 +3,9 @@ * SPDX-License-Identifier: Apache-2.0. */ +use std::error::Error; +use std::fmt; + const NANOS_PER_SECOND: u32 = 1_000_000_000; #[non_exhaustive] @@ -12,6 +15,18 @@ pub enum DateParseError { IntParseError, } +impl Error for DateParseError {} + +impl fmt::Display for DateParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use DateParseError::*; + match self { + Invalid(msg) => write!(f, "invalid date: {}", msg), + IntParseError => write!(f, "failed to parse int"), + } + } +} + pub mod http_date { use std::str::FromStr; diff --git a/rust-runtime/smithy-types/src/lib.rs b/rust-runtime/smithy-types/src/lib.rs index 2e84366d2e..733d1fc2fe 100644 --- a/rust-runtime/smithy-types/src/lib.rs +++ b/rust-runtime/smithy-types/src/lib.rs @@ -3,6 +3,7 @@ * SPDX-License-Identifier: Apache-2.0. */ +pub mod base64; pub mod instant; pub mod retry; @@ -16,8 +17,10 @@ pub struct Blob { } impl Blob { - pub fn new>>(inp: T) -> Self { - Blob { inner: inp.into() } + pub fn new>>(input: T) -> Self { + Blob { + inner: input.into(), + } } pub fn into_inner(self) -> Vec { self.inner @@ -57,6 +60,34 @@ pub enum Number { Float(f64), } +macro_rules! to_num_fn { + ($name:ident, $typ:ident) => { + /// Converts to a `$typ`. This conversion may be lossy. + pub fn $name(&self) -> $typ { + match self { + Number::PosInt(val) => *val as $typ, + Number::NegInt(val) => *val as $typ, + Number::Float(val) => *val as $typ, + } + } + }; +} + +impl Number { + to_num_fn!(to_f32, f32); + to_num_fn!(to_f64, f64); + + to_num_fn!(to_i8, i8); + to_num_fn!(to_i16, i16); + to_num_fn!(to_i32, i32); + to_num_fn!(to_i64, i64); + + to_num_fn!(to_u8, u8); + to_num_fn!(to_u16, u16); + to_num_fn!(to_u32, u32); + to_num_fn!(to_u64, u64); +} + /* ANCHOR_END: document */ pub use error::Error;