|
1 | 1 | //! Utility functions for Header implementations.
|
2 | 2 |
|
| 3 | +use language_tags::LanguageTag; |
3 | 4 | use std::str;
|
| 5 | +use std::str::FromStr; |
4 | 6 | use std::fmt::{self, Display};
|
| 7 | +use url::percent_encoding; |
| 8 | + |
| 9 | +use header::shared::Charset; |
5 | 10 |
|
6 | 11 | /// Reads a single raw string when parsing a header.
|
7 | 12 | pub fn from_one_raw_str<T: str::FromStr>(raw: &[Vec<u8>]) -> ::Result<T> {
|
@@ -48,3 +53,131 @@ pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> f
|
48 | 53 | }
|
49 | 54 | Ok(())
|
50 | 55 | }
|
| 56 | + |
| 57 | +/// An extended header parameter value (i.e., tagged with a character set and optionally, |
| 58 | +/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). |
| 59 | +pub struct ExtendedValue { |
| 60 | + pub charset: Charset, |
| 61 | + pub language_tag: Option<LanguageTag>, |
| 62 | + pub value: Vec<u8>, |
| 63 | +} |
| 64 | + |
| 65 | +/// Parses extended header parameter values (`ext-value`), as defined in |
| 66 | +/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). |
| 67 | +/// |
| 68 | +/// Extended values are denoted by parameter names that end with `*`. |
| 69 | +/// |
| 70 | +/// ## ABNF |
| 71 | +/// ```plain |
| 72 | +/// ext-value = charset "'" [ language ] "'" value-chars |
| 73 | +/// ; like RFC 2231's <extended-initial-value> |
| 74 | +/// ; (see [RFC2231], Section 7) |
| 75 | +/// |
| 76 | +/// charset = "UTF-8" / "ISO-8859-1" / mime-charset |
| 77 | +/// |
| 78 | +/// mime-charset = 1*mime-charsetc |
| 79 | +/// mime-charsetc = ALPHA / DIGIT |
| 80 | +/// / "!" / "#" / "$" / "%" / "&" |
| 81 | +/// / "+" / "-" / "^" / "_" / "`" |
| 82 | +/// / "{" / "}" / "~" |
| 83 | +/// ; as <mime-charset> in Section 2.3 of [RFC2978] |
| 84 | +/// ; except that the single quote is not included |
| 85 | +/// ; SHOULD be registered in the IANA charset registry |
| 86 | +/// |
| 87 | +/// language = <Language-Tag, defined in [RFC5646], Section 2.1> |
| 88 | +/// |
| 89 | +/// value-chars = *( pct-encoded / attr-char ) |
| 90 | +/// |
| 91 | +/// pct-encoded = "%" HEXDIG HEXDIG |
| 92 | +/// ; see [RFC3986], Section 2.1 |
| 93 | +/// |
| 94 | +/// attr-char = ALPHA / DIGIT |
| 95 | +/// / "!" / "#" / "$" / "&" / "+" / "-" / "." |
| 96 | +/// / "^" / "_" / "`" / "|" / "~" |
| 97 | +/// ; token except ( "*" / "'" / "%" ) |
| 98 | +/// ``` |
| 99 | +pub fn parse_extended_value(val: &str) -> ::Result<ExtendedValue> { |
| 100 | + |
| 101 | + // Break into three pieces separated by the single-quote character |
| 102 | + let mut parts = val.splitn(3,'\''); |
| 103 | + |
| 104 | + // Interpret the first piece as a Charset |
| 105 | + let charset: Charset = match parts.next() { |
| 106 | + None => return Err(::Error::Header), |
| 107 | + Some(n) => try!(FromStr::from_str(n)), |
| 108 | + }; |
| 109 | + |
| 110 | + // Interpret the second piece as a language tag |
| 111 | + let lang: Option<LanguageTag> = match parts.next() { |
| 112 | + None => return Err(::Error::Header), |
| 113 | + Some("") => None, |
| 114 | + Some(s) => match s.parse() { |
| 115 | + Ok(lt) => Some(lt), |
| 116 | + Err(_) => return Err(::Error::Header), |
| 117 | + } |
| 118 | + }; |
| 119 | + |
| 120 | + // Interpret the third piece as a sequence of value characters |
| 121 | + let value: Vec<u8> = match parts.next() { |
| 122 | + None => return Err(::Error::Header), |
| 123 | + Some(v) => percent_encoding::percent_decode(v.as_bytes()), |
| 124 | + }; |
| 125 | + |
| 126 | + Ok(ExtendedValue { |
| 127 | + charset: charset, |
| 128 | + language_tag: lang, |
| 129 | + value: value, |
| 130 | + }) |
| 131 | +} |
| 132 | + |
| 133 | +#[cfg(test)] |
| 134 | +mod tests { |
| 135 | + use header::shared::Charset; |
| 136 | + use super::parse_extended_value; |
| 137 | + |
| 138 | + #[test] |
| 139 | + fn test_parse_extended_value_with_encoding_and_language_tag() { |
| 140 | + let expected_language_tag = langtag!(en); |
| 141 | + // RFC 5987, Section 3.2.2 |
| 142 | + // Extended notation, using the Unicode character U+00A3 (POUND SIGN) |
| 143 | + let result = parse_extended_value("iso-8859-1'en'%A3%20rates"); |
| 144 | + assert!(result.is_ok()); |
| 145 | + let extended_value = result.unwrap(); |
| 146 | + assert_eq!(Charset::Iso_8859_1, extended_value.charset); |
| 147 | + assert!(extended_value.language_tag.is_some()); |
| 148 | + assert_eq!(expected_language_tag, extended_value.language_tag.unwrap()); |
| 149 | + assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value); |
| 150 | + } |
| 151 | + |
| 152 | + #[test] |
| 153 | + fn test_parse_extended_value_with_encoding() { |
| 154 | + // RFC 5987, Section 3.2.2 |
| 155 | + // Extended notation, using the Unicode characters U+00A3 (POUND SIGN) |
| 156 | + // and U+20AC (EURO SIGN) |
| 157 | + let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates"); |
| 158 | + assert!(result.is_ok()); |
| 159 | + let extended_value = result.unwrap(); |
| 160 | + assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset); |
| 161 | + assert!(extended_value.language_tag.is_none()); |
| 162 | + assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value); |
| 163 | + } |
| 164 | + |
| 165 | + #[test] |
| 166 | + fn test_parse_extended_value_missing_language_tag_and_encoding() { |
| 167 | + // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2 |
| 168 | + let result = parse_extended_value("foo%20bar.html"); |
| 169 | + assert!(result.is_err()); |
| 170 | + } |
| 171 | + |
| 172 | + #[test] |
| 173 | + fn test_parse_extended_value_partially_formatted() { |
| 174 | + let result = parse_extended_value("UTF-8'missing third part"); |
| 175 | + assert!(result.is_err()); |
| 176 | + } |
| 177 | + |
| 178 | + #[test] |
| 179 | + fn test_parse_extended_value_partially_formatted_blank() { |
| 180 | + let result = parse_extended_value("blank second part'"); |
| 181 | + assert!(result.is_err()); |
| 182 | + } |
| 183 | +} |
0 commit comments