Skip to content

Commit 402fb76

Browse files
committed
feat(headers): add extended parameter parser to the public API
Move the extended parameter parser from the Content-Disposition header implementation into the common header parsing module. This allows crates that use Hyper to parse RFC 5987-compliant header parameter values. Add tests based on the examples given in the RFC.
1 parent 23f20c7 commit 402fb76

File tree

2 files changed

+136
-65
lines changed

2 files changed

+136
-65
lines changed

src/header/common/content_disposition.rs

+3-65
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88

99
use language_tags::LanguageTag;
1010
use std::fmt;
11-
use std::str::FromStr;
1211
use unicase::UniCase;
1312
use url::percent_encoding;
1413

1514
use header::{Header, HeaderFormat, parsing};
15+
use header::parsing::parse_extended_value;
1616
use header::shared::Charset;
1717

1818
/// The implied disposition of the content of the HTTP body
@@ -133,8 +133,8 @@ impl Header for ContentDisposition {
133133
Charset::Ext("UTF-8".to_owned()), None,
134134
val.trim_matches('"').as_bytes().to_owned())
135135
} else if UniCase(&*key) == UniCase("filename*") {
136-
let (charset, opt_language, value) = try!(parse_ext_value(val));
137-
DispositionParam::Filename(charset, opt_language, value)
136+
let extended_value = try!(parse_extended_value(val));
137+
DispositionParam::Filename(extended_value.charset, extended_value.language_tag, extended_value.value)
138138
} else {
139139
DispositionParam::Ext(key.to_owned(), val.trim_matches('"').to_owned())
140140
}
@@ -195,68 +195,6 @@ impl fmt::Display for ContentDisposition {
195195
}
196196
}
197197

198-
/// Parsing of `ext-value`
199-
/// https://tools.ietf.org/html/rfc5987#section-3.2
200-
///
201-
/// # ABNF
202-
/// ```plain
203-
/// ext-value = charset "'" [ language ] "'" value-chars
204-
/// ; like RFC 2231's <extended-initial-value>
205-
/// ; (see [RFC2231], Section 7)
206-
///
207-
/// charset = "UTF-8" / "ISO-8859-1" / mime-charset
208-
///
209-
/// mime-charset = 1*mime-charsetc
210-
/// mime-charsetc = ALPHA / DIGIT
211-
/// / "!" / "#" / "$" / "%" / "&"
212-
/// / "+" / "-" / "^" / "_" / "`"
213-
/// / "{" / "}" / "~"
214-
/// ; as <mime-charset> in Section 2.3 of [RFC2978]
215-
/// ; except that the single quote is not included
216-
/// ; SHOULD be registered in the IANA charset registry
217-
///
218-
/// language = <Language-Tag, defined in [RFC5646], Section 2.1>
219-
///
220-
/// value-chars = *( pct-encoded / attr-char )
221-
///
222-
/// pct-encoded = "%" HEXDIG HEXDIG
223-
/// ; see [RFC3986], Section 2.1
224-
///
225-
/// attr-char = ALPHA / DIGIT
226-
/// / "!" / "#" / "$" / "&" / "+" / "-" / "."
227-
/// / "^" / "_" / "`" / "|" / "~"
228-
/// ; token except ( "*" / "'" / "%" )
229-
/// ```
230-
fn parse_ext_value(val: &str) -> ::Result<(Charset, Option<LanguageTag>, Vec<u8>)> {
231-
232-
// Break into three pieces separated by the single-quote character
233-
let mut parts = val.splitn(3,'\'');
234-
235-
// Interpret the first piece as a Charset
236-
let charset: Charset = match parts.next() {
237-
None => return Err(::Error::Header),
238-
Some(n) => try!(FromStr::from_str(n)),
239-
};
240-
241-
// Interpret the second piece as a language tag
242-
let lang: Option<LanguageTag> = match parts.next() {
243-
None => return Err(::Error::Header),
244-
Some("") => None,
245-
Some(s) => match s.parse() {
246-
Ok(lt) => Some(lt),
247-
Err(_) => return Err(::Error::Header),
248-
}
249-
};
250-
251-
// Interpret the third piece as a sequence of value characters
252-
let value: Vec<u8> = match parts.next() {
253-
None => return Err(::Error::Header),
254-
Some(v) => percent_encoding::percent_decode(v.as_bytes()),
255-
};
256-
257-
Ok( (charset, lang, value) )
258-
}
259-
260198
#[cfg(test)]
261199
mod tests {
262200
use super::{ContentDisposition,DispositionType,DispositionParam};

src/header/parsing.rs

+133
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
//! Utility functions for Header implementations.
22
3+
use language_tags::LanguageTag;
34
use std::str;
5+
use std::str::FromStr;
46
use std::fmt::{self, Display};
7+
use url::percent_encoding;
8+
9+
use header::shared::Charset;
510

611
/// Reads a single raw string when parsing a header.
712
pub fn from_one_raw_str<T: str::FromStr>(raw: &[Vec<u8>]) -> ::Result<T> {
@@ -48,3 +53,131 @@ pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> f
4853
}
4954
Ok(())
5055
}
56+
57+
/// An extended header parameter value (i.e., tagged with a character set and optionally,
58+
/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
59+
pub struct ExtendedValue {
60+
pub charset: Charset,
61+
pub language_tag: Option<LanguageTag>,
62+
pub value: Vec<u8>,
63+
}
64+
65+
/// Parses extended header parameter values (`ext-value`), as defined in
66+
/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
67+
///
68+
/// Extended values are denoted by parameter names that end with `*`.
69+
///
70+
/// ## ABNF
71+
/// ```plain
72+
/// ext-value = charset "'" [ language ] "'" value-chars
73+
/// ; like RFC 2231's <extended-initial-value>
74+
/// ; (see [RFC2231], Section 7)
75+
///
76+
/// charset = "UTF-8" / "ISO-8859-1" / mime-charset
77+
///
78+
/// mime-charset = 1*mime-charsetc
79+
/// mime-charsetc = ALPHA / DIGIT
80+
/// / "!" / "#" / "$" / "%" / "&"
81+
/// / "+" / "-" / "^" / "_" / "`"
82+
/// / "{" / "}" / "~"
83+
/// ; as <mime-charset> in Section 2.3 of [RFC2978]
84+
/// ; except that the single quote is not included
85+
/// ; SHOULD be registered in the IANA charset registry
86+
///
87+
/// language = <Language-Tag, defined in [RFC5646], Section 2.1>
88+
///
89+
/// value-chars = *( pct-encoded / attr-char )
90+
///
91+
/// pct-encoded = "%" HEXDIG HEXDIG
92+
/// ; see [RFC3986], Section 2.1
93+
///
94+
/// attr-char = ALPHA / DIGIT
95+
/// / "!" / "#" / "$" / "&" / "+" / "-" / "."
96+
/// / "^" / "_" / "`" / "|" / "~"
97+
/// ; token except ( "*" / "'" / "%" )
98+
/// ```
99+
pub fn parse_extended_value(val: &str) -> ::Result<ExtendedValue> {
100+
101+
// Break into three pieces separated by the single-quote character
102+
let mut parts = val.splitn(3,'\'');
103+
104+
// Interpret the first piece as a Charset
105+
let charset: Charset = match parts.next() {
106+
None => return Err(::Error::Header),
107+
Some(n) => try!(FromStr::from_str(n)),
108+
};
109+
110+
// Interpret the second piece as a language tag
111+
let lang: Option<LanguageTag> = match parts.next() {
112+
None => return Err(::Error::Header),
113+
Some("") => None,
114+
Some(s) => match s.parse() {
115+
Ok(lt) => Some(lt),
116+
Err(_) => return Err(::Error::Header),
117+
}
118+
};
119+
120+
// Interpret the third piece as a sequence of value characters
121+
let value: Vec<u8> = match parts.next() {
122+
None => return Err(::Error::Header),
123+
Some(v) => percent_encoding::percent_decode(v.as_bytes()),
124+
};
125+
126+
Ok(ExtendedValue {
127+
charset: charset,
128+
language_tag: lang,
129+
value: value,
130+
})
131+
}
132+
133+
#[cfg(test)]
134+
mod tests {
135+
use header::shared::Charset;
136+
use super::parse_extended_value;
137+
138+
#[test]
139+
fn test_parse_extended_value_with_encoding_and_language_tag() {
140+
let expected_language_tag = langtag!(en);
141+
// RFC 5987, Section 3.2.2
142+
// Extended notation, using the Unicode character U+00A3 (POUND SIGN)
143+
let result = parse_extended_value("iso-8859-1'en'%A3%20rates");
144+
assert!(result.is_ok());
145+
let extended_value = result.unwrap();
146+
assert_eq!(Charset::Iso_8859_1, extended_value.charset);
147+
assert!(extended_value.language_tag.is_some());
148+
assert_eq!(expected_language_tag, extended_value.language_tag.unwrap());
149+
assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
150+
}
151+
152+
#[test]
153+
fn test_parse_extended_value_with_encoding() {
154+
// RFC 5987, Section 3.2.2
155+
// Extended notation, using the Unicode characters U+00A3 (POUND SIGN)
156+
// and U+20AC (EURO SIGN)
157+
let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates");
158+
assert!(result.is_ok());
159+
let extended_value = result.unwrap();
160+
assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset);
161+
assert!(extended_value.language_tag.is_none());
162+
assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
163+
}
164+
165+
#[test]
166+
fn test_parse_extended_value_missing_language_tag_and_encoding() {
167+
// From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2
168+
let result = parse_extended_value("foo%20bar.html");
169+
assert!(result.is_err());
170+
}
171+
172+
#[test]
173+
fn test_parse_extended_value_partially_formatted() {
174+
let result = parse_extended_value("UTF-8'missing third part");
175+
assert!(result.is_err());
176+
}
177+
178+
#[test]
179+
fn test_parse_extended_value_partially_formatted_blank() {
180+
let result = parse_extended_value("blank second part'");
181+
assert!(result.is_err());
182+
}
183+
}

0 commit comments

Comments
 (0)