Skip to content

Commit 79d2782

Browse files
committed
RUST-786 Update collation fields to use custom types (C-CUSTOM-TYPE)
1 parent 18cfdb3 commit 79d2782

File tree

1 file changed

+241
-5
lines changed

1 file changed

+241
-5
lines changed

src/collation.rs

+241-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
use std::convert::TryFrom;
2+
13
use serde::{Deserialize, Serialize};
24
use typed_builder::TypedBuilder;
35

6+
use crate::error::{Error, ErrorKind};
7+
48
/// A collation configuration. See the official MongoDB
59
/// [documentation](https://docs.mongodb.com/manual/reference/collation/) for more information on
610
/// each of the fields.
@@ -16,15 +20,15 @@ pub struct Collation {
1620

1721
/// The level of comparison to perform. Corresponds to [ICU Comparison Levels](http://userguide.icu-project.org/collation/concepts#TOC-Comparison-Levels).
1822
#[builder(default, setter(strip_option))]
19-
pub strength: Option<i32>,
23+
pub strength: Option<CollationStrength>,
2024

21-
/// Whether to include case comparison when `strength` is level 1 or 2.
25+
/// Whether to include a separate level for case differences. See [ICU Collation: CaseLevel](http://userguide.icu-project.org/collation/concepts#TOC-CaseLevel) for more information.
2226
#[builder(default, setter(strip_option))]
2327
pub case_level: Option<bool>,
2428

2529
/// The sort order of case differences during tertiary level comparisons.
2630
#[builder(default, setter(strip_option))]
27-
pub case_first: Option<String>,
31+
pub case_first: Option<CollationCaseFirst>,
2832

2933
/// Whether to compare numeric strings as numbers or strings.
3034
#[builder(default, setter(strip_option))]
@@ -33,12 +37,12 @@ pub struct Collation {
3337
/// Whether collation should consider whitespace and punctuation as base characters for
3438
/// purposes of comparison.
3539
#[builder(default, setter(strip_option))]
36-
pub alternate: Option<String>,
40+
pub alternate: Option<CollationAlternate>,
3741

3842
/// Up to which characters are considered ignorable when `alternate` is "shifted". Has no
3943
/// effect if `alternate` is set to "non-ignorable".
4044
#[builder(default, setter(strip_option))]
41-
pub max_variable: Option<String>,
45+
pub max_variable: Option<CollationMaxVariable>,
4246

4347
/// Whether to check if text require normalization and to perform it.
4448
#[builder(default, setter(strip_option))]
@@ -48,3 +52,235 @@ pub struct Collation {
4852
#[builder(default, setter(strip_option))]
4953
pub backwards: Option<bool>,
5054
}
55+
56+
/// The level of comparison to perform. Corresponds to [ICU Comparison Levels](http://userguide.icu-project.org/collation/concepts#TOC-Comparison-Levels).
57+
#[derive(Debug, Clone, Copy)]
58+
#[non_exhaustive]
59+
pub enum CollationStrength {
60+
/// Typically, this is used to denote differences between base characters (for example, "a" <
61+
/// "b").
62+
///
63+
/// This is also called the level-1 strength.
64+
Primary,
65+
66+
/// Accents in the characters are considered secondary differences (for example, "as" < "às" <
67+
/// "at").
68+
///
69+
/// This is also called the level-2 strength.
70+
Secondary,
71+
72+
/// Upper and lower case differences in characters are distinguished at the tertiary level (for
73+
/// example, "ao" < "Ao" < "aò").
74+
///
75+
/// This is also called the level-3 strength.
76+
Tertiary,
77+
78+
/// When punctuation is ignored at level 1-3, an additional level can be used to distinguish
79+
/// words with and without punctuation (for example, "ab" < "a-b" < "aB").
80+
///
81+
/// This is also called the level-4 strength.
82+
Quaternary,
83+
84+
/// When all other levels are equal, the identical level is used as a tiebreaker. The Unicode
85+
/// code point values of the NFD form of each string are compared at this level, just in
86+
/// case there is no difference at levels 1-4.
87+
///
88+
/// This is also called the level-5 strength.
89+
Identical,
90+
}
91+
92+
impl From<CollationStrength> for u32 {
93+
fn from(strength: CollationStrength) -> Self {
94+
match strength {
95+
CollationStrength::Primary => 1,
96+
CollationStrength::Secondary => 2,
97+
CollationStrength::Tertiary => 3,
98+
CollationStrength::Quaternary => 4,
99+
CollationStrength::Identical => 5,
100+
}
101+
}
102+
}
103+
104+
impl TryFrom<u32> for CollationStrength {
105+
type Error = Error;
106+
107+
fn try_from(level: u32) -> Result<Self, Self::Error> {
108+
Ok(match level {
109+
1 => CollationStrength::Primary,
110+
2 => CollationStrength::Secondary,
111+
3 => CollationStrength::Tertiary,
112+
4 => CollationStrength::Quaternary,
113+
5 => CollationStrength::Identical,
114+
_ => {
115+
return Err(ErrorKind::InvalidArgument {
116+
message: (format!("invalid collation strength: {}", level)),
117+
}
118+
.into())
119+
}
120+
})
121+
}
122+
}
123+
124+
impl Serialize for CollationStrength {
125+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
126+
where
127+
S: serde::Serializer,
128+
{
129+
let level = u32::from(*self);
130+
serializer.serialize_i32(level as i32)
131+
}
132+
}
133+
134+
impl<'de> Deserialize<'de> for CollationStrength {
135+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
136+
where
137+
D: serde::Deserializer<'de>,
138+
{
139+
let level = u32::deserialize(deserializer)?;
140+
Self::try_from(level).map_err(serde::de::Error::custom)
141+
}
142+
}
143+
144+
impl std::fmt::Display for CollationStrength {
145+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146+
std::fmt::Display::fmt(&u32::from(*self), f)
147+
}
148+
}
149+
150+
/// Setting that determines sort order of case differences during case tertiary level comparisons.
151+
/// For more info, see http://userguide.icu-project.org/collation/customization.
152+
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
153+
#[serde(rename_all = "kebab-case")]
154+
#[non_exhaustive]
155+
pub enum CollationCaseFirst {
156+
/// Uppercase sorts before lowercase.
157+
Upper,
158+
159+
/// Lowercase sorts before uppercase.
160+
Lower,
161+
162+
/// Default value. Similar to `Lower` with slight differences.
163+
/// See http://userguide.icu-project.org/collation/customization for details of differences.
164+
Off,
165+
}
166+
167+
impl std::str::FromStr for CollationCaseFirst {
168+
type Err = Error;
169+
170+
fn from_str(s: &str) -> Result<Self, Self::Err> {
171+
match s {
172+
"upper" => Ok(CollationCaseFirst::Upper),
173+
"lower" => Ok(CollationCaseFirst::Lower),
174+
"off" => Ok(CollationCaseFirst::Off),
175+
_ => Err(ErrorKind::InvalidArgument {
176+
message: format!("invalid CollationCaseFirst: {}", s),
177+
}
178+
.into()),
179+
}
180+
}
181+
}
182+
183+
impl CollationCaseFirst {
184+
/// Returns this [`CollationCaseFirst`] as a `&'static str`.
185+
pub fn as_str(&self) -> &'static str {
186+
match self {
187+
CollationCaseFirst::Upper => "upper",
188+
CollationCaseFirst::Lower => "lower",
189+
CollationCaseFirst::Off => "off",
190+
}
191+
}
192+
}
193+
194+
impl std::fmt::Display for CollationCaseFirst {
195+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
196+
std::fmt::Display::fmt(self.as_str(), f)
197+
}
198+
}
199+
200+
/// Setting that determines whether collation should consider whitespace and punctuation as base
201+
/// characters for purposes of comparison.
202+
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
203+
#[serde(rename_all = "kebab-case")]
204+
#[non_exhaustive]
205+
pub enum CollationAlternate {
206+
/// Whitespace and punctuation are considered base characters.
207+
NonIgnorable,
208+
209+
/// Whitespace and punctuation are not considered base characters and are only distinguished at
210+
/// strength levels greater than 3.
211+
Shifted,
212+
}
213+
214+
impl std::str::FromStr for CollationAlternate {
215+
type Err = Error;
216+
217+
fn from_str(s: &str) -> Result<Self, Self::Err> {
218+
match s {
219+
"non-ignorable" => Ok(CollationAlternate::NonIgnorable),
220+
"shifted" => Ok(CollationAlternate::Shifted),
221+
_ => Err(ErrorKind::InvalidArgument {
222+
message: format!("invalid collation alternate: {}", s),
223+
}
224+
.into()),
225+
}
226+
}
227+
}
228+
229+
impl CollationAlternate {
230+
/// Returns this [`CollationAlternate`] as a `&'static str`.
231+
pub fn as_str(&self) -> &'static str {
232+
match self {
233+
CollationAlternate::NonIgnorable => "non-ignorable",
234+
CollationAlternate::Shifted => "shifted",
235+
}
236+
}
237+
}
238+
239+
impl std::fmt::Display for CollationAlternate {
240+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241+
std::fmt::Display::fmt(self.as_str(), f)
242+
}
243+
}
244+
245+
/// Field that determines up to which characters are considered ignorable when alternate: "shifted".
246+
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
247+
#[serde(rename_all = "kebab-case")]
248+
#[non_exhaustive]
249+
pub enum CollationMaxVariable {
250+
/// Both whitespace and punctuation are "ignorable", i.e. not considered base characters.
251+
Punct,
252+
253+
/// Whitespace are "ignorable", i.e. not considered base characters
254+
Space,
255+
}
256+
257+
impl std::str::FromStr for CollationMaxVariable {
258+
type Err = Error;
259+
260+
fn from_str(s: &str) -> Result<Self, Self::Err> {
261+
match s {
262+
"punct" => Ok(CollationMaxVariable::Punct),
263+
"space" => Ok(CollationMaxVariable::Space),
264+
_ => Err(ErrorKind::InvalidArgument {
265+
message: format!("invalid collation max variable: {}", s),
266+
}
267+
.into()),
268+
}
269+
}
270+
}
271+
272+
impl CollationMaxVariable {
273+
/// Returns this [`CollationMaxVariable`] as a `&'static str`.
274+
pub fn as_str(&self) -> &'static str {
275+
match self {
276+
CollationMaxVariable::Punct => "punct",
277+
CollationMaxVariable::Space => "space",
278+
}
279+
}
280+
}
281+
282+
impl std::fmt::Display for CollationMaxVariable {
283+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
284+
std::fmt::Display::fmt(self.as_str(), f)
285+
}
286+
}

0 commit comments

Comments
 (0)