Skip to content

Commit

Permalink
feat(error): Provide ParseError::char_span
Browse files Browse the repository at this point in the history
This simplifies adapting parse errors to application errors.
  • Loading branch information
epage committed Feb 19, 2025
1 parent 2e94c45 commit 8021856
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 3 deletions.
5 changes: 2 additions & 3 deletions src/_tutorial/chapter_7.rs
Original file line number Diff line number Diff line change
Expand Up @@ -625,13 +625,12 @@
//! // customized as well to better fit your needs.
//! let message = error.inner().to_string();
//! let input = input.to_owned();
//! let start = error.offset();
//! // Assume the error span is only for the first `char`.
//! // Semantic errors are free to choose the entire span returned by `Parser::with_span`.
//! let end = (start + 1..).find(|e| input.is_char_boundary(*e)).unwrap_or(start);
//! let span = error.char_span();
//! Self {
//! message,
//! span: start..end,
//! span,
//! input,
//! }
//! }
Expand Down
90 changes: 90 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1263,6 +1263,8 @@ impl<I, E> ParseError<I, E> {

/// The location in [`ParseError::input`] where parsing failed
///
/// To get the span for the `char` this points to, see [`ParseError::char_span`].
///
/// <div class="warning">
///
/// **Note:** This is an offset, not an index, and may point to the end of input
Expand All @@ -1287,6 +1289,48 @@ impl<I, E> ParseError<I, E> {
}
}

impl<I: AsBStr, E> ParseError<I, E> {
/// The byte indices for the `char` at [`ParseError::offset`]
#[inline]
pub fn char_span(&self) -> std::ops::Range<usize> {
char_boundary(self.input.as_bstr(), self.offset())
}
}

Check warning

Code scanning / clippy

used import from std instead of core Warning

used import from std instead of core

Check warning

Code scanning / clippy

used import from std instead of core Warning

used import from std instead of core

fn char_boundary(input: &[u8], offset: usize) -> std::ops::Range<usize> {
let len = input.len();
if offset == len {
return offset..offset;

Check warning

Code scanning / clippy

used import from std instead of core Warning

used import from std instead of core

Check warning

Code scanning / clippy

used import from std instead of core Warning

used import from std instead of core
}

let start = (0..(offset + 1).min(len))
.rev()
.find(|i| {
input
.get(*i)
.copied()
.map(is_utf8_char_boundary)
.unwrap_or(false)
})
.unwrap_or(0);
let end = (offset + 1..len)
.find(|i| {
input
.get(*i)
.copied()
.map(is_utf8_char_boundary)
.unwrap_or(false)
})
.unwrap_or(len);
start..end
}

/// Taken from `core::num`
const fn is_utf8_char_boundary(b: u8) -> bool {
// This is bit magic equivalent to: b < 128 || b >= 192
(b as i8) >= -0x40
}

impl<I, E> core::fmt::Display for ParseError<I, E>
where
I: AsBStr,
Expand Down Expand Up @@ -1384,6 +1428,52 @@ fn translate_position(input: &[u8], index: usize) -> (usize, usize) {
(line, column)
}

#[cfg(test)]
mod test_char_boundary {
use super::*;

#[test]
fn ascii() {
let input = "hi";
let cases = [(0, 0..1), (1, 1..2), (2, 2..2)];
for (offset, expected) in cases {
assert_eq!(
char_boundary(input.as_bytes(), offset),
expected,
"input={input:?}, offset={offset:?}"
);
}
}

#[test]
fn utf8() {
let input = "βèƒôřè";
assert_eq!(input.len(), 12);
let cases = [
(0, 0..2),
(1, 0..2),
(2, 2..4),
(3, 2..4),
(4, 4..6),
(5, 4..6),
(6, 6..8),
(7, 6..8),
(8, 8..10),
(9, 8..10),
(10, 10..12),
(11, 10..12),
(12, 12..12),
];
for (offset, expected) in cases {
assert_eq!(
char_boundary(input.as_bytes(), offset),
expected,
"input={input:?}, offset={offset:?}"
);
}
}
}

#[cfg(test)]
#[cfg(feature = "std")]
mod test_parse_error {
Expand Down

0 comments on commit 8021856

Please # to comment.