Skip to content

Commit df4d490

Browse files
Minimize unsafety in encode_utf8
Use slice patterns to avoid having to skip bounds checking
1 parent 9ae6ced commit df4d490

File tree

2 files changed

+30
-30
lines changed

2 files changed

+30
-30
lines changed

src/libcore/char/methods.rs

+29-30
Original file line numberDiff line numberDiff line change
@@ -434,36 +434,35 @@ impl char {
434434
#[inline]
435435
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
436436
let code = self as u32;
437-
// SAFETY: each arm checks the size of the slice and only uses `get_unchecked` unsafe ops
438-
unsafe {
439-
let len = if code < MAX_ONE_B && !dst.is_empty() {
440-
*dst.get_unchecked_mut(0) = code as u8;
441-
1
442-
} else if code < MAX_TWO_B && dst.len() >= 2 {
443-
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
444-
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
445-
2
446-
} else if code < MAX_THREE_B && dst.len() >= 3 {
447-
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
448-
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
449-
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
450-
3
451-
} else if dst.len() >= 4 {
452-
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
453-
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
454-
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
455-
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
456-
4
457-
} else {
458-
panic!(
459-
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
460-
from_u32_unchecked(code).len_utf8(),
461-
code,
462-
dst.len(),
463-
)
464-
};
465-
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
466-
}
437+
let len = self.len_utf8();
438+
match (len, &mut dst[..]) {
439+
(1, [a, ..]) => {
440+
*a = code as u8;
441+
}
442+
(2, [a, b, ..]) => {
443+
*a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
444+
*b = (code & 0x3F) as u8 | TAG_CONT;
445+
}
446+
(3, [a, b, c, ..]) => {
447+
*a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
448+
*b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
449+
*c = (code & 0x3F) as u8 | TAG_CONT;
450+
}
451+
(4, [a, b, c, d, ..]) => {
452+
*a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
453+
*b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
454+
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
455+
*d = (code & 0x3F) as u8 | TAG_CONT;
456+
}
457+
_ => panic!(
458+
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
459+
len,
460+
code,
461+
dst.len(),
462+
),
463+
};
464+
// SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
465+
unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
467466
}
468467

469468
/// Encodes this character as UTF-16 into the provided `u16` buffer,

src/libcore/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
#![feature(associated_type_bounds)]
130130
#![feature(const_type_id)]
131131
#![feature(const_caller_location)]
132+
#![feature(slice_patterns)]
132133

133134
#[prelude_import]
134135
#[allow(unused)]

0 commit comments

Comments
 (0)