|
18 | 18 | use char_private::is_printable;
|
19 | 19 | use convert::TryFrom;
|
20 | 20 | use fmt;
|
| 21 | +use slice; |
21 | 22 | use iter::FusedIterator;
|
22 | 23 | use mem::transmute;
|
23 | 24 |
|
@@ -327,9 +328,9 @@ pub trait CharExt {
|
327 | 328 | #[stable(feature = "core", since = "1.6.0")]
|
328 | 329 | fn len_utf16(self) -> usize;
|
329 | 330 | #[unstable(feature = "unicode", issue = "27784")]
|
330 |
| - fn encode_utf8(self) -> EncodeUtf8; |
| 331 | + fn encode_utf8(self, dst: &mut [u8]) -> &mut str; |
331 | 332 | #[unstable(feature = "unicode", issue = "27784")]
|
332 |
| - fn encode_utf16(self) -> EncodeUtf16; |
| 333 | + fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16]; |
333 | 334 | }
|
334 | 335 |
|
335 | 336 | #[stable(feature = "core", since = "1.6.0")]
|
@@ -419,47 +420,59 @@ impl CharExt for char {
|
419 | 420 | }
|
420 | 421 |
|
421 | 422 | #[inline]
|
422 |
| - fn encode_utf8(self) -> EncodeUtf8 { |
| 423 | + fn encode_utf8(self, dst: &mut [u8]) -> &mut str { |
423 | 424 | let code = self as u32;
|
424 |
| - let mut buf = [0; 4]; |
425 |
| - let pos = if code < MAX_ONE_B { |
426 |
| - buf[3] = code as u8; |
427 |
| - 3 |
428 |
| - } else if code < MAX_TWO_B { |
429 |
| - buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
430 |
| - buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
431 |
| - 2 |
432 |
| - } else if code < MAX_THREE_B { |
433 |
| - buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
434 |
| - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
435 |
| - buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
436 |
| - 1 |
437 |
| - } else { |
438 |
| - buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
439 |
| - buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
440 |
| - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
441 |
| - buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
442 |
| - 0 |
443 |
| - }; |
444 |
| - EncodeUtf8 { buf: buf, pos: pos } |
| 425 | + unsafe { |
| 426 | + let len = |
| 427 | + if code < MAX_ONE_B && !dst.is_empty() { |
| 428 | + *dst.get_unchecked_mut(0) = code as u8; |
| 429 | + 1 |
| 430 | + } else if code < MAX_TWO_B && dst.len() >= 2 { |
| 431 | + *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
| 432 | + *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; |
| 433 | + 2 |
| 434 | + } else if code < MAX_THREE_B && dst.len() >= 3 { |
| 435 | + *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
| 436 | + *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 437 | + *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; |
| 438 | + 3 |
| 439 | + } else if dst.len() >= 4 { |
| 440 | + *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
| 441 | + *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
| 442 | + *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 443 | + *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; |
| 444 | + 4 |
| 445 | + } else { |
| 446 | + panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", |
| 447 | + from_u32_unchecked(code).len_utf8(), |
| 448 | + code, |
| 449 | + dst.len()) |
| 450 | + }; |
| 451 | + transmute(slice::from_raw_parts_mut(dst.as_mut_ptr(), len)) |
| 452 | + } |
445 | 453 | }
|
446 | 454 |
|
447 | 455 | #[inline]
|
448 |
| - fn encode_utf16(self) -> EncodeUtf16 { |
449 |
| - let mut buf = [0; 2]; |
| 456 | + fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { |
450 | 457 | let mut code = self as u32;
|
451 |
| - let pos = if (code & 0xFFFF) == code { |
452 |
| - // The BMP falls through (assuming non-surrogate, as it should) |
453 |
| - buf[1] = code as u16; |
454 |
| - 1 |
455 |
| - } else { |
456 |
| - // Supplementary planes break into surrogates. |
457 |
| - code -= 0x1_0000; |
458 |
| - buf[0] = 0xD800 | ((code >> 10) as u16); |
459 |
| - buf[1] = 0xDC00 | ((code as u16) & 0x3FF); |
460 |
| - 0 |
461 |
| - }; |
462 |
| - EncodeUtf16 { buf: buf, pos: pos } |
| 458 | + unsafe { |
| 459 | + if (code & 0xFFFF) == code && !dst.is_empty() { |
| 460 | + // The BMP falls through (assuming non-surrogate, as it should) |
| 461 | + *dst.get_unchecked_mut(0) = code as u16; |
| 462 | + slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) |
| 463 | + } else if dst.len() >= 2 { |
| 464 | + // Supplementary planes break into surrogates. |
| 465 | + code -= 0x1_0000; |
| 466 | + *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); |
| 467 | + *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); |
| 468 | + slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) |
| 469 | + } else { |
| 470 | + panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", |
| 471 | + from_u32_unchecked(code).len_utf16(), |
| 472 | + code, |
| 473 | + dst.len()) |
| 474 | + } |
| 475 | + } |
463 | 476 | }
|
464 | 477 | }
|
465 | 478 |
|
@@ -702,88 +715,7 @@ impl ExactSizeIterator for EscapeDebug { }
|
702 | 715 | #[unstable(feature = "fused", issue = "35602")]
|
703 | 716 | impl FusedIterator for EscapeDebug {}
|
704 | 717 |
|
705 |
| -/// An iterator over `u8` entries represending the UTF-8 encoding of a `char` |
706 |
| -/// value. |
707 |
| -/// |
708 |
| -/// Constructed via the `.encode_utf8()` method on `char`. |
709 |
| -#[unstable(feature = "unicode", issue = "27784")] |
710 |
| -#[derive(Debug)] |
711 |
| -pub struct EncodeUtf8 { |
712 |
| - buf: [u8; 4], |
713 |
| - pos: usize, |
714 |
| -} |
715 |
| - |
716 |
| -impl EncodeUtf8 { |
717 |
| - /// Returns the remaining bytes of this iterator as a slice. |
718 |
| - #[unstable(feature = "unicode", issue = "27784")] |
719 |
| - pub fn as_slice(&self) -> &[u8] { |
720 |
| - &self.buf[self.pos..] |
721 |
| - } |
722 |
| -} |
723 |
| - |
724 |
| -#[unstable(feature = "unicode", issue = "27784")] |
725 |
| -impl Iterator for EncodeUtf8 { |
726 |
| - type Item = u8; |
727 |
| - |
728 |
| - fn next(&mut self) -> Option<u8> { |
729 |
| - if self.pos == self.buf.len() { |
730 |
| - None |
731 |
| - } else { |
732 |
| - let ret = Some(self.buf[self.pos]); |
733 |
| - self.pos += 1; |
734 |
| - ret |
735 |
| - } |
736 |
| - } |
737 |
| - |
738 |
| - fn size_hint(&self) -> (usize, Option<usize>) { |
739 |
| - self.as_slice().iter().size_hint() |
740 |
| - } |
741 |
| -} |
742 |
| - |
743 |
| -#[unstable(feature = "fused", issue = "35602")] |
744 |
| -impl FusedIterator for EncodeUtf8 {} |
745 |
| - |
746 |
| -/// An iterator over `u16` entries represending the UTF-16 encoding of a `char` |
747 |
| -/// value. |
748 |
| -/// |
749 |
| -/// Constructed via the `.encode_utf16()` method on `char`. |
750 |
| -#[unstable(feature = "unicode", issue = "27784")] |
751 |
| -#[derive(Debug)] |
752 |
| -pub struct EncodeUtf16 { |
753 |
| - buf: [u16; 2], |
754 |
| - pos: usize, |
755 |
| -} |
756 |
| - |
757 |
| -impl EncodeUtf16 { |
758 |
| - /// Returns the remaining bytes of this iterator as a slice. |
759 |
| - #[unstable(feature = "unicode", issue = "27784")] |
760 |
| - pub fn as_slice(&self) -> &[u16] { |
761 |
| - &self.buf[self.pos..] |
762 |
| - } |
763 |
| -} |
764 |
| - |
765 |
| - |
766 |
| -#[unstable(feature = "unicode", issue = "27784")] |
767 |
| -impl Iterator for EncodeUtf16 { |
768 |
| - type Item = u16; |
769 |
| - |
770 |
| - fn next(&mut self) -> Option<u16> { |
771 |
| - if self.pos == self.buf.len() { |
772 |
| - None |
773 |
| - } else { |
774 |
| - let ret = Some(self.buf[self.pos]); |
775 |
| - self.pos += 1; |
776 |
| - ret |
777 |
| - } |
778 |
| - } |
779 |
| - |
780 |
| - fn size_hint(&self) -> (usize, Option<usize>) { |
781 |
| - self.as_slice().iter().size_hint() |
782 |
| - } |
783 |
| -} |
784 | 718 |
|
785 |
| -#[unstable(feature = "fused", issue = "35602")] |
786 |
| -impl FusedIterator for EncodeUtf16 {} |
787 | 719 |
|
788 | 720 | /// An iterator over an iterator of bytes of the characters the bytes represent
|
789 | 721 | /// as UTF-8
|
|
0 commit comments