From 03eed121687b4a489bb4d6648ea448dd562165bd Mon Sep 17 00:00:00 2001 From: may Date: Mon, 3 Feb 2025 23:25:24 +0100 Subject: [PATCH 1/2] implement inherent str constructors --- library/core/src/str/mod.rs | 176 ++++++++++++++++++ .../suggest-std-when-using-type.fixed | 5 +- .../suggest-std-when-using-type.rs | 5 +- .../suggest-std-when-using-type.stderr | 16 +- 4 files changed, 180 insertions(+), 22 deletions(-) diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 39fa6c1a25fe9..17fb43b7830eb 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -160,6 +160,182 @@ impl str { self.len() == 0 } + /// Converts a slice of bytes to a string slice. + /// + /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice + /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between + /// the two. Not all byte slices are valid string slices, however: [`&str`] requires + /// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid + /// UTF-8, and then does the conversion. + /// + /// [`&str`]: str + /// [byteslice]: slice + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want to + /// incur the overhead of the validity check, there is an unsafe version of + /// this function, [`from_utf8_unchecked`], which has the same + /// behavior but skips the check. + /// + /// If you need a `String` instead of a `&str`, consider + /// [`String::from_utf8`][string]. + /// + /// [string]: ../std/string/struct.String.html#method.from_utf8 + /// + /// Because you can stack-allocate a `[u8; N]`, and you can take a + /// [`&[u8]`][byteslice] of it, this function is one way to have a + /// stack-allocated string. There is an example of this in the + /// examples section below. + /// + /// [byteslice]: slice + /// + /// # Errors + /// + /// Returns `Err` if the slice is not UTF-8 with a description as to why the + /// provided slice is not UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::str; + /// + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// // We can use the ? (try) operator to check if the bytes are valid + /// let sparkle_heart = str::from_utf8(&sparkle_heart)?; + /// + /// assert_eq!("💖", sparkle_heart); + /// # Ok::<_, str::Utf8Error>(()) + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// use std::str; + /// + /// // some invalid bytes, in a vector + /// let sparkle_heart = vec![0, 159, 146, 150]; + /// + /// assert!(str::from_utf8(&sparkle_heart).is_err()); + /// ``` + /// + /// See the docs for [`Utf8Error`] for more details on the kinds of + /// errors that can be returned. + /// + /// A "stack allocated string": + /// + /// ``` + /// use std::str; + /// + /// // some bytes, in a stack-allocated array + /// let sparkle_heart = [240, 159, 146, 150]; + /// + /// // We know these bytes are valid, so just use `unwrap()`. + /// let sparkle_heart: &str = str::from_utf8(&sparkle_heart).unwrap(); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + #[unstable(feature = "inherent_str_constructors", issue = "131114")] + pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { + converts::from_utf8(v) + } + + /// Converts a mutable slice of bytes to a mutable string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::str; + /// + /// // "Hello, Rust!" as a mutable vector + /// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33]; + /// + /// // As we know these bytes are valid, we can use `unwrap()` + /// let outstr = str::from_utf8_mut(&mut hellorust).unwrap(); + /// + /// assert_eq!("Hello, Rust!", outstr); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// use std::str; + /// + /// // Some invalid bytes in a mutable vector + /// let mut invalid = vec![128, 223]; + /// + /// assert!(str::from_utf8_mut(&mut invalid).is_err()); + /// ``` + /// See the docs for [`Utf8Error`] for more details on the kinds of + /// errors that can be returned. + #[unstable(feature = "inherent_str_constructors", issue = "131114")] + #[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")] + pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { + converts::from_utf8_mut(v) + } + + /// Converts a slice of bytes to a string slice without checking + /// that the string contains valid UTF-8. + /// + /// See the safe version, [`from_utf8`], for more information. + /// + /// # Safety + /// + /// The bytes passed in must be valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::str; + /// + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = unsafe { + /// str::from_utf8_unchecked(&sparkle_heart) + /// }; + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + #[inline] + #[must_use] + #[unstable(feature = "inherent_str_constructors", issue = "131114")] + pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { + // SAFETY: converts::from_utf8_unchecked has the same safety requirements as this function. + unsafe { converts::from_utf8_unchecked(v) } + } + + /// Converts a slice of bytes to a string slice without checking + /// that the string contains valid UTF-8; mutable version. + /// + /// See the immutable version, [`from_utf8_unchecked()`] for more information. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::str; + /// + /// let mut heart = vec![240, 159, 146, 150]; + /// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) }; + /// + /// assert_eq!("💖", heart); + /// ``` + #[inline] + #[must_use] + #[unstable(feature = "inherent_str_constructors", issue = "131114")] + pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { + // SAFETY: converts::from_utf8_unchecked_mut has the same safety requirements as this function. + unsafe { converts::from_utf8_unchecked_mut(v) } + } + /// Checks that `index`-th byte is the first byte in a UTF-8 code point /// sequence or the end of the string. /// diff --git a/tests/ui/suggestions/suggest-std-when-using-type.fixed b/tests/ui/suggestions/suggest-std-when-using-type.fixed index ebbb854175bda..0547940f94d31 100644 --- a/tests/ui/suggestions/suggest-std-when-using-type.fixed +++ b/tests/ui/suggestions/suggest-std-when-using-type.fixed @@ -1,8 +1,5 @@ //@ run-rustfix fn main() { let pi = std::f32::consts::PI; //~ ERROR ambiguous associated type - let bytes = "hello world".as_bytes(); - let string = std::str::from_utf8(bytes).unwrap(); - //~^ ERROR no function or associated item named `from_utf8` found - println!("{pi} {bytes:?} {string}"); + println!("{pi}"); } diff --git a/tests/ui/suggestions/suggest-std-when-using-type.rs b/tests/ui/suggestions/suggest-std-when-using-type.rs index 06aab63d68856..cd55c5d13bd18 100644 --- a/tests/ui/suggestions/suggest-std-when-using-type.rs +++ b/tests/ui/suggestions/suggest-std-when-using-type.rs @@ -1,8 +1,5 @@ //@ run-rustfix fn main() { let pi = f32::consts::PI; //~ ERROR ambiguous associated type - let bytes = "hello world".as_bytes(); - let string = str::from_utf8(bytes).unwrap(); - //~^ ERROR no function or associated item named `from_utf8` found - println!("{pi} {bytes:?} {string}"); + println!("{pi}"); } diff --git a/tests/ui/suggestions/suggest-std-when-using-type.stderr b/tests/ui/suggestions/suggest-std-when-using-type.stderr index 6f890b87b24bd..7f8545f461d9e 100644 --- a/tests/ui/suggestions/suggest-std-when-using-type.stderr +++ b/tests/ui/suggestions/suggest-std-when-using-type.stderr @@ -9,18 +9,6 @@ help: you are looking for the module in `std`, not the primitive type LL | let pi = std::f32::consts::PI; | +++++ -error[E0599]: no function or associated item named `from_utf8` found for type `str` in the current scope - --> $DIR/suggest-std-when-using-type.rs:5:23 - | -LL | let string = str::from_utf8(bytes).unwrap(); - | ^^^^^^^^^ function or associated item not found in `str` - | -help: you are looking for the module in `std`, not the primitive type - | -LL | let string = std::str::from_utf8(bytes).unwrap(); - | +++++ - -error: aborting due to 2 previous errors +error: aborting due to 1 previous error -Some errors have detailed explanations: E0223, E0599. -For more information about an error, try `rustc --explain E0223`. +For more information about this error, try `rustc --explain E0223`. From 15adc38ffc4608855c959ec521fe0124e1dffb2c Mon Sep 17 00:00:00 2001 From: may Date: Tue, 4 Feb 2025 00:34:42 +0100 Subject: [PATCH 2/2] specify a prim@slice in docs i am not quite sure how this failure is in any way related to this pr, since i am only touching inherent functions on str? but sure. --- library/core/src/str/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 17fb43b7830eb..5b258a7c844fe 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -169,7 +169,7 @@ impl str { /// UTF-8, and then does the conversion. /// /// [`&str`]: str - /// [byteslice]: slice + /// [byteslice]: prim@slice /// /// If you are sure that the byte slice is valid UTF-8, and you don't want to /// incur the overhead of the validity check, there is an unsafe version of