From fde8a8d518e3bd9124bfe4028e3c45f7dcf34593 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Mon, 13 May 2024 20:32:33 +0200 Subject: [PATCH 1/2] Optimize `Seek::stream_len` impl for `File` It uses the file metadata on Unix with a fallback for files incorrectly reported as zero-sized. It uses `GetFileSizeEx` on Windows. This reduces the number of syscalls needed for determining the file size of an open file from 3 to 1. --- library/std/src/fs.rs | 36 +++++++++++++++++++ library/std/src/io/mod.rs | 24 +++++++------ library/std/src/sys/fs/hermit.rs | 4 +++ library/std/src/sys/fs/solid.rs | 4 +++ library/std/src/sys/fs/uefi.rs | 4 +++ library/std/src/sys/fs/unix.rs | 9 +++++ library/std/src/sys/fs/unsupported.rs | 4 +++ library/std/src/sys/fs/wasi.rs | 4 +++ library/std/src/sys/fs/windows.rs | 8 +++++ .../std/src/sys/pal/windows/c/bindings.txt | 1 + .../std/src/sys/pal/windows/c/windows_sys.rs | 1 + 11 files changed, 89 insertions(+), 10 deletions(-) diff --git a/library/std/src/fs.rs b/library/std/src/fs.rs index 711efc7d011c1..6cbf8301e01b9 100644 --- a/library/std/src/fs.rs +++ b/library/std/src/fs.rs @@ -1311,9 +1311,39 @@ impl Write for &File { } #[stable(feature = "rust1", since = "1.0.0")] impl Seek for &File { + /// Seek to an offset, in bytes in a file. + /// + /// See [`Seek::seek`] docs for more info. + /// + /// # Platform-specific behavior + /// + /// This function currently corresponds to the `lseek64` function on Unix + /// and the `SetFilePointerEx` function on Windows. Note that this [may + /// change in the future][changes]. + /// + /// [changes]: io#platform-specific-behavior fn seek(&mut self, pos: SeekFrom) -> io::Result { self.inner.seek(pos) } + + /// Returns the length of this file (in bytes). + /// + /// See [`Seek::stream_len`] docs for more info. + /// + /// # Platform-specific behavior + /// + /// This function currently corresponds to the `statx` function on Linux + /// (with fallbacks) and the `GetFileSizeEx` function on Windows. Note that + /// this [may change in the future][changes]. + /// + /// [changes]: io#platform-specific-behavior + fn stream_len(&mut self) -> io::Result { + if let Some(result) = self.inner.size() { + return result; + } + io::stream_len_default(self) + } + fn stream_position(&mut self) -> io::Result { self.inner.tell() } @@ -1363,6 +1393,9 @@ impl Seek for File { fn seek(&mut self, pos: SeekFrom) -> io::Result { (&*self).seek(pos) } + fn stream_len(&mut self) -> io::Result { + (&*self).stream_len() + } fn stream_position(&mut self) -> io::Result { (&*self).stream_position() } @@ -1412,6 +1445,9 @@ impl Seek for Arc { fn seek(&mut self, pos: SeekFrom) -> io::Result { (&**self).seek(pos) } + fn stream_len(&mut self) -> io::Result { + (&**self).stream_len() + } fn stream_position(&mut self) -> io::Result { (&**self).stream_position() } diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 03f5f838311a9..859f189a6c84e 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2062,16 +2062,7 @@ pub trait Seek { /// ``` #[unstable(feature = "seek_stream_len", issue = "59359")] fn stream_len(&mut self) -> Result { - let old_pos = self.stream_position()?; - let len = self.seek(SeekFrom::End(0))?; - - // Avoid seeking a third time when we were already at the end of the - // stream. The branch is usually way cheaper than a seek operation. - if old_pos != len { - self.seek(SeekFrom::Start(old_pos))?; - } - - Ok(len) + stream_len_default(self) } /// Returns the current seek position from the start of the stream. @@ -2132,6 +2123,19 @@ pub trait Seek { } } +pub(crate) fn stream_len_default(self_: &mut T) -> Result { + let old_pos = self_.stream_position()?; + let len = self_.seek(SeekFrom::End(0))?; + + // Avoid seeking a third time when we were already at the end of the + // stream. The branch is usually way cheaper than a seek operation. + if old_pos != len { + self_.seek(SeekFrom::Start(old_pos))?; + } + + Ok(len) +} + /// Enumeration of possible methods to seek within an I/O object. /// /// It is used by the [`Seek`] trait. diff --git a/library/std/src/sys/fs/hermit.rs b/library/std/src/sys/fs/hermit.rs index a9774bef9e338..175d919c289dd 100644 --- a/library/std/src/sys/fs/hermit.rs +++ b/library/std/src/sys/fs/hermit.rs @@ -422,6 +422,10 @@ impl File { self.0.seek(pos) } + pub fn size(&self) -> Option> { + None + } + pub fn tell(&self) -> io::Result { self.0.tell() } diff --git a/library/std/src/sys/fs/solid.rs b/library/std/src/sys/fs/solid.rs index 3bfb39bac95bc..808a95829114e 100644 --- a/library/std/src/sys/fs/solid.rs +++ b/library/std/src/sys/fs/solid.rs @@ -459,6 +459,10 @@ impl File { self.tell() } + pub fn size(&self) -> Option> { + None + } + pub fn tell(&self) -> io::Result { unsafe { let mut out_offset = MaybeUninit::uninit(); diff --git a/library/std/src/sys/fs/uefi.rs b/library/std/src/sys/fs/uefi.rs index 416c90b98b6d3..5763d7862f5ae 100644 --- a/library/std/src/sys/fs/uefi.rs +++ b/library/std/src/sys/fs/uefi.rs @@ -280,6 +280,10 @@ impl File { self.0 } + pub fn size(&self) -> Option> { + self.0 + } + pub fn tell(&self) -> io::Result { self.0 } diff --git a/library/std/src/sys/fs/unix.rs b/library/std/src/sys/fs/unix.rs index a3e520fdeef43..dc278274f00f4 100644 --- a/library/std/src/sys/fs/unix.rs +++ b/library/std/src/sys/fs/unix.rs @@ -1464,6 +1464,15 @@ impl File { Ok(n as u64) } + pub fn size(&self) -> Option> { + match self.file_attr().map(|attr| attr.size()) { + // Fall back to default implementation if the returned size is 0, + // we might be in a proc mount. + Ok(0) => None, + result => Some(result), + } + } + pub fn tell(&self) -> io::Result { self.seek(SeekFrom::Current(0)) } diff --git a/library/std/src/sys/fs/unsupported.rs b/library/std/src/sys/fs/unsupported.rs index 0ff9533c04734..efaddb51b3751 100644 --- a/library/std/src/sys/fs/unsupported.rs +++ b/library/std/src/sys/fs/unsupported.rs @@ -259,6 +259,10 @@ impl File { self.0 } + pub fn size(&self) -> Option> { + self.0 + } + pub fn tell(&self) -> io::Result { self.0 } diff --git a/library/std/src/sys/fs/wasi.rs b/library/std/src/sys/fs/wasi.rs index ebfc7377a2ead..b65d86de12a3d 100644 --- a/library/std/src/sys/fs/wasi.rs +++ b/library/std/src/sys/fs/wasi.rs @@ -516,6 +516,10 @@ impl File { self.fd.seek(pos) } + pub fn size(&self) -> Option> { + None + } + pub fn tell(&self) -> io::Result { self.fd.tell() } diff --git a/library/std/src/sys/fs/windows.rs b/library/std/src/sys/fs/windows.rs index d01a572ac7332..a95709b489143 100644 --- a/library/std/src/sys/fs/windows.rs +++ b/library/std/src/sys/fs/windows.rs @@ -616,6 +616,14 @@ impl File { Ok(newpos as u64) } + pub fn size(&self) -> Option> { + let mut result = 0; + Some( + cvt(unsafe { c::GetFileSizeEx(self.handle.as_raw_handle(), &mut result) }) + .map(|_| result as u64), + ) + } + pub fn tell(&self) -> io::Result { self.seek(SeekFrom::Current(0)) } diff --git a/library/std/src/sys/pal/windows/c/bindings.txt b/library/std/src/sys/pal/windows/c/bindings.txt index d5fbb453c6f96..a99c474c763c5 100644 --- a/library/std/src/sys/pal/windows/c/bindings.txt +++ b/library/std/src/sys/pal/windows/c/bindings.txt @@ -2156,6 +2156,7 @@ GetExitCodeProcess GetFileAttributesW GetFileInformationByHandle GetFileInformationByHandleEx +GetFileSizeEx GetFileType GETFINALPATHNAMEBYHANDLE_FLAGS GetFinalPathNameByHandleW diff --git a/library/std/src/sys/pal/windows/c/windows_sys.rs b/library/std/src/sys/pal/windows/c/windows_sys.rs index eb2914b864473..95bf8040229d0 100644 --- a/library/std/src/sys/pal/windows/c/windows_sys.rs +++ b/library/std/src/sys/pal/windows/c/windows_sys.rs @@ -44,6 +44,7 @@ windows_targets::link!("kernel32.dll" "system" fn GetExitCodeProcess(hprocess : windows_targets::link!("kernel32.dll" "system" fn GetFileAttributesW(lpfilename : PCWSTR) -> u32); windows_targets::link!("kernel32.dll" "system" fn GetFileInformationByHandle(hfile : HANDLE, lpfileinformation : *mut BY_HANDLE_FILE_INFORMATION) -> BOOL); windows_targets::link!("kernel32.dll" "system" fn GetFileInformationByHandleEx(hfile : HANDLE, fileinformationclass : FILE_INFO_BY_HANDLE_CLASS, lpfileinformation : *mut core::ffi::c_void, dwbuffersize : u32) -> BOOL); +windows_targets::link!("kernel32.dll" "system" fn GetFileSizeEx(hfile : HANDLE, lpfilesize : *mut i64) -> BOOL); windows_targets::link!("kernel32.dll" "system" fn GetFileType(hfile : HANDLE) -> FILE_TYPE); windows_targets::link!("kernel32.dll" "system" fn GetFinalPathNameByHandleW(hfile : HANDLE, lpszfilepath : PWSTR, cchfilepath : u32, dwflags : GETFINALPATHNAMEBYHANDLE_FLAGS) -> u32); windows_targets::link!("kernel32.dll" "system" fn GetFullPathNameW(lpfilename : PCWSTR, nbufferlength : u32, lpbuffer : PWSTR, lpfilepart : *mut PWSTR) -> u32); From 4b1e28b21f78491d4ae187a52ebd5436edef6281 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Thu, 9 Jan 2025 10:31:03 +0100 Subject: [PATCH 2/2] Clarify description of `Seek::stream_len` It can only describe the inner workings of the default implementation, other implementations might not be implemented using seeks at all. --- library/std/src/io/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 859f189a6c84e..20c82b64bcc16 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2028,7 +2028,7 @@ pub trait Seek { /// Returns the length of this stream (in bytes). /// - /// This method is implemented using up to three seek operations. If this + /// The default implementation uses up to three seek operations. If this /// method returns successfully, the seek position is unchanged (i.e. the /// position before calling this method is the same as afterwards). /// However, if this method returns an error, the seek position is