diff --git a/Cargo.lock b/Cargo.lock index 51c569a12..91e8849b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "adler2" @@ -1326,6 +1326,7 @@ name = "posixutils-file" version = "0.2.1" dependencies = [ "clap", + "ftw", "gettext-rs", "libc", "plib", diff --git a/file/Cargo.toml b/file/Cargo.toml index 0c5732da0..d6aa57052 100644 --- a/file/Cargo.toml +++ b/file/Cargo.toml @@ -8,6 +8,7 @@ edition.workspace = true [dependencies] plib = { path = "../plib" } +ftw = { path = "../ftw" } clap.workspace = true gettext-rs.workspace = true libc.workspace = true diff --git a/file/file.rs b/file/file.rs index ddc0f3a92..3c7b9af20 100644 --- a/file/file.rs +++ b/file/file.rs @@ -10,16 +10,12 @@ mod magic; use crate::magic::{get_type_from_magic_file_dbs, DEFAULT_MAGIC_FILE}; +use ftw::{FileType, Metadata}; use clap::Parser; use gettextrs::{bind_textdomain_codeset, gettext, setlocale, textdomain, LocaleCategory}; use plib::PROJECT_NAME; -use std::{ - fs::{self, read_link}, - io, - os::unix::fs::FileTypeExt, - path::PathBuf, -}; +use std::{fs::read_link, io, path::PathBuf}; #[derive(Parser)] #[command( @@ -112,83 +108,46 @@ fn get_magic_files(args: &Args) -> Vec { magic_files } -fn analyze_file(mut path: String, args: &Args, magic_files: &Vec) { - if path == "-" { - path = String::new(); - io::stdin().read_line(&mut path).unwrap(); - path = path.trim().to_string(); - } - - let met = match fs::symlink_metadata(&path) { - Ok(met) => met, - Err(_) => { - println!("{path}: cannot open"); - return; - } +fn analyze_file(path: &String, args: &Args, magic_files: &Vec) -> String { + let meta = match Metadata::symlink_metadata(&path) { + Ok(meta) => meta, + Err(_) => return gettext("cannot open"), }; - let file_type = met.file_type(); - - if file_type.is_symlink() { - if args.identify_as_symbolic_link { - println!("{path}: symbolic link"); - return; - } - match read_link(&path) { - Ok(file_p) => { - // trace the file pointed by symbolic link - if file_p.exists() { - println!("{path}: symbolic link to {}", file_p.to_str().unwrap()); - } else { - println!( - "{path}: broken symbolic link to {}", - file_p.to_str().unwrap() - ); - } + match meta.file_type() { + FileType::Socket => gettext("socket"), + FileType::BlockDevice => gettext("block special"), + FileType::Directory => gettext("directory"), + FileType::CharacterDevice => gettext("character special"), + FileType::Fifo => gettext("fifo"), + FileType::SymbolicLink => { + if args.identify_as_symbolic_link { + return gettext("symbolic link"); } - Err(_) => { - println!("{path}: symbolic link"); + match read_link(&path) { + Ok(file_p) => { + // trace the file pointed by symbolic link + if file_p.exists() { + gettext!("symbolic link to {}", file_p.to_str().unwrap()) + } else { + gettext!("broken symbolic link to {}", file_p.to_str().unwrap()) + } + } + Err(_) => return gettext("symbolic link"), } } - return; - } - if file_type.is_char_device() { - println!("{path}: character special"); - return; - } - if file_type.is_dir() { - println!("{path}: directory"); - return; - } - if file_type.is_fifo() { - println!("{path}: fifo"); - return; - } - if file_type.is_socket() { - println!("{path}: socket"); - return; - } - if file_type.is_block_device() { - println!("{path}: block special"); - return; - } - if file_type.is_file() { - if args.no_further_file_classification { - assert!(magic_files.is_empty()); - println!("{path}: regular file"); - return; - } - if met.len() == 0 { - println!("{path}: empty"); - return; - } - match get_type_from_magic_file_dbs(&PathBuf::from(&path), &magic_files) { - Some(f_type) => println!("{path}: {f_type}"), - None => println!("{path}: data"), + FileType::RegularFile => { + if args.no_further_file_classification { + assert!(magic_files.is_empty()); + return gettext("regular file"); + } + if meta.len() == 0 { + return gettext("empty"); + } + get_type_from_magic_file_dbs(&PathBuf::from(&path), &magic_files) + .unwrap_or_else(|| gettext("data")) } - return; } - unreachable!(); } fn main() -> Result<(), Box> { @@ -202,7 +161,13 @@ fn main() -> Result<(), Box> { let magic_files = get_magic_files(&args); for file in &args.files { - analyze_file(file.clone(), &args, &magic_files); + let mut file = file.clone(); + if file == "-" { + file = String::new(); + io::stdin().read_line(&mut file).unwrap(); + file = file.trim().to_string(); + } + println!("{}: {}", &file, analyze_file(&file, &args, &magic_files)); } Ok(()) diff --git a/ftw/src/lib.rs b/ftw/src/lib.rs index 50b5dc222..0e4cabcab 100644 --- a/ftw/src/lib.rs +++ b/ftw/src/lib.rs @@ -1,6 +1,9 @@ mod dir; +mod small_c_string; + +use crate::dir::{DeferredDir, HybridDir, OwnedDir}; +use crate::small_c_string::run_path_with_cstr; -use dir::{DeferredDir, HybridDir, OwnedDir}; use std::{ ffi::{CStr, CString, OsStr}, fmt, io, @@ -107,7 +110,9 @@ impl AsRawFd for FileDescriptor { } } -/// Metadata of an entry. This is analogous to `std::fs::Metadata`. +/// Metadata information about a file. +/// +/// This is analogous to [`std::fs::Metadata`]. #[derive(Clone)] pub struct Metadata(libc::stat); @@ -120,7 +125,7 @@ impl fmt::Debug for Metadata { impl Metadata { /// Create a new `Metadata`. /// - /// `dirfd` could be the special value `libc::AT_FDCWD` to query the metadata of a file at the + /// `dirfd` could be the special value [`libc::AT_FDCWD`] to query the metadata of a file at the /// process' current working directory. pub fn new( dirfd: libc::c_int, @@ -140,18 +145,25 @@ impl Metadata { Ok(Metadata(unsafe { statbuf.assume_init() })) } + /// Queries the metadata about a file without following symlinks. + /// + /// This is analogous to [`std::fs::symlink_metadata`]. + pub fn symlink_metadata>(path: P) -> io::Result { + let p: &Path = path.as_ref(); + run_path_with_cstr(p, &|p| Self::new(libc::AT_FDCWD, p, false)) + } + + /// Returns the size of the file, in bytes, this metadata is for. + /// + /// This is analogous to [`std::fs::Metadata::len`]. + #[must_use] + pub fn len(&self) -> u64 { + self.0.st_size as u64 + } + /// Query the file type. pub fn file_type(&self) -> FileType { - match self.0.st_mode & libc::S_IFMT { - libc::S_IFSOCK => FileType::Socket, - libc::S_IFLNK => FileType::SymbolicLink, - libc::S_IFREG => FileType::RegularFile, - libc::S_IFBLK => FileType::BlockDevice, - libc::S_IFDIR => FileType::Directory, - libc::S_IFCHR => FileType::CharacterDevice, - libc::S_IFIFO => FileType::Fifo, - _ => unreachable!(), - } + FileType::new(self.0.st_mode) } // These are "effective" IDs and not "real" to allow for things like sudo @@ -277,7 +289,10 @@ impl unix::fs::MetadataExt for Metadata { } } -/// File type of an entry. Returned by `Metadata::file_type`. +/// A structure representing a type of file with accessors for each file type. +/// It is returned by [`Metadata::file_type`] method. +/// +/// This is analogous to [`std::fs::FileType`]. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum FileType { Socket, @@ -290,6 +305,19 @@ pub enum FileType { } impl FileType { + fn new(value: libc::mode_t) -> Self { + match value & libc::S_IFMT { + libc::S_IFSOCK => FileType::Socket, + libc::S_IFLNK => FileType::SymbolicLink, + libc::S_IFREG => FileType::RegularFile, + libc::S_IFBLK => FileType::BlockDevice, + libc::S_IFDIR => FileType::Directory, + libc::S_IFCHR => FileType::CharacterDevice, + libc::S_IFIFO => FileType::Fifo, + _ => unreachable!(), + } + } + /// Tests whether this file type represents a directory. pub fn is_dir(&self) -> bool { *self == FileType::Directory diff --git a/ftw/src/small_c_string.rs b/ftw/src/small_c_string.rs new file mode 100644 index 000000000..a2db4cac1 --- /dev/null +++ b/ftw/src/small_c_string.rs @@ -0,0 +1,68 @@ +//! Copied from [`std::sys::common::small_c_string::run_path_with_cstr`] + +use std::ffi::{CStr, CString}; +use std::mem::MaybeUninit; +use std::path::Path; +use std::{io, ptr, slice}; + +// Make sure to stay under 4096 so the compiler doesn't insert a probe frame: +// https://docs.rs/compiler_builtins/latest/compiler_builtins/probestack/index.html +#[cfg(not(target_os = "espidf"))] +const MAX_STACK_ALLOCATION: usize = 384; +#[cfg(target_os = "espidf")] +const MAX_STACK_ALLOCATION: usize = 32; + +// const NUL_ERR: io::Error = +// io::const_io_error!(io::ErrorKind::InvalidInput, "file name contained an unexpected NUL byte"); + +#[inline] +pub fn run_path_with_cstr(path: &Path, f: &dyn Fn(&CStr) -> io::Result) -> io::Result { + run_with_cstr(path.as_os_str().as_encoded_bytes(), f) +} + +#[inline] +pub fn run_with_cstr(bytes: &[u8], f: &dyn Fn(&CStr) -> io::Result) -> io::Result { + // Dispatch and dyn erase the closure type to prevent mono bloat. + // See https://github.com/rust-lang/rust/pull/121101. + if bytes.len() >= MAX_STACK_ALLOCATION { + run_with_cstr_allocating(bytes, f) + } else { + unsafe { run_with_cstr_stack(bytes, f) } + } +} + +/// # Safety +/// +/// `bytes` must have a length less than `MAX_STACK_ALLOCATION`. +unsafe fn run_with_cstr_stack( + bytes: &[u8], + f: &dyn Fn(&CStr) -> io::Result, +) -> io::Result { + let mut buf = MaybeUninit::<[u8; MAX_STACK_ALLOCATION]>::uninit(); + let buf_ptr = buf.as_mut_ptr() as *mut u8; + + unsafe { + ptr::copy_nonoverlapping(bytes.as_ptr(), buf_ptr, bytes.len()); + buf_ptr.add(bytes.len()).write(0); + } + + match CStr::from_bytes_with_nul(unsafe { slice::from_raw_parts(buf_ptr, bytes.len() + 1) }) { + Ok(s) => f(s), + Err(_) => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "file name contained an unexpected NUL byte", + )), + } +} + +#[cold] +#[inline(never)] +fn run_with_cstr_allocating(bytes: &[u8], f: &dyn Fn(&CStr) -> io::Result) -> io::Result { + match CString::new(bytes) { + Ok(s) => f(&s), + Err(_) => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "file name contained an unexpected NUL byte", + )), + } +}