diff --git a/Cargo.toml b/Cargo.toml index 5dc00dd..c3a7d15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,13 +14,18 @@ version = "0.0.1" [dependencies] cfg-if = "1.0.0" +cvt = "0.1.1" [dev-dependencies] tempfile = "3.3.0" [target.'cfg(not(windows))'.dependencies] -cvt = "0.1.1" libc = "0.2.121" +# Saves nontrivial unsafe and platform specific code (Darwin vs other Unixes, +# MAX_PATH and more) : consider it weak and something we can remove if expedient +# later. +nix = { version = "0.24.2", default-features = false, features = ["dir"] } + [target.'cfg(windows)'.dependencies] ntapi = "0.3.7" diff --git a/README.md b/README.md index 8f08ee0..2150823 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ filesystem code, since otherwise the state of the filesystem path that operations are executed against can change silently, leading to TOC-TOU race conditions. For Unix these calls are readily available in the libc crate, but for Windows some more plumbing is needed. This crate provides a unified -Rust-y interface to these calls. +Rust-y and safe interface to these calls. ## MSRV policy diff --git a/src/lib.rs b/src/lib.rs index ebd05d6..933f676 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,7 @@ //! unified Rust-y interface to these calls. use std::{ + ffi::OsStr, fs::File, io::{Error, ErrorKind, Result}, path::Path, @@ -23,11 +24,11 @@ cfg_if::cfg_if! { if #[cfg(windows)] { mod win; - use win::OpenOptionsImpl; + use win::{OpenOptionsImpl, ReadDirImpl, DirEntryImpl}; } else { mod unix; - use unix::OpenOptionsImpl; + use unix::{OpenOptionsImpl, ReadDirImpl, DirEntryImpl}; } } @@ -183,6 +184,11 @@ impl OpenOptions { /// This will honour the options set for creation/append etc, but will only /// operate relative to d. To open a file with an absolute path, use the /// stdlib fs::OpenOptions. + /// + /// Note: On Windows this uses low level APIs that do not perform path + /// separator translation: if passing a path containing a separator, it must + /// be a platform native one. e.g. `foo\\bar` on Windows, vs `foo/bar` on + /// most other OS's. pub fn open_at>(&self, d: &mut File, p: P) -> Result { self._impl.open_at(d, OpenOptions::ensure_root(p.as_ref())?) } @@ -198,6 +204,66 @@ impl OpenOptions { } } +/// Iterate over the contents of a directory. Created by calling read_dir() on +/// an opened directory. Each item yielded by the iterator is an io::Result to +/// allow communication of io errors as the iterator is advanced. +/// +/// To the greatest extent possible the underlying OS semantics are preserved. +/// That means that `.` and `..` entries are exposed, and that no sort order is +/// guaranteed by the iterator. +#[derive(Debug)] +pub struct ReadDir<'a> { + _impl: ReadDirImpl<'a>, +} + +impl<'a> ReadDir<'a> { + pub fn new(d: &'a mut File) -> Result { + Ok(ReadDir { + _impl: ReadDirImpl::new(d)?, + }) + } +} + +impl Iterator for ReadDir<'_> { + type Item = Result; + + fn next(&mut self) -> Option> { + self._impl + .next() + .map(|entry| entry.map(|_impl| DirEntry { _impl })) + } +} + +/// The returned type for each entry found by [`read_dir`]. +/// +/// Each entry represents a single entry inside the directory. Platforms that +/// provide rich metadata may in future expose this through methods or extension +/// traits on DirEntry. +/// +/// For now however, only the [`name()`] is exposed. This does not imply any +/// additional IO for most workloads: metadata returned from a directory listing +/// is inherently racy: presuming that what was a dir, or symlink etc when the +/// directory was listed, will still be the same when opened is fallible. +/// Instead, use open_at to open the contents, and then process based on the +/// type of content found. +#[derive(Debug)] +pub struct DirEntry { + _impl: DirEntryImpl, +} + +impl DirEntry { + pub fn name(&self) -> &OsStr { + self._impl.name() + } +} + +/// Read the children of the directory d. +/// +/// See [`ReadDir`] and [`DirEntry`] for details. +pub fn read_dir(d: &mut File) -> Result { + ReadDir::new(d) +} + pub mod os { cfg_if::cfg_if! { if #[cfg(windows)] { @@ -214,6 +280,7 @@ pub mod testsupport; #[cfg(test)] mod tests { use std::{ + ffi::OsStr, fs::{rename, File}, io::{Error, ErrorKind, Result, Seek, SeekFrom, Write}, path::PathBuf, @@ -221,7 +288,7 @@ mod tests { use tempfile::TempDir; - use crate::{testsupport::open_dir, OpenOptions, OpenOptionsWriteMode}; + use crate::{read_dir, testsupport::open_dir, DirEntry, OpenOptions, OpenOptionsWriteMode}; /// Create a directory parent, open it, then rename it to renamed-parent and /// create another directory in its place. returns the file handle and the @@ -464,4 +531,46 @@ mod tests { } Ok(()) } + + #[test] + fn readdir() -> Result<()> { + let (_tmp, mut parent_dir, _pathname) = setup()?; + assert_eq!( + 2, // . and .. + read_dir(&mut parent_dir)? + .collect::>>()? + .len() + ); + let dir_present = + |children: &Vec, name: &OsStr| children.iter().any(|e| e.name() == name); + + let mut options = OpenOptions::default(); + options.create_new(true).write(OpenOptionsWriteMode::Write); + options.open_at(&mut parent_dir, "1")?; + options.open_at(&mut parent_dir, "2")?; + options.open_at(&mut options.mkdir_at(&mut parent_dir, "child")?, "3")?; + let children = read_dir(&mut parent_dir)?.collect::>>()?; + assert_eq!( + 5, + children.len(), + "directory contains 5 entries (., .., 1, 2, child)" + ); + assert!(dir_present(&children, OsStr::new("1")), "{:?}", children); + assert!(dir_present(&children, OsStr::new("2")), "{:?}", children); + assert!( + dir_present(&children, OsStr::new("child")), + "{:?}", + children + ); + + { + let mut child = OpenOptions::default() + .read(true) + .open_at(&mut parent_dir, "child")?; + let children = read_dir(&mut child)?.collect::>>()?; + assert_eq!(3, children.len(), "{:?}", children); + assert!(dir_present(&children, OsStr::new("3")), "{:?}", children); + } + Ok(()) + } } diff --git a/src/unix.rs b/src/unix.rs index 4bec331..f2ea1a0 100644 --- a/src/unix.rs +++ b/src/unix.rs @@ -1,9 +1,11 @@ use std::{ - ffi::CString, + ffi::{CString, OsStr, OsString}, fs::File, io::Result, + marker::PhantomData, os::unix::prelude::{AsRawFd, FromRawFd, OsStrExt}, path::Path, + ptr, }; // This will probably take a few iterations to get right. The idea: always use @@ -180,6 +182,109 @@ impl OpenOptionsExt for OpenOptions { } } +#[derive(Debug)] +pub(crate) struct ReadDirImpl<'a> { + // Since we clone the FD, the original FD is now separate. In theory. + // However for Windows we use the File directly, thus here we need to + // pretend. + _phantom: PhantomData<&'a File>, + // Set to None after we closedir it. Perhaps we should we impl Send and Sync + // because the data referenced is owned by libc ? + dir: Option>, +} + +impl<'a> ReadDirImpl<'a> { + pub fn new(dir_file: &'a mut File) -> Result { + // closedir closes the FD; make a new one that we can close when done with. + let new_fd = + cvt_r(|| unsafe { libc::fcntl(dir_file.as_raw_fd(), libc::F_DUPFD_CLOEXEC, 0) })?; + let mut dir = Some( + ptr::NonNull::new(unsafe { libc::fdopendir(new_fd) }).ok_or_else(|| { + let _droppable = unsafe { File::from_raw_fd(new_fd) }; + std::io::Error::last_os_error() + })?, + ); + + // If dir_file has had operations on it - such as open_at - its pointer + // might not be at the start of the dir, and fdopendir is documented + // (e.g. BSD man pages) to not rewind the fd - and our cloned fd + // inherits the pointer. + if let Some(d) = dir.as_mut() { + unsafe { libc::rewinddir(d.as_mut()) }; + } + + Ok(ReadDirImpl { + _phantom: PhantomData, + dir, + }) + } + + fn close_dir(&mut self) -> Result<()> { + if let Some(ref mut dir) = self.dir { + let result = unsafe { libc::closedir(dir.as_mut()) }; + // call made, clear state + self.dir = None; + cvt_r(|| result)?; + } + Ok(()) + } +} + +impl Drop for ReadDirImpl<'_> { + fn drop(&mut self) { + // like the stdlib, we eat errors occuring during drop, as there is no + // way to get error handling. + let _ = self.close_dir(); + } +} + +impl Iterator for ReadDirImpl<'_> { + type Item = Result; + + fn next(&mut self) -> Option { + let dir = unsafe { self.dir?.as_mut() }; + // the readdir result is only guaranteed valid within the same thread + // and until other calls are made on the same dir stream. Thus we + // perform the required work inside next, allowing the next call to + // readdir to be managed by the single mutable borrower rule in Rust. + // readdir requires errno set to zero. + nix::Error::clear(); + ptr::NonNull::new(unsafe { libc::readdir(dir) }) + .map(|e| { + Ok(DirEntryImpl { + name: unsafe { + // Step one: C pointer to CStr - referenced data, length not known. + let c_str = std::ffi::CStr::from_ptr(e.as_ref().d_name.as_ptr()); + // Step two: OsStr: referenced data, length calcu;ated + let os_str = OsStr::from_bytes(c_str.to_bytes()); + // Step three: owned copy + os_str.to_os_string() + }, + }) + }) + .or_else(|| { + // NULL result, an error IFF errno has been set. + let err = std::io::Error::last_os_error(); + if err.raw_os_error() == Some(0) { + None + } else { + Some(Err(err)) + } + }) + } +} + +#[derive(Debug)] +pub(crate) struct DirEntryImpl { + name: OsString, +} + +impl DirEntryImpl { + pub fn name(&self) -> &OsStr { + &self.name + } +} + #[cfg(test)] mod tests { use std::{ diff --git a/src/win.rs b/src/win.rs index 09110a1..4ac8519 100644 --- a/src/win.rs +++ b/src/win.rs @@ -1,33 +1,40 @@ mod sugar; use std::{ - ffi::c_void, + ffi::{c_void, OsStr, OsString}, fmt, fs::File, io::Result, mem::{size_of, zeroed, MaybeUninit}, - os::windows::prelude::{AsRawHandle, FromRawHandle, OsStrExt}, + os::windows::prelude::{AsRawHandle, FromRawHandle, OsStrExt, OsStringExt}, path::Path, ptr::null_mut, + slice, }; use ntapi::ntioapi::{ - FILE_CREATE, FILE_CREATED, FILE_DIRECTORY_FILE, FILE_DOES_NOT_EXIST, FILE_EXISTS, - FILE_NON_DIRECTORY_FILE, FILE_OPENED, FILE_OPEN_IF, FILE_OVERWRITE_IF, FILE_OVERWRITTEN, - FILE_SUPERSEDED, FILE_SYNCHRONOUS_IO_NONALERT, + FILE_CREATE, FILE_CREATED, FILE_DIRECTORY_FILE, FILE_DOES_NOT_EXIST, FILE_EXISTS, FILE_OPEN, + FILE_OPENED, FILE_OPEN_IF, FILE_OVERWRITE_IF, FILE_OVERWRITTEN, FILE_SUPERSEDED, + FILE_SYNCHRONOUS_IO_NONALERT, }; use winapi::{ ctypes, shared::{ - minwindef::ULONG, - ntdef::{NULL, OBJECT_ATTRIBUTES, OBJ_CASE_INSENSITIVE, PLARGE_INTEGER, PVOID}, - winerror::ERROR_INVALID_PARAMETER, + minwindef::{LPVOID, ULONG}, + ntdef::{HANDLE, NULL, OBJECT_ATTRIBUTES, OBJ_CASE_INSENSITIVE, PLARGE_INTEGER, PVOID}, + winerror::{ERROR_INVALID_PARAMETER, ERROR_NO_MORE_FILES}, }, - um::winnt::{ - DELETE, FILE_ATTRIBUTE_NORMAL, FILE_GENERIC_WRITE, FILE_LIST_DIRECTORY, FILE_SHARE_DELETE, - FILE_SHARE_READ, FILE_SHARE_WRITE, FILE_TRAVERSE, FILE_WRITE_DATA, GENERIC_READ, - GENERIC_WRITE, PSECURITY_QUALITY_OF_SERVICE, SECURITY_CONTEXT_TRACKING_MODE, - SECURITY_DESCRIPTOR, SECURITY_QUALITY_OF_SERVICE, SYNCHRONIZE, + um::{ + fileapi::FILE_ID_BOTH_DIR_INFO, + minwinbase::{FileIdBothDirectoryInfo, FileIdBothDirectoryRestartInfo}, + winbase::GetFileInformationByHandleEx, + winnt::{ + DELETE, FILE_ATTRIBUTE_NORMAL, FILE_GENERIC_WRITE, FILE_LIST_DIRECTORY, + FILE_SHARE_DELETE, FILE_SHARE_READ, FILE_SHARE_WRITE, FILE_TRAVERSE, FILE_WRITE_DATA, + GENERIC_READ, GENERIC_WRITE, PSECURITY_QUALITY_OF_SERVICE, + SECURITY_CONTEXT_TRACKING_MODE, SECURITY_DESCRIPTOR, SECURITY_QUALITY_OF_SERVICE, + SYNCHRONIZE, + }, }, }; @@ -226,8 +233,10 @@ impl OpenOptionsImpl { // create options needs to be controlled through OpenOptions too. // FILE_SYNCHRONOUS_IO_NONALERT is set by CreateFile with the options // Rust itself uses - this lets the OS position tracker work. It also - // requires SYNCHRONIZE on the access mode. - let create_options = CreateOptions(FILE_NON_DIRECTORY_FILE | FILE_SYNCHRONOUS_IO_NONALERT); + // requires SYNCHRONIZE on the access mode. We should permit users to + // expect particular types, but until we make that explicit, we need to + // open any kind of file when requested # FILE_NON_DIRECTORY_FILE | + let create_options = CreateOptions(FILE_SYNCHRONOUS_IO_NONALERT); self.do_create_file(f, path, desired_access, create_disposition, create_options) } @@ -244,9 +253,11 @@ impl OpenOptionsImpl { // its poor ergonomics otherwise. Ok(FileDisposition(FILE_CREATE)) } else { - Err(std::io::Error::from_raw_os_error( - ERROR_INVALID_PARAMETER as i32, - )) + // just open the existing file. + Ok(FileDisposition(FILE_OPEN)) + // Err(std::io::Error::from_raw_os_error( + // ERROR_INVALID_PARAMETER as i32, + // )) } } @@ -519,6 +530,116 @@ impl OpenOptionsExt for OpenOptions { } } +#[derive(Debug)] +pub(crate) struct ReadDirImpl<'a> { + /// FILE_ID_BOTH_DIR_INFO is a variable-length struct, otherwise this would + /// be a vec of that. None indicates end of iterator from the OS. + buffer: Option>, + d: &'a mut File, + // byte offset in buffer to next entry to yield + offset: usize, +} + +impl<'a> ReadDirImpl<'a> { + pub fn new(d: &mut File) -> Result { + let mut result = ReadDirImpl { + // Start with a page, can always grow it statically or dynamically if + // needed. + buffer: Some(vec![0_u8; 4096]), + d, + offset: 0, + }; + // TODO: can this ever fail as FindFirstFile does? + result.fill_buffer(FileIdBothDirectoryRestartInfo)?; + Ok(result) + } + + fn fill_buffer(&mut self, class: ULONG) -> Result { + let buffer = self.buffer.as_mut().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::Other, + "Attempt to fill buffer after end of dir", + ) + })?; + // Implement + // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-findnextfilea + // without ever doing path resolution... the docs for + // GetFileInformationByHandleEx do not mention how to detect end of dir, + // but FindNextFile does: + // + // ``` + //If the function fails because no more matching files can be found, + //the GetLastError function returns ERROR_NO_MORE_FILES. + // ``` + let result = cvt::cvt(unsafe { + GetFileInformationByHandleEx( + self.d.as_raw_handle() as HANDLE, + class, + buffer.as_mut_ptr() as LPVOID, + buffer.len() as u32, + ) + }); + match result { + Ok(_) => Ok(false), + Err(e) if e.raw_os_error() == Some(ERROR_NO_MORE_FILES as i32) => Ok(true), + Err(e) => Err(e), + } + } +} + +impl Iterator for ReadDirImpl<'_> { + type Item = Result; + + fn next(&mut self) -> Option { + // if the buffer is empty, fill it; if the buffer is None, exit early. + if self.offset >= self.buffer.as_ref()?.len() { + match self.fill_buffer(FileIdBothDirectoryInfo) { + Ok(false) => { + self.offset = 0; + } + Ok(true) => { + self.buffer = None; + return None; + } + Err(e) => return Some(Err(e)), + } + } + // offset is now valid. Dereference into a struct. + let struct_mem = &self.buffer.as_ref()?[self.offset..]; + let info = unsafe { &*struct_mem.as_ptr().cast::() }; + self.offset = if info.NextEntryOffset == 0 { + self.buffer.as_ref()?.len() + } else { + info.NextEntryOffset as usize + self.offset + }; + + let name = OsString::from_wide(unsafe { + slice::from_raw_parts( + info.FileName.as_ptr(), + info.FileNameLength as usize / size_of::(), + ) + }); + Some(Ok(DirEntryImpl { name })) + // + // + // Read Attributes, Delete, Synchronize + // Disposition: Open + // Options: Synchronous IO Non-Alert, Open Reparse Point + // + } +} + +#[derive(Debug)] +pub(crate) struct DirEntryImpl { + name: OsString, +} + +impl DirEntryImpl { + pub fn name(&self) -> &OsStr { + &self.name + } +} + #[cfg(test)] mod tests { use std::{fs::rename, io::Result};