From 37ca6edfdc6c3f6da4fd906b7f095772e7d8ddd0 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Sat, 15 Jan 2022 22:39:07 -0600 Subject: [PATCH 01/12] Fix display of bad fd errors --- src/uu/ls/src/ls.rs | 177 ++++++++++++++++++++++++++++----------- tests/by-util/test_ls.rs | 41 ++++++++- 2 files changed, 167 insertions(+), 51 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index fd3e41adb..226dcd1bf 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -21,7 +21,6 @@ use lscolors::LsColors; use number_prefix::NumberPrefix; use once_cell::unsync::OnceCell; use quoting_style::{escape_name, QuotingStyle}; -use std::ffi::OsString; #[cfg(windows)] use std::os::windows::fs::MetadataExt; use std::{ @@ -39,6 +38,7 @@ use std::{ os::unix::fs::{FileTypeExt, MetadataExt}, time::Duration, }; +use std::{ffi::OsString, fs::ReadDir}; use term_grid::{Cell, Direction, Filling, Grid, GridOptions}; use uucore::{ display::Quotable, @@ -165,7 +165,7 @@ impl Display for LsError { LsError::IOError(e) => write!(f, "general io error: {}", e), LsError::IOErrorContext(e, p) => { let error_kind = e.kind(); - let raw_os_error = e.raw_os_error().unwrap_or(13i32); + let errno = e.raw_os_error().unwrap_or(1i32); match error_kind { // No such file or directory @@ -180,7 +180,7 @@ impl Display for LsError { ErrorKind::PermissionDenied => { #[allow(clippy::wildcard_in_or_patterns)] - match raw_os_error { + match errno { 1i32 => { write!( f, @@ -205,12 +205,23 @@ impl Display for LsError { } } } - _ => write!( - f, - "unknown io error: '{:?}', '{:?}'", - p.to_string_lossy(), - e - ), + _ => match errno { + 9i32 => { + write!( + f, + "cannot access '{}': Bad file descriptor", + p.to_string_lossy(), + ) + } + _ => { + write!( + f, + "unknown io error: '{:?}', '{:?}'", + p.to_string_lossy(), + e + ) + } + }, } } } @@ -1342,8 +1353,19 @@ impl PathData { || match get_metadata(self.p_buf.as_path(), self.must_dereference) { Err(err) => { let _ = out.flush(); - show!(LsError::IOErrorContext(err, self.p_buf.clone(),)); - None + let errno = err.raw_os_error().unwrap_or(1i32); + // Wait to enter "directory" to print error for a bad fd + if self.must_dereference && errno.eq(&9i32) { + if let Ok(md) = get_metadata(&self.p_buf, false) { + Some(md) + } else { + show!(LsError::IOErrorContext(err, self.p_buf.clone(),)); + None + } + } else { + show!(LsError::IOErrorContext(err, self.p_buf.clone(),)); + None + } } Ok(md) => Some(md), }, @@ -1351,6 +1373,10 @@ impl PathData { .as_ref() } + fn set_md(&self, md: Metadata) { + self.md.get_or_init(|| Some(md)).as_ref(); + } + fn file_type(&self, out: &mut BufWriter) -> Option<&FileType> { self.ft .get_or_init(|| self.md(out).map(|md| md.file_type())) @@ -1397,16 +1423,28 @@ fn list(locs: Vec<&Path>, config: Config) -> UResult<()> { display_items(&files, &config, &mut out); - for (pos, dir) in dirs.iter().enumerate() { + for (pos, path_data) in dirs.iter().enumerate() { + // Do read_dir call here to match GNU semantics by printing + // read_dir errors before directory headings, names and totals + let read_dir = match fs::read_dir(&path_data.p_buf) { + Err(err) => { + // flush stdout buffer before the error to preserve formatting and order + let _ = out.flush(); + show!(LsError::IOErrorContext(err, path_data.p_buf.clone())); + continue; + } + Ok(rd) => rd, + }; + // Print dir heading - name... 'total' comes after error display if initial_locs_len > 1 || config.recursive { if pos.eq(&0usize) && files.is_empty() { - let _ = writeln!(out, "{}:", dir.p_buf.display()); + let _ = writeln!(out, "{}:", path_data.p_buf.display()); } else { - let _ = writeln!(out, "\n{}:", dir.p_buf.display()); + let _ = writeln!(out, "\n{}:", path_data.p_buf.display()); } } - enter_directory(dir, &config, &mut out); + enter_directory(path_data, read_dir, &config, &mut out); } Ok(()) @@ -1475,12 +1513,29 @@ fn should_display(entry: &DirEntry, config: &Config) -> bool { true } -fn enter_directory(dir: &PathData, config: &Config, out: &mut BufWriter) { +fn enter_directory( + path_data: &PathData, + read_dir: ReadDir, + config: &Config, + out: &mut BufWriter, +) { // Create vec of entries with initial dot files let mut entries: Vec = if config.files == Files::All { vec![ - PathData::new(dir.p_buf.clone(), None, Some(".".into()), config, false), - PathData::new(dir.p_buf.join(".."), None, Some("..".into()), config, false), + PathData::new( + path_data.p_buf.clone(), + None, + Some(".".into()), + config, + false, + ), + PathData::new( + path_data.p_buf.join(".."), + None, + Some("..".into()), + config, + false, + ), ] } else { vec![] @@ -1489,19 +1544,8 @@ fn enter_directory(dir: &PathData, config: &Config, out: &mut BufWriter) // Convert those entries to the PathData struct let mut vec_path_data = Vec::new(); - // check for errors early, and ignore entries with errors - let read_dir = match fs::read_dir(&dir.p_buf) { - Err(err) => { - // flush buffer because the error may get printed in the wrong order - let _ = out.flush(); - show!(LsError::IOErrorContext(err, dir.p_buf.clone())); - return; - } - Ok(res) => res, - }; - - for entry in read_dir { - let unwrapped = match entry { + for raw_entry in read_dir { + let dir_entry = match raw_entry { Ok(path) => path, Err(err) => { let _ = out.flush(); @@ -1510,27 +1554,40 @@ fn enter_directory(dir: &PathData, config: &Config, out: &mut BufWriter) } }; - if should_display(&unwrapped, config) { - // why check the DirEntry file_type()? B/c the call is - // nearly free compared to a metadata() or file_type() call on a dir/file - let path_data = match unwrapped.file_type() { - Err(err) => { - let _ = out.flush(); - show!(LsError::IOErrorContext(err, unwrapped.path())); - continue; - } - Ok(dir_ft) => { - PathData::new(unwrapped.path(), Some(Ok(dir_ft)), None, config, false) + if should_display(&dir_entry, config) { + // Why prefer to check the DirEntry file_type()? B/c the call is + // nearly free compared to a metadata() or file_type() call on a dir/file. + // + // Why not print an error here? If we wait for the metadata() call, we make + // certain we print the error once. This also seems to match GNU behavior. + let entry_path_data = match dir_entry.file_type() { + Ok(ft) => { + let res = PathData::new(dir_entry.path(), Some(Ok(ft)), None, config, false); + // metadata returned from a DirEntry matches GNU metadata for + // non-dereferenced files, and is *different* from the + // metadata call on the path, see, for example, bad fds + if !res.must_dereference + && ((config.format == Format::Long) + || (config.sort == Sort::Name) + || (config.sort == Sort::None) + || config.inode) + { + if let Ok(md) = dir_entry.metadata() { + res.set_md(md) + } + } + res } + Err(_) => PathData::new(dir_entry.path(), None, None, config, false), }; - vec_path_data.push(path_data); + vec_path_data.push(entry_path_data); }; } sort_entries(&mut vec_path_data, config, out); entries.append(&mut vec_path_data); - // ...and total + // Print total after any error display if config.format == Format::Long { display_total(&entries, config, out); } @@ -1546,8 +1603,17 @@ fn enter_directory(dir: &PathData, config: &Config, out: &mut BufWriter) .filter(|p| p.ft.get().unwrap().is_some()) .filter(|p| p.ft.get().unwrap().unwrap().is_dir()) { - let _ = writeln!(out, "\n{}:", e.p_buf.display()); - enter_directory(e, config, out); + match fs::read_dir(&e.p_buf) { + Err(err) => { + let _ = out.flush(); + show!(LsError::IOErrorContext(err, e.p_buf.clone())); + continue; + } + Ok(rd) => { + let _ = writeln!(out, "\n{}:", e.p_buf.display()); + enter_directory(e, rd, config, out); + } + } } } } @@ -1974,7 +2040,24 @@ fn display_item_long( let _ = write!( out, "{}{} {}", - "l?????????".to_string(), + format_args!( + "{}?????????", + if item.ft.get().is_some() && item.ft.get().unwrap().is_some() { + if item.ft.get().unwrap().unwrap().is_char_device() { + "c" + } else if item.ft.get().unwrap().unwrap().is_block_device() { + "b" + } else if item.ft.get().unwrap().unwrap().is_symlink() { + "l" + } else if item.ft.get().unwrap().unwrap().is_dir() { + "d" + } else { + "-" + } + } else { + "-" + }, + ), if item.security_context.len() > 1 { // GNU `ls` uses a "." character to indicate a file with a security context, // but not other alternate access method. diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index b5d49337d..85b336a26 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -7,6 +7,11 @@ use crate::common::util::*; extern crate regex; use self::regex::Regex; +#[cfg(unix)] +use nix::unistd::{close, dup2}; +#[cfg(unix)] +use std::os::unix::io::AsRawFd; + use std::collections::HashMap; use std::path::Path; use std::thread::sleep; @@ -128,10 +133,7 @@ fn test_ls_io_errors() { at.symlink_file("does_not_exist", "some-dir2/dangle"); at.mkdir("some-dir3"); at.mkdir("some-dir3/some-dir4"); - at.mkdir("some-dir3/some-dir5"); - at.mkdir("some-dir3/some-dir6"); - at.mkdir("some-dir3/some-dir7"); - at.mkdir("some-dir3/some-dir8"); + at.mkdir("some-dir4"); scene.ccmd("chmod").arg("000").arg("some-dir1").succeeds(); @@ -177,6 +179,37 @@ fn test_ls_io_errors() { .stderr_does_not_contain( "ls: cannot access 'some-dir2/dangle': No such file or directory\nls: cannot access 'some-dir2/dangle': No such file or directory" ); + + #[cfg(unix)] + { + at.touch("some-dir4/bad-fd.txt"); + let fd1 = at.open("some-dir4/bad-fd.txt").as_raw_fd(); + let fd2 = 25000; + let rv1 = dup2(fd1, fd2); + let rv2 = close(fd1); + + scene + .ucmd() + .arg("-alR") + .arg(format!("/dev/fd/{}", fd2)) + .fails() + .stderr_contains(format!( + "cannot access '/dev/fd/{}': Bad file descriptor", + fd2 + )) + .stdout_does_not_contain(format!("{}:\n", fd2)); + + scene + .ucmd() + .arg("-RiL") + .arg(format!("/dev/fd/{}", fd2)) + .fails() + .stderr_contains(format!("cannot access '/dev/fd/{}': Bad file descriptor", fd2)) + // test we only print bad fd error once + .stderr_does_not_contain(format!("ls: cannot access '/dev/fd/{fd}': Bad file descriptor\nls: cannot access '/dev/fd/{fd}': Bad file descriptor", fd = fd2)); + + let _ = close(fd2); + } } #[test] From 15efba54c52db99530264a40aa766865247ff786 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Sun, 16 Jan 2022 10:20:50 -0600 Subject: [PATCH 02/12] Use dir_entry metadata for dereferenced bad fds to match GNU, add comments, clippy lints --- build.rs | 4 ++-- src/uu/ls/src/ls.rs | 30 ++++++++++++++++++---------- tests/by-util/test_ls.rs | 43 +++++++++++++++++++++------------------- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/build.rs b/build.rs index 293d2e65f..03563730f 100644 --- a/build.rs +++ b/build.rs @@ -83,7 +83,7 @@ pub fn main() { mf.write_all( format!( "\tmap.insert(\"{k}\", ({krate}::uumain, {krate}::uu_app));\n", - k = krate[override_prefix.len()..].to_string(), + k = krate[override_prefix.len()..].to_owned(), krate = krate ) .as_bytes(), @@ -92,7 +92,7 @@ pub fn main() { tf.write_all( format!( "#[path=\"{dir}/test_{k}.rs\"]\nmod test_{k};\n", - k = krate[override_prefix.len()..].to_string(), + k = krate[override_prefix.len()..].to_owned(), dir = util_tests_dir, ) .as_bytes(), diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 226dcd1bf..2da4fa328 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -207,9 +207,10 @@ impl Display for LsError { } _ => match errno { 9i32 => { + // only should ever occur on a read_dir on a bad fd write!( f, - "cannot access '{}': Bad file descriptor", + "cannot open directory '{}': Bad file descriptor", p.to_string_lossy(), ) } @@ -1354,18 +1355,24 @@ impl PathData { Err(err) => { let _ = out.flush(); let errno = err.raw_os_error().unwrap_or(1i32); - // Wait to enter "directory" to print error for a bad fd + // Wait to enter "directory" to print error for any bad fd if self.must_dereference && errno.eq(&9i32) { - if let Ok(md) = get_metadata(&self.p_buf, false) { - Some(md) - } else { - show!(LsError::IOErrorContext(err, self.p_buf.clone(),)); - None + if let Some(parent) = self.p_buf.parent() { + if let Ok(read_dir) = fs::read_dir(parent) { + // this dir_entry metadata is different from the metadata call on the path + let res = read_dir + .filter_map(|x| x.ok()) + .filter(|x| self.p_buf.eq(&x.path())) + .filter_map(|x| x.metadata().ok()) + .next(); + if let Some(md) = res { + return Some(md); + } + } } - } else { - show!(LsError::IOErrorContext(err, self.p_buf.clone(),)); - None } + show!(LsError::IOErrorContext(err, self.p_buf.clone(),)); + None } Ok(md) => Some(md), }, @@ -1565,7 +1572,8 @@ fn enter_directory( let res = PathData::new(dir_entry.path(), Some(Ok(ft)), None, config, false); // metadata returned from a DirEntry matches GNU metadata for // non-dereferenced files, and is *different* from the - // metadata call on the path, see, for example, bad fds + // metadata call on the path, see, for example, bad fds, + // so we use here when we will need metadata later anyway if !res.must_dereference && ((config.format == Format::Long) || (config.sort == Sort::Name) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 85b336a26..bc840173f 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -170,7 +170,7 @@ fn test_ls_io_errors() { .stderr_contains("Permission denied") .stdout_contains("some-dir4"); - // test we don't double print on dangling link metadata errors + // don't double print on dangling link metadata errors scene .ucmd() .arg("-iRL") @@ -188,26 +188,29 @@ fn test_ls_io_errors() { let rv1 = dup2(fd1, fd2); let rv2 = close(fd1); - scene - .ucmd() - .arg("-alR") - .arg(format!("/dev/fd/{}", fd2)) - .fails() - .stderr_contains(format!( - "cannot access '/dev/fd/{}': Bad file descriptor", - fd2 - )) - .stdout_does_not_contain(format!("{}:\n", fd2)); - - scene - .ucmd() - .arg("-RiL") - .arg(format!("/dev/fd/{}", fd2)) - .fails() - .stderr_contains(format!("cannot access '/dev/fd/{}': Bad file descriptor", fd2)) - // test we only print bad fd error once - .stderr_does_not_contain(format!("ls: cannot access '/dev/fd/{fd}': Bad file descriptor\nls: cannot access '/dev/fd/{fd}': Bad file descriptor", fd = fd2)); + // dup and close work on the mac, but doesn't work in some linux containers + // so check to see that return values are non-error before proceeding + if rv1.is_ok() && rv2.is_ok() { + scene + .ucmd() + .arg("-alR") + .arg(format!("/dev/fd/{}", fd = fd2)) + .fails() + .stderr_contains(format!( + "cannot open directory '/dev/fd/{fd}': Bad file descriptor", + fd = fd2 + )) + .stdout_does_not_contain(format!("{fd}:\n", fd = fd2)); + scene + .ucmd() + .arg("-RiL") + .arg(format!("/dev/fd/{fd}", fd = fd2)) + .fails() + .stderr_contains(format!("cannot open directory '/dev/fd/{fd}': Bad file descriptor", fd = fd2)) + // don't double print bad fd errors + .stderr_does_not_contain(format!("ls: cannot open directory '/dev/fd/{fd}': Bad file descriptor\nls: cannot open directory '/dev/fd/{fd}': Bad file descriptor", fd = fd2)); + } let _ = close(fd2); } } From e6ce049d2ccffa100bbc0dc20d8754d5d9ba55b7 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Sun, 16 Jan 2022 11:07:22 -0600 Subject: [PATCH 03/12] Fix Windows lints/build errors --- src/uu/ls/src/ls.rs | 83 ++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 24 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 2da4fa328..e6297cfb7 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1573,15 +1573,31 @@ fn enter_directory( // metadata returned from a DirEntry matches GNU metadata for // non-dereferenced files, and is *different* from the // metadata call on the path, see, for example, bad fds, - // so we use here when we will need metadata later anyway - if !res.must_dereference - && ((config.format == Format::Long) - || (config.sort == Sort::Name) - || (config.sort == Sort::None) - || config.inode) + // so we use dir_entry metadata here when we know we + // will need metadata later anyway + #[cfg(unix)] { - if let Ok(md) = dir_entry.metadata() { - res.set_md(md) + if !res.must_dereference + && ((config.format == Format::Long) + || (config.sort == Sort::Name) + || (config.sort == Sort::None) + || config.inode) + { + if let Ok(md) = dir_entry.metadata() { + res.set_md(md) + } + } + } + #[cfg(not(unix))] + { + if !res.must_dereference + && ((config.format == Format::Long) + || (config.sort == Sort::Name) + || (config.sort == Sort::None)) + { + if let Ok(md) = dir_entry.metadata() { + res.set_md(md) + } } } res @@ -2045,26 +2061,45 @@ fn display_item_long( } } + #[cfg(unix)] + let leading_char = { + if item.ft.get().is_some() && item.ft.get().unwrap().is_some() { + if item.ft.get().unwrap().unwrap().is_char_device() { + "c" + } else if item.ft.get().unwrap().unwrap().is_block_device() { + "b" + } else if item.ft.get().unwrap().unwrap().is_symlink() { + "l" + } else if item.ft.get().unwrap().unwrap().is_dir() { + "d" + } else { + "-" + } + } else { + "-" + } + }; + #[cfg(not(unix))] + let leading_char = { + if item.ft.get().is_some() && item.ft.get().unwrap().is_some() { + if item.ft.get().unwrap().unwrap().is_symlink() { + "l" + } else if item.ft.get().unwrap().unwrap().is_dir() { + "d" + } else { + "-" + } + } else { + "-" + } + }; + let _ = write!( out, "{}{} {}", format_args!( - "{}?????????", - if item.ft.get().is_some() && item.ft.get().unwrap().is_some() { - if item.ft.get().unwrap().unwrap().is_char_device() { - "c" - } else if item.ft.get().unwrap().unwrap().is_block_device() { - "b" - } else if item.ft.get().unwrap().unwrap().is_symlink() { - "l" - } else if item.ft.get().unwrap().unwrap().is_dir() { - "d" - } else { - "-" - } - } else { - "-" - }, + "{}?????????", leading_char + ), if item.security_context.len() > 1 { // GNU `ls` uses a "." character to indicate a file with a security context, From 16b7b38b92b210142d2e7adf920bdd0292445a3c Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Sun, 16 Jan 2022 11:17:43 -0600 Subject: [PATCH 04/12] Run cargo fmt --- src/uu/ls/src/ls.rs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index e6297cfb7..561f278a0 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1573,27 +1573,27 @@ fn enter_directory( // metadata returned from a DirEntry matches GNU metadata for // non-dereferenced files, and is *different* from the // metadata call on the path, see, for example, bad fds, - // so we use dir_entry metadata here when we know we + // so we use dir_entry metadata here when we know we // will need metadata later anyway - #[cfg(unix)] + #[cfg(unix)] { if !res.must_dereference - && ((config.format == Format::Long) - || (config.sort == Sort::Name) - || (config.sort == Sort::None) - || config.inode) + && ((config.format == Format::Long) + || (config.sort == Sort::Name) + || (config.sort == Sort::None) + || config.inode) { if let Ok(md) = dir_entry.metadata() { res.set_md(md) } } } - #[cfg(not(unix))] + #[cfg(not(unix))] { if !res.must_dereference - && ((config.format == Format::Long) - || (config.sort == Sort::Name) - || (config.sort == Sort::None)) + && ((config.format == Format::Long) + || (config.sort == Sort::Name) + || (config.sort == Sort::None)) { if let Ok(md) = dir_entry.metadata() { res.set_md(md) @@ -2097,10 +2097,7 @@ fn display_item_long( let _ = write!( out, "{}{} {}", - format_args!( - "{}?????????", leading_char - - ), + format_args!("{}?????????", leading_char), if item.security_context.len() > 1 { // GNU `ls` uses a "." character to indicate a file with a security context, // but not other alternate access method. From 4bee6526bf88a43eb1cc3a5f66a1fa13ef1ef1e4 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Mon, 17 Jan 2022 13:14:43 -0600 Subject: [PATCH 05/12] Add new test, never print error on deref-ed bad fd --- tests/by-util/test_ls.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index bc840173f..372a1c89f 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -210,6 +210,12 @@ fn test_ls_io_errors() { .stderr_contains(format!("cannot open directory '/dev/fd/{fd}': Bad file descriptor", fd = fd2)) // don't double print bad fd errors .stderr_does_not_contain(format!("ls: cannot open directory '/dev/fd/{fd}': Bad file descriptor\nls: cannot open directory '/dev/fd/{fd}': Bad file descriptor", fd = fd2)); + + scene + .ucmd() + .arg("-alL") + .arg(format!("/dev/fd/{fd}", fd = fd2)) + .succeeds(); } let _ = close(fd2); } From 0b53cd8c4a88ab434dc9d2e9e3f4cdc29b7eafea Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Thu, 20 Jan 2022 14:52:45 -0600 Subject: [PATCH 06/12] Make suggested changes --- src/uu/ls/src/ls.rs | 80 ++++++++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 561f278a0..f77278255 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1294,6 +1294,7 @@ impl PathData { fn new( p_buf: PathBuf, file_type: Option>, + metadata: Option>, file_name: Option, config: &Config, command_line: bool, @@ -1332,6 +1333,17 @@ impl PathData { None => OnceCell::new(), }; + let md = match metadata { + Some(md) => { + if !must_dereference { + OnceCell::from(md.ok()) + } else { + OnceCell::new() + } + } + None => OnceCell::new(), + }; + let security_context = if config.context { get_security_context(config, &p_buf, must_dereference) } else { @@ -1339,8 +1351,8 @@ impl PathData { }; Self { - md: OnceCell::new(), ft, + md, display_name, p_buf, must_dereference, @@ -1380,10 +1392,6 @@ impl PathData { .as_ref() } - fn set_md(&self, md: Metadata) { - self.md.get_or_init(|| Some(md)).as_ref(); - } - fn file_type(&self, out: &mut BufWriter) -> Option<&FileType> { self.ft .get_or_init(|| self.md(out).map(|md| md.file_type())) @@ -1398,7 +1406,7 @@ fn list(locs: Vec<&Path>, config: Config) -> UResult<()> { let initial_locs_len = locs.len(); for loc in locs { - let path_data = PathData::new(PathBuf::from(loc), None, None, &config, true); + let path_data = PathData::new(PathBuf::from(loc), None, None, None, &config, true); // Getting metadata here is no big deal as it's just the CWD // and we really just want to know if the strings exist as files/dirs @@ -1532,6 +1540,7 @@ fn enter_directory( PathData::new( path_data.p_buf.clone(), None, + None, Some(".".into()), config, false, @@ -1539,6 +1548,7 @@ fn enter_directory( PathData::new( path_data.p_buf.join(".."), None, + None, Some("..".into()), config, false, @@ -1569,7 +1579,6 @@ fn enter_directory( // certain we print the error once. This also seems to match GNU behavior. let entry_path_data = match dir_entry.file_type() { Ok(ft) => { - let res = PathData::new(dir_entry.path(), Some(Ok(ft)), None, config, false); // metadata returned from a DirEntry matches GNU metadata for // non-dereferenced files, and is *different* from the // metadata call on the path, see, for example, bad fds, @@ -1577,32 +1586,51 @@ fn enter_directory( // will need metadata later anyway #[cfg(unix)] { - if !res.must_dereference - && ((config.format == Format::Long) - || (config.sort == Sort::Name) - || (config.sort == Sort::None) - || config.inode) + if (config.format == Format::Long) + || (config.sort == Sort::Name) + || (config.sort == Sort::None) + || config.inode { if let Ok(md) = dir_entry.metadata() { - res.set_md(md) + PathData::new( + dir_entry.path(), + Some(Ok(ft)), + Some(Ok(md)), + None, + config, + false, + ) + } else { + PathData::new(dir_entry.path(), None, None, None, config, false) } + } else { + PathData::new(dir_entry.path(), None, None, None, config, false) } } #[cfg(not(unix))] { - if !res.must_dereference - && ((config.format == Format::Long) - || (config.sort == Sort::Name) - || (config.sort == Sort::None)) + if (config.format == Format::Long) + || (config.sort == Sort::Name) + || (config.sort == Sort::None) { if let Ok(md) = dir_entry.metadata() { - res.set_md(md) + PathData::new( + dir_entry.path(), + Some(Ok(ft)), + Some(Ok(md)), + None, + config, + false, + ) + } else { + PathData::new(dir_entry.path(), None, None, None, config, false) } + } else { + PathData::new(dir_entry.path(), None, None, None, config, false) } } - res } - Err(_) => PathData::new(dir_entry.path(), None, None, config, false), + Err(_) => PathData::new(dir_entry.path(), None, None, None, config, false), }; vec_path_data.push(entry_path_data); }; @@ -2063,14 +2091,14 @@ fn display_item_long( #[cfg(unix)] let leading_char = { - if item.ft.get().is_some() && item.ft.get().unwrap().is_some() { - if item.ft.get().unwrap().unwrap().is_char_device() { + if let Some(Some(ft)) = item.ft.get() { + if ft.is_char_device() { "c" - } else if item.ft.get().unwrap().unwrap().is_block_device() { + } else if ft.is_block_device() { "b" - } else if item.ft.get().unwrap().unwrap().is_symlink() { + } else if ft.is_symlink() { "l" - } else if item.ft.get().unwrap().unwrap().is_dir() { + } else if ft.is_dir() { "d" } else { "-" @@ -2466,7 +2494,7 @@ fn display_file_name( } } - let target_data = PathData::new(absolute_target, None, None, config, false); + let target_data = PathData::new(absolute_target, None, None, None, config, false); // If we have a symlink to a valid file, we use the metadata of said file. // Because we use an absolute path, we can assume this is guaranteed to exist. From 1d629d8d665fb0ede65765dacdc5d106717a5a9b Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Wed, 26 Jan 2022 16:41:52 -0600 Subject: [PATCH 07/12] Move more logic into md function, make enter_directory function clearer --- src/uu/ls/src/ls.rs | 125 ++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 69 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index f77278255..542cf1252 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1283,6 +1283,7 @@ struct PathData { md: OnceCell>, ft: OnceCell>, // Name of the file - will be empty for . or .. + de: OnceCell>, display_name: OsString, // PathBuf that all above data corresponds to p_buf: PathBuf, @@ -1295,6 +1296,7 @@ impl PathData { p_buf: PathBuf, file_type: Option>, metadata: Option>, + dir_entry: Option>, file_name: Option, config: &Config, command_line: bool, @@ -1328,6 +1330,11 @@ impl PathData { Dereference::None => false, }; + let de = match dir_entry { + Some(de) => OnceCell::from(de.ok()), + None => OnceCell::new(), + }; + let ft = match file_type { Some(ft) => OnceCell::from(ft.ok()), None => OnceCell::new(), @@ -1353,6 +1360,7 @@ impl PathData { Self { ft, md, + de, display_name, p_buf, must_dereference, @@ -1362,33 +1370,30 @@ impl PathData { fn md(&self, out: &mut BufWriter) -> Option<&Metadata> { self.md - .get_or_init( - || match get_metadata(self.p_buf.as_path(), self.must_dereference) { + .get_or_init(|| { + // check if we can use DirEntry metadata + if !self.must_dereference { + if let Some(Some(dir_entry)) = self.de.get() { + return dir_entry.metadata().ok(); + } + } + + // if not, check if we can use path metadata + match get_metadata(self.p_buf.as_path(), self.must_dereference) { Err(err) => { let _ = out.flush(); let errno = err.raw_os_error().unwrap_or(1i32); - // Wait to enter "directory" to print error for any bad fd if self.must_dereference && errno.eq(&9i32) { - if let Some(parent) = self.p_buf.parent() { - if let Ok(read_dir) = fs::read_dir(parent) { - // this dir_entry metadata is different from the metadata call on the path - let res = read_dir - .filter_map(|x| x.ok()) - .filter(|x| self.p_buf.eq(&x.path())) - .filter_map(|x| x.metadata().ok()) - .next(); - if let Some(md) = res { - return Some(md); - } - } + if let Some(Some(dir_entry)) = self.de.get() { + return dir_entry.metadata().ok(); } } show!(LsError::IOErrorContext(err, self.p_buf.clone(),)); None } Ok(md) => Some(md), - }, - ) + } + }) .as_ref() } @@ -1406,7 +1411,7 @@ fn list(locs: Vec<&Path>, config: Config) -> UResult<()> { let initial_locs_len = locs.len(); for loc in locs { - let path_data = PathData::new(PathBuf::from(loc), None, None, None, &config, true); + let path_data = PathData::new(PathBuf::from(loc), None, None, None, None, &config, true); // Getting metadata here is no big deal as it's just the CWD // and we really just want to know if the strings exist as files/dirs @@ -1541,6 +1546,7 @@ fn enter_directory( path_data.p_buf.clone(), None, None, + None, Some(".".into()), config, false, @@ -1549,6 +1555,7 @@ fn enter_directory( path_data.p_buf.join(".."), None, None, + None, Some("..".into()), config, false, @@ -1579,58 +1586,37 @@ fn enter_directory( // certain we print the error once. This also seems to match GNU behavior. let entry_path_data = match dir_entry.file_type() { Ok(ft) => { - // metadata returned from a DirEntry matches GNU metadata for - // non-dereferenced files, and is *different* from the - // metadata call on the path, see, for example, bad fds, - // so we use dir_entry metadata here when we know we - // will need metadata later anyway - #[cfg(unix)] - { - if (config.format == Format::Long) - || (config.sort == Sort::Name) - || (config.sort == Sort::None) - || config.inode - { - if let Ok(md) = dir_entry.metadata() { - PathData::new( - dir_entry.path(), - Some(Ok(ft)), - Some(Ok(md)), - None, - config, - false, - ) - } else { - PathData::new(dir_entry.path(), None, None, None, config, false) - } - } else { - PathData::new(dir_entry.path(), None, None, None, config, false) - } - } - #[cfg(not(unix))] - { - if (config.format == Format::Long) - || (config.sort == Sort::Name) - || (config.sort == Sort::None) - { - if let Ok(md) = dir_entry.metadata() { - PathData::new( - dir_entry.path(), - Some(Ok(ft)), - Some(Ok(md)), - None, - config, - false, - ) - } else { - PathData::new(dir_entry.path(), None, None, None, config, false) - } - } else { - PathData::new(dir_entry.path(), None, None, None, config, false) - } + if let Ok(md) = dir_entry.metadata() { + PathData::new( + dir_entry.path(), + Some(Ok(ft)), + Some(Ok(md)), + Some(Ok(dir_entry)), + None, + config, + false, + ) + } else { + PathData::new( + dir_entry.path(), + Some(Ok(ft)), + None, + Some(Ok(dir_entry)), + None, + config, + false, + ) } } - Err(_) => PathData::new(dir_entry.path(), None, None, None, config, false), + Err(_) => PathData::new( + dir_entry.path(), + None, + None, + Some(Ok(dir_entry)), + None, + config, + false, + ), }; vec_path_data.push(entry_path_data); }; @@ -2494,7 +2480,8 @@ fn display_file_name( } } - let target_data = PathData::new(absolute_target, None, None, None, config, false); + let target_data = + PathData::new(absolute_target, None, None, None, None, config, false); // If we have a symlink to a valid file, we use the metadata of said file. // Because we use an absolute path, we can assume this is guaranteed to exist. From 7512200ba2966599d7742a0a49f9bddeea7a0426 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Wed, 26 Jan 2022 16:52:05 -0600 Subject: [PATCH 08/12] Cleanup comments --- src/uu/ls/src/ls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 542cf1252..a5cba5d11 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1378,7 +1378,7 @@ impl PathData { } } - // if not, check if we can use path metadata + // if not, check if we can use Path metadata match get_metadata(self.p_buf.as_path(), self.must_dereference) { Err(err) => { let _ = out.flush(); From 463c1ac2ff735768dd67228b8b0cc471a339ce53 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Thu, 27 Jan 2022 11:57:19 -0600 Subject: [PATCH 09/12] Make suggested changes: Move logic into PathData struct, etc. --- src/uu/ls/src/ls.rs | 84 ++++++++++++--------------------------------- 1 file changed, 22 insertions(+), 62 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index a5cba5d11..e5c0b562f 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1282,8 +1282,8 @@ struct PathData { // Result got from symlink_metadata() or metadata() based on config md: OnceCell>, ft: OnceCell>, + de: Option, // Name of the file - will be empty for . or .. - de: OnceCell>, display_name: OsString, // PathBuf that all above data corresponds to p_buf: PathBuf, @@ -1294,8 +1294,6 @@ struct PathData { impl PathData { fn new( p_buf: PathBuf, - file_type: Option>, - metadata: Option>, dir_entry: Option>, file_name: Option, config: &Config, @@ -1331,19 +1329,14 @@ impl PathData { }; let de = match dir_entry { - Some(de) => OnceCell::from(de.ok()), - None => OnceCell::new(), + Some(de) => de.ok(), + None => None, }; - let ft = match file_type { - Some(ft) => OnceCell::from(ft.ok()), - None => OnceCell::new(), - }; - - let md = match metadata { - Some(md) => { - if !must_dereference { - OnceCell::from(md.ok()) + let ft = match de { + Some(ref de) => { + if let Ok(ft_de) = de.file_type() { + OnceCell::from(Some(ft_de)) } else { OnceCell::new() } @@ -1359,7 +1352,7 @@ impl PathData { Self { ft, - md, + md: OnceCell::new(), de, display_name, p_buf, @@ -1373,7 +1366,7 @@ impl PathData { .get_or_init(|| { // check if we can use DirEntry metadata if !self.must_dereference { - if let Some(Some(dir_entry)) = self.de.get() { + if let Some(dir_entry) = &self.de { return dir_entry.metadata().ok(); } } @@ -1383,8 +1376,12 @@ impl PathData { Err(err) => { let _ = out.flush(); let errno = err.raw_os_error().unwrap_or(1i32); - if self.must_dereference && errno.eq(&9i32) { - if let Some(Some(dir_entry)) = self.de.get() { + // a bad fd will throw an error when dereferenced, + // but GNU will not throw an error until a bad fd "dir" + // is entered, here we match that GNU behavior, by handing + // back the non-dereferenced metadata upon an EBADF + if self.must_dereference && errno == 9i32 { + if let Some(dir_entry) = &self.de { return dir_entry.metadata().ok(); } } @@ -1411,7 +1408,7 @@ fn list(locs: Vec<&Path>, config: Config) -> UResult<()> { let initial_locs_len = locs.len(); for loc in locs { - let path_data = PathData::new(PathBuf::from(loc), None, None, None, None, &config, true); + let path_data = PathData::new(PathBuf::from(loc), None, None, &config, true); // Getting metadata here is no big deal as it's just the CWD // and we really just want to know if the strings exist as files/dirs @@ -1545,8 +1542,6 @@ fn enter_directory( PathData::new( path_data.p_buf.clone(), None, - None, - None, Some(".".into()), config, false, @@ -1554,8 +1549,6 @@ fn enter_directory( PathData::new( path_data.p_buf.join(".."), None, - None, - None, Some("..".into()), config, false, @@ -1584,40 +1577,8 @@ fn enter_directory( // // Why not print an error here? If we wait for the metadata() call, we make // certain we print the error once. This also seems to match GNU behavior. - let entry_path_data = match dir_entry.file_type() { - Ok(ft) => { - if let Ok(md) = dir_entry.metadata() { - PathData::new( - dir_entry.path(), - Some(Ok(ft)), - Some(Ok(md)), - Some(Ok(dir_entry)), - None, - config, - false, - ) - } else { - PathData::new( - dir_entry.path(), - Some(Ok(ft)), - None, - Some(Ok(dir_entry)), - None, - config, - false, - ) - } - } - Err(_) => PathData::new( - dir_entry.path(), - None, - None, - Some(Ok(dir_entry)), - None, - config, - false, - ), - }; + let entry_path_data = + PathData::new(dir_entry.path(), Some(Ok(dir_entry)), None, config, false); vec_path_data.push(entry_path_data); }; } @@ -2095,10 +2056,10 @@ fn display_item_long( }; #[cfg(not(unix))] let leading_char = { - if item.ft.get().is_some() && item.ft.get().unwrap().is_some() { - if item.ft.get().unwrap().unwrap().is_symlink() { + if let Some(Some(ft)) = item.ft.get() { + if ft.is_symlink() { "l" - } else if item.ft.get().unwrap().unwrap().is_dir() { + } else if ft.is_dir() { "d" } else { "-" @@ -2480,8 +2441,7 @@ fn display_file_name( } } - let target_data = - PathData::new(absolute_target, None, None, None, None, config, false); + let target_data = PathData::new(absolute_target, None, None, config, false); // If we have a symlink to a valid file, we use the metadata of said file. // Because we use an absolute path, we can assume this is guaranteed to exist. From 8b6c1337aa6d54d219d62eeb0e2297a14529a349 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Thu, 27 Jan 2022 15:36:55 -0600 Subject: [PATCH 10/12] Fix broken test in certain Linux containers --- tests/by-util/test_ls.rs | 52 ++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 372a1c89f..b40173522 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -191,25 +191,41 @@ fn test_ls_io_errors() { // dup and close work on the mac, but doesn't work in some linux containers // so check to see that return values are non-error before proceeding if rv1.is_ok() && rv2.is_ok() { - scene - .ucmd() - .arg("-alR") - .arg(format!("/dev/fd/{}", fd = fd2)) - .fails() - .stderr_contains(format!( - "cannot open directory '/dev/fd/{fd}': Bad file descriptor", - fd = fd2 - )) - .stdout_does_not_contain(format!("{fd}:\n", fd = fd2)); + // on the mac and in certain Linux containers bad fds are typed as dirs, + // however sometimes bad fds are typed as links and directory entry on links won't fail + if PathBuf::from(format!("/dev/fd/{fd}", fd = fd2)).is_dir() { + scene + .ucmd() + .arg("-alR") + .arg(format!("/dev/fd/{fd}", fd = fd2)) + .fails() + .stderr_contains(format!( + "cannot open directory '/dev/fd/{fd}': Bad file descriptor", + fd = fd2 + )) + .stdout_does_not_contain(format!("{fd}:\n", fd = fd2)); - scene - .ucmd() - .arg("-RiL") - .arg(format!("/dev/fd/{fd}", fd = fd2)) - .fails() - .stderr_contains(format!("cannot open directory '/dev/fd/{fd}': Bad file descriptor", fd = fd2)) - // don't double print bad fd errors - .stderr_does_not_contain(format!("ls: cannot open directory '/dev/fd/{fd}': Bad file descriptor\nls: cannot open directory '/dev/fd/{fd}': Bad file descriptor", fd = fd2)); + scene + .ucmd() + .arg("-RiL") + .arg(format!("/dev/fd/{fd}", fd = fd2)) + .fails() + .stderr_contains(format!("cannot open directory '/dev/fd/{fd}': Bad file descriptor", fd = fd2)) + // don't double print bad fd errors + .stderr_does_not_contain(format!("ls: cannot open directory '/dev/fd/{fd}': Bad file descriptor\nls: cannot open directory '/dev/fd/{fd}': Bad file descriptor", fd = fd2)); + } else { + scene + .ucmd() + .arg("-alR") + .arg(format!("/dev/fd/{fd}", fd = fd2)) + .succeeds(); + + scene + .ucmd() + .arg("-RiL") + .arg(format!("/dev/fd/{fd}", fd = fd2)) + .succeeds(); + } scene .ucmd() From 5e82d6069f21df37819cb8a77432e2bfeeeb9101 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Thu, 27 Jan 2022 16:12:34 -0600 Subject: [PATCH 11/12] Fix comments --- src/uu/ls/src/ls.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index e5c0b562f..88bcff5a7 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1333,6 +1333,8 @@ impl PathData { None => None, }; + // Why prefer to check the DirEntry file_type()? B/c the call is + // nearly free compared to a metadata() or file_type() call on a dir/file. let ft = match de { Some(ref de) => { if let Ok(ft_de) = de.file_type() { @@ -1572,11 +1574,6 @@ fn enter_directory( }; if should_display(&dir_entry, config) { - // Why prefer to check the DirEntry file_type()? B/c the call is - // nearly free compared to a metadata() or file_type() call on a dir/file. - // - // Why not print an error here? If we wait for the metadata() call, we make - // certain we print the error once. This also seems to match GNU behavior. let entry_path_data = PathData::new(dir_entry.path(), Some(Ok(dir_entry)), None, config, false); vec_path_data.push(entry_path_data); From 72c53219e357315b4454bb74056577081778f091 Mon Sep 17 00:00:00 2001 From: electricboogie <32370782+electricboogie@users.noreply.github.com> Date: Thu, 27 Jan 2022 22:07:07 -0600 Subject: [PATCH 12/12] Prevent potential unwrap on a None value --- src/uu/ls/src/ls.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 88bcff5a7..a3620b213 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1328,17 +1328,19 @@ impl PathData { Dereference::None => false, }; - let de = match dir_entry { + let de: Option = match dir_entry { Some(de) => de.ok(), None => None, }; // Why prefer to check the DirEntry file_type()? B/c the call is - // nearly free compared to a metadata() or file_type() call on a dir/file. + // nearly free compared to a metadata() call on a Path let ft = match de { Some(ref de) => { if let Ok(ft_de) = de.file_type() { OnceCell::from(Some(ft_de)) + } else if let Ok(md_pb) = p_buf.metadata() { + OnceCell::from(Some(md_pb.file_type())) } else { OnceCell::new() } @@ -1353,8 +1355,8 @@ impl PathData { }; Self { - ft, md: OnceCell::new(), + ft, de, display_name, p_buf, @@ -1594,8 +1596,7 @@ fn enter_directory( for e in entries .iter() .skip(if config.files == Files::All { 2 } else { 0 }) - // Already requested file_type for the dir_entries above. So we know the OnceCell is set. - // And can unwrap again because we tested whether path has is_some here + .filter(|p| p.ft.get().is_some()) .filter(|p| p.ft.get().unwrap().is_some()) .filter(|p| p.ft.get().unwrap().unwrap().is_dir()) {