From 054ca4a6b517cd86893a92362f152b7cde0071a1 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Sun, 26 Nov 2023 17:01:22 -0500 Subject: [PATCH] wc: better handle files in pseudo-filesystems --- src/uu/wc/src/count_fast.rs | 26 +++++++++++++++++++++----- tests/by-util/test_wc.rs | 8 ++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/uu/wc/src/count_fast.rs b/src/uu/wc/src/count_fast.rs index 863625921..3872e60bf 100644 --- a/src/uu/wc/src/count_fast.rs +++ b/src/uu/wc/src/count_fast.rs @@ -2,6 +2,8 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. + +// cSpell:ignore sysconf use crate::word_count::WordCount; use super::WordCountable; @@ -11,7 +13,7 @@ use std::fs::OpenOptions; use std::io::{self, ErrorKind, Read}; #[cfg(unix)] -use libc::S_IFREG; +use libc::{sysconf, S_IFREG, _SC_PAGESIZE}; #[cfg(unix)] use nix::sys::stat; #[cfg(any(target_os = "linux", target_os = "android"))] @@ -87,11 +89,25 @@ pub(crate) fn count_bytes_fast(handle: &mut T) -> (usize, Opti // If stat.st_size = 0 then // - either the size is 0 // - or the size is unknown. - // The second case happens for files in pseudo-filesystems. For - // example with /proc/version and /sys/kernel/profiling. So, - // if it is 0 we don't report that and instead do a full read. + // The second case happens for files in pseudo-filesystems. + // For example with /proc/version. + // So, if it is 0 we don't report that and instead do a full read. + // + // Another thing to consider for files in pseudo-filesystems like /proc, /sys + // and similar is that they could report `st_size` greater than actual content. + // For example /sys/kernel/profiling could report `st_size` equal to + // system page size (typically 4096 on 64bit system), while it's file content + // would count up only to a couple of bytes. + // This condition usually occurs for files in pseudo-filesystems like /proc, /sys + // that report `st_size` in the multiples of system page size. + // In such cases - fall back on full read if (stat.st_mode as libc::mode_t & S_IFREG) != 0 && stat.st_size > 0 { - return (stat.st_size as usize, None); + let sys_page_size = unsafe { sysconf(_SC_PAGESIZE) as usize }; + if stat.st_size as usize % sys_page_size > 0 { + // regular file or file from /proc, /sys and similar pseudo-filesystems + // with size that is NOT a multiple of system page size + return (stat.st_size as usize, None); + } } #[cfg(any(target_os = "linux", target_os = "android"))] { diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 8358a542a..c365b13b9 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -419,6 +419,14 @@ fn test_files_from_pseudo_filesystem() { use pretty_assertions::assert_ne; let result = new_ucmd!().arg("-c").arg("/proc/cpuinfo").succeeds(); assert_ne!(result.stdout_str(), "0 /proc/cpuinfo\n"); + + let (at, mut ucmd) = at_and_ucmd!(); + let result = ucmd.arg("-c").arg("/sys/kernel/profiling").succeeds(); + let actual = at.read("/sys/kernel/profiling").len(); + assert_eq!( + result.stdout_str(), + format!("{} /sys/kernel/profiling\n", actual) + ); } #[test]