mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #5612 from zhitkoff/wc-proc
wc: pass GNU test wc-proc and Windows optimization
This commit is contained in:
commit
f88209249e
3 changed files with 107 additions and 6 deletions
|
@ -2,6 +2,8 @@
|
|||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
// cSpell:ignore sysconf
|
||||
use crate::word_count::WordCount;
|
||||
|
||||
use super::WordCountable;
|
||||
|
@ -11,11 +13,19 @@ use std::fs::OpenOptions;
|
|||
use std::io::{self, ErrorKind, Read};
|
||||
|
||||
#[cfg(unix)]
|
||||
use libc::S_IFREG;
|
||||
use libc::{sysconf, S_IFREG, _SC_PAGESIZE};
|
||||
#[cfg(unix)]
|
||||
use nix::sys::stat;
|
||||
#[cfg(unix)]
|
||||
use std::io::{Seek, SeekFrom};
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
use std::os::unix::io::AsRawFd;
|
||||
#[cfg(windows)]
|
||||
use std::os::windows::fs::MetadataExt;
|
||||
#[cfg(windows)]
|
||||
const FILE_ATTRIBUTE_ARCHIVE: u32 = 32;
|
||||
#[cfg(windows)]
|
||||
const FILE_ATTRIBUTE_NORMAL: u32 = 128;
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
use libc::S_IFIFO;
|
||||
|
@ -72,6 +82,8 @@ fn count_bytes_using_splice(fd: &impl AsRawFd) -> Result<usize, usize> {
|
|||
/// 1. On Unix, we can simply `stat` the file if it is regular.
|
||||
/// 2. On Linux -- if the above did not work -- we can use splice to count
|
||||
/// the number of bytes if the file is a FIFO.
|
||||
/// 3. On Windows we can use `std::os::windows::fs::MetadataExt` to get file size
|
||||
/// for regular files
|
||||
/// 3. Otherwise, we just read normally, but without the overhead of counting
|
||||
/// other things such as lines and words.
|
||||
#[inline]
|
||||
|
@ -87,11 +99,60 @@ pub(crate) fn count_bytes_fast<T: WordCountable>(handle: &mut T) -> (usize, Opti
|
|||
// If stat.st_size = 0 then
|
||||
// - either the size is 0
|
||||
// - or the size is unknown.
|
||||
// The second case happens for files in pseudo-filesystems. For
|
||||
// example with /proc/version and /sys/kernel/profiling. So,
|
||||
// if it is 0 we don't report that and instead do a full read.
|
||||
if (stat.st_mode as libc::mode_t & S_IFREG) != 0 && stat.st_size > 0 {
|
||||
return (stat.st_size as usize, None);
|
||||
// The second case happens for files in pseudo-filesystems.
|
||||
// For example with /proc/version.
|
||||
// So, if it is 0 we don't report that and instead do a full read.
|
||||
//
|
||||
// Another thing to consider for files in pseudo-filesystems like /proc, /sys
|
||||
// and similar is that they could report `st_size` greater than actual content.
|
||||
// For example /sys/kernel/profiling could report `st_size` equal to
|
||||
// system page size (typically 4096 on 64bit system), while it's file content
|
||||
// would count up only to a couple of bytes.
|
||||
// This condition usually occurs for files in pseudo-filesystems like /proc, /sys
|
||||
// that report `st_size` in the multiples of system page size.
|
||||
// In such cases - attempt `seek()` almost to the end of the file
|
||||
// and then fall back on read to count the rest.
|
||||
//
|
||||
// And finally a special case of input redirection in *nix shell:
|
||||
// `( wc -c ; wc -c ) < file` should return
|
||||
// ```
|
||||
// size_of_file
|
||||
// 0
|
||||
// ```
|
||||
// Similarly
|
||||
// `( head -c1 ; wc -c ) < file` should return
|
||||
// ```
|
||||
// first_byte_of_file
|
||||
// size_of_file - 1
|
||||
// ```
|
||||
// Since the input stream from file is treated as continuous across both commands inside ().
|
||||
// In cases like this, due to `<` redirect, the `stat.st_mode` would report input as a regular file
|
||||
// and `stat.st_size` would report the size of file on disk
|
||||
// and NOT the remaining number of bytes in the input stream.
|
||||
// However, the raw file descriptor in this situation would be equal to `0`
|
||||
// for STDIN in both invocations.
|
||||
// Therefore we cannot rely of `st_size` here and should fall back on full read.
|
||||
if fd > 0 && (stat.st_mode as libc::mode_t & S_IFREG) != 0 && stat.st_size > 0 {
|
||||
let sys_page_size = unsafe { sysconf(_SC_PAGESIZE) as usize };
|
||||
if stat.st_size as usize % sys_page_size > 0 {
|
||||
// regular file or file from /proc, /sys and similar pseudo-filesystems
|
||||
// with size that is NOT a multiple of system page size
|
||||
return (stat.st_size as usize, None);
|
||||
} else if let Some(file) = handle.inner_file() {
|
||||
// On some platforms `stat.st_blksize` and `stat.st_size`
|
||||
// are of different types: i64 vs i32
|
||||
// i.e. MacOS on Apple Silicon (aarch64-apple-darwin),
|
||||
// Debian Linux on ARM (aarch64-unknown-linux-gnu),
|
||||
// 32bit i686 targets, etc.
|
||||
// While on the others they are of the same type.
|
||||
#[allow(clippy::unnecessary_cast)]
|
||||
let offset =
|
||||
stat.st_size as i64 - stat.st_size as i64 % (stat.st_blksize as i64 + 1);
|
||||
|
||||
if let Ok(n) = file.seek(SeekFrom::Start(offset as u64)) {
|
||||
byte_count = n as usize;
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
{
|
||||
|
@ -107,6 +168,21 @@ pub(crate) fn count_bytes_fast<T: WordCountable>(handle: &mut T) -> (usize, Opti
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
{
|
||||
if let Some(file) = handle.inner_file() {
|
||||
if let Ok(metadata) = file.metadata() {
|
||||
let attributes = metadata.file_attributes();
|
||||
|
||||
if (attributes & FILE_ATTRIBUTE_ARCHIVE) != 0
|
||||
|| (attributes & FILE_ATTRIBUTE_NORMAL) != 0
|
||||
{
|
||||
return (metadata.file_size() as usize, None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back on `read`, but without the overhead of counting words and lines.
|
||||
let mut buf = [0_u8; BUF_SIZE];
|
||||
loop {
|
||||
|
|
|
@ -17,12 +17,14 @@ use std::os::unix::io::AsRawFd;
|
|||
pub trait WordCountable: AsRawFd + Read {
|
||||
type Buffered: BufRead;
|
||||
fn buffered(self) -> Self::Buffered;
|
||||
fn inner_file(&mut self) -> Option<&mut File>;
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
pub trait WordCountable: Read {
|
||||
type Buffered: BufRead;
|
||||
fn buffered(self) -> Self::Buffered;
|
||||
fn inner_file(&mut self) -> Option<&mut File>;
|
||||
}
|
||||
|
||||
impl WordCountable for StdinLock<'_> {
|
||||
|
@ -31,6 +33,9 @@ impl WordCountable for StdinLock<'_> {
|
|||
fn buffered(self) -> Self::Buffered {
|
||||
self
|
||||
}
|
||||
fn inner_file(&mut self) -> Option<&mut File> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl WordCountable for File {
|
||||
|
@ -39,4 +44,8 @@ impl WordCountable for File {
|
|||
fn buffered(self) -> Self::Buffered {
|
||||
BufReader::new(self)
|
||||
}
|
||||
|
||||
fn inner_file(&mut self) -> Option<&mut File> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -243,6 +243,14 @@ fn test_single_only_lines() {
|
|||
.stdout_is("18 moby_dick.txt\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_only_bytes() {
|
||||
new_ucmd!()
|
||||
.args(&["-c", "lorem_ipsum.txt"])
|
||||
.run()
|
||||
.stdout_is("772 lorem_ipsum.txt\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_all_counts() {
|
||||
new_ucmd!()
|
||||
|
@ -419,6 +427,14 @@ fn test_files_from_pseudo_filesystem() {
|
|||
use pretty_assertions::assert_ne;
|
||||
let result = new_ucmd!().arg("-c").arg("/proc/cpuinfo").succeeds();
|
||||
assert_ne!(result.stdout_str(), "0 /proc/cpuinfo\n");
|
||||
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let result = ucmd.arg("-c").arg("/sys/kernel/profiling").succeeds();
|
||||
let actual = at.read("/sys/kernel/profiling").len();
|
||||
assert_eq!(
|
||||
result.stdout_str(),
|
||||
format!("{} /sys/kernel/profiling\n", actual)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue