1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Rewrote head (#1911)

See https://github.com/uutils/coreutils/pull/1911
for the details
This commit is contained in:
Mikadore 2021-03-29 13:08:48 +02:00 committed by GitHub
parent da5f2f3a6c
commit 8320b1ec5f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 1235 additions and 256 deletions

View file

@ -15,7 +15,7 @@ edition = "2018"
path = "src/head.rs" path = "src/head.rs"
[dependencies] [dependencies]
libc = "0.2.42" clap = "2.33"
uucore = { version=">=0.0.7", package="uucore", path="../../uucore" } uucore = { version=">=0.0.7", package="uucore", path="../../uucore" }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -1,240 +1,642 @@
// * This file is part of the uutils coreutils package. use clap::{App, Arg};
// * use std::convert::TryFrom;
// * (c) Alan Andrade <alan.andradec@gmail.com> use std::ffi::OsString;
// * use std::io::{ErrorKind, Read, Seek, SeekFrom, Write};
// * For the full copyright and license information, please view the LICENSE use uucore::{crash, executable, show_error};
// * file that was distributed with this source code.
// *
// * Synced with: https://raw.github.com/avsm/src/master/usr.bin/head/head.c
#[macro_use] const EXIT_FAILURE: i32 = 1;
extern crate uucore; const EXIT_SUCCESS: i32 = 0;
const BUF_SIZE: usize = 65536;
use std::collections::VecDeque; const VERSION: &str = env!("CARGO_PKG_VERSION");
use std::fs::File; const ABOUT: &str = "\
use std::io::{stdin, BufRead, BufReader, Read}; Print the first 10 lines of each FILE to standard output.\n\
use std::path::Path; With more than one FILE, precede each with a header giving the file name.\n\
use std::str::from_utf8; \n\
With no FILE, or when FILE is -, read standard input.\n\
\n\
Mandatory arguments to long flags are mandatory for short flags too.\
";
const USAGE: &str = "head [FLAG]... [FILE]...";
static SYNTAX: &str = ""; mod options {
static SUMMARY: &str = ""; pub const BYTES_NAME: &str = "BYTES";
static LONG_HELP: &str = ""; pub const LINES_NAME: &str = "LINES";
pub const QUIET_NAME: &str = "QUIET";
pub const VERBOSE_NAME: &str = "VERBOSE";
pub const ZERO_NAME: &str = "ZERO";
pub const FILES_NAME: &str = "FILE";
}
mod parse;
mod split;
enum FilterMode { fn app<'a>() -> App<'a, 'a> {
Bytes(usize), App::new(executable!())
.version(VERSION)
.about(ABOUT)
.usage(USAGE)
.arg(
Arg::with_name(options::BYTES_NAME)
.short("c")
.long("bytes")
.value_name("[-]NUM")
.takes_value(true)
.help(
"\
print the first NUM bytes of each file;\n\
with the leading '-', print all but the last\n\
NUM bytes of each file\
",
)
.overrides_with_all(&[options::BYTES_NAME, options::LINES_NAME])
.allow_hyphen_values(true),
)
.arg(
Arg::with_name(options::LINES_NAME)
.short("n")
.long("lines")
.value_name("[-]NUM")
.takes_value(true)
.help(
"\
print the first NUM lines instead of the first 10;\n\
with the leading '-', print all but the last\n\
NUM lines of each file\
",
)
.overrides_with_all(&[options::LINES_NAME, options::BYTES_NAME])
.allow_hyphen_values(true),
)
.arg(
Arg::with_name(options::QUIET_NAME)
.short("q")
.long("--quiet")
.visible_alias("silent")
.help("never print headers giving file names")
.overrides_with_all(&[options::VERBOSE_NAME, options::QUIET_NAME]),
)
.arg(
Arg::with_name(options::VERBOSE_NAME)
.short("v")
.long("verbose")
.help("always print headers giving file names")
.overrides_with_all(&[options::QUIET_NAME, options::VERBOSE_NAME]),
)
.arg(
Arg::with_name(options::ZERO_NAME)
.short("z")
.long("zero-terminated")
.help("line delimiter is NUL, not newline")
.overrides_with(options::ZERO_NAME),
)
.arg(Arg::with_name(options::FILES_NAME).multiple(true))
}
#[derive(PartialEq, Debug, Clone, Copy)]
enum Modes {
Lines(usize), Lines(usize),
NLines(usize), Bytes(usize),
} }
struct Settings { fn parse_mode<F>(src: &str, closure: F) -> Result<(Modes, bool), String>
mode: FilterMode, where
verbose: bool, F: FnOnce(usize) -> Modes,
zero_terminated: bool, {
match parse::parse_num(src) {
Ok((n, last)) => Ok((closure(n), last)),
Err(reason) => match reason {
parse::ParseError::Syntax => Err(format!("'{}'", src)),
parse::ParseError::Overflow => {
Err(format!("'{}': Value too large for defined datatype", src))
}
},
}
} }
impl Default for Settings { fn arg_iterate<'a>(
fn default() -> Settings { mut args: impl uucore::Args + 'a,
Settings { ) -> Result<Box<dyn Iterator<Item = OsString> + 'a>, String> {
mode: FilterMode::Lines(10), // argv[0] is always present
verbose: false, let first = args.next().unwrap();
zero_terminated: false, if let Some(second) = args.next() {
if let Some(s) = second.to_str() {
match parse::parse_obsolete(s) {
Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))),
Some(Err(e)) => match e {
parse::ParseError::Syntax => Err(format!("bad argument format: '{}'", s)),
parse::ParseError::Overflow => Err(format!(
"invalid argument: '{}' Value too large for defined datatype",
s
)),
},
None => Ok(Box::new(vec![first, second].into_iter().chain(args))),
}
} else {
Err("bad argument encoding".to_owned())
} }
} else {
Ok(Box::new(vec![first].into_iter()))
}
}
#[derive(Debug, PartialEq)]
struct HeadOptions {
pub quiet: bool,
pub verbose: bool,
pub zeroed: bool,
pub all_but_last: bool,
pub mode: Modes,
pub files: Vec<String>,
}
impl HeadOptions {
pub fn new() -> HeadOptions {
HeadOptions {
quiet: false,
verbose: false,
zeroed: false,
all_but_last: false,
mode: Modes::Lines(10),
files: Vec::new(),
}
}
///Construct options from matches
pub fn get_from(args: impl uucore::Args) -> Result<Self, String> {
let matches = app().get_matches_from(arg_iterate(args)?);
let mut options = HeadOptions::new();
options.quiet = matches.is_present(options::QUIET_NAME);
options.verbose = matches.is_present(options::VERBOSE_NAME);
options.zeroed = matches.is_present(options::ZERO_NAME);
let mode_and_from_end = if let Some(v) = matches.value_of(options::BYTES_NAME) {
match parse_mode(v, Modes::Bytes) {
Ok(v) => v,
Err(err) => {
return Err(format!("invalid number of bytes: {}", err));
}
}
} else if let Some(v) = matches.value_of(options::LINES_NAME) {
match parse_mode(v, Modes::Lines) {
Ok(v) => v,
Err(err) => {
return Err(format!("invalid number of lines: {}", err));
}
}
} else {
(Modes::Lines(10), false)
};
options.mode = mode_and_from_end.0;
options.all_but_last = mode_and_from_end.1;
options.files = match matches.values_of(options::FILES_NAME) {
Some(v) => v.map(|s| s.to_owned()).collect(),
None => vec!["-".to_owned()],
};
//println!("{:#?}", options);
Ok(options)
}
}
// to make clippy shut up
impl Default for HeadOptions {
fn default() -> Self {
Self::new()
}
}
fn rbuf_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io::Result<()> {
if n == 0 {
return Ok(());
}
let mut readbuf = [0u8; BUF_SIZE];
let mut i = 0usize;
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
loop {
let read = loop {
match input.read(&mut readbuf) {
Ok(n) => break n,
Err(e) => match e.kind() {
ErrorKind::Interrupted => {}
_ => return Err(e),
},
}
};
if read == 0 {
// might be unexpected if
// we haven't read `n` bytes
// but this mirrors GNU's behavior
return Ok(());
}
stdout.write_all(&readbuf[..read.min(n - i)])?;
i += read.min(n - i);
if i == n {
return Ok(());
}
}
}
fn rbuf_n_lines(input: &mut impl std::io::BufRead, n: usize, zero: bool) -> std::io::Result<()> {
if n == 0 {
return Ok(());
}
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
let mut lines = 0usize;
split::walk_lines(input, zero, |e| match e {
split::Event::Data(dat) => {
stdout.write_all(dat)?;
Ok(true)
}
split::Event::Line => {
lines += 1;
if lines == n {
Ok(false)
} else {
Ok(true)
}
}
})
}
fn rbuf_but_last_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io::Result<()> {
if n == 0 {
//prints everything
return rbuf_n_bytes(input, std::usize::MAX);
}
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
let mut ringbuf = vec![0u8; n];
// first we fill the ring buffer
if let Err(e) = input.read_exact(&mut ringbuf) {
if e.kind() == ErrorKind::UnexpectedEof {
return Ok(());
} else {
return Err(e);
}
}
let mut buffer = [0u8; BUF_SIZE];
loop {
let read = loop {
match input.read(&mut buffer) {
Ok(n) => break n,
Err(e) => match e.kind() {
ErrorKind::Interrupted => {}
_ => return Err(e),
},
}
};
if read == 0 {
return Ok(());
} else if read >= n {
stdout.write_all(&ringbuf)?;
stdout.write_all(&buffer[..read - n])?;
for i in 0..n {
ringbuf[i] = buffer[read - n + i];
}
} else {
stdout.write_all(&ringbuf[..read])?;
for i in 0..n - read {
ringbuf[i] = ringbuf[read + i];
}
ringbuf[n - read..].copy_from_slice(&buffer[..read]);
}
}
}
fn rbuf_but_last_n_lines(
input: &mut impl std::io::BufRead,
n: usize,
zero: bool,
) -> std::io::Result<()> {
if n == 0 {
//prints everything
return rbuf_n_bytes(input, std::usize::MAX);
}
let mut ringbuf = vec![Vec::new(); n];
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
let mut line = Vec::new();
let mut lines = 0usize;
split::walk_lines(input, zero, |e| match e {
split::Event::Data(dat) => {
line.extend_from_slice(dat);
Ok(true)
}
split::Event::Line => {
if lines < n {
ringbuf[lines] = std::mem::replace(&mut line, Vec::new());
lines += 1;
} else {
stdout.write_all(&ringbuf[0])?;
ringbuf.rotate_left(1);
ringbuf[n - 1] = std::mem::replace(&mut line, Vec::new());
}
Ok(true)
}
})
}
fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
assert!(options.all_but_last);
let size = input.seek(SeekFrom::End(0))?;
let size = usize::try_from(size).unwrap();
match options.mode {
Modes::Bytes(n) => {
if n >= size {
return Ok(());
} else {
input.seek(SeekFrom::Start(0))?;
rbuf_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
size - n,
)?;
}
}
Modes::Lines(n) => {
let mut buffer = [0u8; BUF_SIZE];
let buffer = &mut buffer[..BUF_SIZE.min(size)];
let mut i = 0usize;
let mut lines = 0usize;
let found = 'o: loop {
// the casts here are ok, `buffer.len()` should never be above a few k
input.seek(SeekFrom::Current(
-((buffer.len() as i64).min((size - i) as i64)),
))?;
input.read_exact(buffer)?;
for byte in buffer.iter().rev() {
match byte {
b'\n' if !options.zeroed => {
lines += 1;
}
0u8 if options.zeroed => {
lines += 1;
}
_ => {}
}
// if it were just `n`,
if lines == n + 1 {
break 'o i;
}
i += 1;
}
if size - i == 0 {
return Ok(());
}
};
input.seek(SeekFrom::Start(0))?;
rbuf_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
size - found,
)?;
}
}
Ok(())
}
fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
if options.all_but_last {
head_backwards_file(input, options)
} else {
match options.mode {
Modes::Bytes(n) => {
rbuf_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n)
}
Modes::Lines(n) => rbuf_n_lines(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
n,
options.zeroed,
),
}
}
}
fn uu_head(options: &HeadOptions) {
let mut first = true;
for fname in &options.files {
let res = match fname.as_str() {
"-" => {
if options.verbose {
if !first {
println!();
}
println!("==> standard input <==")
}
let stdin = std::io::stdin();
let mut stdin = stdin.lock();
match options.mode {
Modes::Bytes(n) => {
if options.all_but_last {
rbuf_but_last_n_bytes(&mut stdin, n)
} else {
rbuf_n_bytes(&mut stdin, n)
}
}
Modes::Lines(n) => {
if options.all_but_last {
rbuf_but_last_n_lines(&mut stdin, n, options.zeroed)
} else {
rbuf_n_lines(&mut stdin, n, options.zeroed)
}
}
}
}
name => {
let mut file = match std::fs::File::open(name) {
Ok(f) => f,
Err(err) => match err.kind() {
ErrorKind::NotFound => {
crash!(
EXIT_FAILURE,
"head: cannot open '{}' for reading: No such file or directory",
name
);
}
ErrorKind::PermissionDenied => {
crash!(
EXIT_FAILURE,
"head: cannot open '{}' for reading: Permission denied",
name
);
}
_ => {
crash!(
EXIT_FAILURE,
"head: cannot open '{}' for reading: {}",
name,
err
);
}
},
};
if (options.files.len() > 1 && !options.quiet) || options.verbose {
println!("==> {} <==", name)
}
head_file(&mut file, options)
}
};
if res.is_err() {
if fname.as_str() == "-" {
crash!(
EXIT_FAILURE,
"head: error reading standard input: Input/output error"
);
} else {
crash!(
EXIT_FAILURE,
"head: error reading {}: Input/output error",
fname
);
}
}
first = false;
} }
} }
pub fn uumain(args: impl uucore::Args) -> i32 { pub fn uumain(args: impl uucore::Args) -> i32 {
let args = args.collect_str(); let args = match HeadOptions::get_from(args) {
Ok(o) => o,
let mut settings: Settings = Default::default(); Err(s) => {
crash!(EXIT_FAILURE, "head: {}", s);
// handle obsolete -number syntax
let new_args = match obsolete(&args[0..]) {
(args, Some(n)) => {
settings.mode = FilterMode::Lines(n);
args
}
(args, None) => args,
};
let matches = app!(SYNTAX, SUMMARY, LONG_HELP)
.optopt(
"c",
"bytes",
"Print the first K bytes. With the leading '-', print all but the last K bytes",
"[-]K",
)
.optopt(
"n",
"lines",
"Print the first K lines. With the leading '-', print all but the last K lines",
"[-]K",
)
.optflag("q", "quiet", "never print headers giving file names")
.optflag("v", "verbose", "always print headers giving file names")
.optflag("z", "zero-terminated", "line delimiter is NUL, not newline")
.optflag("h", "help", "display this help and exit")
.optflag("V", "version", "output version information and exit")
.parse(new_args);
let use_bytes = matches.opt_present("c");
// TODO: suffixes (e.g. b, kB, etc.)
match matches.opt_str("n") {
Some(n) => {
if use_bytes {
show_error!("cannot specify both --bytes and --lines.");
return 1;
}
match n.parse::<isize>() {
Ok(m) => {
settings.mode = if m < 0 {
let m: usize = m.abs() as usize;
FilterMode::NLines(m)
} else {
let m: usize = m.abs() as usize;
FilterMode::Lines(m)
}
}
Err(e) => {
show_error!("invalid line count '{}': {}", n, e);
return 1;
}
}
}
None => {
if let Some(count) = matches.opt_str("c") {
match count.parse::<usize>() {
Ok(m) => settings.mode = FilterMode::Bytes(m),
Err(e) => {
show_error!("invalid byte count '{}': {}", count, e);
return 1;
}
}
}
} }
}; };
uu_head(&args);
let quiet = matches.opt_present("q"); EXIT_SUCCESS
let verbose = matches.opt_present("v");
settings.zero_terminated = matches.opt_present("z");
let files = matches.free;
// GNU implementation allows multiple declarations of "-q" and "-v" with the
// last flag winning. This can't be simulated with the getopts cargo unless
// we manually parse the arguments. Given the declaration of both flags,
// verbose mode always wins. This is a potential future improvement.
if files.len() > 1 && !quiet && !verbose {
settings.verbose = true;
}
if quiet {
settings.verbose = false;
}
if verbose {
settings.verbose = true;
}
if files.is_empty() {
let mut buffer = BufReader::new(stdin());
head(&mut buffer, &settings);
} else {
let mut first_time = true;
for file in &files {
if settings.verbose {
if !first_time {
println!();
}
println!("==> {} <==", file);
}
first_time = false;
let path = Path::new(file);
if path.is_dir() || !path.metadata().is_ok() {
eprintln!(
"cannot open '{}' for reading: No such file or directory",
&path.to_str().unwrap()
);
continue;
}
let reader = File::open(&path).unwrap();
let mut buffer = BufReader::new(reader);
if !head(&mut buffer, &settings) {
break;
}
}
}
0
} }
// It searches for an option in the form of -123123 #[cfg(test)]
// mod tests {
// In case is found, the options vector will get rid of that object so that use std::ffi::OsString;
// getopts works correctly.
fn obsolete(options: &[String]) -> (Vec<String>, Option<usize>) {
let mut options: Vec<String> = options.to_vec();
let mut a = 1;
let b = options.len();
while a < b { use super::*;
let previous = options[a - 1].clone(); fn options(args: &str) -> Result<HeadOptions, String> {
let current = options[a].clone(); let combined = "head ".to_owned() + args;
let current = current.as_bytes(); let args = combined.split_whitespace();
HeadOptions::get_from(args.map(|s| OsString::from(s)))
if previous != "-n" && current.len() > 1 && current[0] == b'-' {
let len = current.len();
for pos in 1..len {
// Ensure that the argument is only made out of digits
if !(current[pos] as char).is_numeric() {
break;
}
// If this is the last number
if pos == len - 1 {
options.remove(a);
let number: Option<usize> =
from_utf8(&current[1..len]).unwrap().parse::<usize>().ok();
return (options, Some(number.unwrap()));
}
}
}
a += 1;
} }
#[test]
fn test_args_modes() {
let args = options("-n -10M -vz").unwrap();
assert!(args.zeroed);
assert!(args.verbose);
assert!(args.all_but_last);
assert_eq!(args.mode, Modes::Lines(10 * 1024 * 1024));
}
#[test]
fn test_gnu_compatibility() {
let args = options("-n 1 -c 1 -n 5 -c kiB -vqvqv").unwrap();
assert!(args.mode == Modes::Bytes(1024));
assert!(args.verbose);
assert_eq!(options("-5").unwrap().mode, Modes::Lines(5));
assert_eq!(options("-2b").unwrap().mode, Modes::Bytes(1024));
assert_eq!(options("-5 -c 1").unwrap().mode, Modes::Bytes(1));
}
#[test]
fn all_args_test() {
assert!(options("--silent").unwrap().quiet);
assert!(options("--quiet").unwrap().quiet);
assert!(options("-q").unwrap().quiet);
assert!(options("--verbose").unwrap().verbose);
assert!(options("-v").unwrap().verbose);
assert!(options("--zero-terminated").unwrap().zeroed);
assert!(options("-z").unwrap().zeroed);
assert_eq!(options("--lines 15").unwrap().mode, Modes::Lines(15));
assert_eq!(options("-n 15").unwrap().mode, Modes::Lines(15));
assert_eq!(options("--bytes 15").unwrap().mode, Modes::Bytes(15));
assert_eq!(options("-c 15").unwrap().mode, Modes::Bytes(15));
}
#[test]
fn test_options_errors() {
assert!(options("-n IsThisTheRealLife?").is_err());
assert!(options("-c IsThisJustFantasy").is_err());
}
#[test]
fn test_options_correct_defaults() {
let opts = HeadOptions::new();
let opts2: HeadOptions = Default::default();
(options, None) assert_eq!(opts, opts2);
}
// TODO: handle errors on read assert!(opts.verbose == false);
fn head<T: Read>(reader: &mut BufReader<T>, settings: &Settings) -> bool { assert!(opts.quiet == false);
match settings.mode { assert!(opts.zeroed == false);
FilterMode::Bytes(count) => { assert!(opts.all_but_last == false);
for byte in reader.bytes().take(count) { assert_eq!(opts.mode, Modes::Lines(10));
print!("{}", byte.unwrap() as char); assert!(opts.files.is_empty());
} }
} #[test]
FilterMode::Lines(count) => { fn test_parse_mode() {
if settings.zero_terminated { assert_eq!(
for line in reader.split(0).take(count) { parse_mode("123", Modes::Lines),
print!("{}\0", String::from_utf8(line.unwrap()).unwrap()) Ok((Modes::Lines(123), false))
} );
} else { assert_eq!(
for line in reader.lines().take(count) { parse_mode("-456", Modes::Bytes),
println!("{}", line.unwrap()); Ok((Modes::Bytes(456), true))
} );
} assert!(parse_mode("Nonsensical Nonsense", Modes::Bytes).is_err());
} #[cfg(target_pointer_width = "64")]
FilterMode::NLines(count) => { assert!(parse_mode("1Y", Modes::Lines).is_err());
let mut vector: VecDeque<String> = VecDeque::new(); #[cfg(target_pointer_width = "32")]
assert!(parse_mode("1T", Modes::Bytes).is_err());
for line in reader.lines() { }
vector.push_back(line.unwrap()); fn arg_outputs(src: &str) -> Result<String, String> {
if vector.len() <= count { let split = src.split_whitespace().map(|x| OsString::from(x));
continue; match arg_iterate(split) {
} Ok(args) => {
println!("{}", vector.pop_front().unwrap()); let vec = args
.map(|s| s.to_str().unwrap().to_owned())
.collect::<Vec<_>>();
Ok(vec.join(" "))
} }
Err(e) => Err(e),
} }
} }
true #[test]
fn test_arg_iterate() {
// test that normal args remain unchanged
assert_eq!(
arg_outputs("head -n -5 -zv"),
Ok("head -n -5 -zv".to_owned())
);
// tests that nonsensical args are unchanged
assert_eq!(
arg_outputs("head -to_be_or_not_to_be,..."),
Ok("head -to_be_or_not_to_be,...".to_owned())
);
//test that the obsolete syntax is unrolled
assert_eq!(
arg_outputs("head -123qvqvqzc"),
Ok("head -q -z -c 123".to_owned())
);
//test that bad obsoletes are an error
assert!(arg_outputs("head -123FooBar").is_err());
//test overflow
assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_err());
//test that empty args remain unchanged
assert_eq!(arg_outputs("head"), Ok("head".to_owned()));
}
#[test]
#[cfg(linux)]
fn test_arg_iterate_bad_encoding() {
let invalid = unsafe { std::str::from_utf8_unchecked(b"\x80\x81") };
// this arises from a conversion from OsString to &str
assert!(
arg_iterate(vec![OsString::from("head"), OsString::from(invalid)].into_iter()).is_err()
);
}
#[test]
fn rbuf_early_exit() {
let mut empty = std::io::BufReader::new(std::io::Cursor::new(Vec::new()));
assert!(rbuf_n_bytes(&mut empty, 0).is_ok());
assert!(rbuf_n_lines(&mut empty, 0, false).is_ok());
}
} }

282
src/uu/head/src/parse.rs Normal file
View file

@ -0,0 +1,282 @@
use std::convert::TryFrom;
use std::ffi::OsString;
#[derive(PartialEq, Debug)]
pub enum ParseError {
Syntax,
Overflow,
}
/// Parses obsolete syntax
/// head -NUM[kmzv]
pub fn parse_obsolete(src: &str) -> Option<Result<impl Iterator<Item = OsString>, ParseError>> {
let mut chars = src.char_indices();
if let Some((_, '-')) = chars.next() {
let mut num_end = 0usize;
let mut has_num = false;
let mut last_char = 0 as char;
while let Some((n, c)) = chars.next() {
if c.is_numeric() {
has_num = true;
num_end = n;
} else {
last_char = c;
break;
}
}
if has_num {
match src[1..=num_end].parse::<usize>() {
Ok(num) => {
let mut quiet = false;
let mut verbose = false;
let mut zero_terminated = false;
let mut multiplier = None;
let mut c = last_char;
loop {
// not that here, we only match lower case 'k', 'c', and 'm'
match c {
// we want to preserve order
// this also saves us 1 heap allocation
'q' => {
quiet = true;
verbose = false
}
'v' => {
verbose = true;
quiet = false
}
'z' => zero_terminated = true,
'c' => multiplier = Some(1),
'b' => multiplier = Some(512),
'k' => multiplier = Some(1024),
'm' => multiplier = Some(1024 * 1024),
'\0' => {}
_ => return Some(Err(ParseError::Syntax)),
}
if let Some((_, next)) = chars.next() {
c = next
} else {
break;
}
}
let mut options = Vec::new();
if quiet {
options.push(OsString::from("-q"))
}
if verbose {
options.push(OsString::from("-v"))
}
if zero_terminated {
options.push(OsString::from("-z"))
}
if let Some(n) = multiplier {
options.push(OsString::from("-c"));
let num = match num.checked_mul(n) {
Some(n) => n,
None => return Some(Err(ParseError::Overflow)),
};
options.push(OsString::from(format!("{}", num)));
} else {
options.push(OsString::from("-n"));
options.push(OsString::from(format!("{}", num)));
}
Some(Ok(options.into_iter()))
}
Err(_) => Some(Err(ParseError::Overflow)),
}
} else {
None
}
} else {
None
}
}
/// Parses an -c or -n argument,
/// the bool specifies whether to read from the end
pub fn parse_num(src: &str) -> Result<(usize, bool), ParseError> {
let mut num_start = 0;
let mut chars = src.char_indices();
let (mut chars, all_but_last) = match chars.next() {
Some((_, c)) => {
if c == '-' {
num_start += 1;
(chars, true)
} else {
(src.char_indices(), false)
}
}
None => return Err(ParseError::Syntax),
};
let mut num_end = 0usize;
let mut last_char = 0 as char;
let mut num_count = 0usize;
while let Some((n, c)) = chars.next() {
if c.is_numeric() {
num_end = n;
num_count += 1;
} else {
last_char = c;
break;
}
}
let num = if num_count > 0 {
match src[num_start..=num_end].parse::<usize>() {
Ok(n) => Some(n),
Err(_) => return Err(ParseError::Overflow),
}
} else {
None
};
if last_char == 0 as char {
if let Some(n) = num {
Ok((n, all_but_last))
} else {
Err(ParseError::Syntax)
}
} else {
let base: u128 = match chars.next() {
Some((_, c)) => {
let b = match c {
'B' if last_char != 'b' => 1000,
'i' if last_char != 'b' => {
if let Some((_, 'B')) = chars.next() {
1024
} else {
return Err(ParseError::Syntax);
}
}
_ => return Err(ParseError::Syntax),
};
if chars.next().is_some() {
return Err(ParseError::Syntax);
} else {
b
}
}
None => 1024,
};
let mul = match last_char.to_lowercase().next().unwrap() {
'b' => 512,
'k' => base.pow(1),
'm' => base.pow(2),
'g' => base.pow(3),
't' => base.pow(4),
'p' => base.pow(5),
'e' => base.pow(6),
'z' => base.pow(7),
'y' => base.pow(8),
_ => return Err(ParseError::Syntax),
};
let mul = match usize::try_from(mul) {
Ok(n) => n,
Err(_) => return Err(ParseError::Overflow),
};
match num.unwrap_or(1).checked_mul(mul) {
Some(n) => Ok((n, all_but_last)),
None => Err(ParseError::Overflow),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn obsolete(src: &str) -> Option<Result<Vec<String>, ParseError>> {
let r = parse_obsolete(src);
match r {
Some(s) => match s {
Ok(v) => Some(Ok(v.map(|s| s.to_str().unwrap().to_owned()).collect())),
Err(e) => Some(Err(e)),
},
None => None,
}
}
fn obsolete_result(src: &[&str]) -> Option<Result<Vec<String>, ParseError>> {
Some(Ok(src.iter().map(|s| s.to_string()).collect()))
}
#[test]
#[cfg(not(target_pointer_width = "128"))]
fn test_parse_overflow_x64() {
assert_eq!(parse_num("1Y"), Err(ParseError::Overflow));
assert_eq!(parse_num("1Z"), Err(ParseError::Overflow));
assert_eq!(parse_num("100E"), Err(ParseError::Overflow));
assert_eq!(parse_num("100000P"), Err(ParseError::Overflow));
assert_eq!(parse_num("1000000000T"), Err(ParseError::Overflow));
assert_eq!(
parse_num("10000000000000000000000"),
Err(ParseError::Overflow)
);
}
#[test]
#[cfg(target_pointer_width = "32")]
fn test_parse_overflow_x32() {
assert_eq!(parse_num("1T"), Err(ParseError::Overflow));
assert_eq!(parse_num("1000G"), Err(ParseError::Overflow));
}
#[test]
fn test_parse_bad_syntax() {
assert_eq!(parse_num("5MiB nonsense"), Err(ParseError::Syntax));
assert_eq!(parse_num("Nonsense string"), Err(ParseError::Syntax));
assert_eq!(parse_num("5mib"), Err(ParseError::Syntax));
assert_eq!(parse_num("biB"), Err(ParseError::Syntax));
assert_eq!(parse_num("-"), Err(ParseError::Syntax));
assert_eq!(parse_num(""), Err(ParseError::Syntax));
}
#[test]
fn test_parse_numbers() {
assert_eq!(parse_num("k"), Ok((1024, false)));
assert_eq!(parse_num("MiB"), Ok((1024 * 1024, false)));
assert_eq!(parse_num("-5"), Ok((5, true)));
assert_eq!(parse_num("b"), Ok((512, false)));
assert_eq!(parse_num("-2GiB"), Ok((2 * 1024 * 1024 * 1024, true)));
assert_eq!(parse_num("5M"), Ok((5 * 1024 * 1024, false)));
assert_eq!(parse_num("5MB"), Ok((5 * 1000 * 1000, false)));
}
#[test]
fn test_parse_numbers_obsolete() {
assert_eq!(obsolete("-5"), obsolete_result(&["-n", "5"]));
assert_eq!(obsolete("-100"), obsolete_result(&["-n", "100"]));
assert_eq!(obsolete("-5m"), obsolete_result(&["-c", "5242880"]));
assert_eq!(obsolete("-1k"), obsolete_result(&["-c", "1024"]));
assert_eq!(obsolete("-2b"), obsolete_result(&["-c", "1024"]));
assert_eq!(obsolete("-1mmk"), obsolete_result(&["-c", "1024"]));
assert_eq!(obsolete("-1vz"), obsolete_result(&["-v", "-z", "-n", "1"]));
assert_eq!(
obsolete("-1vzqvq"),
obsolete_result(&["-q", "-z", "-n", "1"])
);
assert_eq!(obsolete("-1vzc"), obsolete_result(&["-v", "-z", "-c", "1"]));
assert_eq!(
obsolete("-105kzm"),
obsolete_result(&["-z", "-c", "110100480"])
);
}
#[test]
fn test_parse_errors_obsolete() {
assert_eq!(obsolete("-5n"), Some(Err(ParseError::Syntax)));
assert_eq!(obsolete("-5c5"), Some(Err(ParseError::Syntax)));
}
#[test]
fn test_parse_obsolete_nomatch() {
assert_eq!(obsolete("-k"), None);
assert_eq!(obsolete("asd"), None);
}
#[test]
#[cfg(target_pointer_width = "64")]
fn test_parse_obsolete_overflow_x64() {
assert_eq!(
obsolete("-1000000000000000m"),
Some(Err(ParseError::Overflow))
);
assert_eq!(
obsolete("-10000000000000000000000"),
Some(Err(ParseError::Overflow))
);
}
#[test]
#[cfg(target_pointer_width = "32")]
fn test_parse_obsolete_overflow_x32() {
assert_eq!(obsolete("-42949672960"), Some(Err(ParseError::Overflow)));
assert_eq!(obsolete("-42949672k"), Some(Err(ParseError::Overflow)));
}
}

60
src/uu/head/src/split.rs Normal file
View file

@ -0,0 +1,60 @@
#[derive(Debug)]
pub enum Event<'a> {
Data(&'a [u8]),
Line,
}
/// Loops over the lines read from a BufRead.
/// # Arguments
/// * `input` the ReadBuf to read from
/// * `zero` whether to use 0u8 as a line delimiter
/// * `on_event` a closure receiving some bytes read in a slice, or
/// event signalling a line was just read.
/// this is guaranteed to be signalled *directly* after the
/// slice containing the (CR on win)LF / 0 is passed
///
/// Return whether to continue
pub fn walk_lines<F>(
input: &mut impl std::io::BufRead,
zero: bool,
mut on_event: F,
) -> std::io::Result<()>
where
F: FnMut(Event) -> std::io::Result<bool>,
{
let mut buffer = [0u8; super::BUF_SIZE];
loop {
let read = loop {
match input.read(&mut buffer) {
Ok(n) => break n,
Err(e) => match e.kind() {
std::io::ErrorKind::Interrupted => {}
_ => return Err(e),
},
}
};
if read == 0 {
return Ok(());
}
let mut base = 0usize;
for (i, byte) in buffer[..read].iter().enumerate() {
match byte {
b'\n' if !zero => {
on_event(Event::Data(&buffer[base..=i]))?;
base = i + 1;
if !on_event(Event::Line)? {
return Ok(());
}
}
0u8 if zero => {
on_event(Event::Data(&buffer[base..=i]))?;
base = i + 1;
if !on_event(Event::Line)? {
return Ok(());
}
}
_ => {}
}
}
on_event(Event::Data(&buffer[base..read]))?;
}
}

105
tests/by-util/test_head.rs Normal file → Executable file
View file

@ -86,88 +86,74 @@ fn test_verbose() {
.stdout_is_fixture("lorem_ipsum_verbose.expected"); .stdout_is_fixture("lorem_ipsum_verbose.expected");
} }
#[test]
fn test_zero_terminated() {
new_ucmd!()
.args(&["-z", "zero_terminated.txt"])
.run()
.stdout_is_fixture("zero_terminated.expected");
}
#[test] #[test]
#[ignore] #[ignore]
fn test_spams_newline() { fn test_spams_newline() {
//this test is does not mirror what GNU does
new_ucmd!().pipe_in("a").succeeds().stdout_is("a\n"); new_ucmd!().pipe_in("a").succeeds().stdout_is("a\n");
} }
#[test] #[test]
#[ignore] fn test_byte_syntax() {
fn test_unsupported_byte_syntax() {
new_ucmd!() new_ucmd!()
.args(&["-1c"]) .args(&["-1c"])
.pipe_in("abc") .pipe_in("abc")
.fails() .run()
//GNU head returns "a" .stdout_is("a");
.stdout_is("")
.stderr_is("head: error: Unrecognized option: \'1\'");
} }
#[test] #[test]
#[ignore] fn test_line_syntax() {
fn test_unsupported_line_syntax() {
new_ucmd!() new_ucmd!()
.args(&["-n", "2048m"]) .args(&["-n", "2048m"])
.pipe_in("a\n") .pipe_in("a\n")
.fails() .run()
//.stdout_is("a\n"); What GNU head returns. .stdout_is("a\n");
.stdout_is("")
.stderr_is("head: error: invalid line count \'2048m\': invalid digit found in string");
} }
#[test] #[test]
#[ignore] fn test_zero_terminated_syntax() {
fn test_unsupported_zero_terminated_syntax() {
new_ucmd!() new_ucmd!()
.args(&["-z -n 1"]) .args(&["-z", "-n", "1"])
.pipe_in("x\0y") .pipe_in("x\0y")
.fails() .run()
//GNU Head returns "x\0" .stdout_is("x\0");
.stderr_is("head: error: Unrecognized option: \'z\'");
} }
#[test] #[test]
#[ignore] fn test_zero_terminated_syntax_2() {
fn test_unsupported_zero_terminated_syntax_2() {
new_ucmd!() new_ucmd!()
.args(&["-z -n 2"]) .args(&["-z", "-n", "2"])
.pipe_in("x\0y") .pipe_in("x\0y")
.fails() .run()
//GNU Head returns "x\0y" .stdout_is("x\0y");
.stderr_is("head: error: Unrecognized option: \'z\'");
} }
#[test] #[test]
#[ignore] fn test_negative_byte_syntax() {
fn test_unsupported_negative_byte_syntax() {
new_ucmd!() new_ucmd!()
.args(&["--bytes=-2"]) .args(&["--bytes=-2"])
.pipe_in("a\n") .pipe_in("a\n")
.fails() .run()
//GNU Head returns "" .stdout_is("");
.stderr_is("head: error: invalid byte count \'-2\': invalid digit found in string");
} }
#[test] #[test]
#[ignore] fn test_negative_zero_lines() {
fn test_bug_in_negative_zero_lines() {
new_ucmd!() new_ucmd!()
.args(&["--lines=-0"]) .args(&["--lines=-0"])
.pipe_in("a\nb\n") .pipe_in("a\nb\n")
.succeeds() .succeeds()
//GNU Head returns "a\nb\n" .stdout_is("a\nb\n");
.stdout_is(""); }
#[test]
fn test_negative_zero_bytes() {
new_ucmd!()
.args(&["--bytes=-0"])
.pipe_in("qwerty")
.succeeds()
.stdout_is("qwerty");
} }
#[test] #[test]
fn test_no_such_file_or_directory() { fn test_no_such_file_or_directory() {
let result = new_ucmd!().arg("no_such_file.toml").run(); let result = new_ucmd!().arg("no_such_file.toml").run();
@ -179,3 +165,38 @@ fn test_no_such_file_or_directory() {
.contains("cannot open 'no_such_file.toml' for reading: No such file or directory") .contains("cannot open 'no_such_file.toml' for reading: No such file or directory")
) )
} }
// there was a bug not caught by previous tests
// where for negative n > 3, the total amount of lines
// was correct, but it would eat from the second line
#[test]
fn test_sequence_fixture() {
new_ucmd!()
.args(&["-n", "-10", "sequence"])
.run()
.stdout_is_fixture("sequence.expected");
}
#[test]
fn test_file_backwards() {
new_ucmd!()
.args(&["-c", "-10", "lorem_ipsum.txt"])
.run()
.stdout_is_fixture("lorem_ipsum_backwards_file.expected");
}
#[test]
fn test_zero_terminated() {
new_ucmd!()
.args(&["-z", "zero_terminated.txt"])
.run()
.stdout_is_fixture("zero_terminated.expected");
}
#[test]
fn test_obsolete_extras() {
new_ucmd!()
.args(&["-5zv"])
.pipe_in("1\02\03\04\05\06")
.succeeds()
.stdout_is("==> standard input <==\n1\02\03\04\05\0");
}

View file

@ -0,0 +1,24 @@
Lorem ipsum dolor sit amet,
consectetur adipiscing elit.
Nunc interdum suscipit sem vel ornare.
Proin euismod,
justo sed mollis dictum,
eros urna ultricies augue,
eu pharetra mi ex id ante.
Duis convallis porttitor aliquam.
Nunc vitae tincidunt ex.
Suspendisse iaculis ligula ac diam consectetur lacinia.
Donec vel velit dui.
Etiam fringilla,
dolor quis tempor vehicula,
lacus turpis bibendum velit,
et pellentesque elit odio a magna.
Cras vulputate tortor non libero vehicula euismod.
Aliquam tincidunt nisl eget enim cursus,
viverra sagittis magna commodo.
Cras rhoncus egestas leo nec blandit.
Suspendisse potenti.
Etiam ullamcorper leo vel lacus vestibulum,
cursus semper eros efficitur.
In hac habitasse platea dictumst.
Phasellus scelerisque vehicula f

100
tests/fixtures/head/sequence vendored Normal file
View file

@ -0,0 +1,100 @@
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

90
tests/fixtures/head/sequence.expected vendored Normal file
View file

@ -0,0 +1,90 @@
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90