From 8320b1ec5f58a94372edce2477480fb0116df262 Mon Sep 17 00:00:00 2001 From: Mikadore Date: Mon, 29 Mar 2021 13:08:48 +0200 Subject: [PATCH] Rewrote head (#1911) See https://github.com/uutils/coreutils/pull/1911 for the details --- src/uu/head/Cargo.toml | 2 +- src/uu/head/src/head.rs | 828 +++++++++++++----- src/uu/head/src/parse.rs | 282 ++++++ src/uu/head/src/split.rs | 60 ++ tests/by-util/test_head.rs | 105 ++- .../head/lorem_ipsum_backwards_file.expected | 24 + tests/fixtures/head/sequence | 100 +++ tests/fixtures/head/sequence.expected | 90 ++ 8 files changed, 1235 insertions(+), 256 deletions(-) create mode 100644 src/uu/head/src/parse.rs create mode 100644 src/uu/head/src/split.rs mode change 100644 => 100755 tests/by-util/test_head.rs create mode 100644 tests/fixtures/head/lorem_ipsum_backwards_file.expected create mode 100644 tests/fixtures/head/sequence create mode 100644 tests/fixtures/head/sequence.expected diff --git a/src/uu/head/Cargo.toml b/src/uu/head/Cargo.toml index adcce2726..1cd075113 100644 --- a/src/uu/head/Cargo.toml +++ b/src/uu/head/Cargo.toml @@ -15,7 +15,7 @@ edition = "2018" path = "src/head.rs" [dependencies] -libc = "0.2.42" +clap = "2.33" uucore = { version=">=0.0.7", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 0036dbba9..a8f519f6b 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -1,240 +1,642 @@ -// * This file is part of the uutils coreutils package. -// * -// * (c) Alan Andrade -// * -// * For the full copyright and license information, please view the LICENSE -// * file that was distributed with this source code. -// * -// * Synced with: https://raw.github.com/avsm/src/master/usr.bin/head/head.c +use clap::{App, Arg}; +use std::convert::TryFrom; +use std::ffi::OsString; +use std::io::{ErrorKind, Read, Seek, SeekFrom, Write}; +use uucore::{crash, executable, show_error}; -#[macro_use] -extern crate uucore; +const EXIT_FAILURE: i32 = 1; +const EXIT_SUCCESS: i32 = 0; +const BUF_SIZE: usize = 65536; -use std::collections::VecDeque; -use std::fs::File; -use std::io::{stdin, BufRead, BufReader, Read}; -use std::path::Path; -use std::str::from_utf8; +const VERSION: &str = env!("CARGO_PKG_VERSION"); +const ABOUT: &str = "\ + Print the first 10 lines of each FILE to standard output.\n\ + With more than one FILE, precede each with a header giving the file name.\n\ + \n\ + With no FILE, or when FILE is -, read standard input.\n\ + \n\ + Mandatory arguments to long flags are mandatory for short flags too.\ + "; +const USAGE: &str = "head [FLAG]... [FILE]..."; -static SYNTAX: &str = ""; -static SUMMARY: &str = ""; -static LONG_HELP: &str = ""; +mod options { + pub const BYTES_NAME: &str = "BYTES"; + pub const LINES_NAME: &str = "LINES"; + pub const QUIET_NAME: &str = "QUIET"; + pub const VERBOSE_NAME: &str = "VERBOSE"; + pub const ZERO_NAME: &str = "ZERO"; + pub const FILES_NAME: &str = "FILE"; +} +mod parse; +mod split; -enum FilterMode { - Bytes(usize), +fn app<'a>() -> App<'a, 'a> { + App::new(executable!()) + .version(VERSION) + .about(ABOUT) + .usage(USAGE) + .arg( + Arg::with_name(options::BYTES_NAME) + .short("c") + .long("bytes") + .value_name("[-]NUM") + .takes_value(true) + .help( + "\ + print the first NUM bytes of each file;\n\ + with the leading '-', print all but the last\n\ + NUM bytes of each file\ + ", + ) + .overrides_with_all(&[options::BYTES_NAME, options::LINES_NAME]) + .allow_hyphen_values(true), + ) + .arg( + Arg::with_name(options::LINES_NAME) + .short("n") + .long("lines") + .value_name("[-]NUM") + .takes_value(true) + .help( + "\ + print the first NUM lines instead of the first 10;\n\ + with the leading '-', print all but the last\n\ + NUM lines of each file\ + ", + ) + .overrides_with_all(&[options::LINES_NAME, options::BYTES_NAME]) + .allow_hyphen_values(true), + ) + .arg( + Arg::with_name(options::QUIET_NAME) + .short("q") + .long("--quiet") + .visible_alias("silent") + .help("never print headers giving file names") + .overrides_with_all(&[options::VERBOSE_NAME, options::QUIET_NAME]), + ) + .arg( + Arg::with_name(options::VERBOSE_NAME) + .short("v") + .long("verbose") + .help("always print headers giving file names") + .overrides_with_all(&[options::QUIET_NAME, options::VERBOSE_NAME]), + ) + .arg( + Arg::with_name(options::ZERO_NAME) + .short("z") + .long("zero-terminated") + .help("line delimiter is NUL, not newline") + .overrides_with(options::ZERO_NAME), + ) + .arg(Arg::with_name(options::FILES_NAME).multiple(true)) +} +#[derive(PartialEq, Debug, Clone, Copy)] +enum Modes { Lines(usize), - NLines(usize), + Bytes(usize), } -struct Settings { - mode: FilterMode, - verbose: bool, - zero_terminated: bool, +fn parse_mode(src: &str, closure: F) -> Result<(Modes, bool), String> +where + F: FnOnce(usize) -> Modes, +{ + match parse::parse_num(src) { + Ok((n, last)) => Ok((closure(n), last)), + Err(reason) => match reason { + parse::ParseError::Syntax => Err(format!("'{}'", src)), + parse::ParseError::Overflow => { + Err(format!("'{}': Value too large for defined datatype", src)) + } + }, + } } -impl Default for Settings { - fn default() -> Settings { - Settings { - mode: FilterMode::Lines(10), - verbose: false, - zero_terminated: false, +fn arg_iterate<'a>( + mut args: impl uucore::Args + 'a, +) -> Result + 'a>, String> { + // argv[0] is always present + let first = args.next().unwrap(); + if let Some(second) = args.next() { + if let Some(s) = second.to_str() { + match parse::parse_obsolete(s) { + Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))), + Some(Err(e)) => match e { + parse::ParseError::Syntax => Err(format!("bad argument format: '{}'", s)), + parse::ParseError::Overflow => Err(format!( + "invalid argument: '{}' Value too large for defined datatype", + s + )), + }, + None => Ok(Box::new(vec![first, second].into_iter().chain(args))), + } + } else { + Err("bad argument encoding".to_owned()) } + } else { + Ok(Box::new(vec![first].into_iter())) + } +} + +#[derive(Debug, PartialEq)] +struct HeadOptions { + pub quiet: bool, + pub verbose: bool, + pub zeroed: bool, + pub all_but_last: bool, + pub mode: Modes, + pub files: Vec, +} + +impl HeadOptions { + pub fn new() -> HeadOptions { + HeadOptions { + quiet: false, + verbose: false, + zeroed: false, + all_but_last: false, + mode: Modes::Lines(10), + files: Vec::new(), + } + } + + ///Construct options from matches + pub fn get_from(args: impl uucore::Args) -> Result { + let matches = app().get_matches_from(arg_iterate(args)?); + + let mut options = HeadOptions::new(); + + options.quiet = matches.is_present(options::QUIET_NAME); + options.verbose = matches.is_present(options::VERBOSE_NAME); + options.zeroed = matches.is_present(options::ZERO_NAME); + + let mode_and_from_end = if let Some(v) = matches.value_of(options::BYTES_NAME) { + match parse_mode(v, Modes::Bytes) { + Ok(v) => v, + Err(err) => { + return Err(format!("invalid number of bytes: {}", err)); + } + } + } else if let Some(v) = matches.value_of(options::LINES_NAME) { + match parse_mode(v, Modes::Lines) { + Ok(v) => v, + Err(err) => { + return Err(format!("invalid number of lines: {}", err)); + } + } + } else { + (Modes::Lines(10), false) + }; + + options.mode = mode_and_from_end.0; + options.all_but_last = mode_and_from_end.1; + + options.files = match matches.values_of(options::FILES_NAME) { + Some(v) => v.map(|s| s.to_owned()).collect(), + None => vec!["-".to_owned()], + }; + //println!("{:#?}", options); + Ok(options) + } +} +// to make clippy shut up +impl Default for HeadOptions { + fn default() -> Self { + Self::new() + } +} + +fn rbuf_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io::Result<()> { + if n == 0 { + return Ok(()); + } + let mut readbuf = [0u8; BUF_SIZE]; + let mut i = 0usize; + + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + + loop { + let read = loop { + match input.read(&mut readbuf) { + Ok(n) => break n, + Err(e) => match e.kind() { + ErrorKind::Interrupted => {} + _ => return Err(e), + }, + } + }; + if read == 0 { + // might be unexpected if + // we haven't read `n` bytes + // but this mirrors GNU's behavior + return Ok(()); + } + stdout.write_all(&readbuf[..read.min(n - i)])?; + i += read.min(n - i); + if i == n { + return Ok(()); + } + } +} + +fn rbuf_n_lines(input: &mut impl std::io::BufRead, n: usize, zero: bool) -> std::io::Result<()> { + if n == 0 { + return Ok(()); + } + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + let mut lines = 0usize; + split::walk_lines(input, zero, |e| match e { + split::Event::Data(dat) => { + stdout.write_all(dat)?; + Ok(true) + } + split::Event::Line => { + lines += 1; + if lines == n { + Ok(false) + } else { + Ok(true) + } + } + }) +} + +fn rbuf_but_last_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io::Result<()> { + if n == 0 { + //prints everything + return rbuf_n_bytes(input, std::usize::MAX); + } + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + + let mut ringbuf = vec![0u8; n]; + + // first we fill the ring buffer + if let Err(e) = input.read_exact(&mut ringbuf) { + if e.kind() == ErrorKind::UnexpectedEof { + return Ok(()); + } else { + return Err(e); + } + } + let mut buffer = [0u8; BUF_SIZE]; + loop { + let read = loop { + match input.read(&mut buffer) { + Ok(n) => break n, + Err(e) => match e.kind() { + ErrorKind::Interrupted => {} + _ => return Err(e), + }, + } + }; + if read == 0 { + return Ok(()); + } else if read >= n { + stdout.write_all(&ringbuf)?; + stdout.write_all(&buffer[..read - n])?; + for i in 0..n { + ringbuf[i] = buffer[read - n + i]; + } + } else { + stdout.write_all(&ringbuf[..read])?; + for i in 0..n - read { + ringbuf[i] = ringbuf[read + i]; + } + ringbuf[n - read..].copy_from_slice(&buffer[..read]); + } + } +} + +fn rbuf_but_last_n_lines( + input: &mut impl std::io::BufRead, + n: usize, + zero: bool, +) -> std::io::Result<()> { + if n == 0 { + //prints everything + return rbuf_n_bytes(input, std::usize::MAX); + } + let mut ringbuf = vec![Vec::new(); n]; + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + let mut line = Vec::new(); + let mut lines = 0usize; + split::walk_lines(input, zero, |e| match e { + split::Event::Data(dat) => { + line.extend_from_slice(dat); + Ok(true) + } + split::Event::Line => { + if lines < n { + ringbuf[lines] = std::mem::replace(&mut line, Vec::new()); + lines += 1; + } else { + stdout.write_all(&ringbuf[0])?; + ringbuf.rotate_left(1); + ringbuf[n - 1] = std::mem::replace(&mut line, Vec::new()); + } + Ok(true) + } + }) +} + +fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { + assert!(options.all_but_last); + let size = input.seek(SeekFrom::End(0))?; + let size = usize::try_from(size).unwrap(); + match options.mode { + Modes::Bytes(n) => { + if n >= size { + return Ok(()); + } else { + input.seek(SeekFrom::Start(0))?; + rbuf_n_bytes( + &mut std::io::BufReader::with_capacity(BUF_SIZE, input), + size - n, + )?; + } + } + Modes::Lines(n) => { + let mut buffer = [0u8; BUF_SIZE]; + let buffer = &mut buffer[..BUF_SIZE.min(size)]; + let mut i = 0usize; + let mut lines = 0usize; + + let found = 'o: loop { + // the casts here are ok, `buffer.len()` should never be above a few k + input.seek(SeekFrom::Current( + -((buffer.len() as i64).min((size - i) as i64)), + ))?; + input.read_exact(buffer)?; + for byte in buffer.iter().rev() { + match byte { + b'\n' if !options.zeroed => { + lines += 1; + } + 0u8 if options.zeroed => { + lines += 1; + } + _ => {} + } + // if it were just `n`, + if lines == n + 1 { + break 'o i; + } + i += 1; + } + if size - i == 0 { + return Ok(()); + } + }; + input.seek(SeekFrom::Start(0))?; + rbuf_n_bytes( + &mut std::io::BufReader::with_capacity(BUF_SIZE, input), + size - found, + )?; + } + } + Ok(()) +} + +fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { + if options.all_but_last { + head_backwards_file(input, options) + } else { + match options.mode { + Modes::Bytes(n) => { + rbuf_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n) + } + Modes::Lines(n) => rbuf_n_lines( + &mut std::io::BufReader::with_capacity(BUF_SIZE, input), + n, + options.zeroed, + ), + } + } +} + +fn uu_head(options: &HeadOptions) { + let mut first = true; + for fname in &options.files { + let res = match fname.as_str() { + "-" => { + if options.verbose { + if !first { + println!(); + } + println!("==> standard input <==") + } + let stdin = std::io::stdin(); + let mut stdin = stdin.lock(); + match options.mode { + Modes::Bytes(n) => { + if options.all_but_last { + rbuf_but_last_n_bytes(&mut stdin, n) + } else { + rbuf_n_bytes(&mut stdin, n) + } + } + Modes::Lines(n) => { + if options.all_but_last { + rbuf_but_last_n_lines(&mut stdin, n, options.zeroed) + } else { + rbuf_n_lines(&mut stdin, n, options.zeroed) + } + } + } + } + name => { + let mut file = match std::fs::File::open(name) { + Ok(f) => f, + Err(err) => match err.kind() { + ErrorKind::NotFound => { + crash!( + EXIT_FAILURE, + "head: cannot open '{}' for reading: No such file or directory", + name + ); + } + ErrorKind::PermissionDenied => { + crash!( + EXIT_FAILURE, + "head: cannot open '{}' for reading: Permission denied", + name + ); + } + _ => { + crash!( + EXIT_FAILURE, + "head: cannot open '{}' for reading: {}", + name, + err + ); + } + }, + }; + if (options.files.len() > 1 && !options.quiet) || options.verbose { + println!("==> {} <==", name) + } + head_file(&mut file, options) + } + }; + if res.is_err() { + if fname.as_str() == "-" { + crash!( + EXIT_FAILURE, + "head: error reading standard input: Input/output error" + ); + } else { + crash!( + EXIT_FAILURE, + "head: error reading {}: Input/output error", + fname + ); + } + } + first = false; } } pub fn uumain(args: impl uucore::Args) -> i32 { - let args = args.collect_str(); - - let mut settings: Settings = Default::default(); - - // handle obsolete -number syntax - let new_args = match obsolete(&args[0..]) { - (args, Some(n)) => { - settings.mode = FilterMode::Lines(n); - args - } - (args, None) => args, - }; - - let matches = app!(SYNTAX, SUMMARY, LONG_HELP) - .optopt( - "c", - "bytes", - "Print the first K bytes. With the leading '-', print all but the last K bytes", - "[-]K", - ) - .optopt( - "n", - "lines", - "Print the first K lines. With the leading '-', print all but the last K lines", - "[-]K", - ) - .optflag("q", "quiet", "never print headers giving file names") - .optflag("v", "verbose", "always print headers giving file names") - .optflag("z", "zero-terminated", "line delimiter is NUL, not newline") - .optflag("h", "help", "display this help and exit") - .optflag("V", "version", "output version information and exit") - .parse(new_args); - - let use_bytes = matches.opt_present("c"); - // TODO: suffixes (e.g. b, kB, etc.) - match matches.opt_str("n") { - Some(n) => { - if use_bytes { - show_error!("cannot specify both --bytes and --lines."); - return 1; - } - - match n.parse::() { - Ok(m) => { - settings.mode = if m < 0 { - let m: usize = m.abs() as usize; - FilterMode::NLines(m) - } else { - let m: usize = m.abs() as usize; - FilterMode::Lines(m) - } - } - Err(e) => { - show_error!("invalid line count '{}': {}", n, e); - return 1; - } - } - } - None => { - if let Some(count) = matches.opt_str("c") { - match count.parse::() { - Ok(m) => settings.mode = FilterMode::Bytes(m), - Err(e) => { - show_error!("invalid byte count '{}': {}", count, e); - return 1; - } - } - } + let args = match HeadOptions::get_from(args) { + Ok(o) => o, + Err(s) => { + crash!(EXIT_FAILURE, "head: {}", s); } }; + uu_head(&args); - let quiet = matches.opt_present("q"); - let verbose = matches.opt_present("v"); - settings.zero_terminated = matches.opt_present("z"); - let files = matches.free; - - // GNU implementation allows multiple declarations of "-q" and "-v" with the - // last flag winning. This can't be simulated with the getopts cargo unless - // we manually parse the arguments. Given the declaration of both flags, - // verbose mode always wins. This is a potential future improvement. - if files.len() > 1 && !quiet && !verbose { - settings.verbose = true; - } - if quiet { - settings.verbose = false; - } - if verbose { - settings.verbose = true; - } - - if files.is_empty() { - let mut buffer = BufReader::new(stdin()); - head(&mut buffer, &settings); - } else { - let mut first_time = true; - - for file in &files { - if settings.verbose { - if !first_time { - println!(); - } - println!("==> {} <==", file); - } - first_time = false; - - let path = Path::new(file); - if path.is_dir() || !path.metadata().is_ok() { - eprintln!( - "cannot open '{}' for reading: No such file or directory", - &path.to_str().unwrap() - ); - continue; - } - let reader = File::open(&path).unwrap(); - let mut buffer = BufReader::new(reader); - if !head(&mut buffer, &settings) { - break; - } - } - } - - 0 + EXIT_SUCCESS } -// It searches for an option in the form of -123123 -// -// In case is found, the options vector will get rid of that object so that -// getopts works correctly. -fn obsolete(options: &[String]) -> (Vec, Option) { - let mut options: Vec = options.to_vec(); - let mut a = 1; - let b = options.len(); +#[cfg(test)] +mod tests { + use std::ffi::OsString; - while a < b { - let previous = options[a - 1].clone(); - let current = options[a].clone(); - let current = current.as_bytes(); - - if previous != "-n" && current.len() > 1 && current[0] == b'-' { - let len = current.len(); - for pos in 1..len { - // Ensure that the argument is only made out of digits - if !(current[pos] as char).is_numeric() { - break; - } - - // If this is the last number - if pos == len - 1 { - options.remove(a); - let number: Option = - from_utf8(¤t[1..len]).unwrap().parse::().ok(); - return (options, Some(number.unwrap())); - } - } - } - - a += 1; + use super::*; + fn options(args: &str) -> Result { + let combined = "head ".to_owned() + args; + let args = combined.split_whitespace(); + HeadOptions::get_from(args.map(|s| OsString::from(s))) } + #[test] + fn test_args_modes() { + let args = options("-n -10M -vz").unwrap(); + assert!(args.zeroed); + assert!(args.verbose); + assert!(args.all_but_last); + assert_eq!(args.mode, Modes::Lines(10 * 1024 * 1024)); + } + #[test] + fn test_gnu_compatibility() { + let args = options("-n 1 -c 1 -n 5 -c kiB -vqvqv").unwrap(); + assert!(args.mode == Modes::Bytes(1024)); + assert!(args.verbose); + assert_eq!(options("-5").unwrap().mode, Modes::Lines(5)); + assert_eq!(options("-2b").unwrap().mode, Modes::Bytes(1024)); + assert_eq!(options("-5 -c 1").unwrap().mode, Modes::Bytes(1)); + } + #[test] + fn all_args_test() { + assert!(options("--silent").unwrap().quiet); + assert!(options("--quiet").unwrap().quiet); + assert!(options("-q").unwrap().quiet); + assert!(options("--verbose").unwrap().verbose); + assert!(options("-v").unwrap().verbose); + assert!(options("--zero-terminated").unwrap().zeroed); + assert!(options("-z").unwrap().zeroed); + assert_eq!(options("--lines 15").unwrap().mode, Modes::Lines(15)); + assert_eq!(options("-n 15").unwrap().mode, Modes::Lines(15)); + assert_eq!(options("--bytes 15").unwrap().mode, Modes::Bytes(15)); + assert_eq!(options("-c 15").unwrap().mode, Modes::Bytes(15)); + } + #[test] + fn test_options_errors() { + assert!(options("-n IsThisTheRealLife?").is_err()); + assert!(options("-c IsThisJustFantasy").is_err()); + } + #[test] + fn test_options_correct_defaults() { + let opts = HeadOptions::new(); + let opts2: HeadOptions = Default::default(); - (options, None) -} + assert_eq!(opts, opts2); -// TODO: handle errors on read -fn head(reader: &mut BufReader, settings: &Settings) -> bool { - match settings.mode { - FilterMode::Bytes(count) => { - for byte in reader.bytes().take(count) { - print!("{}", byte.unwrap() as char); - } - } - FilterMode::Lines(count) => { - if settings.zero_terminated { - for line in reader.split(0).take(count) { - print!("{}\0", String::from_utf8(line.unwrap()).unwrap()) - } - } else { - for line in reader.lines().take(count) { - println!("{}", line.unwrap()); - } - } - } - FilterMode::NLines(count) => { - let mut vector: VecDeque = VecDeque::new(); - - for line in reader.lines() { - vector.push_back(line.unwrap()); - if vector.len() <= count { - continue; - } - println!("{}", vector.pop_front().unwrap()); + assert!(opts.verbose == false); + assert!(opts.quiet == false); + assert!(opts.zeroed == false); + assert!(opts.all_but_last == false); + assert_eq!(opts.mode, Modes::Lines(10)); + assert!(opts.files.is_empty()); + } + #[test] + fn test_parse_mode() { + assert_eq!( + parse_mode("123", Modes::Lines), + Ok((Modes::Lines(123), false)) + ); + assert_eq!( + parse_mode("-456", Modes::Bytes), + Ok((Modes::Bytes(456), true)) + ); + assert!(parse_mode("Nonsensical Nonsense", Modes::Bytes).is_err()); + #[cfg(target_pointer_width = "64")] + assert!(parse_mode("1Y", Modes::Lines).is_err()); + #[cfg(target_pointer_width = "32")] + assert!(parse_mode("1T", Modes::Bytes).is_err()); + } + fn arg_outputs(src: &str) -> Result { + let split = src.split_whitespace().map(|x| OsString::from(x)); + match arg_iterate(split) { + Ok(args) => { + let vec = args + .map(|s| s.to_str().unwrap().to_owned()) + .collect::>(); + Ok(vec.join(" ")) } + Err(e) => Err(e), } } - true + #[test] + fn test_arg_iterate() { + // test that normal args remain unchanged + assert_eq!( + arg_outputs("head -n -5 -zv"), + Ok("head -n -5 -zv".to_owned()) + ); + // tests that nonsensical args are unchanged + assert_eq!( + arg_outputs("head -to_be_or_not_to_be,..."), + Ok("head -to_be_or_not_to_be,...".to_owned()) + ); + //test that the obsolete syntax is unrolled + assert_eq!( + arg_outputs("head -123qvqvqzc"), + Ok("head -q -z -c 123".to_owned()) + ); + //test that bad obsoletes are an error + assert!(arg_outputs("head -123FooBar").is_err()); + //test overflow + assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_err()); + //test that empty args remain unchanged + assert_eq!(arg_outputs("head"), Ok("head".to_owned())); + } + #[test] + #[cfg(linux)] + fn test_arg_iterate_bad_encoding() { + let invalid = unsafe { std::str::from_utf8_unchecked(b"\x80\x81") }; + // this arises from a conversion from OsString to &str + assert!( + arg_iterate(vec![OsString::from("head"), OsString::from(invalid)].into_iter()).is_err() + ); + } + #[test] + fn rbuf_early_exit() { + let mut empty = std::io::BufReader::new(std::io::Cursor::new(Vec::new())); + assert!(rbuf_n_bytes(&mut empty, 0).is_ok()); + assert!(rbuf_n_lines(&mut empty, 0, false).is_ok()); + } } diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs new file mode 100644 index 000000000..470d821e0 --- /dev/null +++ b/src/uu/head/src/parse.rs @@ -0,0 +1,282 @@ +use std::convert::TryFrom; +use std::ffi::OsString; + +#[derive(PartialEq, Debug)] +pub enum ParseError { + Syntax, + Overflow, +} +/// Parses obsolete syntax +/// head -NUM[kmzv] +pub fn parse_obsolete(src: &str) -> Option, ParseError>> { + let mut chars = src.char_indices(); + if let Some((_, '-')) = chars.next() { + let mut num_end = 0usize; + let mut has_num = false; + let mut last_char = 0 as char; + while let Some((n, c)) = chars.next() { + if c.is_numeric() { + has_num = true; + num_end = n; + } else { + last_char = c; + break; + } + } + if has_num { + match src[1..=num_end].parse::() { + Ok(num) => { + let mut quiet = false; + let mut verbose = false; + let mut zero_terminated = false; + let mut multiplier = None; + let mut c = last_char; + loop { + // not that here, we only match lower case 'k', 'c', and 'm' + match c { + // we want to preserve order + // this also saves us 1 heap allocation + 'q' => { + quiet = true; + verbose = false + } + 'v' => { + verbose = true; + quiet = false + } + 'z' => zero_terminated = true, + 'c' => multiplier = Some(1), + 'b' => multiplier = Some(512), + 'k' => multiplier = Some(1024), + 'm' => multiplier = Some(1024 * 1024), + '\0' => {} + _ => return Some(Err(ParseError::Syntax)), + } + if let Some((_, next)) = chars.next() { + c = next + } else { + break; + } + } + let mut options = Vec::new(); + if quiet { + options.push(OsString::from("-q")) + } + if verbose { + options.push(OsString::from("-v")) + } + if zero_terminated { + options.push(OsString::from("-z")) + } + if let Some(n) = multiplier { + options.push(OsString::from("-c")); + let num = match num.checked_mul(n) { + Some(n) => n, + None => return Some(Err(ParseError::Overflow)), + }; + options.push(OsString::from(format!("{}", num))); + } else { + options.push(OsString::from("-n")); + options.push(OsString::from(format!("{}", num))); + } + Some(Ok(options.into_iter())) + } + Err(_) => Some(Err(ParseError::Overflow)), + } + } else { + None + } + } else { + None + } +} +/// Parses an -c or -n argument, +/// the bool specifies whether to read from the end +pub fn parse_num(src: &str) -> Result<(usize, bool), ParseError> { + let mut num_start = 0; + let mut chars = src.char_indices(); + let (mut chars, all_but_last) = match chars.next() { + Some((_, c)) => { + if c == '-' { + num_start += 1; + (chars, true) + } else { + (src.char_indices(), false) + } + } + None => return Err(ParseError::Syntax), + }; + let mut num_end = 0usize; + let mut last_char = 0 as char; + let mut num_count = 0usize; + while let Some((n, c)) = chars.next() { + if c.is_numeric() { + num_end = n; + num_count += 1; + } else { + last_char = c; + break; + } + } + + let num = if num_count > 0 { + match src[num_start..=num_end].parse::() { + Ok(n) => Some(n), + Err(_) => return Err(ParseError::Overflow), + } + } else { + None + }; + + if last_char == 0 as char { + if let Some(n) = num { + Ok((n, all_but_last)) + } else { + Err(ParseError::Syntax) + } + } else { + let base: u128 = match chars.next() { + Some((_, c)) => { + let b = match c { + 'B' if last_char != 'b' => 1000, + 'i' if last_char != 'b' => { + if let Some((_, 'B')) = chars.next() { + 1024 + } else { + return Err(ParseError::Syntax); + } + } + _ => return Err(ParseError::Syntax), + }; + if chars.next().is_some() { + return Err(ParseError::Syntax); + } else { + b + } + } + None => 1024, + }; + let mul = match last_char.to_lowercase().next().unwrap() { + 'b' => 512, + 'k' => base.pow(1), + 'm' => base.pow(2), + 'g' => base.pow(3), + 't' => base.pow(4), + 'p' => base.pow(5), + 'e' => base.pow(6), + 'z' => base.pow(7), + 'y' => base.pow(8), + _ => return Err(ParseError::Syntax), + }; + let mul = match usize::try_from(mul) { + Ok(n) => n, + Err(_) => return Err(ParseError::Overflow), + }; + match num.unwrap_or(1).checked_mul(mul) { + Some(n) => Ok((n, all_but_last)), + None => Err(ParseError::Overflow), + } + } +} +#[cfg(test)] +mod tests { + use super::*; + fn obsolete(src: &str) -> Option, ParseError>> { + let r = parse_obsolete(src); + match r { + Some(s) => match s { + Ok(v) => Some(Ok(v.map(|s| s.to_str().unwrap().to_owned()).collect())), + Err(e) => Some(Err(e)), + }, + None => None, + } + } + fn obsolete_result(src: &[&str]) -> Option, ParseError>> { + Some(Ok(src.iter().map(|s| s.to_string()).collect())) + } + #[test] + #[cfg(not(target_pointer_width = "128"))] + fn test_parse_overflow_x64() { + assert_eq!(parse_num("1Y"), Err(ParseError::Overflow)); + assert_eq!(parse_num("1Z"), Err(ParseError::Overflow)); + assert_eq!(parse_num("100E"), Err(ParseError::Overflow)); + assert_eq!(parse_num("100000P"), Err(ParseError::Overflow)); + assert_eq!(parse_num("1000000000T"), Err(ParseError::Overflow)); + assert_eq!( + parse_num("10000000000000000000000"), + Err(ParseError::Overflow) + ); + } + #[test] + #[cfg(target_pointer_width = "32")] + fn test_parse_overflow_x32() { + assert_eq!(parse_num("1T"), Err(ParseError::Overflow)); + assert_eq!(parse_num("1000G"), Err(ParseError::Overflow)); + } + #[test] + fn test_parse_bad_syntax() { + assert_eq!(parse_num("5MiB nonsense"), Err(ParseError::Syntax)); + assert_eq!(parse_num("Nonsense string"), Err(ParseError::Syntax)); + assert_eq!(parse_num("5mib"), Err(ParseError::Syntax)); + assert_eq!(parse_num("biB"), Err(ParseError::Syntax)); + assert_eq!(parse_num("-"), Err(ParseError::Syntax)); + assert_eq!(parse_num(""), Err(ParseError::Syntax)); + } + #[test] + fn test_parse_numbers() { + assert_eq!(parse_num("k"), Ok((1024, false))); + assert_eq!(parse_num("MiB"), Ok((1024 * 1024, false))); + assert_eq!(parse_num("-5"), Ok((5, true))); + assert_eq!(parse_num("b"), Ok((512, false))); + assert_eq!(parse_num("-2GiB"), Ok((2 * 1024 * 1024 * 1024, true))); + assert_eq!(parse_num("5M"), Ok((5 * 1024 * 1024, false))); + assert_eq!(parse_num("5MB"), Ok((5 * 1000 * 1000, false))); + } + #[test] + fn test_parse_numbers_obsolete() { + assert_eq!(obsolete("-5"), obsolete_result(&["-n", "5"])); + assert_eq!(obsolete("-100"), obsolete_result(&["-n", "100"])); + assert_eq!(obsolete("-5m"), obsolete_result(&["-c", "5242880"])); + assert_eq!(obsolete("-1k"), obsolete_result(&["-c", "1024"])); + assert_eq!(obsolete("-2b"), obsolete_result(&["-c", "1024"])); + assert_eq!(obsolete("-1mmk"), obsolete_result(&["-c", "1024"])); + assert_eq!(obsolete("-1vz"), obsolete_result(&["-v", "-z", "-n", "1"])); + assert_eq!( + obsolete("-1vzqvq"), + obsolete_result(&["-q", "-z", "-n", "1"]) + ); + assert_eq!(obsolete("-1vzc"), obsolete_result(&["-v", "-z", "-c", "1"])); + assert_eq!( + obsolete("-105kzm"), + obsolete_result(&["-z", "-c", "110100480"]) + ); + } + #[test] + fn test_parse_errors_obsolete() { + assert_eq!(obsolete("-5n"), Some(Err(ParseError::Syntax))); + assert_eq!(obsolete("-5c5"), Some(Err(ParseError::Syntax))); + } + #[test] + fn test_parse_obsolete_nomatch() { + assert_eq!(obsolete("-k"), None); + assert_eq!(obsolete("asd"), None); + } + #[test] + #[cfg(target_pointer_width = "64")] + fn test_parse_obsolete_overflow_x64() { + assert_eq!( + obsolete("-1000000000000000m"), + Some(Err(ParseError::Overflow)) + ); + assert_eq!( + obsolete("-10000000000000000000000"), + Some(Err(ParseError::Overflow)) + ); + } + #[test] + #[cfg(target_pointer_width = "32")] + fn test_parse_obsolete_overflow_x32() { + assert_eq!(obsolete("-42949672960"), Some(Err(ParseError::Overflow))); + assert_eq!(obsolete("-42949672k"), Some(Err(ParseError::Overflow))); + } +} diff --git a/src/uu/head/src/split.rs b/src/uu/head/src/split.rs new file mode 100644 index 000000000..9e9a0c685 --- /dev/null +++ b/src/uu/head/src/split.rs @@ -0,0 +1,60 @@ +#[derive(Debug)] +pub enum Event<'a> { + Data(&'a [u8]), + Line, +} +/// Loops over the lines read from a BufRead. +/// # Arguments +/// * `input` the ReadBuf to read from +/// * `zero` whether to use 0u8 as a line delimiter +/// * `on_event` a closure receiving some bytes read in a slice, or +/// event signalling a line was just read. +/// this is guaranteed to be signalled *directly* after the +/// slice containing the (CR on win)LF / 0 is passed +/// +/// Return whether to continue +pub fn walk_lines( + input: &mut impl std::io::BufRead, + zero: bool, + mut on_event: F, +) -> std::io::Result<()> +where + F: FnMut(Event) -> std::io::Result, +{ + let mut buffer = [0u8; super::BUF_SIZE]; + loop { + let read = loop { + match input.read(&mut buffer) { + Ok(n) => break n, + Err(e) => match e.kind() { + std::io::ErrorKind::Interrupted => {} + _ => return Err(e), + }, + } + }; + if read == 0 { + return Ok(()); + } + let mut base = 0usize; + for (i, byte) in buffer[..read].iter().enumerate() { + match byte { + b'\n' if !zero => { + on_event(Event::Data(&buffer[base..=i]))?; + base = i + 1; + if !on_event(Event::Line)? { + return Ok(()); + } + } + 0u8 if zero => { + on_event(Event::Data(&buffer[base..=i]))?; + base = i + 1; + if !on_event(Event::Line)? { + return Ok(()); + } + } + _ => {} + } + } + on_event(Event::Data(&buffer[base..read]))?; + } +} diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs old mode 100644 new mode 100755 index a1086c004..d91cc1289 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -86,88 +86,74 @@ fn test_verbose() { .stdout_is_fixture("lorem_ipsum_verbose.expected"); } -#[test] -fn test_zero_terminated() { - new_ucmd!() - .args(&["-z", "zero_terminated.txt"]) - .run() - .stdout_is_fixture("zero_terminated.expected"); -} - #[test] #[ignore] fn test_spams_newline() { + //this test is does not mirror what GNU does new_ucmd!().pipe_in("a").succeeds().stdout_is("a\n"); } #[test] -#[ignore] -fn test_unsupported_byte_syntax() { +fn test_byte_syntax() { new_ucmd!() .args(&["-1c"]) .pipe_in("abc") - .fails() - //GNU head returns "a" - .stdout_is("") - .stderr_is("head: error: Unrecognized option: \'1\'"); + .run() + .stdout_is("a"); } #[test] -#[ignore] -fn test_unsupported_line_syntax() { +fn test_line_syntax() { new_ucmd!() .args(&["-n", "2048m"]) .pipe_in("a\n") - .fails() - //.stdout_is("a\n"); What GNU head returns. - .stdout_is("") - .stderr_is("head: error: invalid line count \'2048m\': invalid digit found in string"); + .run() + .stdout_is("a\n"); } #[test] -#[ignore] -fn test_unsupported_zero_terminated_syntax() { +fn test_zero_terminated_syntax() { new_ucmd!() - .args(&["-z -n 1"]) + .args(&["-z", "-n", "1"]) .pipe_in("x\0y") - .fails() - //GNU Head returns "x\0" - .stderr_is("head: error: Unrecognized option: \'z\'"); + .run() + .stdout_is("x\0"); } #[test] -#[ignore] -fn test_unsupported_zero_terminated_syntax_2() { +fn test_zero_terminated_syntax_2() { new_ucmd!() - .args(&["-z -n 2"]) + .args(&["-z", "-n", "2"]) .pipe_in("x\0y") - .fails() - //GNU Head returns "x\0y" - .stderr_is("head: error: Unrecognized option: \'z\'"); + .run() + .stdout_is("x\0y"); } #[test] -#[ignore] -fn test_unsupported_negative_byte_syntax() { +fn test_negative_byte_syntax() { new_ucmd!() .args(&["--bytes=-2"]) .pipe_in("a\n") - .fails() - //GNU Head returns "" - .stderr_is("head: error: invalid byte count \'-2\': invalid digit found in string"); + .run() + .stdout_is(""); } #[test] -#[ignore] -fn test_bug_in_negative_zero_lines() { +fn test_negative_zero_lines() { new_ucmd!() .args(&["--lines=-0"]) .pipe_in("a\nb\n") .succeeds() - //GNU Head returns "a\nb\n" - .stdout_is(""); + .stdout_is("a\nb\n"); +} +#[test] +fn test_negative_zero_bytes() { + new_ucmd!() + .args(&["--bytes=-0"]) + .pipe_in("qwerty") + .succeeds() + .stdout_is("qwerty"); } - #[test] fn test_no_such_file_or_directory() { let result = new_ucmd!().arg("no_such_file.toml").run(); @@ -179,3 +165,38 @@ fn test_no_such_file_or_directory() { .contains("cannot open 'no_such_file.toml' for reading: No such file or directory") ) } + +// there was a bug not caught by previous tests +// where for negative n > 3, the total amount of lines +// was correct, but it would eat from the second line +#[test] +fn test_sequence_fixture() { + new_ucmd!() + .args(&["-n", "-10", "sequence"]) + .run() + .stdout_is_fixture("sequence.expected"); +} +#[test] +fn test_file_backwards() { + new_ucmd!() + .args(&["-c", "-10", "lorem_ipsum.txt"]) + .run() + .stdout_is_fixture("lorem_ipsum_backwards_file.expected"); +} + +#[test] +fn test_zero_terminated() { + new_ucmd!() + .args(&["-z", "zero_terminated.txt"]) + .run() + .stdout_is_fixture("zero_terminated.expected"); +} + +#[test] +fn test_obsolete_extras() { + new_ucmd!() + .args(&["-5zv"]) + .pipe_in("1\02\03\04\05\06") + .succeeds() + .stdout_is("==> standard input <==\n1\02\03\04\05\0"); +} diff --git a/tests/fixtures/head/lorem_ipsum_backwards_file.expected b/tests/fixtures/head/lorem_ipsum_backwards_file.expected new file mode 100644 index 000000000..fcf432187 --- /dev/null +++ b/tests/fixtures/head/lorem_ipsum_backwards_file.expected @@ -0,0 +1,24 @@ +Lorem ipsum dolor sit amet, +consectetur adipiscing elit. +Nunc interdum suscipit sem vel ornare. +Proin euismod, +justo sed mollis dictum, +eros urna ultricies augue, +eu pharetra mi ex id ante. +Duis convallis porttitor aliquam. +Nunc vitae tincidunt ex. +Suspendisse iaculis ligula ac diam consectetur lacinia. +Donec vel velit dui. +Etiam fringilla, +dolor quis tempor vehicula, +lacus turpis bibendum velit, +et pellentesque elit odio a magna. +Cras vulputate tortor non libero vehicula euismod. +Aliquam tincidunt nisl eget enim cursus, +viverra sagittis magna commodo. +Cras rhoncus egestas leo nec blandit. +Suspendisse potenti. +Etiam ullamcorper leo vel lacus vestibulum, +cursus semper eros efficitur. +In hac habitasse platea dictumst. +Phasellus scelerisque vehicula f \ No newline at end of file diff --git a/tests/fixtures/head/sequence b/tests/fixtures/head/sequence new file mode 100644 index 000000000..190423f88 --- /dev/null +++ b/tests/fixtures/head/sequence @@ -0,0 +1,100 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 diff --git a/tests/fixtures/head/sequence.expected b/tests/fixtures/head/sequence.expected new file mode 100644 index 000000000..17d2a1390 --- /dev/null +++ b/tests/fixtures/head/sequence.expected @@ -0,0 +1,90 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90