1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2026-01-16 18:21:01 +00:00
uutils-coreutils/src/uu/head/src/head.rs
2021-06-11 23:26:58 +09:00

605 lines
20 KiB
Rust

// spell-checker:ignore (vars) zlines
use clap::{crate_version, App, Arg};
use std::convert::TryFrom;
use std::ffi::OsString;
use std::io::{self, ErrorKind, Read, Seek, SeekFrom, Write};
use uucore::{crash, executable, show_error, show_error_custom_description};
const EXIT_FAILURE: i32 = 1;
const EXIT_SUCCESS: i32 = 0;
const BUF_SIZE: usize = 65536;
const ABOUT: &str = "\
Print the first 10 lines of each FILE to standard output.\n\
With more than one FILE, precede each with a header giving the file name.\n\
\n\
With no FILE, or when FILE is -, read standard input.\n\
\n\
Mandatory arguments to long flags are mandatory for short flags too.\
";
const USAGE: &str = "head [FLAG]... [FILE]...";
mod options {
pub const BYTES_NAME: &str = "BYTES";
pub const LINES_NAME: &str = "LINES";
pub const QUIET_NAME: &str = "QUIET";
pub const VERBOSE_NAME: &str = "VERBOSE";
pub const ZERO_NAME: &str = "ZERO";
pub const FILES_NAME: &str = "FILE";
}
mod lines;
mod parse;
mod split;
mod take;
use lines::zlines;
use take::take_all_but;
fn app<'a>() -> App<'a, 'a> {
App::new(executable!())
.version(crate_version!())
.about(ABOUT)
.usage(USAGE)
.arg(
Arg::with_name(options::BYTES_NAME)
.short("c")
.long("bytes")
.value_name("[-]NUM")
.takes_value(true)
.help(
"\
print the first NUM bytes of each file;\n\
with the leading '-', print all but the last\n\
NUM bytes of each file\
",
)
.overrides_with_all(&[options::BYTES_NAME, options::LINES_NAME])
.allow_hyphen_values(true),
)
.arg(
Arg::with_name(options::LINES_NAME)
.short("n")
.long("lines")
.value_name("[-]NUM")
.takes_value(true)
.help(
"\
print the first NUM lines instead of the first 10;\n\
with the leading '-', print all but the last\n\
NUM lines of each file\
",
)
.overrides_with_all(&[options::LINES_NAME, options::BYTES_NAME])
.allow_hyphen_values(true),
)
.arg(
Arg::with_name(options::QUIET_NAME)
.short("q")
.long("--quiet")
.visible_alias("silent")
.help("never print headers giving file names")
.overrides_with_all(&[options::VERBOSE_NAME, options::QUIET_NAME]),
)
.arg(
Arg::with_name(options::VERBOSE_NAME)
.short("v")
.long("verbose")
.help("always print headers giving file names")
.overrides_with_all(&[options::QUIET_NAME, options::VERBOSE_NAME]),
)
.arg(
Arg::with_name(options::ZERO_NAME)
.short("z")
.long("zero-terminated")
.help("line delimiter is NUL, not newline")
.overrides_with(options::ZERO_NAME),
)
.arg(Arg::with_name(options::FILES_NAME).multiple(true))
}
#[derive(PartialEq, Debug, Clone, Copy)]
enum Modes {
Lines(usize),
Bytes(usize),
}
fn parse_mode<F>(src: &str, closure: F) -> Result<(Modes, bool), String>
where
F: FnOnce(usize) -> Modes,
{
match parse::parse_num(src) {
Ok((n, last)) => Ok((closure(n), last)),
Err(reason) => match reason {
parse::ParseError::Syntax => Err(format!("'{}'", src)),
parse::ParseError::Overflow => {
Err(format!("'{}': Value too large for defined datatype", src))
}
},
}
}
fn arg_iterate<'a>(
mut args: impl uucore::Args + 'a,
) -> Result<Box<dyn Iterator<Item = OsString> + 'a>, String> {
// argv[0] is always present
let first = args.next().unwrap();
if let Some(second) = args.next() {
if let Some(s) = second.to_str() {
match parse::parse_obsolete(s) {
Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))),
Some(Err(e)) => match e {
parse::ParseError::Syntax => Err(format!("bad argument format: '{}'", s)),
parse::ParseError::Overflow => Err(format!(
"invalid argument: '{}' Value too large for defined datatype",
s
)),
},
None => Ok(Box::new(vec![first, second].into_iter().chain(args))),
}
} else {
Err("bad argument encoding".to_owned())
}
} else {
Ok(Box::new(vec![first].into_iter()))
}
}
#[derive(Debug, PartialEq)]
struct HeadOptions {
pub quiet: bool,
pub verbose: bool,
pub zeroed: bool,
pub all_but_last: bool,
pub mode: Modes,
pub files: Vec<String>,
}
impl HeadOptions {
pub fn new() -> HeadOptions {
HeadOptions {
quiet: false,
verbose: false,
zeroed: false,
all_but_last: false,
mode: Modes::Lines(10),
files: Vec::new(),
}
}
///Construct options from matches
pub fn get_from(args: impl uucore::Args) -> Result<Self, String> {
let matches = app().get_matches_from(arg_iterate(args)?);
let mut options = HeadOptions::new();
options.quiet = matches.is_present(options::QUIET_NAME);
options.verbose = matches.is_present(options::VERBOSE_NAME);
options.zeroed = matches.is_present(options::ZERO_NAME);
let mode_and_from_end = if let Some(v) = matches.value_of(options::BYTES_NAME) {
parse_mode(v, Modes::Bytes)
.map_err(|err| format!("invalid number of bytes: {}", err))?
} else if let Some(v) = matches.value_of(options::LINES_NAME) {
parse_mode(v, Modes::Lines)
.map_err(|err| format!("invalid number of lines: {}", err))?
} else {
(Modes::Lines(10), false)
};
options.mode = mode_and_from_end.0;
options.all_but_last = mode_and_from_end.1;
options.files = match matches.values_of(options::FILES_NAME) {
Some(v) => v.map(|s| s.to_owned()).collect(),
None => vec!["-".to_owned()],
};
//println!("{:#?}", options);
Ok(options)
}
}
// to make clippy shut up
impl Default for HeadOptions {
fn default() -> Self {
Self::new()
}
}
fn read_n_bytes<R>(input: R, n: usize) -> std::io::Result<()>
where
R: Read,
{
// Read the first `n` bytes from the `input` reader.
let mut reader = input.take(n as u64);
// Write those bytes to `stdout`.
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
io::copy(&mut reader, &mut stdout)?;
Ok(())
}
fn read_n_lines(input: &mut impl std::io::BufRead, n: usize, zero: bool) -> std::io::Result<()> {
if n == 0 {
return Ok(());
}
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
let mut lines = 0usize;
split::walk_lines(input, zero, |e| match e {
split::Event::Data(dat) => {
stdout.write_all(dat)?;
Ok(true)
}
split::Event::Line => {
lines += 1;
if lines == n {
Ok(false)
} else {
Ok(true)
}
}
})
}
fn read_but_last_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io::Result<()> {
if n == 0 {
//prints everything
return read_n_bytes(input, std::usize::MAX);
}
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
let mut ring_buffer = vec![0u8; n];
// first we fill the ring buffer
if let Err(e) = input.read_exact(&mut ring_buffer) {
if e.kind() == ErrorKind::UnexpectedEof {
return Ok(());
} else {
return Err(e);
}
}
let mut buffer = [0u8; BUF_SIZE];
loop {
let read = loop {
match input.read(&mut buffer) {
Ok(n) => break n,
Err(e) => match e.kind() {
ErrorKind::Interrupted => {}
_ => return Err(e),
},
}
};
if read == 0 {
return Ok(());
} else if read >= n {
stdout.write_all(&ring_buffer)?;
stdout.write_all(&buffer[..read - n])?;
for i in 0..n {
ring_buffer[i] = buffer[read - n + i];
}
} else {
stdout.write_all(&ring_buffer[..read])?;
for i in 0..n - read {
ring_buffer[i] = ring_buffer[read + i];
}
ring_buffer[n - read..].copy_from_slice(&buffer[..read]);
}
}
}
fn read_but_last_n_lines(
input: impl std::io::BufRead,
n: usize,
zero: bool,
) -> std::io::Result<()> {
if zero {
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
for bytes in take_all_but(zlines(input), n) {
stdout.write_all(&bytes?)?;
}
} else {
for line in take_all_but(input.lines(), n) {
println!("{}", line?);
}
}
Ok(())
}
fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
assert!(options.all_but_last);
let size = input.seek(SeekFrom::End(0))?;
let size = usize::try_from(size).unwrap();
match options.mode {
Modes::Bytes(n) => {
if n >= size {
return Ok(());
} else {
input.seek(SeekFrom::Start(0))?;
read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
size - n,
)?;
}
}
Modes::Lines(n) => {
let mut buffer = [0u8; BUF_SIZE];
let buffer = &mut buffer[..BUF_SIZE.min(size)];
let mut i = 0usize;
let mut lines = 0usize;
let found = 'o: loop {
// the casts here are ok, `buffer.len()` should never be above a few k
input.seek(SeekFrom::Current(
-((buffer.len() as i64).min((size - i) as i64)),
))?;
input.read_exact(buffer)?;
for byte in buffer.iter().rev() {
match byte {
b'\n' if !options.zeroed => {
lines += 1;
}
0u8 if options.zeroed => {
lines += 1;
}
_ => {}
}
// if it were just `n`,
if lines == n + 1 {
break 'o i;
}
i += 1;
}
if size - i == 0 {
return Ok(());
}
};
input.seek(SeekFrom::Start(0))?;
read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
size - found,
)?;
}
}
Ok(())
}
fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
if options.all_but_last {
head_backwards_file(input, options)
} else {
match options.mode {
Modes::Bytes(n) => {
read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n)
}
Modes::Lines(n) => read_n_lines(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
n,
options.zeroed,
),
}
}
}
fn uu_head(options: &HeadOptions) -> Result<(), u32> {
let mut error_count = 0;
let mut first = true;
for file in &options.files {
let res = match file.as_str() {
"-" => {
if (options.files.len() > 1 && !options.quiet) || options.verbose {
if !first {
println!();
}
println!("==> standard input <==")
}
let stdin = std::io::stdin();
let mut stdin = stdin.lock();
match options.mode {
Modes::Bytes(n) => {
if options.all_but_last {
read_but_last_n_bytes(&mut stdin, n)
} else {
read_n_bytes(&mut stdin, n)
}
}
Modes::Lines(n) => {
if options.all_but_last {
read_but_last_n_lines(&mut stdin, n, options.zeroed)
} else {
read_n_lines(&mut stdin, n, options.zeroed)
}
}
}
}
name => {
let mut file = match std::fs::File::open(name) {
Ok(f) => f,
Err(err) => {
let prefix = format!("cannot open '{}' for reading", name);
match err.kind() {
ErrorKind::NotFound => {
show_error_custom_description!(prefix, "No such file or directory");
}
ErrorKind::PermissionDenied => {
show_error_custom_description!(prefix, "Permission denied");
}
_ => {
show_error_custom_description!(prefix, "{}", err);
}
}
error_count += 1;
continue;
}
};
if (options.files.len() > 1 && !options.quiet) || options.verbose {
if !first {
println!();
}
println!("==> {} <==", name)
}
head_file(&mut file, options)
}
};
if res.is_err() {
let name = if file.as_str() == "-" {
"standard input"
} else {
file
};
let prefix = format!("error reading {}", name);
show_error_custom_description!(prefix, "Input/output error");
error_count += 1;
}
first = false;
}
if error_count > 0 {
Err(error_count)
} else {
Ok(())
}
}
pub fn uumain(args: impl uucore::Args) -> i32 {
let args = match HeadOptions::get_from(args) {
Ok(o) => o,
Err(s) => {
crash!(EXIT_FAILURE, "head: {}", s);
}
};
match uu_head(&args) {
Ok(_) => EXIT_SUCCESS,
Err(_) => EXIT_FAILURE,
}
}
#[cfg(test)]
mod tests {
use std::ffi::OsString;
use super::*;
fn options(args: &str) -> Result<HeadOptions, String> {
let combined = "head ".to_owned() + args;
let args = combined.split_whitespace();
HeadOptions::get_from(args.map(|s| OsString::from(s)))
}
#[test]
fn test_args_modes() {
let args = options("-n -10M -vz").unwrap();
assert!(args.zeroed);
assert!(args.verbose);
assert!(args.all_but_last);
assert_eq!(args.mode, Modes::Lines(10 * 1024 * 1024));
}
#[test]
fn test_gnu_compatibility() {
let args = options("-n 1 -c 1 -n 5 -c kiB -vqvqv").unwrap(); // spell-checker:disable-line
assert!(args.mode == Modes::Bytes(1024));
assert!(args.verbose);
assert_eq!(options("-5").unwrap().mode, Modes::Lines(5));
assert_eq!(options("-2b").unwrap().mode, Modes::Bytes(1024));
assert_eq!(options("-5 -c 1").unwrap().mode, Modes::Bytes(1));
}
#[test]
fn all_args_test() {
assert!(options("--silent").unwrap().quiet);
assert!(options("--quiet").unwrap().quiet);
assert!(options("-q").unwrap().quiet);
assert!(options("--verbose").unwrap().verbose);
assert!(options("-v").unwrap().verbose);
assert!(options("--zero-terminated").unwrap().zeroed);
assert!(options("-z").unwrap().zeroed);
assert_eq!(options("--lines 15").unwrap().mode, Modes::Lines(15));
assert_eq!(options("-n 15").unwrap().mode, Modes::Lines(15));
assert_eq!(options("--bytes 15").unwrap().mode, Modes::Bytes(15));
assert_eq!(options("-c 15").unwrap().mode, Modes::Bytes(15));
}
#[test]
fn test_options_errors() {
assert!(options("-n IsThisTheRealLife?").is_err());
assert!(options("-c IsThisJustFantasy").is_err());
}
#[test]
fn test_options_correct_defaults() {
let opts = HeadOptions::new();
let opts2: HeadOptions = Default::default();
assert_eq!(opts, opts2);
assert!(opts.verbose == false);
assert!(opts.quiet == false);
assert!(opts.zeroed == false);
assert!(opts.all_but_last == false);
assert_eq!(opts.mode, Modes::Lines(10));
assert!(opts.files.is_empty());
}
#[test]
fn test_parse_mode() {
assert_eq!(
parse_mode("123", Modes::Lines),
Ok((Modes::Lines(123), false))
);
assert_eq!(
parse_mode("-456", Modes::Bytes),
Ok((Modes::Bytes(456), true))
);
assert!(parse_mode("Nonsensical Nonsense", Modes::Bytes).is_err());
#[cfg(target_pointer_width = "64")]
assert!(parse_mode("1Y", Modes::Lines).is_err());
#[cfg(target_pointer_width = "32")]
assert!(parse_mode("1T", Modes::Bytes).is_err());
}
fn arg_outputs(src: &str) -> Result<String, String> {
let split = src.split_whitespace().map(|x| OsString::from(x));
match arg_iterate(split) {
Ok(args) => {
let vec = args
.map(|s| s.to_str().unwrap().to_owned())
.collect::<Vec<_>>();
Ok(vec.join(" "))
}
Err(e) => Err(e),
}
}
#[test]
fn test_arg_iterate() {
// test that normal args remain unchanged
assert_eq!(
arg_outputs("head -n -5 -zv"),
Ok("head -n -5 -zv".to_owned())
);
// tests that nonsensical args are unchanged
assert_eq!(
arg_outputs("head -to_be_or_not_to_be,..."),
Ok("head -to_be_or_not_to_be,...".to_owned())
);
//test that the obsolete syntax is unrolled
assert_eq!(
arg_outputs("head -123qvqvqzc"), // spell-checker:disable-line
Ok("head -q -z -c 123".to_owned())
);
//test that bad obsoletes are an error
assert!(arg_outputs("head -123FooBar").is_err());
//test overflow
assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_err());
//test that empty args remain unchanged
assert_eq!(arg_outputs("head"), Ok("head".to_owned()));
}
#[test]
#[cfg(target_os = "linux")]
fn test_arg_iterate_bad_encoding() {
let invalid = unsafe { std::str::from_utf8_unchecked(b"\x80\x81") };
// this arises from a conversion from OsString to &str
assert!(
arg_iterate(vec![OsString::from("head"), OsString::from(invalid)].into_iter()).is_err()
);
}
#[test]
fn read_early_exit() {
let mut empty = std::io::BufReader::new(std::io::Cursor::new(Vec::new()));
assert!(read_n_bytes(&mut empty, 0).is_ok());
assert!(read_n_lines(&mut empty, 0, false).is_ok());
}
}