diff --git a/Makefile b/Makefile index 5b0478d88..e390530f9 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,7 @@ PROGS := \ fold \ md5sum \ mkdir \ + nl \ paste \ printenv \ pwd \ @@ -78,6 +79,7 @@ INSTALLEES := \ TEST_PROGS := \ cat \ mkdir \ + nl \ seq \ tr \ truncate \ diff --git a/nl/fixtures/joinblanklines.txt b/nl/fixtures/joinblanklines.txt new file mode 100644 index 000000000..197859644 --- /dev/null +++ b/nl/fixtures/joinblanklines.txt @@ -0,0 +1,27 @@ +Nonempty +Nonempty +Followed by 10x empty + + + + + + + + + + +Followed by 5x empty + + + + + +Followed by 4x empty + + + + +Nonempty +Nonempty +Nonempty. diff --git a/nl/fixtures/section.txt b/nl/fixtures/section.txt new file mode 100644 index 000000000..62359e7bf --- /dev/null +++ b/nl/fixtures/section.txt @@ -0,0 +1,18 @@ +\:\:\: +HEADER1 +HEADER2 +\:\: +BODY1 +BODY2 +\: +FOOTER1 +FOOTER2 +\:\:\: +NEXTHEADER1 +NEXTHEADER2 +\:\: +NEXTBODY1 +NEXTBODY2 +\: +NEXTFOOTER1 +NEXTFOOTER2 diff --git a/nl/fixtures/simple.txt b/nl/fixtures/simple.txt new file mode 100644 index 000000000..b168ae840 --- /dev/null +++ b/nl/fixtures/simple.txt @@ -0,0 +1,15 @@ +L1 +L2 +L3 +L4 +L5 +L6 +L7 +L8 +L9 +L10 +L11 +L12 +L13 +L14 +L15 diff --git a/nl/helper.rs b/nl/helper.rs new file mode 100644 index 000000000..e3401cb06 --- /dev/null +++ b/nl/helper.rs @@ -0,0 +1,122 @@ +extern crate getopts; +extern crate regex; + +use std::str; + +// parse_style parses a style string into a NumberingStyle. +fn parse_style(chars: &[char]) -> Result<::NumberingStyle, String> { + match chars { + ['a'] => { Ok(::NumberForAll) }, + ['t'] => { Ok(::NumberForNonEmpty) }, + ['n'] => { Ok(::NumberForNone) }, + ['p', ..rest] => { + match regex::Regex::new(str::from_chars(rest).as_slice()) { + Ok(re) => Ok(::NumberForRegularExpression(re)), + Err(_) => Err(String::from_str("Illegal regular expression")), + } + } + _ => { + Err(String::from_str("Illegal style encountered")) + }, + } +} + +// parse_options loads the options into the settings, returning an array of +// error messages. +pub fn parse_options(settings: &mut ::Settings, opts: &getopts::Matches) -> Vec { + // This vector holds error messages encountered. + let mut errs: Vec = vec![]; + settings.renumber = !opts.opt_present("p"); + match opts.opt_str("s") { + None => {}, + Some(val) => { settings.number_separator = val; } + } + match opts.opt_str("n") { + None => {}, + Some(val) => match val.as_slice() { + "ln" => { settings.number_format = ::Left; }, + "rn" => { settings.number_format = ::Right; }, + "rz" => { settings.number_format = ::RightZero; }, + _ => { errs.push(String::from_str("Illegal value for -n")); }, + } + } + match opts.opt_str("b") { + None => {}, + Some(val) => { + let chars: Vec = val.as_slice().chars().collect(); + match parse_style(chars.as_slice()) { + Ok(s) => { settings.body_numbering = s; } + Err(message) => { errs.push(message); } + } + } + } + match opts.opt_str("f") { + None => {}, + Some(val) => { + let chars: Vec = val.as_slice().chars().collect(); + match parse_style(chars.as_slice()) { + Ok(s) => { settings.footer_numbering = s; } + Err(message) => { errs.push(message); } + } + } + } + match opts.opt_str("h") { + None => {}, + Some(val) => { + let chars: Vec = val.as_slice().chars().collect(); + match parse_style(chars.as_slice()) { + Ok(s) => { settings.header_numbering = s; } + Err(message) => { errs.push(message); } + } + } + } + match opts.opt_str("i") { + None => {} + Some(val) => { + let conv: Option = from_str(val.as_slice()); + match conv { + None => { + errs.push(String::from_str("Illegal value for -i")); + } + Some(num) => { settings.line_increment = num } + } + } + } + match opts.opt_str("w") { + None => {} + Some(val) => { + let conv: Option = from_str(val.as_slice()); + match conv { + None => { + errs.push(String::from_str("Illegal value for -w")); + } + Some(num) => { settings.number_width = num } + } + } + } + match opts.opt_str("v") { + None => {} + Some(val) => { + let conv: Option = from_str(val.as_slice()); + match conv { + None => { + errs.push(String::from_str("Illegal value for -v")); + } + Some(num) => { settings.starting_line_number = num } + } + } + } + match opts.opt_str("l") { + None => {} + Some(val) => { + let conv: Option = from_str(val.as_slice()); + match conv { + None => { + errs.push(String::from_str("Illegal value for -l")); + } + Some(num) => { settings.join_blank_lines = num } + } + } + } + errs +} diff --git a/nl/nl.rs b/nl/nl.rs new file mode 100644 index 000000000..91b4636c4 --- /dev/null +++ b/nl/nl.rs @@ -0,0 +1,334 @@ +#![crate_id(name="nl", vers="1.0.0", author="Tobias Schottdorf")] +#![feature(macro_rules)] +/* + * This file is part of the uutils coreutils package. + * + * (c) Tobias Bohumir Schottdorf + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + * + */ + +#![feature(phase)] +#[phase(plugin)] +extern crate regex_macros; +extern crate regex; +extern crate getopts; + +use std::os; +use std::io::{stdin}; +use std::io::BufferedReader; +use std::io::fs::File; +use std::path::Path; +use getopts::{optopt, optflag, getopts, usage, OptGroup}; + +#[path="../common/util.rs"] +mod util; +mod helper; + +static NAME: &'static str = "nl"; +static USAGE: &'static str = "nl [OPTION]... [FILE]..."; +// A regular expression matching everything. +static REGEX_DUMMY: &'static regex::Regex = ®ex!(r".?"); + +// Settings store options used by nl to produce its output. +struct Settings { + // The variables corresponding to the options -h, -b, and -f. + header_numbering: NumberingStyle, + body_numbering: NumberingStyle, + footer_numbering: NumberingStyle, + // The variable corresponding to -d + section_delimiter: [char, ..2], + // The variables corresponding to the options -v, -i, -l, -w. + starting_line_number: u64, + line_increment: u64, + join_blank_lines: u64, + number_width: uint, // Used with String::from_char, hence uint. + // The format of the number and the (default value for) + // renumbering each page. + number_format: NumberFormat, + renumber: bool, + // The string appended to each line number output. + number_separator: String +} + +// NumberingStyle stores which lines are to be numberd. +// The possible options are: +// 1. Number all lines +// 2. Number only nonempty lines +// 3. Don't number any lines at all +// 4. Number all lines that match a basic regular expression. +enum NumberingStyle { + NumberForAll, + NumberForNonEmpty, + NumberForNone, + NumberForRegularExpression(regex::Regex) +} + +// NumberFormat specifies how line numbers are output within their allocated +// space. They are justified to the left or right, in the latter case with +// the option of having all unused space to its left turned into leading zeroes. +enum NumberFormat { + Left, + Right, + RightZero, +} + +fn main () { + os::set_exit_status(uumain(os::args())); +} + +pub fn uumain(args: Vec) -> int { + let possible_options = [ + optopt("b", "body-numbering", "use STYLE for numbering body lines", "STYLE"), + optopt("d", "section-delimiter", "use CC for separating logical pages", "CC"), + optopt("f", "footer-numbering", "use STYLE for numbering footer lines", "STYLE"), + optopt("h", "header-numbering", "use STYLE for numbering header lines", "STYLE"), + optopt("i", "line-increment", "line number increment at each line", ""), + optopt("l", "join-blank-lines", "group of NUMBER empty lines counted as one", "NUMBER"), + optopt("n", "number-format", "insert line numbers according to FORMAT", "FORMAT"), + optflag("p", "no-renumber", "do not reset line numbers at logical pages"), + optopt("s", "number-separator", "add STRING after (possible) line number", "STRING"), + optopt("v", "starting-line-number", "first line number on each logical page", "NUMBER"), + optopt("w", "number-width", "use NUMBER columns for line numbers", "NUMBER"), + optflag("", "help", "display this help and exit"), + optflag("V", "version", "version"), + ]; + + // A mutable settings object, initialized with the defaults. + let mut settings = Settings { + header_numbering: NumberForNone, + body_numbering: NumberForAll, + footer_numbering: NumberForNone, + section_delimiter: ['\\', ':'], + starting_line_number: 1, + line_increment: 1, + join_blank_lines: 1, + number_width: 6, + number_format: Right, + renumber: true, + number_separator: String::from_str("\t"), + }; + + let given_options = match getopts(args.tail(), possible_options) { + Ok (m) => { m } + Err(f) => { + show_error!("{}", f.to_err_msg()); + print_usage(possible_options); + return 1 + } + }; + + if given_options.opt_present("help") { + print_usage(possible_options); + return 0; + } + if given_options.opt_present("version") { version(); return 0; } + + // Update the settings from the command line options, and terminate the + // program if some options could not successfully be parsed. + let parse_errors = helper::parse_options(&mut settings, &given_options); + if parse_errors.len() > 0 { + show_error!("Invalid arguments supplied."); + for message in parse_errors.iter() { + println!("{}", message.as_slice()); + } + return 1; + } + + let files = given_options.free; + let mut read_stdin = files.is_empty(); + + for file in files.iter() { + if file.as_slice() == "-" { + // If both file names and '-' are specified, we choose to treat first all + // regular files, and then read from stdin last. + read_stdin = true; + continue + } + let path = Path::new(file.as_slice()); + let reader = File::open(&path).unwrap(); + let mut buffer = BufferedReader::new(reader); + nl(&mut buffer, &settings); + } + + if read_stdin { + let mut buffer = BufferedReader::new(stdin()); + nl(&mut buffer, &settings); + } + 0 +} + +// nl implements the main functionality for an individual buffer. +fn nl (reader: &mut BufferedReader, settings: &Settings) { + let mut line_no = settings.starting_line_number; + // The current line number's width as a string. Using to_str is inefficient + // but since we only do it once, it should not hurt. + let mut line_no_width = line_no.to_str().len(); + let line_no_width_initial = line_no_width; + // Stores the smallest integer with one more digit than line_no, so that + // when line_no >= line_no_threshold, we need to use one more digit. + let mut line_no_threshold = std::num::pow(10, line_no_width) as u64; + let mut empty_line_count: u64 = 0; + let fill_char = match settings.number_format { + RightZero => '0', + _ => ' ' + }; + // Initially, we use the body's line counting settings + let mut regex_filter = match settings.body_numbering { + NumberForRegularExpression(ref re) => re, + _ => REGEX_DUMMY, + }; + let mut line_filter = pass_regex; + for mut l in reader.lines().map(|r| r.unwrap()) { + // Sanitize the string. We want to print the newline ourselves. + if l.as_slice().chars().rev().next().unwrap() == '\n' { + l.pop_char(); + } + // Next we iterate through the individual chars to see if this + // is one of the special lines starting a new "section" in the + // document. + let line = l.as_slice(); + let mut odd = false; + // matched_group counts how many copies of section_delimiter + // this string consists of (0 if there's anything else) + let mut matched_groups = 0u8; + for c in line.chars() { + // If this is a newline character, the loop should end. + if c == '\n' { + break; + } + // If we have already seen three groups (corresponding to + // a header) or the current char does not form part of + // a new group, then this line is not a segment indicator. + if matched_groups >= 3 + || settings.section_delimiter[std::bool::to_bit::(odd)] != c { + matched_groups = 0; + break; + } + if odd { + // We have seen a new group and count it. + matched_groups += 1; + } + odd = !odd; + } + + // See how many groups we matched. That will tell us if this is + // a line starting a new segment, and the number of groups + // indicates what type of segment. + if matched_groups > 0 { + // The current line is a section delimiter, so we output + // a blank line. + println!(""); + // However the line does not count as a blank line, so we + // reset the counter used for --join-blank-lines. + empty_line_count = 0; + match *match matched_groups { + 3 => { + // This is a header, so we may need to reset the + // line number and the line width + if settings.renumber { + line_no = settings.starting_line_number; + line_no_width = line_no_width_initial; + line_no_threshold = std::num::pow(10, line_no_width) as u64; + } + &settings.header_numbering + }, + 1 => { + &settings.footer_numbering + }, + // The only option left is 2, but rust wants + // a catch-all here. + _ => { + &settings.body_numbering + } + } { + NumberForAll => { + line_filter = pass_all; + }, + NumberForNonEmpty => { + line_filter = pass_nonempty; + }, + NumberForNone => { + line_filter = pass_none; + } + NumberForRegularExpression(ref re) => { + line_filter = pass_regex; + regex_filter = re; + } + } + continue; + } + // From this point on we format and print a "regular" line. + if line == "" { + // The line is empty, which means that we have to care + // about the --join-blank-lines parameter. + empty_line_count += 1; + } else { + // This saves us from having to check for an empty string + // in the next selector. + empty_line_count = 0; + } + if !line_filter(line, regex_filter) + || ( empty_line_count > 0 && empty_line_count < settings.join_blank_lines) { + // No number is printed for this line. Either we did not + // want to print one in the first place, or it is a blank + // line but we are still collecting more blank lines via + // the option --join-blank-lines. + println!("{}", line); + continue; + } + // If we make it here, then either we are printing a non-empty + // line or assigning a line number to an empty line. Either + // way, start counting empties from zero once more. + empty_line_count = 0; + // A line number is to be printed. + let mut w: uint = 0; + if settings.number_width > line_no_width { + w = settings.number_width - line_no_width; + } + let fill = String::from_char(w, fill_char); + match settings.number_format { + Left => { + println!("{1}{0}{2}{3}", fill, line_no, settings.number_separator, line) + }, + _ => { + println!("{0}{1}{2}{3}", fill, line_no, settings.number_separator, line) + } + } + // Now update the variables for the (potential) next + // line. + line_no += settings.line_increment; + while line_no >= line_no_threshold { + // The line number just got longer. + line_no_threshold *= 10; + line_no_width += 1; + } + + } +} + +fn pass_regex(line: &str, re: ®ex::Regex) -> bool { + re.is_match(line) +} + +fn pass_nonempty(line: &str, _: ®ex::Regex) -> bool { + line.len() > 0 +} + +fn pass_none(_: &str, _: ®ex::Regex) -> bool { + false +} + +fn pass_all(_: &str, _: ®ex::Regex) -> bool { + true +} + +fn print_usage(opts: &[OptGroup]) { + println!("{:s}", usage(USAGE, opts)); +} + +fn version () { + println!("{} version 1.0.0", NAME); +} diff --git a/nl/test.rs b/nl/test.rs new file mode 100644 index 000000000..9ed7668bb --- /dev/null +++ b/nl/test.rs @@ -0,0 +1,63 @@ +use std::io::process::Command; +use std::str; + +#[test] +fn test_stdin_nonewline() { + + let mut process = Command::new("build/nl").spawn().unwrap(); + process.stdin.take_unwrap().write(bytes!("No Newline")).unwrap(); + let po = process.wait_with_output().unwrap(); + let out = str::from_utf8(po.output.as_slice()).unwrap(); + + assert_eq!(out, " 1\tNo Newline\n"); +} +#[test] +fn test_stdin_newline() { + + let mut process = Command::new("build/nl").arg("-s").arg("-") + .arg("-w").arg("1").spawn().unwrap(); + + process.stdin.take_unwrap().write(bytes!("Line One\nLine Two\n")).unwrap(); + let po = process.wait_with_output().unwrap(); + let out = str::from_utf8(po.output.as_slice()).unwrap(); + + assert_eq!(out, "1-Line One\n2-Line Two\n"); +} + +#[test] +fn test_padding_without_overflow() { + let po = Command::new("build/nl").arg("-i").arg("1000").arg("-s").arg("x") + .arg("-n").arg("rz").arg("nl/fixtures/simple.txt").output().unwrap(); + + let out = str::from_utf8(po.output.as_slice()).unwrap(); + assert_eq!(out, "000001xL1\n001001xL2\n002001xL3\n003001xL4\n004001xL5\n005001xL6\n006001xL7\n007001xL8\n008001xL9\n009001xL10\n010001xL11\n011001xL12\n012001xL13\n013001xL14\n014001xL15\n"); +} + +#[test] +fn test_padding_with_overflow() { + let po = Command::new("build/nl").arg("-i").arg("1000").arg("-s").arg("x") + .arg("-n").arg("rz").arg("-w").arg("4") + .arg("nl/fixtures/simple.txt").output().unwrap(); + + let out = str::from_utf8(po.output.as_slice()).unwrap(); + assert_eq!(out, "0001xL1\n1001xL2\n2001xL3\n3001xL4\n4001xL5\n5001xL6\n6001xL7\n7001xL8\n8001xL9\n9001xL10\n10001xL11\n11001xL12\n12001xL13\n13001xL14\n14001xL15\n"); +} + +#[test] +fn test_sections_and_styles() { + for &(fixture, output) in [ + ( + "nl/fixtures/section.txt", + "\nHEADER1\nHEADER2\n\n1 |BODY1\n2 |BODY2\n\nFOOTER1\nFOOTER2\n\nNEXTHEADER1\nNEXTHEADER2\n\n1 |NEXTBODY1\n2 |NEXTBODY2\n\nNEXTFOOTER1\nNEXTFOOTER2\n" + ), + ( + "nl/fixtures/joinblanklines.txt", + "1 |Nonempty\n2 |Nonempty\n3 |Followed by 10x empty\n\n\n\n\n4 |\n\n\n\n\n5 |\n6 |Followed by 5x empty\n\n\n\n\n7 |\n8 |Followed by 4x empty\n\n\n\n\n9 |Nonempty\n10 |Nonempty\n11 |Nonempty.\n" + ), + ].iter() { + let po = Command::new("build/nl").arg("-s").arg("|").arg("-n").arg("ln") + .arg("-w").arg("3").arg("-b").arg("a").arg("-l").arg("5") + .arg(fixture).output().unwrap(); + assert_eq!(str::from_utf8(po.output.as_slice()).unwrap(), output); + } +}