1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 19:47:45 +00:00
This commit is contained in:
Tobias Schottdorf 2014-06-14 16:43:39 +02:00
parent 637d2c72d0
commit 2e097d659e
7 changed files with 581 additions and 0 deletions

View file

@ -22,6 +22,7 @@ PROGS := \
fold \
md5sum \
mkdir \
nl \
paste \
printenv \
pwd \
@ -78,6 +79,7 @@ INSTALLEES := \
TEST_PROGS := \
cat \
mkdir \
nl \
seq \
tr \
truncate \

View file

@ -0,0 +1,27 @@
Nonempty
Nonempty
Followed by 10x empty
Followed by 5x empty
Followed by 4x empty
Nonempty
Nonempty
Nonempty.

18
nl/fixtures/section.txt Normal file
View file

@ -0,0 +1,18 @@
\:\:\:
HEADER1
HEADER2
\:\:
BODY1
BODY2
\:
FOOTER1
FOOTER2
\:\:\:
NEXTHEADER1
NEXTHEADER2
\:\:
NEXTBODY1
NEXTBODY2
\:
NEXTFOOTER1
NEXTFOOTER2

15
nl/fixtures/simple.txt Normal file
View file

@ -0,0 +1,15 @@
L1
L2
L3
L4
L5
L6
L7
L8
L9
L10
L11
L12
L13
L14
L15

122
nl/helper.rs Normal file
View file

@ -0,0 +1,122 @@
extern crate getopts;
extern crate regex;
use std::str;
// parse_style parses a style string into a NumberingStyle.
fn parse_style(chars: &[char]) -> Result<::NumberingStyle, String> {
match chars {
['a'] => { Ok(::NumberForAll) },
['t'] => { Ok(::NumberForNonEmpty) },
['n'] => { Ok(::NumberForNone) },
['p', ..rest] => {
match regex::Regex::new(str::from_chars(rest).as_slice()) {
Ok(re) => Ok(::NumberForRegularExpression(re)),
Err(_) => Err(String::from_str("Illegal regular expression")),
}
}
_ => {
Err(String::from_str("Illegal style encountered"))
},
}
}
// parse_options loads the options into the settings, returning an array of
// error messages.
pub fn parse_options(settings: &mut ::Settings, opts: &getopts::Matches) -> Vec<String> {
// This vector holds error messages encountered.
let mut errs: Vec<String> = vec![];
settings.renumber = !opts.opt_present("p");
match opts.opt_str("s") {
None => {},
Some(val) => { settings.number_separator = val; }
}
match opts.opt_str("n") {
None => {},
Some(val) => match val.as_slice() {
"ln" => { settings.number_format = ::Left; },
"rn" => { settings.number_format = ::Right; },
"rz" => { settings.number_format = ::RightZero; },
_ => { errs.push(String::from_str("Illegal value for -n")); },
}
}
match opts.opt_str("b") {
None => {},
Some(val) => {
let chars: Vec<char> = val.as_slice().chars().collect();
match parse_style(chars.as_slice()) {
Ok(s) => { settings.body_numbering = s; }
Err(message) => { errs.push(message); }
}
}
}
match opts.opt_str("f") {
None => {},
Some(val) => {
let chars: Vec<char> = val.as_slice().chars().collect();
match parse_style(chars.as_slice()) {
Ok(s) => { settings.footer_numbering = s; }
Err(message) => { errs.push(message); }
}
}
}
match opts.opt_str("h") {
None => {},
Some(val) => {
let chars: Vec<char> = val.as_slice().chars().collect();
match parse_style(chars.as_slice()) {
Ok(s) => { settings.header_numbering = s; }
Err(message) => { errs.push(message); }
}
}
}
match opts.opt_str("i") {
None => {}
Some(val) => {
let conv: Option<u64> = from_str(val.as_slice());
match conv {
None => {
errs.push(String::from_str("Illegal value for -i"));
}
Some(num) => { settings.line_increment = num }
}
}
}
match opts.opt_str("w") {
None => {}
Some(val) => {
let conv: Option<uint> = from_str(val.as_slice());
match conv {
None => {
errs.push(String::from_str("Illegal value for -w"));
}
Some(num) => { settings.number_width = num }
}
}
}
match opts.opt_str("v") {
None => {}
Some(val) => {
let conv: Option<u64> = from_str(val.as_slice());
match conv {
None => {
errs.push(String::from_str("Illegal value for -v"));
}
Some(num) => { settings.starting_line_number = num }
}
}
}
match opts.opt_str("l") {
None => {}
Some(val) => {
let conv: Option<u64> = from_str(val.as_slice());
match conv {
None => {
errs.push(String::from_str("Illegal value for -l"));
}
Some(num) => { settings.join_blank_lines = num }
}
}
}
errs
}

334
nl/nl.rs Normal file
View file

@ -0,0 +1,334 @@
#![crate_id(name="nl", vers="1.0.0", author="Tobias Schottdorf")]
#![feature(macro_rules)]
/*
* This file is part of the uutils coreutils package.
*
* (c) Tobias Bohumir Schottdorf <tobias.schottdorf@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*
*/
#![feature(phase)]
#[phase(plugin)]
extern crate regex_macros;
extern crate regex;
extern crate getopts;
use std::os;
use std::io::{stdin};
use std::io::BufferedReader;
use std::io::fs::File;
use std::path::Path;
use getopts::{optopt, optflag, getopts, usage, OptGroup};
#[path="../common/util.rs"]
mod util;
mod helper;
static NAME: &'static str = "nl";
static USAGE: &'static str = "nl [OPTION]... [FILE]...";
// A regular expression matching everything.
static REGEX_DUMMY: &'static regex::Regex = &regex!(r".?");
// Settings store options used by nl to produce its output.
struct Settings {
// The variables corresponding to the options -h, -b, and -f.
header_numbering: NumberingStyle,
body_numbering: NumberingStyle,
footer_numbering: NumberingStyle,
// The variable corresponding to -d
section_delimiter: [char, ..2],
// The variables corresponding to the options -v, -i, -l, -w.
starting_line_number: u64,
line_increment: u64,
join_blank_lines: u64,
number_width: uint, // Used with String::from_char, hence uint.
// The format of the number and the (default value for)
// renumbering each page.
number_format: NumberFormat,
renumber: bool,
// The string appended to each line number output.
number_separator: String
}
// NumberingStyle stores which lines are to be numberd.
// The possible options are:
// 1. Number all lines
// 2. Number only nonempty lines
// 3. Don't number any lines at all
// 4. Number all lines that match a basic regular expression.
enum NumberingStyle {
NumberForAll,
NumberForNonEmpty,
NumberForNone,
NumberForRegularExpression(regex::Regex)
}
// NumberFormat specifies how line numbers are output within their allocated
// space. They are justified to the left or right, in the latter case with
// the option of having all unused space to its left turned into leading zeroes.
enum NumberFormat {
Left,
Right,
RightZero,
}
fn main () {
os::set_exit_status(uumain(os::args()));
}
pub fn uumain(args: Vec<String>) -> int {
let possible_options = [
optopt("b", "body-numbering", "use STYLE for numbering body lines", "STYLE"),
optopt("d", "section-delimiter", "use CC for separating logical pages", "CC"),
optopt("f", "footer-numbering", "use STYLE for numbering footer lines", "STYLE"),
optopt("h", "header-numbering", "use STYLE for numbering header lines", "STYLE"),
optopt("i", "line-increment", "line number increment at each line", ""),
optopt("l", "join-blank-lines", "group of NUMBER empty lines counted as one", "NUMBER"),
optopt("n", "number-format", "insert line numbers according to FORMAT", "FORMAT"),
optflag("p", "no-renumber", "do not reset line numbers at logical pages"),
optopt("s", "number-separator", "add STRING after (possible) line number", "STRING"),
optopt("v", "starting-line-number", "first line number on each logical page", "NUMBER"),
optopt("w", "number-width", "use NUMBER columns for line numbers", "NUMBER"),
optflag("", "help", "display this help and exit"),
optflag("V", "version", "version"),
];
// A mutable settings object, initialized with the defaults.
let mut settings = Settings {
header_numbering: NumberForNone,
body_numbering: NumberForAll,
footer_numbering: NumberForNone,
section_delimiter: ['\\', ':'],
starting_line_number: 1,
line_increment: 1,
join_blank_lines: 1,
number_width: 6,
number_format: Right,
renumber: true,
number_separator: String::from_str("\t"),
};
let given_options = match getopts(args.tail(), possible_options) {
Ok (m) => { m }
Err(f) => {
show_error!("{}", f.to_err_msg());
print_usage(possible_options);
return 1
}
};
if given_options.opt_present("help") {
print_usage(possible_options);
return 0;
}
if given_options.opt_present("version") { version(); return 0; }
// Update the settings from the command line options, and terminate the
// program if some options could not successfully be parsed.
let parse_errors = helper::parse_options(&mut settings, &given_options);
if parse_errors.len() > 0 {
show_error!("Invalid arguments supplied.");
for message in parse_errors.iter() {
println!("{}", message.as_slice());
}
return 1;
}
let files = given_options.free;
let mut read_stdin = files.is_empty();
for file in files.iter() {
if file.as_slice() == "-" {
// If both file names and '-' are specified, we choose to treat first all
// regular files, and then read from stdin last.
read_stdin = true;
continue
}
let path = Path::new(file.as_slice());
let reader = File::open(&path).unwrap();
let mut buffer = BufferedReader::new(reader);
nl(&mut buffer, &settings);
}
if read_stdin {
let mut buffer = BufferedReader::new(stdin());
nl(&mut buffer, &settings);
}
0
}
// nl implements the main functionality for an individual buffer.
fn nl<T: Reader> (reader: &mut BufferedReader<T>, settings: &Settings) {
let mut line_no = settings.starting_line_number;
// The current line number's width as a string. Using to_str is inefficient
// but since we only do it once, it should not hurt.
let mut line_no_width = line_no.to_str().len();
let line_no_width_initial = line_no_width;
// Stores the smallest integer with one more digit than line_no, so that
// when line_no >= line_no_threshold, we need to use one more digit.
let mut line_no_threshold = std::num::pow(10, line_no_width) as u64;
let mut empty_line_count: u64 = 0;
let fill_char = match settings.number_format {
RightZero => '0',
_ => ' '
};
// Initially, we use the body's line counting settings
let mut regex_filter = match settings.body_numbering {
NumberForRegularExpression(ref re) => re,
_ => REGEX_DUMMY,
};
let mut line_filter = pass_regex;
for mut l in reader.lines().map(|r| r.unwrap()) {
// Sanitize the string. We want to print the newline ourselves.
if l.as_slice().chars().rev().next().unwrap() == '\n' {
l.pop_char();
}
// Next we iterate through the individual chars to see if this
// is one of the special lines starting a new "section" in the
// document.
let line = l.as_slice();
let mut odd = false;
// matched_group counts how many copies of section_delimiter
// this string consists of (0 if there's anything else)
let mut matched_groups = 0u8;
for c in line.chars() {
// If this is a newline character, the loop should end.
if c == '\n' {
break;
}
// If we have already seen three groups (corresponding to
// a header) or the current char does not form part of
// a new group, then this line is not a segment indicator.
if matched_groups >= 3
|| settings.section_delimiter[std::bool::to_bit::<uint>(odd)] != c {
matched_groups = 0;
break;
}
if odd {
// We have seen a new group and count it.
matched_groups += 1;
}
odd = !odd;
}
// See how many groups we matched. That will tell us if this is
// a line starting a new segment, and the number of groups
// indicates what type of segment.
if matched_groups > 0 {
// The current line is a section delimiter, so we output
// a blank line.
println!("");
// However the line does not count as a blank line, so we
// reset the counter used for --join-blank-lines.
empty_line_count = 0;
match *match matched_groups {
3 => {
// This is a header, so we may need to reset the
// line number and the line width
if settings.renumber {
line_no = settings.starting_line_number;
line_no_width = line_no_width_initial;
line_no_threshold = std::num::pow(10, line_no_width) as u64;
}
&settings.header_numbering
},
1 => {
&settings.footer_numbering
},
// The only option left is 2, but rust wants
// a catch-all here.
_ => {
&settings.body_numbering
}
} {
NumberForAll => {
line_filter = pass_all;
},
NumberForNonEmpty => {
line_filter = pass_nonempty;
},
NumberForNone => {
line_filter = pass_none;
}
NumberForRegularExpression(ref re) => {
line_filter = pass_regex;
regex_filter = re;
}
}
continue;
}
// From this point on we format and print a "regular" line.
if line == "" {
// The line is empty, which means that we have to care
// about the --join-blank-lines parameter.
empty_line_count += 1;
} else {
// This saves us from having to check for an empty string
// in the next selector.
empty_line_count = 0;
}
if !line_filter(line, regex_filter)
|| ( empty_line_count > 0 && empty_line_count < settings.join_blank_lines) {
// No number is printed for this line. Either we did not
// want to print one in the first place, or it is a blank
// line but we are still collecting more blank lines via
// the option --join-blank-lines.
println!("{}", line);
continue;
}
// If we make it here, then either we are printing a non-empty
// line or assigning a line number to an empty line. Either
// way, start counting empties from zero once more.
empty_line_count = 0;
// A line number is to be printed.
let mut w: uint = 0;
if settings.number_width > line_no_width {
w = settings.number_width - line_no_width;
}
let fill = String::from_char(w, fill_char);
match settings.number_format {
Left => {
println!("{1}{0}{2}{3}", fill, line_no, settings.number_separator, line)
},
_ => {
println!("{0}{1}{2}{3}", fill, line_no, settings.number_separator, line)
}
}
// Now update the variables for the (potential) next
// line.
line_no += settings.line_increment;
while line_no >= line_no_threshold {
// The line number just got longer.
line_no_threshold *= 10;
line_no_width += 1;
}
}
}
fn pass_regex(line: &str, re: &regex::Regex) -> bool {
re.is_match(line)
}
fn pass_nonempty(line: &str, _: &regex::Regex) -> bool {
line.len() > 0
}
fn pass_none(_: &str, _: &regex::Regex) -> bool {
false
}
fn pass_all(_: &str, _: &regex::Regex) -> bool {
true
}
fn print_usage(opts: &[OptGroup]) {
println!("{:s}", usage(USAGE, opts));
}
fn version () {
println!("{} version 1.0.0", NAME);
}

63
nl/test.rs Normal file
View file

@ -0,0 +1,63 @@
use std::io::process::Command;
use std::str;
#[test]
fn test_stdin_nonewline() {
let mut process = Command::new("build/nl").spawn().unwrap();
process.stdin.take_unwrap().write(bytes!("No Newline")).unwrap();
let po = process.wait_with_output().unwrap();
let out = str::from_utf8(po.output.as_slice()).unwrap();
assert_eq!(out, " 1\tNo Newline\n");
}
#[test]
fn test_stdin_newline() {
let mut process = Command::new("build/nl").arg("-s").arg("-")
.arg("-w").arg("1").spawn().unwrap();
process.stdin.take_unwrap().write(bytes!("Line One\nLine Two\n")).unwrap();
let po = process.wait_with_output().unwrap();
let out = str::from_utf8(po.output.as_slice()).unwrap();
assert_eq!(out, "1-Line One\n2-Line Two\n");
}
#[test]
fn test_padding_without_overflow() {
let po = Command::new("build/nl").arg("-i").arg("1000").arg("-s").arg("x")
.arg("-n").arg("rz").arg("nl/fixtures/simple.txt").output().unwrap();
let out = str::from_utf8(po.output.as_slice()).unwrap();
assert_eq!(out, "000001xL1\n001001xL2\n002001xL3\n003001xL4\n004001xL5\n005001xL6\n006001xL7\n007001xL8\n008001xL9\n009001xL10\n010001xL11\n011001xL12\n012001xL13\n013001xL14\n014001xL15\n");
}
#[test]
fn test_padding_with_overflow() {
let po = Command::new("build/nl").arg("-i").arg("1000").arg("-s").arg("x")
.arg("-n").arg("rz").arg("-w").arg("4")
.arg("nl/fixtures/simple.txt").output().unwrap();
let out = str::from_utf8(po.output.as_slice()).unwrap();
assert_eq!(out, "0001xL1\n1001xL2\n2001xL3\n3001xL4\n4001xL5\n5001xL6\n6001xL7\n7001xL8\n8001xL9\n9001xL10\n10001xL11\n11001xL12\n12001xL13\n13001xL14\n14001xL15\n");
}
#[test]
fn test_sections_and_styles() {
for &(fixture, output) in [
(
"nl/fixtures/section.txt",
"\nHEADER1\nHEADER2\n\n1 |BODY1\n2 |BODY2\n\nFOOTER1\nFOOTER2\n\nNEXTHEADER1\nNEXTHEADER2\n\n1 |NEXTBODY1\n2 |NEXTBODY2\n\nNEXTFOOTER1\nNEXTFOOTER2\n"
),
(
"nl/fixtures/joinblanklines.txt",
"1 |Nonempty\n2 |Nonempty\n3 |Followed by 10x empty\n\n\n\n\n4 |\n\n\n\n\n5 |\n6 |Followed by 5x empty\n\n\n\n\n7 |\n8 |Followed by 4x empty\n\n\n\n\n9 |Nonempty\n10 |Nonempty\n11 |Nonempty.\n"
),
].iter() {
let po = Command::new("build/nl").arg("-s").arg("|").arg("-n").arg("ln")
.arg("-w").arg("3").arg("-b").arg("a").arg("-l").arg("5")
.arg(fixture).output().unwrap();
assert_eq!(str::from_utf8(po.output.as_slice()).unwrap(), output);
}
}