From cbab8677e672f562db1b990426d31dc853cb00f6 Mon Sep 17 00:00:00 2001 From: Sebastian Bentmar Holgersson Date: Tue, 3 Jan 2023 18:23:28 +0000 Subject: [PATCH 1/5] expand: improve handing of + --- src/uu/expand/src/expand.rs | 18 +++++----- tests/by-util/test_expand.rs | 66 ++++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index cbb5593f9..8ba9d74db 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -38,7 +38,7 @@ static DEFAULT_TABSTOP: usize = 8; /// The mode to use when replacing tabs beyond the last one specified in /// the `--tabs` argument. -#[derive(PartialEq)] +#[derive(PartialEq, Debug)] enum RemainingMode { None, Slash, @@ -126,12 +126,8 @@ fn tabstops_parse(s: &str) -> Result<(RemainingMode, Vec), ParseError> { let bytes = word.as_bytes(); for i in 0..bytes.len() { match bytes[i] { - b'+' => { - remaining_mode = RemainingMode::Plus; - } - b'/' => { - remaining_mode = RemainingMode::Slash; - } + b'+' => remaining_mode = RemainingMode::Plus, + b'/' => remaining_mode = RemainingMode::Slash, _ => { // Parse a number from the byte sequence. let s = from_utf8(&bytes[i..]).unwrap(); @@ -190,10 +186,16 @@ fn tabstops_parse(s: &str) -> Result<(RemainingMode, Vec), ParseError> { // then just use the default tabstops. if nums.is_empty() { nums = vec![DEFAULT_TABSTOP]; + remaining_mode = RemainingMode::None; + } + + if nums.len() == 1 { + remaining_mode = RemainingMode::None; } Ok((remaining_mode, nums)) } +#[derive(Debug)] struct Options { files: Vec, tabstops: Vec, @@ -374,7 +376,7 @@ enum CharType { fn expand(options: &Options) -> std::io::Result<()> { use self::CharType::*; - + println!("{:?}", options); let mut output = BufWriter::new(stdout()); let ts = options.tabstops.as_ref(); let mut buf = Vec::new(); diff --git a/tests/by-util/test_expand.rs b/tests/by-util/test_expand.rs index ac9eb1fad..c4da7503f 100644 --- a/tests/by-util/test_expand.rs +++ b/tests/by-util/test_expand.rs @@ -275,7 +275,7 @@ fn test_tabs_shortcut() { .args(&["-2", "-5", "-7"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -285,7 +285,7 @@ fn test_comma_separated_tabs_shortcut() { .args(&["-2,5", "-7"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -295,6 +295,66 @@ fn test_tabs_and_tabs_shortcut_mixed() { .args(&["-2", "--tabs=5", "-7"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } + +#[test] +fn test_ignore_initial_plus() { + new_ucmd!() + .args(&["--tabs=+3"]) + .pipe_in("\ta\tb\tc") + .succeeds() + // 01234567890 + .stdout_is(" a b c"); +} + +#[test] +fn test_ignore_initial_pluses() { + new_ucmd!() + .args(&["--tabs=++3"]) + .pipe_in("\ta\tb\tc") + .succeeds() + // 01234567890 + .stdout_is(" a b c"); +} + +#[test] +fn test_ignore_initial_slash() { + new_ucmd!() + .args(&["--tabs=/3"]) + .pipe_in("\ta\tb\tc") + .succeeds() + // 01234567890 + .stdout_is(" a b c"); +} + +#[test] +fn test_ignore_initial_slashes() { + new_ucmd!() + .args(&["--tabs=//3"]) + .pipe_in("\ta\tb\tc") + .succeeds() + // 01234567890 + .stdout_is(" a b c"); +} + +#[test] +fn test_ignore_initial_plus_slash_combination() { + new_ucmd!() + .args(&["--tabs=+/3"]) + .pipe_in("\ta\tb\tc") + .succeeds() + // 01234567890 + .stdout_is(" a b c"); +} + +#[test] +fn test_comma_with_plus_and_multi_character_values() { + new_ucmd!() + .args(&["--tabs=3,+6"]) + .pipe_in("\taaa\tbbbb\tcccc") + .succeeds() + // 01234567890 + .stdout_is(" aaa bbb ccc"); +} From ddc6fabca043847791df03e3b21cf81fa2934498 Mon Sep 17 00:00:00 2001 From: Sebastian Bentmar Holgersson Date: Tue, 3 Jan 2023 20:49:20 +0000 Subject: [PATCH 2/5] expand: all perl tests passing --- src/uu/expand/src/expand.rs | 13 +++++++++++-- tests/by-util/test_expand.rs | 12 +++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 8ba9d74db..4377f3083 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -348,7 +348,16 @@ fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) match remaining_mode { RemainingMode::Plus => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) { Some(t) => t - col, - None => tabstops[num_tabstops - 1] - 1, + None => { + let step_size = tabstops[num_tabstops - 1]; + let last_before_repeating = tabstops[num_tabstops-2]; + let mut r = last_before_repeating+step_size; + + while col >= r { + r += step_size; + } + r - col + } }, RemainingMode::Slash => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) { Some(t) => t - col, @@ -376,7 +385,7 @@ enum CharType { fn expand(options: &Options) -> std::io::Result<()> { use self::CharType::*; - println!("{:?}", options); + let mut output = BufWriter::new(stdout()); let ts = options.tabstops.as_ref(); let mut buf = Vec::new(); diff --git a/tests/by-util/test_expand.rs b/tests/by-util/test_expand.rs index c4da7503f..d4fbf9f95 100644 --- a/tests/by-util/test_expand.rs +++ b/tests/by-util/test_expand.rs @@ -353,7 +353,17 @@ fn test_ignore_initial_plus_slash_combination() { fn test_comma_with_plus_and_multi_character_values() { new_ucmd!() .args(&["--tabs=3,+6"]) - .pipe_in("\taaa\tbbbb\tcccc") + .pipe_in("\taaa\tbbb\tccc") + .succeeds() + // 01234567890 + .stdout_is(" aaa bbb ccc"); +} + +#[test] +fn test_comma_with_plus_and_multi_character_values() { + new_ucmd!() + .args(&["--tabs=3,+6"]) + .pipe_in("\taaa\tbbb\tccc") .succeeds() // 01234567890 .stdout_is(" aaa bbb ccc"); From 69d87295983cc5562a8f13c932c9dbacdf6d1bdf Mon Sep 17 00:00:00 2001 From: Sebastian Bentmar Holgersson Date: Tue, 3 Jan 2023 21:27:20 +0000 Subject: [PATCH 3/5] expand: replace while loop with constant time algorithm --- src/uu/expand/src/expand.rs | 19 ++++++------------- tests/by-util/test_expand.rs | 34 +++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 4377f3083..2094631b7 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -338,11 +338,6 @@ fn open(path: &str) -> BufReader> { /// in the `tabstops` slice is interpreted as a relative number of /// spaces, which this function will return for every input value of /// `col` beyond the end of the second-to-last element of `tabstops`. -/// -/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry -/// in the `tabstops` slice is interpreted as a relative number of -/// spaces, which this function will return for every input value of -/// `col` beyond the end of the second-to-last element of `tabstops`. fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) -> usize { let num_tabstops = tabstops.len(); match remaining_mode { @@ -350,13 +345,11 @@ fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) Some(t) => t - col, None => { let step_size = tabstops[num_tabstops - 1]; - let last_before_repeating = tabstops[num_tabstops-2]; - let mut r = last_before_repeating+step_size; + let last_fixed_tabstop = tabstops[num_tabstops-2]; + let characters_since_last_tabstop = col-last_fixed_tabstop; - while col >= r { - r += step_size; - } - r - col + let steps_required = 1 + characters_since_last_tabstop/step_size; + steps_required*step_size-characters_since_last_tabstop } }, RemainingMode::Slash => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) { @@ -498,8 +491,8 @@ mod tests { #[test] fn test_next_tabstop_remaining_mode_plus() { assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Plus), 1); - assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Plus), 4); - assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Plus), 4); + assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Plus), 3); + assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Plus), 5); } #[test] diff --git a/tests/by-util/test_expand.rs b/tests/by-util/test_expand.rs index d4fbf9f95..3436b02e9 100644 --- a/tests/by-util/test_expand.rs +++ b/tests/by-util/test_expand.rs @@ -350,21 +350,41 @@ fn test_ignore_initial_plus_slash_combination() { } #[test] -fn test_comma_with_plus_and_multi_character_values() { +fn test_comma_with_plus_1() { new_ucmd!() .args(&["--tabs=3,+6"]) - .pipe_in("\taaa\tbbb\tccc") + .pipe_in("\t111\t222\t333") .succeeds() // 01234567890 - .stdout_is(" aaa bbb ccc"); + .stdout_is(" 111 222 333"); } #[test] -fn test_comma_with_plus_and_multi_character_values() { +fn test_comma_with_plus_2() { new_ucmd!() - .args(&["--tabs=3,+6"]) - .pipe_in("\taaa\tbbb\tccc") + .args(&["--tabs=1,+5"]) + .pipe_in("\ta\tb\tc") .succeeds() // 01234567890 - .stdout_is(" aaa bbb ccc"); + .stdout_is(" a b c"); +} + +#[test] +fn test_comma_with_plus_3() { + new_ucmd!() + .args(&["--tabs=2,+5"]) + .pipe_in("a\tb\tc") + .succeeds() + // 01234567890 + .stdout_is("a b c"); +} + +#[test] +fn test_comma_with_plus_4() { + new_ucmd!() + .args(&["--tabs=1,3,+5"]) + .pipe_in("a\tb\tc") + .succeeds() + // 01234567890 + .stdout_is("a b c"); } From 1bf723fe402bfa6c36ff4a7d72e875ce5634afae Mon Sep 17 00:00:00 2001 From: Sebastian Bentmar Holgersson Date: Tue, 3 Jan 2023 22:06:40 +0000 Subject: [PATCH 4/5] expand: fix formatting issues and cleanup --- src/uu/expand/src/expand.rs | 13 ++++++------- tests/by-util/test_expand.rs | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 2094631b7..6e3f6e364 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -38,7 +38,7 @@ static DEFAULT_TABSTOP: usize = 8; /// The mode to use when replacing tabs beyond the last one specified in /// the `--tabs` argument. -#[derive(PartialEq, Debug)] +#[derive(PartialEq)] enum RemainingMode { None, Slash, @@ -195,7 +195,6 @@ fn tabstops_parse(s: &str) -> Result<(RemainingMode, Vec), ParseError> { Ok((remaining_mode, nums)) } -#[derive(Debug)] struct Options { files: Vec, tabstops: Vec, @@ -345,11 +344,11 @@ fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) Some(t) => t - col, None => { let step_size = tabstops[num_tabstops - 1]; - let last_fixed_tabstop = tabstops[num_tabstops-2]; - let characters_since_last_tabstop = col-last_fixed_tabstop; + let last_fixed_tabstop = tabstops[num_tabstops - 2]; + let characters_since_last_tabstop = col - last_fixed_tabstop; - let steps_required = 1 + characters_since_last_tabstop/step_size; - steps_required*step_size-characters_since_last_tabstop + let steps_required = 1 + characters_since_last_tabstop / step_size; + steps_required * step_size - characters_since_last_tabstop } }, RemainingMode::Slash => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) { @@ -378,7 +377,7 @@ enum CharType { fn expand(options: &Options) -> std::io::Result<()> { use self::CharType::*; - + let mut output = BufWriter::new(stdout()); let ts = options.tabstops.as_ref(); let mut buf = Vec::new(); diff --git a/tests/by-util/test_expand.rs b/tests/by-util/test_expand.rs index 3436b02e9..5b4f269ea 100644 --- a/tests/by-util/test_expand.rs +++ b/tests/by-util/test_expand.rs @@ -275,7 +275,7 @@ fn test_tabs_shortcut() { .args(&["-2", "-5", "-7"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -285,7 +285,7 @@ fn test_comma_separated_tabs_shortcut() { .args(&["-2,5", "-7"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -295,7 +295,7 @@ fn test_tabs_and_tabs_shortcut_mixed() { .args(&["-2", "--tabs=5", "-7"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -305,7 +305,7 @@ fn test_ignore_initial_plus() { .args(&["--tabs=+3"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -315,7 +315,7 @@ fn test_ignore_initial_pluses() { .args(&["--tabs=++3"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -325,7 +325,7 @@ fn test_ignore_initial_slash() { .args(&["--tabs=/3"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -335,7 +335,7 @@ fn test_ignore_initial_slashes() { .args(&["--tabs=//3"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -345,7 +345,7 @@ fn test_ignore_initial_plus_slash_combination() { .args(&["--tabs=+/3"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -355,7 +355,7 @@ fn test_comma_with_plus_1() { .args(&["--tabs=3,+6"]) .pipe_in("\t111\t222\t333") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" 111 222 333"); } @@ -365,7 +365,7 @@ fn test_comma_with_plus_2() { .args(&["--tabs=1,+5"]) .pipe_in("\ta\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is(" a b c"); } @@ -375,7 +375,7 @@ fn test_comma_with_plus_3() { .args(&["--tabs=2,+5"]) .pipe_in("a\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is("a b c"); } @@ -385,6 +385,6 @@ fn test_comma_with_plus_4() { .args(&["--tabs=1,3,+5"]) .pipe_in("a\tb\tc") .succeeds() - // 01234567890 + // 01234567890 .stdout_is("a b c"); } From 999957791a4dac1dda49f7045cdfeee6315bee66 Mon Sep 17 00:00:00 2001 From: Sebastian Bentmar Holgersson Date: Tue, 3 Jan 2023 22:44:59 +0000 Subject: [PATCH 5/5] expand: minor simplification of mode logic --- src/uu/expand/src/expand.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 6e3f6e364..7976a0a97 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -186,10 +186,9 @@ fn tabstops_parse(s: &str) -> Result<(RemainingMode, Vec), ParseError> { // then just use the default tabstops. if nums.is_empty() { nums = vec![DEFAULT_TABSTOP]; - remaining_mode = RemainingMode::None; } - if nums.len() == 1 { + if nums.len() < 2 { remaining_mode = RemainingMode::None; } Ok((remaining_mode, nums))