From fc00b6bfc9c5649b428f2420a2d0e23209ee1ae1 Mon Sep 17 00:00:00 2001 From: clint Date: Wed, 8 Nov 2023 23:36:46 -0500 Subject: [PATCH] expand: move logic to expand a single line into its own function --- src/uu/expand/src/expand.rs | 181 ++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 90 deletions(-) diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index 5791fca9f..4815c5f68 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -366,10 +366,98 @@ enum CharType { Other, } -#[allow(clippy::cognitive_complexity)] -fn expand(options: &Options) -> UResult<()> { +fn expand_line( + buf: &mut Vec, + output: &mut BufWriter, + ts: &[usize], + options: &Options, +) -> std::io::Result<()> { use self::CharType::*; + let mut col = 0; + let mut byte = 0; + let mut init = true; + + while byte < buf.len() { + let (ctype, cwidth, nbytes) = if options.uflag { + let nbytes = char::from(buf[byte]).len_utf8(); + + if byte + nbytes > buf.len() { + // don't overrun buffer because of invalid UTF-8 + (Other, 1, 1) + } else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) { + match t.chars().next() { + Some('\t') => (Tab, 0, nbytes), + Some('\x08') => (Backspace, 0, nbytes), + Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes), + None => { + // no valid char at start of t, so take 1 byte + (Other, 1, 1) + } + } + } else { + (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide + } + } else { + ( + match buf[byte] { + // always take exactly 1 byte in strict ASCII mode + 0x09 => Tab, + 0x08 => Backspace, + _ => Other, + }, + 1, + 1, + ) + }; + + // figure out how many columns this char takes up + match ctype { + Tab => { + // figure out how many spaces to the next tabstop + let nts = next_tabstop(ts, col, &options.remaining_mode); + col += nts; + + // now dump out either spaces if we're expanding, or a literal tab if we're not + if init || !options.iflag { + if nts <= options.tspaces.len() { + output.write_all(options.tspaces[..nts].as_bytes())?; + } else { + output.write_all(" ".repeat(nts).as_bytes())?; + }; + } else { + output.write_all(&buf[byte..byte + nbytes])?; + } + } + _ => { + col = if ctype == Other { + col + cwidth + } else if col > 0 { + col - 1 + } else { + 0 + }; + + // if we're writing anything other than a space, then we're + // done with the line's leading spaces + if buf[byte] != 0x20 { + init = false; + } + + output.write_all(&buf[byte..byte + nbytes])?; + } + } + + byte += nbytes; // advance the pointer + } + + output.flush()?; + buf.truncate(0); // clear the buffer + + Ok(()) +} + +fn expand(options: &Options) -> UResult<()> { let mut output = BufWriter::new(stdout()); let ts = options.tabstops.as_ref(); let mut buf = Vec::new(); @@ -381,95 +469,8 @@ fn expand(options: &Options) -> UResult<()> { Ok(s) => s > 0, Err(_) => buf.is_empty(), } { - let mut col = 0; - let mut byte = 0; - let mut init = true; - - while byte < buf.len() { - let (ctype, cwidth, nbytes) = if options.uflag { - let nbytes = char::from(buf[byte]).len_utf8(); - - if byte + nbytes > buf.len() { - // don't overrun buffer because of invalid UTF-8 - (Other, 1, 1) - } else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) { - match t.chars().next() { - Some('\t') => (Tab, 0, nbytes), - Some('\x08') => (Backspace, 0, nbytes), - Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes), - None => { - // no valid char at start of t, so take 1 byte - (Other, 1, 1) - } - } - } else { - (Other, 1, 1) // implicit assumption: non-UTF-8 char is 1 col wide - } - } else { - ( - match buf[byte] { - // always take exactly 1 byte in strict ASCII mode - 0x09 => Tab, - 0x08 => Backspace, - _ => Other, - }, - 1, - 1, - ) - }; - - // figure out how many columns this char takes up - match ctype { - Tab => { - // figure out how many spaces to the next tabstop - let nts = next_tabstop(ts, col, &options.remaining_mode); - col += nts; - - // now dump out either spaces if we're expanding, or a literal tab if we're not - if init || !options.iflag { - if nts <= options.tspaces.len() { - output - .write_all(options.tspaces[..nts].as_bytes()) - .map_err_context(|| "failed to write output".to_string())?; - } else { - output - .write_all(" ".repeat(nts).as_bytes()) - .map_err_context(|| "failed to write output".to_string())?; - }; - } else { - output - .write_all(&buf[byte..byte + nbytes]) - .map_err_context(|| "failed to write output".to_string())?; - } - } - _ => { - col = if ctype == Other { - col + cwidth - } else if col > 0 { - col - 1 - } else { - 0 - }; - - // if we're writing anything other than a space, then we're - // done with the line's leading spaces - if buf[byte] != 0x20 { - init = false; - } - - output - .write_all(&buf[byte..byte + nbytes]) - .map_err_context(|| "failed to write output".to_string())?; - } - } - - byte += nbytes; // advance the pointer - } - - output - .flush() + expand_line(&mut buf, &mut output, ts, options) .map_err_context(|| "failed to write output".to_string())?; - buf.truncate(0); // clear the buffer } } Ok(())