From 01c32a5220ef036bdc1d9bae8928336a815db619 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 11:40:33 +0100 Subject: [PATCH] fmt: clean up some small bits --- src/uu/fmt/src/linebreak.rs | 37 +++--- src/uu/fmt/src/parasplit.rs | 232 ++++++++++++++++++------------------ 2 files changed, 132 insertions(+), 137 deletions(-) diff --git a/src/uu/fmt/src/linebreak.rs b/src/uu/fmt/src/linebreak.rs index fbd990fff..7cd65d861 100644 --- a/src/uu/fmt/src/linebreak.rs +++ b/src/uu/fmt/src/linebreak.rs @@ -46,7 +46,7 @@ pub fn break_lines( ostream: &mut BufWriter, ) -> std::io::Result<()> { // indent - let p_indent = ¶.indent_str[..]; + let p_indent = ¶.indent_str; let p_indent_len = para.indent_len; // words @@ -55,14 +55,12 @@ pub fn break_lines( // the first word will *always* appear on the first line // make sure of this here - let (w, w_len) = match p_words_words.next() { - Some(winfo) => (winfo.word, winfo.word_nchars), - None => { - return ostream.write_all(b"\n"); - } + let Some(winfo) = p_words_words.next() else { + return ostream.write_all(b"\n"); }; + // print the init, if it exists, and get its length - let p_init_len = w_len + let p_init_len = winfo.word_nchars + if opts.crown || opts.tagged { // handle "init" portion ostream.write_all(para.init_str.as_bytes())?; @@ -75,8 +73,9 @@ pub fn break_lines( // except that mail headers get no indent at all 0 }; + // write first word after writing init - ostream.write_all(w.as_bytes())?; + ostream.write_all(winfo.word.as_bytes())?; // does this paragraph require uniform spacing? let uniform = para.mail_header || opts.uniform; @@ -103,15 +102,16 @@ fn break_simple<'a, T: Iterator>>( mut iter: T, args: &mut BreakArgs<'a>, ) -> std::io::Result<()> { - iter.try_fold((args.init_len, false), |l, winfo| { - accum_words_simple(args, l, winfo) + iter.try_fold((args.init_len, false), |(l, prev_punct), winfo| { + accum_words_simple(args, l, prev_punct, winfo) })?; args.ostream.write_all(b"\n") } fn accum_words_simple<'a>( args: &mut BreakArgs<'a>, - (l, prev_punct): (usize, bool), + l: usize, + prev_punct: bool, winfo: &'a WordInfo<'a>, ) -> std::io::Result<(usize, bool)> { // compute the length of this word, considering how tabs will expand at this position on the line @@ -233,14 +233,14 @@ fn find_kp_breakpoints<'a, T: Iterator>>( linebreak: None, break_before: false, demerits: 0, - prev_rat: 0.0f32, + prev_rat: 0.0, length: args.init_len, fresh: false, }]; // this vec holds the current active linebreaks; next_ holds the breaks that will be active for // the next word - let active_breaks = &mut vec![0]; - let next_active_breaks = &mut vec![]; + let mut active_breaks = vec![0]; + let mut next_active_breaks = vec![]; let stretch = (args.opts.width - args.opts.goal) as isize; let minlength = args.opts.goal - stretch as usize; @@ -248,10 +248,7 @@ fn find_kp_breakpoints<'a, T: Iterator>>( let mut is_sentence_start = false; let mut least_demerits = 0; loop { - let w = match iter.next() { - None => break, - Some(w) => w, - }; + let Some(w) = iter.next() else { break }; // if this is the last word, we don't add additional demerits for this break let (is_last_word, is_sentence_end) = match iter.peek() { @@ -358,13 +355,13 @@ fn find_kp_breakpoints<'a, T: Iterator>>( least_demerits = cmp::max(ld_next, 0); } // swap in new list of active breaks - mem::swap(active_breaks, next_active_breaks); + mem::swap(&mut active_breaks, &mut next_active_breaks); // If this was the last word in a sentence, the next one must be the first in the next. is_sentence_start = is_sentence_end; } // return the best path - build_best_path(&linebreaks, active_breaks) + build_best_path(&linebreaks, &active_breaks) } fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> { diff --git a/src/uu/fmt/src/parasplit.rs b/src/uu/fmt/src/parasplit.rs index 68c8f78fa..311ddbc9b 100644 --- a/src/uu/fmt/src/parasplit.rs +++ b/src/uu/fmt/src/parasplit.rs @@ -52,18 +52,22 @@ impl Line { } } -// each line's prefix has to be considered to know whether to merge it with -// the next line or not +/// Each line's prefix has to be considered to know whether to merge it with +/// the next line or not #[derive(Debug)] pub struct FileLine { line: String, - indent_end: usize, // the end of the indent, always the start of the text - pfxind_end: usize, // the end of the PREFIX's indent, that is, the spaces before the prefix - indent_len: usize, // display length of indent taking into account tabs - prefix_len: usize, // PREFIX indent length taking into account tabs + /// The end of the indent, always the start of the text + indent_end: usize, + /// The end of the PREFIX's indent, that is, the spaces before the prefix + pfxind_end: usize, + /// Display length of indent taking into account tabs + indent_len: usize, + /// PREFIX indent length taking into account tabs + prefix_len: usize, } -// iterator that produces a stream of Lines from a file +/// Iterator that produces a stream of Lines from a file pub struct FileLines<'a> { opts: &'a FmtOptions, lines: Lines<&'a mut FileOrStdReader>, @@ -74,7 +78,7 @@ impl<'a> FileLines<'a> { FileLines { opts, lines } } - // returns true if this line should be formatted + /// returns true if this line should be formatted fn match_prefix(&self, line: &str) -> (bool, usize) { if !self.opts.use_prefix { return (true, 0); @@ -83,7 +87,7 @@ impl<'a> FileLines<'a> { FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix) } - // returns true if this line should be formatted + /// returns true if this line should be formatted fn match_anti_prefix(&self, line: &str) -> bool { if !self.opts.use_anti_prefix { return true; @@ -148,13 +152,7 @@ impl<'a> Iterator for FileLines<'a> { type Item = Line; fn next(&mut self) -> Option { - let n = match self.lines.next() { - Some(t) => match t { - Ok(tt) => tt, - Err(_) => return None, - }, - None => return None, - }; + let n = self.lines.next()?.ok()?; // if this line is entirely whitespace, // emit a blank line @@ -205,24 +203,33 @@ impl<'a> Iterator for FileLines<'a> { } } -// a paragraph : a collection of FileLines that are to be formatted -// plus info about the paragraph's indentation -// (but we only retain the String from the FileLine; the other info -// is only there to help us in deciding how to merge lines into Paragraphs +/// A paragraph : a collection of FileLines that are to be formatted +/// plus info about the paragraph's indentation +/// +/// We only retain the String from the FileLine; the other info +/// is only there to help us in deciding how to merge lines into Paragraphs #[derive(Debug)] pub struct Paragraph { - lines: Vec, // the lines of the file - pub init_str: String, // string representing the init, that is, the first line's indent - pub init_len: usize, // printable length of the init string considering TABWIDTH - init_end: usize, // byte location of end of init in first line String - pub indent_str: String, // string representing indent - pub indent_len: usize, // length of above - indent_end: usize, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) - pub mail_header: bool, // we need to know if this is a mail header because we do word splitting differently in that case + /// the lines of the file + lines: Vec, + /// string representing the init, that is, the first line's indent + pub init_str: String, + /// printable length of the init string considering TABWIDTH + pub init_len: usize, + /// byte location of end of init in first line String + init_end: usize, + /// string representing indent + pub indent_str: String, + /// length of above + pub indent_len: usize, + /// byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) + indent_end: usize, + /// we need to know if this is a mail header because we do word splitting differently in that case + pub mail_header: bool, } -// an iterator producing a stream of paragraphs from a stream of lines -// given a set of options. +/// An iterator producing a stream of paragraphs from a stream of lines +/// given a set of options. pub struct ParagraphStream<'a> { lines: Peekable>, next_mail: bool, @@ -240,7 +247,7 @@ impl<'a> ParagraphStream<'a> { } } - // detect RFC822 mail header + /// Detect RFC822 mail header fn is_mail_header(line: &FileLine) -> bool { // a mail header begins with either "From " (envelope sender line) // or with a sequence of printable ASCII chars (33 to 126, inclusive, @@ -276,12 +283,9 @@ impl<'a> Iterator for ParagraphStream<'a> { #[allow(clippy::cognitive_complexity)] fn next(&mut self) -> Option> { // return a NoFormatLine in an Err; it should immediately be output - let noformat = match self.lines.peek() { - None => return None, - Some(l) => match *l { - Line::FormatLine(_) => false, - Line::NoFormatLine(_, _) => true, - }, + let noformat = match self.lines.peek()? { + Line::FormatLine(_) => false, + Line::NoFormatLine(_, _) => true, }; // found a NoFormatLine, immediately dump it out @@ -305,95 +309,89 @@ impl<'a> Iterator for ParagraphStream<'a> { let mut in_mail = false; let mut second_done = false; // for when we use crown or tagged mode loop { - { - // peek ahead - // need to explicitly force fl out of scope before we can call self.lines.next() - let fl = match self.lines.peek() { - None => break, - Some(l) => match *l { - Line::FormatLine(ref x) => x, - Line::NoFormatLine(..) => break, - }, - }; + // peek ahead + // need to explicitly force fl out of scope before we can call self.lines.next() + let Some(Line::FormatLine(fl)) = self.lines.peek() else { + break; + }; - if p_lines.is_empty() { - // first time through the loop, get things set up - // detect mail header - if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { - in_mail = true; - // there can't be any indent or pfxind because otherwise is_mail_header - // would fail since there cannot be any whitespace before the colon in a - // valid header field - indent_str.push_str(" "); - indent_len = 2; + if p_lines.is_empty() { + // first time through the loop, get things set up + // detect mail header + if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { + in_mail = true; + // there can't be any indent or pfxind because otherwise is_mail_header + // would fail since there cannot be any whitespace before the colon in a + // valid header field + indent_str.push_str(" "); + indent_len = 2; + } else { + if self.opts.crown || self.opts.tagged { + init_str.push_str(&fl.line[..fl.indent_end]); + init_len = fl.indent_len; + init_end = fl.indent_end; } else { - if self.opts.crown || self.opts.tagged { - init_str.push_str(&fl.line[..fl.indent_end]); - init_len = fl.indent_len; - init_end = fl.indent_end; - } else { - second_done = true; - } - - // these will be overwritten in the 2nd line of crown or tagged mode, but - // we are not guaranteed to get to the 2nd line, e.g., if the next line - // is a NoFormatLine or None. Thus, we set sane defaults the 1st time around - indent_str.push_str(&fl.line[..fl.indent_end]); - indent_len = fl.indent_len; - indent_end = fl.indent_end; - - // save these to check for matching lines - prefix_len = fl.prefix_len; - pfxind_end = fl.pfxind_end; - - // in tagged mode, add 4 spaces of additional indenting by default - // (gnu fmt's behavior is different: it seems to find the closest column to - // indent_end that is divisible by 3. But honestly that behavior seems - // pretty arbitrary. - // Perhaps a better default would be 1 TABWIDTH? But ugh that's so big. - if self.opts.tagged { - indent_str.push_str(" "); - indent_len += 4; - } - } - } else if in_mail { - // lines following mail headers must begin with spaces - if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) { - break; // this line does not begin with spaces - } - } else if !second_done { - // now we have enough info to handle crown margin and tagged mode - - // in both crown and tagged modes we require that prefix_len is the same - if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end { - break; + second_done = true; } - // in tagged mode, indent has to be *different* on following lines - if self.opts.tagged - && indent_len - 4 == fl.indent_len - && indent_end == fl.indent_end - { - break; - } - - // this is part of the same paragraph, get the indent info from this line - indent_str.clear(); + // these will be overwritten in the 2nd line of crown or tagged mode, but + // we are not guaranteed to get to the 2nd line, e.g., if the next line + // is a NoFormatLine or None. Thus, we set sane defaults the 1st time around indent_str.push_str(&fl.line[..fl.indent_end]); indent_len = fl.indent_len; indent_end = fl.indent_end; - second_done = true; - } else { - // detect mismatch - if indent_end != fl.indent_end - || pfxind_end != fl.pfxind_end - || indent_len != fl.indent_len - || prefix_len != fl.prefix_len - { - break; + // save these to check for matching lines + prefix_len = fl.prefix_len; + pfxind_end = fl.pfxind_end; + + // in tagged mode, add 4 spaces of additional indenting by default + // (gnu fmt's behavior is different: it seems to find the closest column to + // indent_end that is divisible by 3. But honestly that behavior seems + // pretty arbitrary. + // Perhaps a better default would be 1 TABWIDTH? But ugh that's so big. + if self.opts.tagged { + indent_str.push_str(" "); + indent_len += 4; } } + } else if in_mail { + // lines following mail headers must begin with spaces + if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) { + break; // this line does not begin with spaces + } + } else if !second_done { + // now we have enough info to handle crown margin and tagged mode + + // in both crown and tagged modes we require that prefix_len is the same + if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end { + break; + } + + // in tagged mode, indent has to be *different* on following lines + if self.opts.tagged + && indent_len - 4 == fl.indent_len + && indent_end == fl.indent_end + { + break; + } + + // this is part of the same paragraph, get the indent info from this line + indent_str.clear(); + indent_str.push_str(&fl.line[..fl.indent_end]); + indent_len = fl.indent_len; + indent_end = fl.indent_end; + + second_done = true; + } else { + // detect mismatch + if indent_end != fl.indent_end + || pfxind_end != fl.pfxind_end + || indent_len != fl.indent_len + || prefix_len != fl.prefix_len + { + break; + } } p_lines.push(self.lines.next().unwrap().get_formatline().line); @@ -429,7 +427,7 @@ pub struct ParaWords<'a> { } impl<'a> ParaWords<'a> { - pub fn new<'b>(opts: &'b FmtOptions, para: &'b Paragraph) -> ParaWords<'b> { + pub fn new(opts: &'a FmtOptions, para: &'a Paragraph) -> Self { let mut pw = ParaWords { opts, para,