1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-30 20:47:46 +00:00

Merge pull request #5592 from tertsdiepraam/fmt-small-refactor

`fmt`: a collection of small refactors
This commit is contained in:
Daniel Hofstetter 2023-11-28 14:15:30 +01:00 committed by GitHub
commit 32bc2e4bc8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 263 additions and 306 deletions

View file

@ -5,40 +5,42 @@
// spell-checker:ignore (ToDO) PSKIP linebreak ostream parasplit tabwidth xanti xprefix // spell-checker:ignore (ToDO) PSKIP linebreak ostream parasplit tabwidth xanti xprefix
use clap::{crate_version, Arg, ArgAction, Command}; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::cmp;
use std::fs::File; use std::fs::File;
use std::io::{stdin, stdout, Write}; use std::io::{stdin, stdout, BufReader, BufWriter, Read, Stdout, Write};
use std::io::{BufReader, BufWriter, Read, Stdout};
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{FromIo, UResult, USimpleError}; use uucore::error::{FromIo, UResult, USimpleError};
use uucore::{format_usage, help_about, help_usage, show_warning}; use uucore::{format_usage, help_about, help_usage, show_warning};
use self::linebreak::break_lines; use linebreak::break_lines;
use self::parasplit::ParagraphStream; use parasplit::ParagraphStream;
mod linebreak; mod linebreak;
mod parasplit; mod parasplit;
static ABOUT: &str = help_about!("fmt.md"); const ABOUT: &str = help_about!("fmt.md");
const USAGE: &str = help_usage!("fmt.md"); const USAGE: &str = help_usage!("fmt.md");
static MAX_WIDTH: usize = 2500; const MAX_WIDTH: usize = 2500;
static OPT_CROWN_MARGIN: &str = "crown-margin"; mod options {
static OPT_TAGGED_PARAGRAPH: &str = "tagged-paragraph"; pub const CROWN_MARGIN: &str = "crown-margin";
static OPT_PRESERVE_HEADERS: &str = "preserve-headers"; pub const TAGGED_PARAGRAPH: &str = "tagged-paragraph";
static OPT_SPLIT_ONLY: &str = "split-only"; pub const PRESERVE_HEADERS: &str = "preserve-headers";
static OPT_UNIFORM_SPACING: &str = "uniform-spacing"; pub const SPLIT_ONLY: &str = "split-only";
static OPT_PREFIX: &str = "prefix"; pub const UNIFORM_SPACING: &str = "uniform-spacing";
static OPT_SKIP_PREFIX: &str = "skip-prefix"; pub const PREFIX: &str = "prefix";
static OPT_EXACT_PREFIX: &str = "exact-prefix"; pub const SKIP_PREFIX: &str = "skip-prefix";
static OPT_EXACT_SKIP_PREFIX: &str = "exact-skip-prefix"; pub const EXACT_PREFIX: &str = "exact-prefix";
static OPT_WIDTH: &str = "width"; pub const EXACT_SKIP_PREFIX: &str = "exact-skip-prefix";
static OPT_GOAL: &str = "goal"; pub const WIDTH: &str = "width";
static OPT_QUICK: &str = "quick"; pub const GOAL: &str = "goal";
static OPT_TAB_WIDTH: &str = "tab-width"; pub const QUICK: &str = "quick";
pub const TAB_WIDTH: &str = "tab-width";
pub const FILES: &str = "files";
}
static ARG_FILES: &str = "files"; // by default, goal is 93% of width
const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93;
pub type FileOrStdReader = BufReader<Box<dyn Read + 'static>>; pub type FileOrStdReader = BufReader<Box<dyn Read + 'static>>;
pub struct FmtOptions { pub struct FmtOptions {
@ -46,11 +48,9 @@ pub struct FmtOptions {
tagged: bool, tagged: bool,
mail: bool, mail: bool,
split_only: bool, split_only: bool,
use_prefix: bool, prefix: Option<String>,
prefix: String,
xprefix: bool, xprefix: bool,
use_anti_prefix: bool, anti_prefix: Option<String>,
anti_prefix: String,
xanti_prefix: bool, xanti_prefix: bool,
uniform: bool, uniform: bool,
quick: bool, quick: bool,
@ -59,131 +59,92 @@ pub struct FmtOptions {
tabwidth: usize, tabwidth: usize,
} }
impl Default for FmtOptions { impl FmtOptions {
fn default() -> Self { fn from_matches(matches: &ArgMatches) -> UResult<Self> {
Self { let mut tagged = matches.get_flag(options::TAGGED_PARAGRAPH);
crown: false, let mut crown = matches.get_flag(options::CROWN_MARGIN);
tagged: false,
mail: false, let mail = matches.get_flag(options::PRESERVE_HEADERS);
uniform: false, let uniform = matches.get_flag(options::UNIFORM_SPACING);
quick: false, let quick = matches.get_flag(options::QUICK);
split_only: false, let split_only = matches.get_flag(options::SPLIT_ONLY);
use_prefix: false,
prefix: String::new(), if crown {
xprefix: false, tagged = false;
use_anti_prefix: false, }
anti_prefix: String::new(), if split_only {
xanti_prefix: false, crown = false;
width: 75, tagged = false;
goal: 70,
tabwidth: 8,
} }
}
}
/// Parse the command line arguments and return the list of files and formatting options. let xprefix = matches.contains_id(options::EXACT_PREFIX);
/// let xanti_prefix = matches.contains_id(options::SKIP_PREFIX);
/// # Arguments
///
/// * `args` - Command line arguments.
///
/// # Returns
///
/// A tuple containing a vector of file names and a `FmtOptions` struct.
#[allow(clippy::cognitive_complexity)]
#[allow(clippy::field_reassign_with_default)]
fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec<String>, FmtOptions)> {
// by default, goal is 93% of width
const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93;
let matches = uu_app().try_get_matches_from(args)?; let prefix = matches.get_one::<String>(options::PREFIX).map(String::from);
let anti_prefix = matches
.get_one::<String>(options::SKIP_PREFIX)
.map(String::from);
let mut files: Vec<String> = matches let width_opt = matches.get_one::<usize>(options::WIDTH);
.get_many::<String>(ARG_FILES) let goal_opt = matches.get_one::<usize>(options::GOAL);
.map(|v| v.map(ToString::to_string).collect()) let (width, goal) = match (width_opt, goal_opt) {
.unwrap_or_default(); (Some(&w), Some(&g)) => {
if g > w {
return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH."));
}
(w, g)
}
(Some(&w), None) => {
let g = (w * DEFAULT_GOAL_TO_WIDTH_RATIO / 100).min(w - 3);
(w, g)
}
(None, Some(&g)) => {
let w = (g * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO).max(g + 3);
(w, g)
}
(None, None) => (75, 70),
};
let mut fmt_opts = FmtOptions::default(); if width > MAX_WIDTH {
fmt_opts.tagged = matches.get_flag(OPT_TAGGED_PARAGRAPH);
if matches.get_flag(OPT_CROWN_MARGIN) {
fmt_opts.crown = true;
fmt_opts.tagged = false;
}
fmt_opts.mail = matches.get_flag(OPT_PRESERVE_HEADERS);
fmt_opts.uniform = matches.get_flag(OPT_UNIFORM_SPACING);
fmt_opts.quick = matches.get_flag(OPT_QUICK);
if matches.get_flag(OPT_SPLIT_ONLY) {
fmt_opts.split_only = true;
fmt_opts.crown = false;
fmt_opts.tagged = false;
}
fmt_opts.xprefix = matches.contains_id(OPT_EXACT_PREFIX);
fmt_opts.xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX);
if let Some(s) = matches.get_one::<String>(OPT_PREFIX).map(String::from) {
fmt_opts.prefix = s;
fmt_opts.use_prefix = true;
};
if let Some(s) = matches.get_one::<String>(OPT_SKIP_PREFIX).map(String::from) {
fmt_opts.anti_prefix = s;
fmt_opts.use_anti_prefix = true;
};
if let Some(width) = matches.get_one::<usize>(OPT_WIDTH) {
fmt_opts.width = *width;
if fmt_opts.width > MAX_WIDTH {
return Err(USimpleError::new( return Err(USimpleError::new(
1, 1,
format!( format!("invalid width: '{}': Numerical result out of range", width),
"invalid width: '{}': Numerical result out of range",
fmt_opts.width,
),
)); ));
} }
fmt_opts.goal = cmp::min(
fmt_opts.width * DEFAULT_GOAL_TO_WIDTH_RATIO / 100,
fmt_opts.width - 3,
);
};
if let Some(goal) = matches.get_one::<usize>(OPT_GOAL) { let mut tabwidth = 8;
fmt_opts.goal = *goal; if let Some(s) = matches.get_one::<String>(options::TAB_WIDTH) {
if !matches.contains_id(OPT_WIDTH) { tabwidth = match s.parse::<usize>() {
fmt_opts.width = cmp::max( Ok(t) => t,
fmt_opts.goal * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO, Err(e) => {
fmt_opts.goal + 3, return Err(USimpleError::new(
); 1,
} else if fmt_opts.goal > fmt_opts.width { format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e),
return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); ));
} }
}; };
if let Some(s) = matches.get_one::<String>(OPT_TAB_WIDTH) {
fmt_opts.tabwidth = match s.parse::<usize>() {
Ok(t) => t,
Err(e) => {
return Err(USimpleError::new(
1,
format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e),
));
}
}; };
};
if fmt_opts.tabwidth < 1 { if tabwidth < 1 {
fmt_opts.tabwidth = 1; tabwidth = 1;
}
Ok(Self {
crown,
tagged,
mail,
uniform,
quick,
split_only,
prefix,
xprefix,
anti_prefix,
xanti_prefix,
width,
goal,
tabwidth,
})
} }
// immutable now
let fmt_opts = fmt_opts;
if files.is_empty() {
files.push("-".to_owned());
}
Ok((files, fmt_opts))
} }
/// Process the content of a file and format it according to the provided options. /// Process the content of a file and format it according to the provided options.
@ -239,7 +200,14 @@ fn process_file(
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let (files, fmt_opts) = parse_arguments(args)?; let matches = uu_app().try_get_matches_from(args)?;
let files: Vec<String> = matches
.get_many::<String>(options::FILES)
.map(|v| v.map(ToString::to_string).collect())
.unwrap_or(vec!["-".into()]);
let fmt_opts = FmtOptions::from_matches(&matches)?;
let mut ostream = BufWriter::new(stdout()); let mut ostream = BufWriter::new(stdout());
@ -257,9 +225,9 @@ pub fn uu_app() -> Command {
.override_usage(format_usage(USAGE)) .override_usage(format_usage(USAGE))
.infer_long_args(true) .infer_long_args(true)
.arg( .arg(
Arg::new(OPT_CROWN_MARGIN) Arg::new(options::CROWN_MARGIN)
.short('c') .short('c')
.long(OPT_CROWN_MARGIN) .long(options::CROWN_MARGIN)
.help( .help(
"First and second line of paragraph \ "First and second line of paragraph \
may have different indentations, in which \ may have different indentations, in which \
@ -269,7 +237,7 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
Arg::new(OPT_TAGGED_PARAGRAPH) Arg::new(options::TAGGED_PARAGRAPH)
.short('t') .short('t')
.long("tagged-paragraph") .long("tagged-paragraph")
.help( .help(
@ -279,7 +247,7 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
Arg::new(OPT_PRESERVE_HEADERS) Arg::new(options::PRESERVE_HEADERS)
.short('m') .short('m')
.long("preserve-headers") .long("preserve-headers")
.help( .help(
@ -289,14 +257,14 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
Arg::new(OPT_SPLIT_ONLY) Arg::new(options::SPLIT_ONLY)
.short('s') .short('s')
.long("split-only") .long("split-only")
.help("Split lines only, do not reflow.") .help("Split lines only, do not reflow.")
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
Arg::new(OPT_UNIFORM_SPACING) Arg::new(options::UNIFORM_SPACING)
.short('u') .short('u')
.long("uniform-spacing") .long("uniform-spacing")
.help( .help(
@ -309,7 +277,7 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
Arg::new(OPT_PREFIX) Arg::new(options::PREFIX)
.short('p') .short('p')
.long("prefix") .long("prefix")
.help( .help(
@ -321,7 +289,7 @@ pub fn uu_app() -> Command {
.value_name("PREFIX"), .value_name("PREFIX"),
) )
.arg( .arg(
Arg::new(OPT_SKIP_PREFIX) Arg::new(options::SKIP_PREFIX)
.short('P') .short('P')
.long("skip-prefix") .long("skip-prefix")
.help( .help(
@ -332,7 +300,7 @@ pub fn uu_app() -> Command {
.value_name("PSKIP"), .value_name("PSKIP"),
) )
.arg( .arg(
Arg::new(OPT_EXACT_PREFIX) Arg::new(options::EXACT_PREFIX)
.short('x') .short('x')
.long("exact-prefix") .long("exact-prefix")
.help( .help(
@ -342,7 +310,7 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
Arg::new(OPT_EXACT_SKIP_PREFIX) Arg::new(options::EXACT_SKIP_PREFIX)
.short('X') .short('X')
.long("exact-skip-prefix") .long("exact-skip-prefix")
.help( .help(
@ -352,7 +320,7 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
Arg::new(OPT_WIDTH) Arg::new(options::WIDTH)
.short('w') .short('w')
.long("width") .long("width")
.help("Fill output lines up to a maximum of WIDTH columns, default 75.") .help("Fill output lines up to a maximum of WIDTH columns, default 75.")
@ -360,7 +328,7 @@ pub fn uu_app() -> Command {
.value_parser(clap::value_parser!(usize)), .value_parser(clap::value_parser!(usize)),
) )
.arg( .arg(
Arg::new(OPT_GOAL) Arg::new(options::GOAL)
.short('g') .short('g')
.long("goal") .long("goal")
.help("Goal width, default of 93% of WIDTH. Must be less than WIDTH.") .help("Goal width, default of 93% of WIDTH. Must be less than WIDTH.")
@ -368,7 +336,7 @@ pub fn uu_app() -> Command {
.value_parser(clap::value_parser!(usize)), .value_parser(clap::value_parser!(usize)),
) )
.arg( .arg(
Arg::new(OPT_QUICK) Arg::new(options::QUICK)
.short('q') .short('q')
.long("quick") .long("quick")
.help( .help(
@ -378,7 +346,7 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg( .arg(
Arg::new(OPT_TAB_WIDTH) Arg::new(options::TAB_WIDTH)
.short('T') .short('T')
.long("tab-width") .long("tab-width")
.help( .help(
@ -389,7 +357,7 @@ pub fn uu_app() -> Command {
.value_name("TABWIDTH"), .value_name("TABWIDTH"),
) )
.arg( .arg(
Arg::new(ARG_FILES) Arg::new(options::FILES)
.action(ArgAction::Append) .action(ArgAction::Append)
.value_hint(clap::ValueHint::FilePath), .value_hint(clap::ValueHint::FilePath),
) )

View file

@ -5,10 +5,8 @@
// spell-checker:ignore (ToDO) INFTY MULT accum breakwords linebreak linebreaking linebreaks linelen maxlength minlength nchars ostream overlen parasplit plass posn powf punct signum slen sstart tabwidth tlen underlen winfo wlen wordlen // spell-checker:ignore (ToDO) INFTY MULT accum breakwords linebreak linebreaking linebreaks linelen maxlength minlength nchars ostream overlen parasplit plass posn powf punct signum slen sstart tabwidth tlen underlen winfo wlen wordlen
use std::cmp;
use std::i64;
use std::io::{BufWriter, Stdout, Write}; use std::io::{BufWriter, Stdout, Write};
use std::mem; use std::{cmp, i64, mem};
use uucore::crash; use uucore::crash;
@ -46,7 +44,7 @@ pub fn break_lines(
ostream: &mut BufWriter<Stdout>, ostream: &mut BufWriter<Stdout>,
) -> std::io::Result<()> { ) -> std::io::Result<()> {
// indent // indent
let p_indent = &para.indent_str[..]; let p_indent = &para.indent_str;
let p_indent_len = para.indent_len; let p_indent_len = para.indent_len;
// words // words
@ -55,14 +53,12 @@ pub fn break_lines(
// the first word will *always* appear on the first line // the first word will *always* appear on the first line
// make sure of this here // make sure of this here
let (w, w_len) = match p_words_words.next() { let Some(winfo) = p_words_words.next() else {
Some(winfo) => (winfo.word, winfo.word_nchars), return ostream.write_all(b"\n");
None => {
return ostream.write_all(b"\n");
}
}; };
// print the init, if it exists, and get its length // print the init, if it exists, and get its length
let p_init_len = w_len let p_init_len = winfo.word_nchars
+ if opts.crown || opts.tagged { + if opts.crown || opts.tagged {
// handle "init" portion // handle "init" portion
ostream.write_all(para.init_str.as_bytes())?; ostream.write_all(para.init_str.as_bytes())?;
@ -75,8 +71,9 @@ pub fn break_lines(
// except that mail headers get no indent at all // except that mail headers get no indent at all
0 0
}; };
// write first word after writing init // write first word after writing init
ostream.write_all(w.as_bytes())?; ostream.write_all(winfo.word.as_bytes())?;
// does this paragraph require uniform spacing? // does this paragraph require uniform spacing?
let uniform = para.mail_header || opts.uniform; let uniform = para.mail_header || opts.uniform;
@ -103,15 +100,16 @@ fn break_simple<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
mut iter: T, mut iter: T,
args: &mut BreakArgs<'a>, args: &mut BreakArgs<'a>,
) -> std::io::Result<()> { ) -> std::io::Result<()> {
iter.try_fold((args.init_len, false), |l, winfo| { iter.try_fold((args.init_len, false), |(l, prev_punct), winfo| {
accum_words_simple(args, l, winfo) accum_words_simple(args, l, prev_punct, winfo)
})?; })?;
args.ostream.write_all(b"\n") args.ostream.write_all(b"\n")
} }
fn accum_words_simple<'a>( fn accum_words_simple<'a>(
args: &mut BreakArgs<'a>, args: &mut BreakArgs<'a>,
(l, prev_punct): (usize, bool), l: usize,
prev_punct: bool,
winfo: &'a WordInfo<'a>, winfo: &'a WordInfo<'a>,
) -> std::io::Result<(usize, bool)> { ) -> std::io::Result<(usize, bool)> {
// compute the length of this word, considering how tabs will expand at this position on the line // compute the length of this word, considering how tabs will expand at this position on the line
@ -233,14 +231,14 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
linebreak: None, linebreak: None,
break_before: false, break_before: false,
demerits: 0, demerits: 0,
prev_rat: 0.0f32, prev_rat: 0.0,
length: args.init_len, length: args.init_len,
fresh: false, fresh: false,
}]; }];
// this vec holds the current active linebreaks; next_ holds the breaks that will be active for // this vec holds the current active linebreaks; next_ holds the breaks that will be active for
// the next word // the next word
let active_breaks = &mut vec![0]; let mut active_breaks = vec![0];
let next_active_breaks = &mut vec![]; let mut next_active_breaks = vec![];
let stretch = (args.opts.width - args.opts.goal) as isize; let stretch = (args.opts.width - args.opts.goal) as isize;
let minlength = args.opts.goal - stretch as usize; let minlength = args.opts.goal - stretch as usize;
@ -248,10 +246,7 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
let mut is_sentence_start = false; let mut is_sentence_start = false;
let mut least_demerits = 0; let mut least_demerits = 0;
loop { loop {
let w = match iter.next() { let Some(w) = iter.next() else { break };
None => break,
Some(w) => w,
};
// if this is the last word, we don't add additional demerits for this break // if this is the last word, we don't add additional demerits for this break
let (is_last_word, is_sentence_end) = match iter.peek() { let (is_last_word, is_sentence_end) = match iter.peek() {
@ -358,13 +353,13 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
least_demerits = cmp::max(ld_next, 0); least_demerits = cmp::max(ld_next, 0);
} }
// swap in new list of active breaks // swap in new list of active breaks
mem::swap(active_breaks, next_active_breaks); mem::swap(&mut active_breaks, &mut next_active_breaks);
// If this was the last word in a sentence, the next one must be the first in the next. // If this was the last word in a sentence, the next one must be the first in the next.
is_sentence_start = is_sentence_end; is_sentence_start = is_sentence_end;
} }
// return the best path // return the best path
build_best_path(&linebreaks, active_breaks) build_best_path(&linebreaks, &active_breaks)
} }
fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> { fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> {

View file

@ -52,18 +52,22 @@ impl Line {
} }
} }
// each line's prefix has to be considered to know whether to merge it with /// Each line's prefix has to be considered to know whether to merge it with
// the next line or not /// the next line or not
#[derive(Debug)] #[derive(Debug)]
pub struct FileLine { pub struct FileLine {
line: String, line: String,
indent_end: usize, // the end of the indent, always the start of the text /// The end of the indent, always the start of the text
pfxind_end: usize, // the end of the PREFIX's indent, that is, the spaces before the prefix indent_end: usize,
indent_len: usize, // display length of indent taking into account tabs /// The end of the PREFIX's indent, that is, the spaces before the prefix
prefix_len: usize, // PREFIX indent length taking into account tabs pfxind_end: usize,
/// Display length of indent taking into account tabs
indent_len: usize,
/// PREFIX indent length taking into account tabs
prefix_len: usize,
} }
// iterator that produces a stream of Lines from a file /// Iterator that produces a stream of Lines from a file
pub struct FileLines<'a> { pub struct FileLines<'a> {
opts: &'a FmtOptions, opts: &'a FmtOptions,
lines: Lines<&'a mut FileOrStdReader>, lines: Lines<&'a mut FileOrStdReader>,
@ -74,26 +78,22 @@ impl<'a> FileLines<'a> {
FileLines { opts, lines } FileLines { opts, lines }
} }
// returns true if this line should be formatted /// returns true if this line should be formatted
fn match_prefix(&self, line: &str) -> (bool, usize) { fn match_prefix(&self, line: &str) -> (bool, usize) {
if !self.opts.use_prefix { let Some(prefix) = &self.opts.prefix else {
return (true, 0); return (true, 0);
} };
FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix) FileLines::match_prefix_generic(prefix, line, self.opts.xprefix)
} }
// returns true if this line should be formatted /// returns true if this line should be formatted
fn match_anti_prefix(&self, line: &str) -> bool { fn match_anti_prefix(&self, line: &str) -> bool {
if !self.opts.use_anti_prefix { let Some(anti_prefix) = &self.opts.anti_prefix else {
return true; return true;
} };
match FileLines::match_prefix_generic( match FileLines::match_prefix_generic(anti_prefix, line, self.opts.xanti_prefix) {
&self.opts.anti_prefix[..],
line,
self.opts.xanti_prefix,
) {
(true, _) => false, (true, _) => false,
(_, _) => true, (_, _) => true,
} }
@ -148,13 +148,7 @@ impl<'a> Iterator for FileLines<'a> {
type Item = Line; type Item = Line;
fn next(&mut self) -> Option<Line> { fn next(&mut self) -> Option<Line> {
let n = match self.lines.next() { let n = self.lines.next()?.ok()?;
Some(t) => match t {
Ok(tt) => tt,
Err(_) => return None,
},
None => return None,
};
// if this line is entirely whitespace, // if this line is entirely whitespace,
// emit a blank line // emit a blank line
@ -178,7 +172,7 @@ impl<'a> Iterator for FileLines<'a> {
// not truly blank we will not allow mail headers on the // not truly blank we will not allow mail headers on the
// following line) // following line)
if pmatch if pmatch
&& n[poffset + self.opts.prefix.len()..] && n[poffset + self.opts.prefix.as_ref().map_or(0, |s| s.len())..]
.chars() .chars()
.all(char::is_whitespace) .all(char::is_whitespace)
{ {
@ -192,7 +186,7 @@ impl<'a> Iterator for FileLines<'a> {
} }
// figure out the indent, prefix, and prefixindent ending points // figure out the indent, prefix, and prefixindent ending points
let prefix_end = poffset + self.opts.prefix.len(); let prefix_end = poffset + self.opts.prefix.as_ref().map_or(0, |s| s.len());
let (indent_end, prefix_len, indent_len) = self.compute_indent(&n[..], prefix_end); let (indent_end, prefix_len, indent_len) = self.compute_indent(&n[..], prefix_end);
Some(Line::FormatLine(FileLine { Some(Line::FormatLine(FileLine {
@ -205,24 +199,33 @@ impl<'a> Iterator for FileLines<'a> {
} }
} }
// a paragraph : a collection of FileLines that are to be formatted /// A paragraph : a collection of FileLines that are to be formatted
// plus info about the paragraph's indentation /// plus info about the paragraph's indentation
// (but we only retain the String from the FileLine; the other info ///
// is only there to help us in deciding how to merge lines into Paragraphs /// We only retain the String from the FileLine; the other info
/// is only there to help us in deciding how to merge lines into Paragraphs
#[derive(Debug)] #[derive(Debug)]
pub struct Paragraph { pub struct Paragraph {
lines: Vec<String>, // the lines of the file /// the lines of the file
pub init_str: String, // string representing the init, that is, the first line's indent lines: Vec<String>,
pub init_len: usize, // printable length of the init string considering TABWIDTH /// string representing the init, that is, the first line's indent
init_end: usize, // byte location of end of init in first line String pub init_str: String,
pub indent_str: String, // string representing indent /// printable length of the init string considering TABWIDTH
pub indent_len: usize, // length of above pub init_len: usize,
indent_end: usize, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) /// byte location of end of init in first line String
pub mail_header: bool, // we need to know if this is a mail header because we do word splitting differently in that case init_end: usize,
/// string representing indent
pub indent_str: String,
/// length of above
pub indent_len: usize,
/// byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
indent_end: usize,
/// we need to know if this is a mail header because we do word splitting differently in that case
pub mail_header: bool,
} }
// an iterator producing a stream of paragraphs from a stream of lines /// An iterator producing a stream of paragraphs from a stream of lines
// given a set of options. /// given a set of options.
pub struct ParagraphStream<'a> { pub struct ParagraphStream<'a> {
lines: Peekable<FileLines<'a>>, lines: Peekable<FileLines<'a>>,
next_mail: bool, next_mail: bool,
@ -240,7 +243,7 @@ impl<'a> ParagraphStream<'a> {
} }
} }
// detect RFC822 mail header /// Detect RFC822 mail header
fn is_mail_header(line: &FileLine) -> bool { fn is_mail_header(line: &FileLine) -> bool {
// a mail header begins with either "From " (envelope sender line) // a mail header begins with either "From " (envelope sender line)
// or with a sequence of printable ASCII chars (33 to 126, inclusive, // or with a sequence of printable ASCII chars (33 to 126, inclusive,
@ -276,12 +279,9 @@ impl<'a> Iterator for ParagraphStream<'a> {
#[allow(clippy::cognitive_complexity)] #[allow(clippy::cognitive_complexity)]
fn next(&mut self) -> Option<Result<Paragraph, String>> { fn next(&mut self) -> Option<Result<Paragraph, String>> {
// return a NoFormatLine in an Err; it should immediately be output // return a NoFormatLine in an Err; it should immediately be output
let noformat = match self.lines.peek() { let noformat = match self.lines.peek()? {
None => return None, Line::FormatLine(_) => false,
Some(l) => match *l { Line::NoFormatLine(_, _) => true,
Line::FormatLine(_) => false,
Line::NoFormatLine(_, _) => true,
},
}; };
// found a NoFormatLine, immediately dump it out // found a NoFormatLine, immediately dump it out
@ -305,95 +305,89 @@ impl<'a> Iterator for ParagraphStream<'a> {
let mut in_mail = false; let mut in_mail = false;
let mut second_done = false; // for when we use crown or tagged mode let mut second_done = false; // for when we use crown or tagged mode
loop { loop {
{ // peek ahead
// peek ahead // need to explicitly force fl out of scope before we can call self.lines.next()
// need to explicitly force fl out of scope before we can call self.lines.next() let Some(Line::FormatLine(fl)) = self.lines.peek() else {
let fl = match self.lines.peek() { break;
None => break, };
Some(l) => match *l {
Line::FormatLine(ref x) => x,
Line::NoFormatLine(..) => break,
},
};
if p_lines.is_empty() { if p_lines.is_empty() {
// first time through the loop, get things set up // first time through the loop, get things set up
// detect mail header // detect mail header
if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) {
in_mail = true; in_mail = true;
// there can't be any indent or pfxind because otherwise is_mail_header // there can't be any indent or pfxind because otherwise is_mail_header
// would fail since there cannot be any whitespace before the colon in a // would fail since there cannot be any whitespace before the colon in a
// valid header field // valid header field
indent_str.push_str(" "); indent_str.push_str(" ");
indent_len = 2; indent_len = 2;
} else {
if self.opts.crown || self.opts.tagged {
init_str.push_str(&fl.line[..fl.indent_end]);
init_len = fl.indent_len;
init_end = fl.indent_end;
} else { } else {
if self.opts.crown || self.opts.tagged { second_done = true;
init_str.push_str(&fl.line[..fl.indent_end]);
init_len = fl.indent_len;
init_end = fl.indent_end;
} else {
second_done = true;
}
// these will be overwritten in the 2nd line of crown or tagged mode, but
// we are not guaranteed to get to the 2nd line, e.g., if the next line
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
indent_str.push_str(&fl.line[..fl.indent_end]);
indent_len = fl.indent_len;
indent_end = fl.indent_end;
// save these to check for matching lines
prefix_len = fl.prefix_len;
pfxind_end = fl.pfxind_end;
// in tagged mode, add 4 spaces of additional indenting by default
// (gnu fmt's behavior is different: it seems to find the closest column to
// indent_end that is divisible by 3. But honestly that behavior seems
// pretty arbitrary.
// Perhaps a better default would be 1 TABWIDTH? But ugh that's so big.
if self.opts.tagged {
indent_str.push_str(" ");
indent_len += 4;
}
}
} else if in_mail {
// lines following mail headers must begin with spaces
if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) {
break; // this line does not begin with spaces
}
} else if !second_done {
// now we have enough info to handle crown margin and tagged mode
// in both crown and tagged modes we require that prefix_len is the same
if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end {
break;
} }
// in tagged mode, indent has to be *different* on following lines // these will be overwritten in the 2nd line of crown or tagged mode, but
if self.opts.tagged // we are not guaranteed to get to the 2nd line, e.g., if the next line
&& indent_len - 4 == fl.indent_len // is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
&& indent_end == fl.indent_end
{
break;
}
// this is part of the same paragraph, get the indent info from this line
indent_str.clear();
indent_str.push_str(&fl.line[..fl.indent_end]); indent_str.push_str(&fl.line[..fl.indent_end]);
indent_len = fl.indent_len; indent_len = fl.indent_len;
indent_end = fl.indent_end; indent_end = fl.indent_end;
second_done = true; // save these to check for matching lines
} else { prefix_len = fl.prefix_len;
// detect mismatch pfxind_end = fl.pfxind_end;
if indent_end != fl.indent_end
|| pfxind_end != fl.pfxind_end // in tagged mode, add 4 spaces of additional indenting by default
|| indent_len != fl.indent_len // (gnu fmt's behavior is different: it seems to find the closest column to
|| prefix_len != fl.prefix_len // indent_end that is divisible by 3. But honestly that behavior seems
{ // pretty arbitrary.
break; // Perhaps a better default would be 1 TABWIDTH? But ugh that's so big.
if self.opts.tagged {
indent_str.push_str(" ");
indent_len += 4;
} }
} }
} else if in_mail {
// lines following mail headers must begin with spaces
if fl.indent_end == 0 || (self.opts.prefix.is_some() && fl.pfxind_end == 0) {
break; // this line does not begin with spaces
}
} else if !second_done {
// now we have enough info to handle crown margin and tagged mode
// in both crown and tagged modes we require that prefix_len is the same
if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end {
break;
}
// in tagged mode, indent has to be *different* on following lines
if self.opts.tagged
&& indent_len - 4 == fl.indent_len
&& indent_end == fl.indent_end
{
break;
}
// this is part of the same paragraph, get the indent info from this line
indent_str.clear();
indent_str.push_str(&fl.line[..fl.indent_end]);
indent_len = fl.indent_len;
indent_end = fl.indent_end;
second_done = true;
} else {
// detect mismatch
if indent_end != fl.indent_end
|| pfxind_end != fl.pfxind_end
|| indent_len != fl.indent_len
|| prefix_len != fl.prefix_len
{
break;
}
} }
p_lines.push(self.lines.next().unwrap().get_formatline().line); p_lines.push(self.lines.next().unwrap().get_formatline().line);
@ -429,7 +423,7 @@ pub struct ParaWords<'a> {
} }
impl<'a> ParaWords<'a> { impl<'a> ParaWords<'a> {
pub fn new<'b>(opts: &'b FmtOptions, para: &'b Paragraph) -> ParaWords<'b> { pub fn new(opts: &'a FmtOptions, para: &'a Paragraph) -> Self {
let mut pw = ParaWords { let mut pw = ParaWords {
opts, opts,
para, para,