mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 04:27:45 +00:00
Merge pull request #5592 from tertsdiepraam/fmt-small-refactor
`fmt`: a collection of small refactors
This commit is contained in:
commit
32bc2e4bc8
3 changed files with 263 additions and 306 deletions
|
@ -5,40 +5,42 @@
|
|||
|
||||
// spell-checker:ignore (ToDO) PSKIP linebreak ostream parasplit tabwidth xanti xprefix
|
||||
|
||||
use clap::{crate_version, Arg, ArgAction, Command};
|
||||
use std::cmp;
|
||||
use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
|
||||
use std::fs::File;
|
||||
use std::io::{stdin, stdout, Write};
|
||||
use std::io::{BufReader, BufWriter, Read, Stdout};
|
||||
use std::io::{stdin, stdout, BufReader, BufWriter, Read, Stdout, Write};
|
||||
use uucore::display::Quotable;
|
||||
use uucore::error::{FromIo, UResult, USimpleError};
|
||||
use uucore::{format_usage, help_about, help_usage, show_warning};
|
||||
|
||||
use self::linebreak::break_lines;
|
||||
use self::parasplit::ParagraphStream;
|
||||
use linebreak::break_lines;
|
||||
use parasplit::ParagraphStream;
|
||||
|
||||
mod linebreak;
|
||||
mod parasplit;
|
||||
|
||||
static ABOUT: &str = help_about!("fmt.md");
|
||||
const ABOUT: &str = help_about!("fmt.md");
|
||||
const USAGE: &str = help_usage!("fmt.md");
|
||||
static MAX_WIDTH: usize = 2500;
|
||||
const MAX_WIDTH: usize = 2500;
|
||||
|
||||
static OPT_CROWN_MARGIN: &str = "crown-margin";
|
||||
static OPT_TAGGED_PARAGRAPH: &str = "tagged-paragraph";
|
||||
static OPT_PRESERVE_HEADERS: &str = "preserve-headers";
|
||||
static OPT_SPLIT_ONLY: &str = "split-only";
|
||||
static OPT_UNIFORM_SPACING: &str = "uniform-spacing";
|
||||
static OPT_PREFIX: &str = "prefix";
|
||||
static OPT_SKIP_PREFIX: &str = "skip-prefix";
|
||||
static OPT_EXACT_PREFIX: &str = "exact-prefix";
|
||||
static OPT_EXACT_SKIP_PREFIX: &str = "exact-skip-prefix";
|
||||
static OPT_WIDTH: &str = "width";
|
||||
static OPT_GOAL: &str = "goal";
|
||||
static OPT_QUICK: &str = "quick";
|
||||
static OPT_TAB_WIDTH: &str = "tab-width";
|
||||
mod options {
|
||||
pub const CROWN_MARGIN: &str = "crown-margin";
|
||||
pub const TAGGED_PARAGRAPH: &str = "tagged-paragraph";
|
||||
pub const PRESERVE_HEADERS: &str = "preserve-headers";
|
||||
pub const SPLIT_ONLY: &str = "split-only";
|
||||
pub const UNIFORM_SPACING: &str = "uniform-spacing";
|
||||
pub const PREFIX: &str = "prefix";
|
||||
pub const SKIP_PREFIX: &str = "skip-prefix";
|
||||
pub const EXACT_PREFIX: &str = "exact-prefix";
|
||||
pub const EXACT_SKIP_PREFIX: &str = "exact-skip-prefix";
|
||||
pub const WIDTH: &str = "width";
|
||||
pub const GOAL: &str = "goal";
|
||||
pub const QUICK: &str = "quick";
|
||||
pub const TAB_WIDTH: &str = "tab-width";
|
||||
pub const FILES: &str = "files";
|
||||
}
|
||||
|
||||
static ARG_FILES: &str = "files";
|
||||
// by default, goal is 93% of width
|
||||
const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93;
|
||||
|
||||
pub type FileOrStdReader = BufReader<Box<dyn Read + 'static>>;
|
||||
pub struct FmtOptions {
|
||||
|
@ -46,11 +48,9 @@ pub struct FmtOptions {
|
|||
tagged: bool,
|
||||
mail: bool,
|
||||
split_only: bool,
|
||||
use_prefix: bool,
|
||||
prefix: String,
|
||||
prefix: Option<String>,
|
||||
xprefix: bool,
|
||||
use_anti_prefix: bool,
|
||||
anti_prefix: String,
|
||||
anti_prefix: Option<String>,
|
||||
xanti_prefix: bool,
|
||||
uniform: bool,
|
||||
quick: bool,
|
||||
|
@ -59,131 +59,92 @@ pub struct FmtOptions {
|
|||
tabwidth: usize,
|
||||
}
|
||||
|
||||
impl Default for FmtOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
crown: false,
|
||||
tagged: false,
|
||||
mail: false,
|
||||
uniform: false,
|
||||
quick: false,
|
||||
split_only: false,
|
||||
use_prefix: false,
|
||||
prefix: String::new(),
|
||||
xprefix: false,
|
||||
use_anti_prefix: false,
|
||||
anti_prefix: String::new(),
|
||||
xanti_prefix: false,
|
||||
width: 75,
|
||||
goal: 70,
|
||||
tabwidth: 8,
|
||||
impl FmtOptions {
|
||||
fn from_matches(matches: &ArgMatches) -> UResult<Self> {
|
||||
let mut tagged = matches.get_flag(options::TAGGED_PARAGRAPH);
|
||||
let mut crown = matches.get_flag(options::CROWN_MARGIN);
|
||||
|
||||
let mail = matches.get_flag(options::PRESERVE_HEADERS);
|
||||
let uniform = matches.get_flag(options::UNIFORM_SPACING);
|
||||
let quick = matches.get_flag(options::QUICK);
|
||||
let split_only = matches.get_flag(options::SPLIT_ONLY);
|
||||
|
||||
if crown {
|
||||
tagged = false;
|
||||
}
|
||||
if split_only {
|
||||
crown = false;
|
||||
tagged = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the command line arguments and return the list of files and formatting options.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `args` - Command line arguments.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A tuple containing a vector of file names and a `FmtOptions` struct.
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
#[allow(clippy::field_reassign_with_default)]
|
||||
fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec<String>, FmtOptions)> {
|
||||
// by default, goal is 93% of width
|
||||
const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93;
|
||||
let xprefix = matches.contains_id(options::EXACT_PREFIX);
|
||||
let xanti_prefix = matches.contains_id(options::SKIP_PREFIX);
|
||||
|
||||
let matches = uu_app().try_get_matches_from(args)?;
|
||||
let prefix = matches.get_one::<String>(options::PREFIX).map(String::from);
|
||||
let anti_prefix = matches
|
||||
.get_one::<String>(options::SKIP_PREFIX)
|
||||
.map(String::from);
|
||||
|
||||
let mut files: Vec<String> = matches
|
||||
.get_many::<String>(ARG_FILES)
|
||||
.map(|v| v.map(ToString::to_string).collect())
|
||||
.unwrap_or_default();
|
||||
let width_opt = matches.get_one::<usize>(options::WIDTH);
|
||||
let goal_opt = matches.get_one::<usize>(options::GOAL);
|
||||
let (width, goal) = match (width_opt, goal_opt) {
|
||||
(Some(&w), Some(&g)) => {
|
||||
if g > w {
|
||||
return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH."));
|
||||
}
|
||||
(w, g)
|
||||
}
|
||||
(Some(&w), None) => {
|
||||
let g = (w * DEFAULT_GOAL_TO_WIDTH_RATIO / 100).min(w - 3);
|
||||
(w, g)
|
||||
}
|
||||
(None, Some(&g)) => {
|
||||
let w = (g * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO).max(g + 3);
|
||||
(w, g)
|
||||
}
|
||||
(None, None) => (75, 70),
|
||||
};
|
||||
|
||||
let mut fmt_opts = FmtOptions::default();
|
||||
|
||||
fmt_opts.tagged = matches.get_flag(OPT_TAGGED_PARAGRAPH);
|
||||
if matches.get_flag(OPT_CROWN_MARGIN) {
|
||||
fmt_opts.crown = true;
|
||||
fmt_opts.tagged = false;
|
||||
}
|
||||
fmt_opts.mail = matches.get_flag(OPT_PRESERVE_HEADERS);
|
||||
fmt_opts.uniform = matches.get_flag(OPT_UNIFORM_SPACING);
|
||||
fmt_opts.quick = matches.get_flag(OPT_QUICK);
|
||||
if matches.get_flag(OPT_SPLIT_ONLY) {
|
||||
fmt_opts.split_only = true;
|
||||
fmt_opts.crown = false;
|
||||
fmt_opts.tagged = false;
|
||||
}
|
||||
fmt_opts.xprefix = matches.contains_id(OPT_EXACT_PREFIX);
|
||||
fmt_opts.xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX);
|
||||
|
||||
if let Some(s) = matches.get_one::<String>(OPT_PREFIX).map(String::from) {
|
||||
fmt_opts.prefix = s;
|
||||
fmt_opts.use_prefix = true;
|
||||
};
|
||||
|
||||
if let Some(s) = matches.get_one::<String>(OPT_SKIP_PREFIX).map(String::from) {
|
||||
fmt_opts.anti_prefix = s;
|
||||
fmt_opts.use_anti_prefix = true;
|
||||
};
|
||||
|
||||
if let Some(width) = matches.get_one::<usize>(OPT_WIDTH) {
|
||||
fmt_opts.width = *width;
|
||||
if fmt_opts.width > MAX_WIDTH {
|
||||
if width > MAX_WIDTH {
|
||||
return Err(USimpleError::new(
|
||||
1,
|
||||
format!(
|
||||
"invalid width: '{}': Numerical result out of range",
|
||||
fmt_opts.width,
|
||||
),
|
||||
format!("invalid width: '{}': Numerical result out of range", width),
|
||||
));
|
||||
}
|
||||
fmt_opts.goal = cmp::min(
|
||||
fmt_opts.width * DEFAULT_GOAL_TO_WIDTH_RATIO / 100,
|
||||
fmt_opts.width - 3,
|
||||
);
|
||||
};
|
||||
|
||||
if let Some(goal) = matches.get_one::<usize>(OPT_GOAL) {
|
||||
fmt_opts.goal = *goal;
|
||||
if !matches.contains_id(OPT_WIDTH) {
|
||||
fmt_opts.width = cmp::max(
|
||||
fmt_opts.goal * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO,
|
||||
fmt_opts.goal + 3,
|
||||
);
|
||||
} else if fmt_opts.goal > fmt_opts.width {
|
||||
return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH."));
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(s) = matches.get_one::<String>(OPT_TAB_WIDTH) {
|
||||
fmt_opts.tabwidth = match s.parse::<usize>() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
return Err(USimpleError::new(
|
||||
1,
|
||||
format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e),
|
||||
));
|
||||
}
|
||||
let mut tabwidth = 8;
|
||||
if let Some(s) = matches.get_one::<String>(options::TAB_WIDTH) {
|
||||
tabwidth = match s.parse::<usize>() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
return Err(USimpleError::new(
|
||||
1,
|
||||
format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e),
|
||||
));
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
if fmt_opts.tabwidth < 1 {
|
||||
fmt_opts.tabwidth = 1;
|
||||
if tabwidth < 1 {
|
||||
tabwidth = 1;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
crown,
|
||||
tagged,
|
||||
mail,
|
||||
uniform,
|
||||
quick,
|
||||
split_only,
|
||||
prefix,
|
||||
xprefix,
|
||||
anti_prefix,
|
||||
xanti_prefix,
|
||||
width,
|
||||
goal,
|
||||
tabwidth,
|
||||
})
|
||||
}
|
||||
|
||||
// immutable now
|
||||
let fmt_opts = fmt_opts;
|
||||
|
||||
if files.is_empty() {
|
||||
files.push("-".to_owned());
|
||||
}
|
||||
|
||||
Ok((files, fmt_opts))
|
||||
}
|
||||
|
||||
/// Process the content of a file and format it according to the provided options.
|
||||
|
@ -239,7 +200,14 @@ fn process_file(
|
|||
|
||||
#[uucore::main]
|
||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let (files, fmt_opts) = parse_arguments(args)?;
|
||||
let matches = uu_app().try_get_matches_from(args)?;
|
||||
|
||||
let files: Vec<String> = matches
|
||||
.get_many::<String>(options::FILES)
|
||||
.map(|v| v.map(ToString::to_string).collect())
|
||||
.unwrap_or(vec!["-".into()]);
|
||||
|
||||
let fmt_opts = FmtOptions::from_matches(&matches)?;
|
||||
|
||||
let mut ostream = BufWriter::new(stdout());
|
||||
|
||||
|
@ -257,9 +225,9 @@ pub fn uu_app() -> Command {
|
|||
.override_usage(format_usage(USAGE))
|
||||
.infer_long_args(true)
|
||||
.arg(
|
||||
Arg::new(OPT_CROWN_MARGIN)
|
||||
Arg::new(options::CROWN_MARGIN)
|
||||
.short('c')
|
||||
.long(OPT_CROWN_MARGIN)
|
||||
.long(options::CROWN_MARGIN)
|
||||
.help(
|
||||
"First and second line of paragraph \
|
||||
may have different indentations, in which \
|
||||
|
@ -269,7 +237,7 @@ pub fn uu_app() -> Command {
|
|||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_TAGGED_PARAGRAPH)
|
||||
Arg::new(options::TAGGED_PARAGRAPH)
|
||||
.short('t')
|
||||
.long("tagged-paragraph")
|
||||
.help(
|
||||
|
@ -279,7 +247,7 @@ pub fn uu_app() -> Command {
|
|||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_PRESERVE_HEADERS)
|
||||
Arg::new(options::PRESERVE_HEADERS)
|
||||
.short('m')
|
||||
.long("preserve-headers")
|
||||
.help(
|
||||
|
@ -289,14 +257,14 @@ pub fn uu_app() -> Command {
|
|||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_SPLIT_ONLY)
|
||||
Arg::new(options::SPLIT_ONLY)
|
||||
.short('s')
|
||||
.long("split-only")
|
||||
.help("Split lines only, do not reflow.")
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_UNIFORM_SPACING)
|
||||
Arg::new(options::UNIFORM_SPACING)
|
||||
.short('u')
|
||||
.long("uniform-spacing")
|
||||
.help(
|
||||
|
@ -309,7 +277,7 @@ pub fn uu_app() -> Command {
|
|||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_PREFIX)
|
||||
Arg::new(options::PREFIX)
|
||||
.short('p')
|
||||
.long("prefix")
|
||||
.help(
|
||||
|
@ -321,7 +289,7 @@ pub fn uu_app() -> Command {
|
|||
.value_name("PREFIX"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_SKIP_PREFIX)
|
||||
Arg::new(options::SKIP_PREFIX)
|
||||
.short('P')
|
||||
.long("skip-prefix")
|
||||
.help(
|
||||
|
@ -332,7 +300,7 @@ pub fn uu_app() -> Command {
|
|||
.value_name("PSKIP"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_EXACT_PREFIX)
|
||||
Arg::new(options::EXACT_PREFIX)
|
||||
.short('x')
|
||||
.long("exact-prefix")
|
||||
.help(
|
||||
|
@ -342,7 +310,7 @@ pub fn uu_app() -> Command {
|
|||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_EXACT_SKIP_PREFIX)
|
||||
Arg::new(options::EXACT_SKIP_PREFIX)
|
||||
.short('X')
|
||||
.long("exact-skip-prefix")
|
||||
.help(
|
||||
|
@ -352,7 +320,7 @@ pub fn uu_app() -> Command {
|
|||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_WIDTH)
|
||||
Arg::new(options::WIDTH)
|
||||
.short('w')
|
||||
.long("width")
|
||||
.help("Fill output lines up to a maximum of WIDTH columns, default 75.")
|
||||
|
@ -360,7 +328,7 @@ pub fn uu_app() -> Command {
|
|||
.value_parser(clap::value_parser!(usize)),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_GOAL)
|
||||
Arg::new(options::GOAL)
|
||||
.short('g')
|
||||
.long("goal")
|
||||
.help("Goal width, default of 93% of WIDTH. Must be less than WIDTH.")
|
||||
|
@ -368,7 +336,7 @@ pub fn uu_app() -> Command {
|
|||
.value_parser(clap::value_parser!(usize)),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_QUICK)
|
||||
Arg::new(options::QUICK)
|
||||
.short('q')
|
||||
.long("quick")
|
||||
.help(
|
||||
|
@ -378,7 +346,7 @@ pub fn uu_app() -> Command {
|
|||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(OPT_TAB_WIDTH)
|
||||
Arg::new(options::TAB_WIDTH)
|
||||
.short('T')
|
||||
.long("tab-width")
|
||||
.help(
|
||||
|
@ -389,7 +357,7 @@ pub fn uu_app() -> Command {
|
|||
.value_name("TABWIDTH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(ARG_FILES)
|
||||
Arg::new(options::FILES)
|
||||
.action(ArgAction::Append)
|
||||
.value_hint(clap::ValueHint::FilePath),
|
||||
)
|
||||
|
|
|
@ -5,10 +5,8 @@
|
|||
|
||||
// spell-checker:ignore (ToDO) INFTY MULT accum breakwords linebreak linebreaking linebreaks linelen maxlength minlength nchars ostream overlen parasplit plass posn powf punct signum slen sstart tabwidth tlen underlen winfo wlen wordlen
|
||||
|
||||
use std::cmp;
|
||||
use std::i64;
|
||||
use std::io::{BufWriter, Stdout, Write};
|
||||
use std::mem;
|
||||
use std::{cmp, i64, mem};
|
||||
|
||||
use uucore::crash;
|
||||
|
||||
|
@ -46,7 +44,7 @@ pub fn break_lines(
|
|||
ostream: &mut BufWriter<Stdout>,
|
||||
) -> std::io::Result<()> {
|
||||
// indent
|
||||
let p_indent = ¶.indent_str[..];
|
||||
let p_indent = ¶.indent_str;
|
||||
let p_indent_len = para.indent_len;
|
||||
|
||||
// words
|
||||
|
@ -55,14 +53,12 @@ pub fn break_lines(
|
|||
|
||||
// the first word will *always* appear on the first line
|
||||
// make sure of this here
|
||||
let (w, w_len) = match p_words_words.next() {
|
||||
Some(winfo) => (winfo.word, winfo.word_nchars),
|
||||
None => {
|
||||
return ostream.write_all(b"\n");
|
||||
}
|
||||
let Some(winfo) = p_words_words.next() else {
|
||||
return ostream.write_all(b"\n");
|
||||
};
|
||||
|
||||
// print the init, if it exists, and get its length
|
||||
let p_init_len = w_len
|
||||
let p_init_len = winfo.word_nchars
|
||||
+ if opts.crown || opts.tagged {
|
||||
// handle "init" portion
|
||||
ostream.write_all(para.init_str.as_bytes())?;
|
||||
|
@ -75,8 +71,9 @@ pub fn break_lines(
|
|||
// except that mail headers get no indent at all
|
||||
0
|
||||
};
|
||||
|
||||
// write first word after writing init
|
||||
ostream.write_all(w.as_bytes())?;
|
||||
ostream.write_all(winfo.word.as_bytes())?;
|
||||
|
||||
// does this paragraph require uniform spacing?
|
||||
let uniform = para.mail_header || opts.uniform;
|
||||
|
@ -103,15 +100,16 @@ fn break_simple<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
|
|||
mut iter: T,
|
||||
args: &mut BreakArgs<'a>,
|
||||
) -> std::io::Result<()> {
|
||||
iter.try_fold((args.init_len, false), |l, winfo| {
|
||||
accum_words_simple(args, l, winfo)
|
||||
iter.try_fold((args.init_len, false), |(l, prev_punct), winfo| {
|
||||
accum_words_simple(args, l, prev_punct, winfo)
|
||||
})?;
|
||||
args.ostream.write_all(b"\n")
|
||||
}
|
||||
|
||||
fn accum_words_simple<'a>(
|
||||
args: &mut BreakArgs<'a>,
|
||||
(l, prev_punct): (usize, bool),
|
||||
l: usize,
|
||||
prev_punct: bool,
|
||||
winfo: &'a WordInfo<'a>,
|
||||
) -> std::io::Result<(usize, bool)> {
|
||||
// compute the length of this word, considering how tabs will expand at this position on the line
|
||||
|
@ -233,14 +231,14 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
|
|||
linebreak: None,
|
||||
break_before: false,
|
||||
demerits: 0,
|
||||
prev_rat: 0.0f32,
|
||||
prev_rat: 0.0,
|
||||
length: args.init_len,
|
||||
fresh: false,
|
||||
}];
|
||||
// this vec holds the current active linebreaks; next_ holds the breaks that will be active for
|
||||
// the next word
|
||||
let active_breaks = &mut vec![0];
|
||||
let next_active_breaks = &mut vec![];
|
||||
let mut active_breaks = vec![0];
|
||||
let mut next_active_breaks = vec![];
|
||||
|
||||
let stretch = (args.opts.width - args.opts.goal) as isize;
|
||||
let minlength = args.opts.goal - stretch as usize;
|
||||
|
@ -248,10 +246,7 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
|
|||
let mut is_sentence_start = false;
|
||||
let mut least_demerits = 0;
|
||||
loop {
|
||||
let w = match iter.next() {
|
||||
None => break,
|
||||
Some(w) => w,
|
||||
};
|
||||
let Some(w) = iter.next() else { break };
|
||||
|
||||
// if this is the last word, we don't add additional demerits for this break
|
||||
let (is_last_word, is_sentence_end) = match iter.peek() {
|
||||
|
@ -358,13 +353,13 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
|
|||
least_demerits = cmp::max(ld_next, 0);
|
||||
}
|
||||
// swap in new list of active breaks
|
||||
mem::swap(active_breaks, next_active_breaks);
|
||||
mem::swap(&mut active_breaks, &mut next_active_breaks);
|
||||
// If this was the last word in a sentence, the next one must be the first in the next.
|
||||
is_sentence_start = is_sentence_end;
|
||||
}
|
||||
|
||||
// return the best path
|
||||
build_best_path(&linebreaks, active_breaks)
|
||||
build_best_path(&linebreaks, &active_breaks)
|
||||
}
|
||||
|
||||
fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> {
|
||||
|
|
|
@ -52,18 +52,22 @@ impl Line {
|
|||
}
|
||||
}
|
||||
|
||||
// each line's prefix has to be considered to know whether to merge it with
|
||||
// the next line or not
|
||||
/// Each line's prefix has to be considered to know whether to merge it with
|
||||
/// the next line or not
|
||||
#[derive(Debug)]
|
||||
pub struct FileLine {
|
||||
line: String,
|
||||
indent_end: usize, // the end of the indent, always the start of the text
|
||||
pfxind_end: usize, // the end of the PREFIX's indent, that is, the spaces before the prefix
|
||||
indent_len: usize, // display length of indent taking into account tabs
|
||||
prefix_len: usize, // PREFIX indent length taking into account tabs
|
||||
/// The end of the indent, always the start of the text
|
||||
indent_end: usize,
|
||||
/// The end of the PREFIX's indent, that is, the spaces before the prefix
|
||||
pfxind_end: usize,
|
||||
/// Display length of indent taking into account tabs
|
||||
indent_len: usize,
|
||||
/// PREFIX indent length taking into account tabs
|
||||
prefix_len: usize,
|
||||
}
|
||||
|
||||
// iterator that produces a stream of Lines from a file
|
||||
/// Iterator that produces a stream of Lines from a file
|
||||
pub struct FileLines<'a> {
|
||||
opts: &'a FmtOptions,
|
||||
lines: Lines<&'a mut FileOrStdReader>,
|
||||
|
@ -74,26 +78,22 @@ impl<'a> FileLines<'a> {
|
|||
FileLines { opts, lines }
|
||||
}
|
||||
|
||||
// returns true if this line should be formatted
|
||||
/// returns true if this line should be formatted
|
||||
fn match_prefix(&self, line: &str) -> (bool, usize) {
|
||||
if !self.opts.use_prefix {
|
||||
let Some(prefix) = &self.opts.prefix else {
|
||||
return (true, 0);
|
||||
}
|
||||
};
|
||||
|
||||
FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix)
|
||||
FileLines::match_prefix_generic(prefix, line, self.opts.xprefix)
|
||||
}
|
||||
|
||||
// returns true if this line should be formatted
|
||||
/// returns true if this line should be formatted
|
||||
fn match_anti_prefix(&self, line: &str) -> bool {
|
||||
if !self.opts.use_anti_prefix {
|
||||
let Some(anti_prefix) = &self.opts.anti_prefix else {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
match FileLines::match_prefix_generic(
|
||||
&self.opts.anti_prefix[..],
|
||||
line,
|
||||
self.opts.xanti_prefix,
|
||||
) {
|
||||
match FileLines::match_prefix_generic(anti_prefix, line, self.opts.xanti_prefix) {
|
||||
(true, _) => false,
|
||||
(_, _) => true,
|
||||
}
|
||||
|
@ -148,13 +148,7 @@ impl<'a> Iterator for FileLines<'a> {
|
|||
type Item = Line;
|
||||
|
||||
fn next(&mut self) -> Option<Line> {
|
||||
let n = match self.lines.next() {
|
||||
Some(t) => match t {
|
||||
Ok(tt) => tt,
|
||||
Err(_) => return None,
|
||||
},
|
||||
None => return None,
|
||||
};
|
||||
let n = self.lines.next()?.ok()?;
|
||||
|
||||
// if this line is entirely whitespace,
|
||||
// emit a blank line
|
||||
|
@ -178,7 +172,7 @@ impl<'a> Iterator for FileLines<'a> {
|
|||
// not truly blank we will not allow mail headers on the
|
||||
// following line)
|
||||
if pmatch
|
||||
&& n[poffset + self.opts.prefix.len()..]
|
||||
&& n[poffset + self.opts.prefix.as_ref().map_or(0, |s| s.len())..]
|
||||
.chars()
|
||||
.all(char::is_whitespace)
|
||||
{
|
||||
|
@ -192,7 +186,7 @@ impl<'a> Iterator for FileLines<'a> {
|
|||
}
|
||||
|
||||
// figure out the indent, prefix, and prefixindent ending points
|
||||
let prefix_end = poffset + self.opts.prefix.len();
|
||||
let prefix_end = poffset + self.opts.prefix.as_ref().map_or(0, |s| s.len());
|
||||
let (indent_end, prefix_len, indent_len) = self.compute_indent(&n[..], prefix_end);
|
||||
|
||||
Some(Line::FormatLine(FileLine {
|
||||
|
@ -205,24 +199,33 @@ impl<'a> Iterator for FileLines<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// a paragraph : a collection of FileLines that are to be formatted
|
||||
// plus info about the paragraph's indentation
|
||||
// (but we only retain the String from the FileLine; the other info
|
||||
// is only there to help us in deciding how to merge lines into Paragraphs
|
||||
/// A paragraph : a collection of FileLines that are to be formatted
|
||||
/// plus info about the paragraph's indentation
|
||||
///
|
||||
/// We only retain the String from the FileLine; the other info
|
||||
/// is only there to help us in deciding how to merge lines into Paragraphs
|
||||
#[derive(Debug)]
|
||||
pub struct Paragraph {
|
||||
lines: Vec<String>, // the lines of the file
|
||||
pub init_str: String, // string representing the init, that is, the first line's indent
|
||||
pub init_len: usize, // printable length of the init string considering TABWIDTH
|
||||
init_end: usize, // byte location of end of init in first line String
|
||||
pub indent_str: String, // string representing indent
|
||||
pub indent_len: usize, // length of above
|
||||
indent_end: usize, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
|
||||
pub mail_header: bool, // we need to know if this is a mail header because we do word splitting differently in that case
|
||||
/// the lines of the file
|
||||
lines: Vec<String>,
|
||||
/// string representing the init, that is, the first line's indent
|
||||
pub init_str: String,
|
||||
/// printable length of the init string considering TABWIDTH
|
||||
pub init_len: usize,
|
||||
/// byte location of end of init in first line String
|
||||
init_end: usize,
|
||||
/// string representing indent
|
||||
pub indent_str: String,
|
||||
/// length of above
|
||||
pub indent_len: usize,
|
||||
/// byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
|
||||
indent_end: usize,
|
||||
/// we need to know if this is a mail header because we do word splitting differently in that case
|
||||
pub mail_header: bool,
|
||||
}
|
||||
|
||||
// an iterator producing a stream of paragraphs from a stream of lines
|
||||
// given a set of options.
|
||||
/// An iterator producing a stream of paragraphs from a stream of lines
|
||||
/// given a set of options.
|
||||
pub struct ParagraphStream<'a> {
|
||||
lines: Peekable<FileLines<'a>>,
|
||||
next_mail: bool,
|
||||
|
@ -240,7 +243,7 @@ impl<'a> ParagraphStream<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// detect RFC822 mail header
|
||||
/// Detect RFC822 mail header
|
||||
fn is_mail_header(line: &FileLine) -> bool {
|
||||
// a mail header begins with either "From " (envelope sender line)
|
||||
// or with a sequence of printable ASCII chars (33 to 126, inclusive,
|
||||
|
@ -276,12 +279,9 @@ impl<'a> Iterator for ParagraphStream<'a> {
|
|||
#[allow(clippy::cognitive_complexity)]
|
||||
fn next(&mut self) -> Option<Result<Paragraph, String>> {
|
||||
// return a NoFormatLine in an Err; it should immediately be output
|
||||
let noformat = match self.lines.peek() {
|
||||
None => return None,
|
||||
Some(l) => match *l {
|
||||
Line::FormatLine(_) => false,
|
||||
Line::NoFormatLine(_, _) => true,
|
||||
},
|
||||
let noformat = match self.lines.peek()? {
|
||||
Line::FormatLine(_) => false,
|
||||
Line::NoFormatLine(_, _) => true,
|
||||
};
|
||||
|
||||
// found a NoFormatLine, immediately dump it out
|
||||
|
@ -305,95 +305,89 @@ impl<'a> Iterator for ParagraphStream<'a> {
|
|||
let mut in_mail = false;
|
||||
let mut second_done = false; // for when we use crown or tagged mode
|
||||
loop {
|
||||
{
|
||||
// peek ahead
|
||||
// need to explicitly force fl out of scope before we can call self.lines.next()
|
||||
let fl = match self.lines.peek() {
|
||||
None => break,
|
||||
Some(l) => match *l {
|
||||
Line::FormatLine(ref x) => x,
|
||||
Line::NoFormatLine(..) => break,
|
||||
},
|
||||
};
|
||||
// peek ahead
|
||||
// need to explicitly force fl out of scope before we can call self.lines.next()
|
||||
let Some(Line::FormatLine(fl)) = self.lines.peek() else {
|
||||
break;
|
||||
};
|
||||
|
||||
if p_lines.is_empty() {
|
||||
// first time through the loop, get things set up
|
||||
// detect mail header
|
||||
if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) {
|
||||
in_mail = true;
|
||||
// there can't be any indent or pfxind because otherwise is_mail_header
|
||||
// would fail since there cannot be any whitespace before the colon in a
|
||||
// valid header field
|
||||
indent_str.push_str(" ");
|
||||
indent_len = 2;
|
||||
if p_lines.is_empty() {
|
||||
// first time through the loop, get things set up
|
||||
// detect mail header
|
||||
if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) {
|
||||
in_mail = true;
|
||||
// there can't be any indent or pfxind because otherwise is_mail_header
|
||||
// would fail since there cannot be any whitespace before the colon in a
|
||||
// valid header field
|
||||
indent_str.push_str(" ");
|
||||
indent_len = 2;
|
||||
} else {
|
||||
if self.opts.crown || self.opts.tagged {
|
||||
init_str.push_str(&fl.line[..fl.indent_end]);
|
||||
init_len = fl.indent_len;
|
||||
init_end = fl.indent_end;
|
||||
} else {
|
||||
if self.opts.crown || self.opts.tagged {
|
||||
init_str.push_str(&fl.line[..fl.indent_end]);
|
||||
init_len = fl.indent_len;
|
||||
init_end = fl.indent_end;
|
||||
} else {
|
||||
second_done = true;
|
||||
}
|
||||
|
||||
// these will be overwritten in the 2nd line of crown or tagged mode, but
|
||||
// we are not guaranteed to get to the 2nd line, e.g., if the next line
|
||||
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
|
||||
indent_str.push_str(&fl.line[..fl.indent_end]);
|
||||
indent_len = fl.indent_len;
|
||||
indent_end = fl.indent_end;
|
||||
|
||||
// save these to check for matching lines
|
||||
prefix_len = fl.prefix_len;
|
||||
pfxind_end = fl.pfxind_end;
|
||||
|
||||
// in tagged mode, add 4 spaces of additional indenting by default
|
||||
// (gnu fmt's behavior is different: it seems to find the closest column to
|
||||
// indent_end that is divisible by 3. But honestly that behavior seems
|
||||
// pretty arbitrary.
|
||||
// Perhaps a better default would be 1 TABWIDTH? But ugh that's so big.
|
||||
if self.opts.tagged {
|
||||
indent_str.push_str(" ");
|
||||
indent_len += 4;
|
||||
}
|
||||
}
|
||||
} else if in_mail {
|
||||
// lines following mail headers must begin with spaces
|
||||
if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) {
|
||||
break; // this line does not begin with spaces
|
||||
}
|
||||
} else if !second_done {
|
||||
// now we have enough info to handle crown margin and tagged mode
|
||||
|
||||
// in both crown and tagged modes we require that prefix_len is the same
|
||||
if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end {
|
||||
break;
|
||||
second_done = true;
|
||||
}
|
||||
|
||||
// in tagged mode, indent has to be *different* on following lines
|
||||
if self.opts.tagged
|
||||
&& indent_len - 4 == fl.indent_len
|
||||
&& indent_end == fl.indent_end
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// this is part of the same paragraph, get the indent info from this line
|
||||
indent_str.clear();
|
||||
// these will be overwritten in the 2nd line of crown or tagged mode, but
|
||||
// we are not guaranteed to get to the 2nd line, e.g., if the next line
|
||||
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
|
||||
indent_str.push_str(&fl.line[..fl.indent_end]);
|
||||
indent_len = fl.indent_len;
|
||||
indent_end = fl.indent_end;
|
||||
|
||||
second_done = true;
|
||||
} else {
|
||||
// detect mismatch
|
||||
if indent_end != fl.indent_end
|
||||
|| pfxind_end != fl.pfxind_end
|
||||
|| indent_len != fl.indent_len
|
||||
|| prefix_len != fl.prefix_len
|
||||
{
|
||||
break;
|
||||
// save these to check for matching lines
|
||||
prefix_len = fl.prefix_len;
|
||||
pfxind_end = fl.pfxind_end;
|
||||
|
||||
// in tagged mode, add 4 spaces of additional indenting by default
|
||||
// (gnu fmt's behavior is different: it seems to find the closest column to
|
||||
// indent_end that is divisible by 3. But honestly that behavior seems
|
||||
// pretty arbitrary.
|
||||
// Perhaps a better default would be 1 TABWIDTH? But ugh that's so big.
|
||||
if self.opts.tagged {
|
||||
indent_str.push_str(" ");
|
||||
indent_len += 4;
|
||||
}
|
||||
}
|
||||
} else if in_mail {
|
||||
// lines following mail headers must begin with spaces
|
||||
if fl.indent_end == 0 || (self.opts.prefix.is_some() && fl.pfxind_end == 0) {
|
||||
break; // this line does not begin with spaces
|
||||
}
|
||||
} else if !second_done {
|
||||
// now we have enough info to handle crown margin and tagged mode
|
||||
|
||||
// in both crown and tagged modes we require that prefix_len is the same
|
||||
if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end {
|
||||
break;
|
||||
}
|
||||
|
||||
// in tagged mode, indent has to be *different* on following lines
|
||||
if self.opts.tagged
|
||||
&& indent_len - 4 == fl.indent_len
|
||||
&& indent_end == fl.indent_end
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// this is part of the same paragraph, get the indent info from this line
|
||||
indent_str.clear();
|
||||
indent_str.push_str(&fl.line[..fl.indent_end]);
|
||||
indent_len = fl.indent_len;
|
||||
indent_end = fl.indent_end;
|
||||
|
||||
second_done = true;
|
||||
} else {
|
||||
// detect mismatch
|
||||
if indent_end != fl.indent_end
|
||||
|| pfxind_end != fl.pfxind_end
|
||||
|| indent_len != fl.indent_len
|
||||
|| prefix_len != fl.prefix_len
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
p_lines.push(self.lines.next().unwrap().get_formatline().line);
|
||||
|
@ -429,7 +423,7 @@ pub struct ParaWords<'a> {
|
|||
}
|
||||
|
||||
impl<'a> ParaWords<'a> {
|
||||
pub fn new<'b>(opts: &'b FmtOptions, para: &'b Paragraph) -> ParaWords<'b> {
|
||||
pub fn new(opts: &'a FmtOptions, para: &'a Paragraph) -> Self {
|
||||
let mut pw = ParaWords {
|
||||
opts,
|
||||
para,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue