mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 04:27:45 +00:00
fmt: style modifications suggested by Arcterus
This commit is contained in:
parent
6228bb4b85
commit
5d2a2b6a0b
2 changed files with 194 additions and 175 deletions
159
fmt/fmt.rs
159
fmt/fmt.rs
|
@ -24,8 +24,8 @@ use parasplit::{ParagraphStream, ParaWords};
|
||||||
macro_rules! silent_unwrap(
|
macro_rules! silent_unwrap(
|
||||||
($exp:expr) => (
|
($exp:expr) => (
|
||||||
match $exp {
|
match $exp {
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(_) => unsafe { ::libc::exit(1) }
|
Err(_) => unsafe { ::libc::exit(1) }
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -39,21 +39,21 @@ static NAME: &'static str = "fmt";
|
||||||
static VERSION: &'static str = "0.0.1";
|
static VERSION: &'static str = "0.0.1";
|
||||||
|
|
||||||
struct FmtOptions {
|
struct FmtOptions {
|
||||||
crown : bool,
|
crown : bool,
|
||||||
tagged : bool,
|
tagged : bool,
|
||||||
mail : bool,
|
mail : bool,
|
||||||
split_only : bool,
|
split_only : bool,
|
||||||
use_prefix : bool,
|
use_prefix : bool,
|
||||||
prefix : String,
|
prefix : String,
|
||||||
xprefix : bool,
|
xprefix : bool,
|
||||||
prefix_len : uint,
|
prefix_len : uint,
|
||||||
use_anti_prefix : bool,
|
use_anti_prefix : bool,
|
||||||
anti_prefix : String,
|
anti_prefix : String,
|
||||||
xanti_prefix: bool,
|
xanti_prefix : bool,
|
||||||
uniform : bool,
|
uniform : bool,
|
||||||
width : uint,
|
width : uint,
|
||||||
goal : uint,
|
goal : uint,
|
||||||
tabwidth : uint,
|
tabwidth : uint,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
|
@ -97,71 +97,86 @@ fn uumain(args: Vec<String>) -> int {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut fmt_opts = FmtOptions { crown : false
|
let mut fmt_opts = FmtOptions {
|
||||||
, tagged : false
|
crown : false,
|
||||||
, mail : false
|
tagged : false,
|
||||||
, uniform : false
|
mail : false,
|
||||||
, split_only : false
|
uniform : false,
|
||||||
, use_prefix : false
|
split_only : false,
|
||||||
, prefix : String::new()
|
use_prefix : false,
|
||||||
, xprefix : false
|
prefix : String::new(),
|
||||||
, prefix_len : 0
|
xprefix : false,
|
||||||
, use_anti_prefix : false
|
prefix_len : 0,
|
||||||
, anti_prefix : String::new()
|
use_anti_prefix : false,
|
||||||
, xanti_prefix: false
|
anti_prefix : String::new(),
|
||||||
, width : 78
|
xanti_prefix : false,
|
||||||
, goal : 72
|
width : 78,
|
||||||
, tabwidth : 8
|
goal : 72,
|
||||||
};
|
tabwidth : 8,
|
||||||
|
};
|
||||||
|
|
||||||
if matches.opt_present("t") { fmt_opts.tagged = true; }
|
if matches.opt_present("t") { fmt_opts.tagged = true; }
|
||||||
if matches.opt_present("c") { fmt_opts.crown = true; fmt_opts.tagged = false; }
|
if matches.opt_present("c") { fmt_opts.crown = true; fmt_opts.tagged = false; }
|
||||||
if matches.opt_present("m") { fmt_opts.mail = true; }
|
if matches.opt_present("m") { fmt_opts.mail = true; }
|
||||||
if matches.opt_present("u") { fmt_opts.uniform = true; }
|
if matches.opt_present("u") { fmt_opts.uniform = true; }
|
||||||
if matches.opt_present("s") { fmt_opts.split_only = true; fmt_opts.crown = false; fmt_opts.tagged = false; }
|
if matches.opt_present("s") { fmt_opts.split_only = true; fmt_opts.crown = false; fmt_opts.tagged = false; }
|
||||||
if matches.opt_present("x") { fmt_opts.xprefix = true; }
|
if matches.opt_present("x") { fmt_opts.xprefix = true; }
|
||||||
if matches.opt_present("X") { fmt_opts.xanti_prefix = true; }
|
if matches.opt_present("X") { fmt_opts.xanti_prefix = true; }
|
||||||
|
|
||||||
match matches.opt_str("p") {
|
match matches.opt_str("p") {
|
||||||
Some(s) => { fmt_opts.prefix = s; fmt_opts.use_prefix = true; fmt_opts.prefix_len = fmt_opts.prefix.as_slice().char_len() },
|
Some(s) => {
|
||||||
None => ()
|
fmt_opts.prefix = s;
|
||||||
|
fmt_opts.use_prefix = true;
|
||||||
|
fmt_opts.prefix_len = fmt_opts.prefix.as_slice().char_len()
|
||||||
|
}
|
||||||
|
None => ()
|
||||||
};
|
};
|
||||||
|
|
||||||
match matches.opt_str("P") {
|
match matches.opt_str("P") {
|
||||||
Some(s) => { fmt_opts.anti_prefix = s; fmt_opts.use_anti_prefix = true; },
|
Some(s) => {
|
||||||
None => ()
|
fmt_opts.anti_prefix = s;
|
||||||
|
fmt_opts.use_anti_prefix = true;
|
||||||
|
}
|
||||||
|
None => ()
|
||||||
};
|
};
|
||||||
|
|
||||||
match matches.opt_str("w") {
|
match matches.opt_str("w") {
|
||||||
Some(s) => { fmt_opts.width = match from_str(s.as_slice()) {
|
Some(s) => {
|
||||||
Some(t) => t,
|
fmt_opts.width =
|
||||||
None => { crash!(1, "Invalid WIDTH specification: `{}'", s); }
|
match from_str(s.as_slice()) {
|
||||||
};
|
Some(t) => t,
|
||||||
fmt_opts.goal = std::cmp::min(fmt_opts.width * 92 / 100, fmt_opts.width - 4);
|
None => { crash!(1, "Invalid WIDTH specification: `{}'", s); }
|
||||||
},
|
};
|
||||||
None => ()
|
fmt_opts.goal = std::cmp::min(fmt_opts.width * 92 / 100, fmt_opts.width - 4);
|
||||||
|
}
|
||||||
|
None => ()
|
||||||
};
|
};
|
||||||
|
|
||||||
match matches.opt_str("g") {
|
match matches.opt_str("g") {
|
||||||
Some(s) => { fmt_opts.goal = match from_str(s.as_slice()) {
|
Some(s) => {
|
||||||
Some(t) => t,
|
fmt_opts.goal =
|
||||||
None => { crash!(1, "Invalid GOAL specification: `{}'", s); }
|
match from_str(s.as_slice()) {
|
||||||
};
|
Some(t) => t,
|
||||||
if ! matches.opt_present("w") {
|
None => { crash!(1, "Invalid GOAL specification: `{}'", s); }
|
||||||
fmt_opts.width = std::cmp::max(fmt_opts.goal * 100 / 92, fmt_opts.goal + 4);
|
};
|
||||||
} else if fmt_opts.goal > fmt_opts.width {
|
if !matches.opt_present("w") {
|
||||||
crash!(1, "GOAL cannot be greater than WIDTH.");
|
fmt_opts.width = std::cmp::max(fmt_opts.goal * 100 / 92, fmt_opts.goal + 4);
|
||||||
}
|
} else if fmt_opts.goal > fmt_opts.width {
|
||||||
},
|
crash!(1, "GOAL cannot be greater than WIDTH.");
|
||||||
None => ()
|
}
|
||||||
|
}
|
||||||
|
None => ()
|
||||||
};
|
};
|
||||||
|
|
||||||
match matches.opt_str("T") {
|
match matches.opt_str("T") {
|
||||||
Some(s) => fmt_opts.tabwidth = match from_str(s.as_slice()) {
|
Some(s) => {
|
||||||
Some(t) => t,
|
fmt_opts.tabwidth =
|
||||||
None => { crash!(1, "Invalid TABWIDTH specification: `{}'", s); }
|
match from_str(s.as_slice()) {
|
||||||
},
|
Some(t) => t,
|
||||||
None => ()
|
None => { crash!(1, "Invalid TABWIDTH specification: `{}'", s); }
|
||||||
|
};
|
||||||
|
}
|
||||||
|
None => ()
|
||||||
};
|
};
|
||||||
|
|
||||||
if fmt_opts.tabwidth < 1 {
|
if fmt_opts.tabwidth < 1 {
|
||||||
|
@ -179,17 +194,19 @@ fn uumain(args: Vec<String>) -> int {
|
||||||
let mut ostream = box BufferedWriter::new(stdout_raw()) as Box<Writer>;
|
let mut ostream = box BufferedWriter::new(stdout_raw()) as Box<Writer>;
|
||||||
|
|
||||||
for i in files.iter().map(|x| x.as_slice()) {
|
for i in files.iter().map(|x| x.as_slice()) {
|
||||||
let mut fp = match open_file(i) {
|
let mut fp =
|
||||||
Err(e) => { show_warning!("{}: {}",i,e);
|
match open_file(i) {
|
||||||
continue;
|
Err(e) => {
|
||||||
}
|
show_warning!("{}: {}",i,e);
|
||||||
Ok(f) => f
|
continue;
|
||||||
};
|
}
|
||||||
|
Ok(f) => f
|
||||||
|
};
|
||||||
let mut pStream = ParagraphStream::new(&fmt_opts, &mut fp);
|
let mut pStream = ParagraphStream::new(&fmt_opts, &mut fp);
|
||||||
for paraResult in pStream {
|
for paraResult in pStream {
|
||||||
match paraResult {
|
match paraResult {
|
||||||
Err(s) => silent_unwrap!(ostream.write(s.as_bytes())),
|
Err(s) => silent_unwrap!(ostream.write(s.as_bytes())),
|
||||||
Ok(para) => {
|
Ok(para) => {
|
||||||
// indent
|
// indent
|
||||||
let pIndent = para.pfxind_str.clone().append(fmt_opts.prefix.as_slice()).append(para.indent_str.as_slice());
|
let pIndent = para.pfxind_str.clone().append(fmt_opts.prefix.as_slice()).append(para.indent_str.as_slice());
|
||||||
let pIndentLen = para.pfxind_len + fmt_opts.prefix_len + para.indent_len;
|
let pIndentLen = para.pfxind_len + fmt_opts.prefix_len + para.indent_len;
|
||||||
|
@ -204,7 +221,7 @@ fn uumain(args: Vec<String>) -> int {
|
||||||
// handle "init" portion
|
// handle "init" portion
|
||||||
silent_unwrap!(ostream.write(para.init_str.as_bytes()));
|
silent_unwrap!(ostream.write(para.init_str.as_bytes()));
|
||||||
para.init_len
|
para.init_len
|
||||||
} else if ! para.mail_header {
|
} else if !para.mail_header {
|
||||||
// for non-(crown, tagged) that's the same as a normal indent
|
// for non-(crown, tagged) that's the same as a normal indent
|
||||||
silent_unwrap!(ostream.write(pIndent.as_bytes()));
|
silent_unwrap!(ostream.write(pIndent.as_bytes()));
|
||||||
pIndentLen
|
pIndentLen
|
||||||
|
@ -243,8 +260,8 @@ fn open_file(filename: &str) -> IoResult<FileOrStdReader> {
|
||||||
Ok(BufferedReader::new(box stdin_raw() as Box<Reader>))
|
Ok(BufferedReader::new(box stdin_raw() as Box<Reader>))
|
||||||
} else {
|
} else {
|
||||||
match File::open(&Path::new(filename)) {
|
match File::open(&Path::new(filename)) {
|
||||||
Ok(f) => Ok(BufferedReader::new(box f as Box<Reader>)),
|
Ok(f) => Ok(BufferedReader::new(box f as Box<Reader>)),
|
||||||
Err(e) => return Err(e)
|
Err(e) => return Err(e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
208
fmt/parasplit.rs
208
fmt/parasplit.rs
|
@ -26,16 +26,16 @@ impl Line {
|
||||||
// when we know that it's a FormatLine, as in the ParagraphStream iterator
|
// when we know that it's a FormatLine, as in the ParagraphStream iterator
|
||||||
fn get_fileline(self) -> FileLine {
|
fn get_fileline(self) -> FileLine {
|
||||||
match self {
|
match self {
|
||||||
FormatLine(fl) => fl,
|
FormatLine(fl) => fl,
|
||||||
NoFormatLine(..) => fail!("Found NoFormatLine when expecting FormatLine")
|
NoFormatLine(..) => fail!("Found NoFormatLine when expecting FormatLine")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// when we know that it's a NoFormatLine, as in the ParagraphStream iterator
|
// when we know that it's a NoFormatLine, as in the ParagraphStream iterator
|
||||||
fn get_noformatline(self) -> (String, bool) {
|
fn get_noformatline(self) -> (String, bool) {
|
||||||
match self {
|
match self {
|
||||||
NoFormatLine(s, b) => (s, b),
|
NoFormatLine(s, b) => (s, b),
|
||||||
FormatLine(..) => fail!("Found FormatLine when expecting NoFormatLine")
|
FormatLine(..) => fail!("Found FormatLine when expecting NoFormatLine")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -44,18 +44,18 @@ impl Line {
|
||||||
// the next line or not
|
// the next line or not
|
||||||
#[deriving(Show)]
|
#[deriving(Show)]
|
||||||
struct FileLine {
|
struct FileLine {
|
||||||
line : String,
|
line : String,
|
||||||
indent_end : uint, // the end of the indent, always the start of the text
|
indent_end : uint, // the end of the indent, always the start of the text
|
||||||
prefix_end : uint, // the end of the PREFIX
|
prefix_end : uint, // the end of the PREFIX
|
||||||
pfxind_end : uint, // the end of the PREFIX's indent, that is, the spaces before the prefix
|
pfxind_end : uint, // the end of the PREFIX's indent, that is, the spaces before the prefix
|
||||||
indent_len : uint, // display length of indent taking into account TABWIDTH
|
indent_len : uint, // display length of indent taking into account TABWIDTH
|
||||||
pfxind_len : uint, // PREFIX indent length taking into account TABWIDTH
|
pfxind_len : uint, // PREFIX indent length taking into account TABWIDTH
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterator that produces a stream of Lines from a file
|
// iterator that produces a stream of Lines from a file
|
||||||
struct FileLines<'a> {
|
struct FileLines<'a> {
|
||||||
opts : &'a FmtOptions,
|
opts : &'a FmtOptions,
|
||||||
lines : Lines<'a, FileOrStdReader>,
|
lines : Lines<'a, FileOrStdReader>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FileLines<'a> {
|
impl<'a> FileLines<'a> {
|
||||||
|
@ -65,18 +65,18 @@ impl<'a> FileLines<'a> {
|
||||||
|
|
||||||
// returns true if this line should be formatted
|
// returns true if this line should be formatted
|
||||||
fn match_prefix(&self, line: &str) -> (bool, uint) {
|
fn match_prefix(&self, line: &str) -> (bool, uint) {
|
||||||
if ! self.opts.use_prefix { return (true, 0u); }
|
if !self.opts.use_prefix { return (true, 0u); }
|
||||||
|
|
||||||
FileLines::match_prefix_generic(self.opts.prefix.as_slice(), line, self.opts.xprefix)
|
FileLines::match_prefix_generic(self.opts.prefix.as_slice(), line, self.opts.xprefix)
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns true if this line should be formatted
|
// returns true if this line should be formatted
|
||||||
fn match_anti_prefix(&self, line: &str) -> bool {
|
fn match_anti_prefix(&self, line: &str) -> bool {
|
||||||
if ! self.opts.use_anti_prefix { return true; }
|
if !self.opts.use_anti_prefix { return true; }
|
||||||
|
|
||||||
match FileLines::match_prefix_generic(self.opts.anti_prefix.as_slice(), line, self.opts.xanti_prefix) {
|
match FileLines::match_prefix_generic(self.opts.anti_prefix.as_slice(), line, self.opts.xanti_prefix) {
|
||||||
(true, _) => false,
|
(true, _) => false,
|
||||||
(_ , _) => true
|
(_ , _) => true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,7 +85,7 @@ impl<'a> FileLines<'a> {
|
||||||
return (true, 0);
|
return (true, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ! exact {
|
if !exact {
|
||||||
// we do it this way rather than byte indexing to support unicode whitespace chars
|
// we do it this way rather than byte indexing to support unicode whitespace chars
|
||||||
let mut i = 0u;
|
let mut i = 0u;
|
||||||
while (i < line.len()) && line.char_at(i).is_whitespace() {
|
while (i < line.len()) && line.char_at(i).is_whitespace() {
|
||||||
|
@ -98,6 +98,10 @@ impl<'a> FileLines<'a> {
|
||||||
|
|
||||||
(false, 0)
|
(false, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn displayed_length(&self, s: &str) -> uint {
|
||||||
|
s.char_len() + (self.opts.tabwidth - 1) * s.chars().filter(|x| x == &'\t').count()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator<Line> for FileLines<'a> {
|
impl<'a> Iterator<Line> for FileLines<'a> {
|
||||||
|
@ -105,10 +109,10 @@ impl<'a> Iterator<Line> for FileLines<'a> {
|
||||||
let mut n =
|
let mut n =
|
||||||
match self.lines.next() {
|
match self.lines.next() {
|
||||||
Some(t) => match t {
|
Some(t) => match t {
|
||||||
Ok(tt) => tt,
|
Ok(tt) => tt,
|
||||||
Err(_) => return None
|
Err(_) => return None
|
||||||
},
|
},
|
||||||
None => return None
|
None => return None
|
||||||
};
|
};
|
||||||
|
|
||||||
// if this line is entirely whitespace,
|
// if this line is entirely whitespace,
|
||||||
|
@ -122,13 +126,13 @@ impl<'a> Iterator<Line> for FileLines<'a> {
|
||||||
// if this line does not match the prefix,
|
// if this line does not match the prefix,
|
||||||
// emit the line unprocessed and iterate again
|
// emit the line unprocessed and iterate again
|
||||||
let (pmatch, poffset) = self.match_prefix(n.as_slice());
|
let (pmatch, poffset) = self.match_prefix(n.as_slice());
|
||||||
if ! pmatch {
|
if !pmatch {
|
||||||
return Some(NoFormatLine(n, false));
|
return Some(NoFormatLine(n, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
// if this line matches the anti_prefix
|
// if this line matches the anti_prefix
|
||||||
// (NOTE definition of match_anti_prefix is TRUE if we should process)
|
// (NOTE definition of match_anti_prefix is TRUE if we should process)
|
||||||
if ! self.match_anti_prefix(n.as_slice()) {
|
if !self.match_anti_prefix(n.as_slice()) {
|
||||||
return Some(NoFormatLine(n, false));
|
return Some(NoFormatLine(n, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,19 +158,16 @@ impl<'a> Iterator<Line> for FileLines<'a> {
|
||||||
let pfxEnd = poffset + self.opts.prefix.len();
|
let pfxEnd = poffset + self.opts.prefix.len();
|
||||||
let nSlice = n.as_slice().slice_from(pfxEnd);
|
let nSlice = n.as_slice().slice_from(pfxEnd);
|
||||||
let nSlice2 = nSlice.trim_left();
|
let nSlice2 = nSlice.trim_left();
|
||||||
|
|
||||||
(pfxEnd + nSlice.len() - nSlice2.len(), pfxEnd, poffset)
|
(pfxEnd + nSlice.len() - nSlice2.len(), pfxEnd, poffset)
|
||||||
} else {
|
} else {
|
||||||
let nSlice = n.as_slice().trim_left();
|
let nSlice = n.as_slice().trim_left();
|
||||||
|
|
||||||
(nLen - nSlice.len(), 0, 0)
|
(nLen - nSlice.len(), 0, 0)
|
||||||
};
|
};
|
||||||
|
|
||||||
// indent length
|
// indent length
|
||||||
let indLen =
|
let indLen =
|
||||||
if indEnd > 0 {
|
if indEnd > 0 {
|
||||||
let nSlice = n.as_slice().slice(pfxEnd, indEnd);
|
self.displayed_length(n.as_slice().slice(pfxEnd, indEnd))
|
||||||
nSlice.char_len() + (self.opts.tabwidth - 1) * nSlice.chars().filter(|x| x == &'\t').count()
|
|
||||||
} else {
|
} else {
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
@ -174,8 +175,7 @@ impl<'a> Iterator<Line> for FileLines<'a> {
|
||||||
// prefix indent length
|
// prefix indent length
|
||||||
let pfxIndLen =
|
let pfxIndLen =
|
||||||
if pfxIndEnd > 0 {
|
if pfxIndEnd > 0 {
|
||||||
let nSlice = n.as_slice().slice_to(pfxIndEnd);
|
self.displayed_length(n.as_slice().slice_to(pfxIndEnd))
|
||||||
nSlice.char_len() + (self.opts.tabwidth - 1) * nSlice.chars().filter(|x| x == &'\t').count()
|
|
||||||
} else {
|
} else {
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
@ -185,7 +185,7 @@ impl<'a> Iterator<Line> for FileLines<'a> {
|
||||||
// [?!.]\t\t is. We could expand tabs to two spaces to force detection of tab as
|
// [?!.]\t\t is. We could expand tabs to two spaces to force detection of tab as
|
||||||
// sentence ending
|
// sentence ending
|
||||||
if self.opts.uniform {
|
if self.opts.uniform {
|
||||||
let tabinds: Vec<uint> = n.as_slice().slice_from(indEnd).char_indices().filter_map(|(i,c)| if c == '\t' { Some(i) } else { None }).collect();
|
let tabinds: Vec<uint> = n.as_slice().slice_from(indEnd).char_indices().filter_map(|(i, c)| if c == '\t' { Some(i) } else { None }).collect();
|
||||||
unsafe {
|
unsafe {
|
||||||
let nmut = n.as_mut_bytes();
|
let nmut = n.as_mut_bytes();
|
||||||
for i in tabinds.iter() {
|
for i in tabinds.iter() {
|
||||||
|
@ -194,13 +194,14 @@ impl<'a> Iterator<Line> for FileLines<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(FormatLine(FileLine { line: n
|
Some(FormatLine(FileLine {
|
||||||
, indent_end: indEnd
|
line : n,
|
||||||
, prefix_end: pfxEnd
|
indent_end : indEnd,
|
||||||
, pfxind_end: pfxIndEnd
|
prefix_end : pfxEnd,
|
||||||
, indent_len: indLen
|
pfxind_end : pfxIndEnd,
|
||||||
, pfxind_len: pfxIndLen
|
indent_len : indLen,
|
||||||
}))
|
pfxind_len : pfxIndLen,
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,16 +211,16 @@ impl<'a> Iterator<Line> for FileLines<'a> {
|
||||||
// is only there to help us in deciding how to merge lines into Paragraphs
|
// is only there to help us in deciding how to merge lines into Paragraphs
|
||||||
#[deriving(Show)]
|
#[deriving(Show)]
|
||||||
pub struct Paragraph {
|
pub struct Paragraph {
|
||||||
lines : Vec<String>, // the lines of the file
|
lines : Vec<String>, // the lines of the file
|
||||||
pub init_str : String, // string representing the init, that is, the first line's indent
|
pub init_str : String, // string representing the init, that is, the first line's indent
|
||||||
pub init_len : uint, // printable length of the init string considering TABWIDTH
|
pub init_len : uint, // printable length of the init string considering TABWIDTH
|
||||||
init_end : uint, // byte location of end of init in first line String
|
init_end : uint, // byte location of end of init in first line String
|
||||||
pub indent_str : String, // string representing indent
|
pub indent_str : String, // string representing indent
|
||||||
pub indent_len : uint, // length of above
|
pub indent_len : uint, // length of above
|
||||||
indent_end : uint, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
|
indent_end : uint, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward)
|
||||||
pub pfxind_str : String, // string representing the prefix indent
|
pub pfxind_str : String, // string representing the prefix indent
|
||||||
pub pfxind_len : uint, // length of above
|
pub pfxind_len : uint, // length of above
|
||||||
pub mail_header : bool // we need to know if this is a mail header because we do word splitting differently in that case
|
pub mail_header : bool // we need to know if this is a mail header because we do word splitting differently in that case
|
||||||
}
|
}
|
||||||
|
|
||||||
// an iterator producing a stream of paragraphs from a stream of lines
|
// an iterator producing a stream of paragraphs from a stream of lines
|
||||||
|
@ -227,9 +228,9 @@ pub struct Paragraph {
|
||||||
// NOTE as you iterate through the paragraphs, any NoFormatLines are
|
// NOTE as you iterate through the paragraphs, any NoFormatLines are
|
||||||
// immediately dumped to stdout!
|
// immediately dumped to stdout!
|
||||||
pub struct ParagraphStream<'a> {
|
pub struct ParagraphStream<'a> {
|
||||||
lines : Peekable<Line,FileLines<'a>>,
|
lines : Peekable<Line,FileLines<'a>>,
|
||||||
next_mail : bool,
|
next_mail : bool,
|
||||||
opts : &'a FmtOptions,
|
opts : &'a FmtOptions,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ParagraphStream<'a> {
|
impl<'a> ParagraphStream<'a> {
|
||||||
|
@ -245,26 +246,25 @@ impl<'a> ParagraphStream<'a> {
|
||||||
// or with a sequence of printable ASCII chars (33 to 126, inclusive,
|
// or with a sequence of printable ASCII chars (33 to 126, inclusive,
|
||||||
// except colon) followed by a colon.
|
// except colon) followed by a colon.
|
||||||
if line.indent_end > 0 {
|
if line.indent_end > 0 {
|
||||||
return false;
|
false
|
||||||
} else {
|
} else {
|
||||||
let lSlice = line.line.as_slice();
|
let lSlice = line.line.as_slice();
|
||||||
if lSlice.starts_with("From ") {
|
if lSlice.starts_with("From ") {
|
||||||
return true;
|
true
|
||||||
} else {
|
} else {
|
||||||
let colonPosn =
|
let colonPosn =
|
||||||
match lSlice.find(':') {
|
match lSlice.find(':') {
|
||||||
Some(n) => n,
|
Some(n) => n,
|
||||||
None => return false
|
None => return false
|
||||||
};
|
};
|
||||||
|
|
||||||
// header field must be nonzero length
|
// header field must be nonzero length
|
||||||
if colonPosn == 0 { return false; }
|
if colonPosn == 0 { return false; }
|
||||||
|
|
||||||
return lSlice.slice_to(colonPosn).chars()
|
return lSlice.slice_to(colonPosn).chars().all(|x| match x as uint {
|
||||||
.all(|x| match x as uint {
|
y if y < 33 || y > 126 => false,
|
||||||
y if y < 33 || y > 126 => false,
|
_ => true
|
||||||
_ => true
|
});
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -275,9 +275,9 @@ impl<'a> Iterator<Result<Paragraph,String>> for ParagraphStream<'a> {
|
||||||
// return a NoFormatLine in an Err; it should immediately be output
|
// return a NoFormatLine in an Err; it should immediately be output
|
||||||
let noformat =
|
let noformat =
|
||||||
match self.lines.peek() {
|
match self.lines.peek() {
|
||||||
None => return None,
|
None => return None,
|
||||||
Some(l) => match l {
|
Some(l) => match l {
|
||||||
&FormatLine(_) => false,
|
&FormatLine(_) => false,
|
||||||
&NoFormatLine(_, _) => true
|
&NoFormatLine(_, _) => true
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -307,11 +307,11 @@ impl<'a> Iterator<Result<Paragraph,String>> for ParagraphStream<'a> {
|
||||||
// need to explicitly force fl out of scope before we can call self.lines.next()
|
// need to explicitly force fl out of scope before we can call self.lines.next()
|
||||||
let fl =
|
let fl =
|
||||||
match self.lines.peek() {
|
match self.lines.peek() {
|
||||||
None => break,
|
None => break,
|
||||||
Some(l) => {
|
Some(l) => {
|
||||||
match l {
|
match l {
|
||||||
&FormatLine(ref x) => x,
|
&FormatLine(ref x) => x,
|
||||||
&NoFormatLine(..) => break
|
&NoFormatLine(..) => break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -335,7 +335,7 @@ impl<'a> Iterator<Result<Paragraph,String>> for ParagraphStream<'a> {
|
||||||
// these will be overwritten in the 2nd line of crown or tagged mode, but
|
// these will be overwritten in the 2nd line of crown or tagged mode, but
|
||||||
// we are not guaranteed to get to the 2nd line, e.g., if the next line
|
// we are not guaranteed to get to the 2nd line, e.g., if the next line
|
||||||
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
|
// is a NoFormatLine or None. Thus, we set sane defaults the 1st time around
|
||||||
indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end,fl.indent_end));
|
indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end, fl.indent_end));
|
||||||
indent_len = fl.indent_len;
|
indent_len = fl.indent_len;
|
||||||
indent_end = fl.indent_end;
|
indent_end = fl.indent_end;
|
||||||
|
|
||||||
|
@ -356,10 +356,10 @@ impl<'a> Iterator<Result<Paragraph,String>> for ParagraphStream<'a> {
|
||||||
}
|
}
|
||||||
} else if in_mail {
|
} else if in_mail {
|
||||||
// lines following mail headers must begin with spaces
|
// lines following mail headers must begin with spaces
|
||||||
if (self.opts.use_prefix && fl.pfxind_end == 0) || (! self.opts.use_prefix && fl.indent_end == 0) {
|
if (self.opts.use_prefix && fl.pfxind_end == 0) || (!self.opts.use_prefix && fl.indent_end == 0) {
|
||||||
break; // this line does not begin with spaces
|
break; // this line does not begin with spaces
|
||||||
}
|
}
|
||||||
} else if ! second_done && (self.opts.crown || self.opts.tagged) {
|
} else if !second_done && (self.opts.crown || self.opts.tagged) {
|
||||||
// now we have enough info to handle crown margin and tagged mode
|
// now we have enough info to handle crown margin and tagged mode
|
||||||
if pfxind_len != fl.pfxind_len {
|
if pfxind_len != fl.pfxind_len {
|
||||||
// in both crown and tagged modes we require that pfxind is the same
|
// in both crown and tagged modes we require that pfxind is the same
|
||||||
|
@ -370,7 +370,7 @@ impl<'a> Iterator<Result<Paragraph,String>> for ParagraphStream<'a> {
|
||||||
} else {
|
} else {
|
||||||
// this is part of the same paragraph, get the indent info from this line
|
// this is part of the same paragraph, get the indent info from this line
|
||||||
indent_str.clear();
|
indent_str.clear();
|
||||||
indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end,fl.indent_end));
|
indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end, fl.indent_end));
|
||||||
indent_len = fl.indent_len;
|
indent_len = fl.indent_len;
|
||||||
indent_end = fl.indent_end;
|
indent_end = fl.indent_end;
|
||||||
}
|
}
|
||||||
|
@ -396,24 +396,25 @@ impl<'a> Iterator<Result<Paragraph,String>> for ParagraphStream<'a> {
|
||||||
// NoFormatLine.
|
// NoFormatLine.
|
||||||
self.next_mail = in_mail;
|
self.next_mail = in_mail;
|
||||||
|
|
||||||
Some(Ok(Paragraph { lines: pLines
|
Some(Ok(Paragraph {
|
||||||
, init_str: init_str
|
lines : pLines,
|
||||||
, init_len: init_len
|
init_str : init_str,
|
||||||
, init_end: init_end
|
init_len : init_len,
|
||||||
, indent_str: indent_str
|
init_end : init_end,
|
||||||
, indent_len: indent_len
|
indent_str : indent_str,
|
||||||
, indent_end: indent_end
|
indent_len : indent_len,
|
||||||
, pfxind_str: pfxind_str
|
indent_end : indent_end,
|
||||||
, pfxind_len: pfxind_len
|
pfxind_str : pfxind_str,
|
||||||
, mail_header: in_mail
|
pfxind_len : pfxind_len,
|
||||||
}))
|
mail_header : in_mail
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ParaWords<'a> {
|
pub struct ParaWords<'a> {
|
||||||
opts : &'a FmtOptions,
|
opts : &'a FmtOptions,
|
||||||
para : &'a Paragraph,
|
para : &'a Paragraph,
|
||||||
words : Vec<&'a str>
|
words : Vec<&'a str>
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ParaWords<'a> {
|
impl<'a> ParaWords<'a> {
|
||||||
|
@ -455,10 +456,10 @@ impl<'a> ParaWords<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct WordSplit<'a> {
|
struct WordSplit<'a> {
|
||||||
uniform : bool,
|
uniform : bool,
|
||||||
string : &'a str,
|
string : &'a str,
|
||||||
length : uint,
|
length : uint,
|
||||||
position : uint
|
position : uint
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> WordSplit<'a> {
|
impl<'a> WordSplit<'a> {
|
||||||
|
@ -471,7 +472,7 @@ impl<'a> WordSplit<'a> {
|
||||||
fn is_punctuation(c: char) -> bool {
|
fn is_punctuation(c: char) -> bool {
|
||||||
match c {
|
match c {
|
||||||
'!' | '.' | '?' => true,
|
'!' | '.' | '?' => true,
|
||||||
_ => false
|
_ => false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -487,7 +488,7 @@ impl<'a> Iterator<&'a str> for WordSplit<'a> {
|
||||||
// find the start of the next whitespace segment
|
// find the start of the next whitespace segment
|
||||||
let ws_start =
|
let ws_start =
|
||||||
match self.string.slice_from(old_position).find(|x: char| x.is_whitespace()) {
|
match self.string.slice_from(old_position).find(|x: char| x.is_whitespace()) {
|
||||||
None => self.length,
|
None => self.length,
|
||||||
Some(s) => s + old_position
|
Some(s) => s + old_position
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -500,31 +501,32 @@ impl<'a> Iterator<&'a str> for WordSplit<'a> {
|
||||||
// note that this preserves the invariant that self.position points to
|
// note that this preserves the invariant that self.position points to
|
||||||
// non-whitespace character OR end of string
|
// non-whitespace character OR end of string
|
||||||
self.position =
|
self.position =
|
||||||
match self.string.slice_from(ws_start).find(|x: char| ! x.is_whitespace()) {
|
match self.string.slice_from(ws_start).find(|x: char| !x.is_whitespace()) {
|
||||||
None => self.length,
|
None => self.length,
|
||||||
Some(s) => s + ws_start
|
Some(s) => s + ws_start
|
||||||
};
|
};
|
||||||
|
|
||||||
let is_sentence_end = match self.string.char_range_at_reverse(ws_start) {
|
let is_sentence_end = match self.string.char_range_at_reverse(ws_start) {
|
||||||
CharRange { ch, next: _ } if WordSplit::is_punctuation(ch) => self.position - ws_start > 2,
|
CharRange { ch, next: _ } if WordSplit::is_punctuation(ch) => self.position - ws_start > 2,
|
||||||
_ => false
|
_ => false
|
||||||
};
|
};
|
||||||
|
|
||||||
Some(
|
Some(
|
||||||
if self.uniform {
|
if self.uniform {
|
||||||
// if the last non-whitespace character is a [?!.] and
|
// if the last non-whitespace character is a [?!.] and
|
||||||
// there are two or more spaces, this is the end of a
|
// there are two or more spaces, this is the end of a
|
||||||
// sentence, so keep one extra space.
|
// sentence, so keep one extra space.
|
||||||
if is_sentence_end {
|
if is_sentence_end {
|
||||||
self.string.slice(old_position, ws_start + 1)
|
self.string.slice(old_position, ws_start + 1)
|
||||||
|
} else {
|
||||||
|
self.string.slice(old_position, ws_start)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
self.string.slice(old_position, ws_start)
|
// in non-uniform mode, we just keep the whole thing
|
||||||
|
// eventually we will want to annotate where the sentence boundaries are
|
||||||
|
// so that we can give preference to splitting lines appropriately
|
||||||
|
self.string.slice(old_position, self.position)
|
||||||
}
|
}
|
||||||
} else {
|
)
|
||||||
// in non-uniform mode, we just keep the whole thing
|
|
||||||
// eventually we will want to annotate where the sentence boundaries are
|
|
||||||
// so that we can give preference to splitting lines appropriately
|
|
||||||
self.string.slice(old_position, self.position)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue