From 5d2a2b6a0b1c37f0fd99c76a8e0dc369e18b7f07 Mon Sep 17 00:00:00 2001 From: kwantam Date: Wed, 18 Jun 2014 22:23:48 -0400 Subject: [PATCH] fmt: style modifications suggested by Arcterus --- fmt/fmt.rs | 161 ++++++++++++++++++++---------------- fmt/parasplit.rs | 208 ++++++++++++++++++++++++----------------------- 2 files changed, 194 insertions(+), 175 deletions(-) diff --git a/fmt/fmt.rs b/fmt/fmt.rs index 4517a3cff..206b89f26 100644 --- a/fmt/fmt.rs +++ b/fmt/fmt.rs @@ -24,8 +24,8 @@ use parasplit::{ParagraphStream, ParaWords}; macro_rules! silent_unwrap( ($exp:expr) => ( match $exp { - Ok(_) => (), - Err(_) => unsafe { ::libc::exit(1) } + Ok(_) => (), + Err(_) => unsafe { ::libc::exit(1) } } ) ) @@ -39,21 +39,21 @@ static NAME: &'static str = "fmt"; static VERSION: &'static str = "0.0.1"; struct FmtOptions { - crown : bool, - tagged : bool, - mail : bool, - split_only : bool, - use_prefix : bool, - prefix : String, - xprefix : bool, - prefix_len : uint, + crown : bool, + tagged : bool, + mail : bool, + split_only : bool, + use_prefix : bool, + prefix : String, + xprefix : bool, + prefix_len : uint, use_anti_prefix : bool, - anti_prefix : String, - xanti_prefix: bool, - uniform : bool, - width : uint, - goal : uint, - tabwidth : uint, + anti_prefix : String, + xanti_prefix : bool, + uniform : bool, + width : uint, + goal : uint, + tabwidth : uint, } #[allow(dead_code)] @@ -97,71 +97,86 @@ fn uumain(args: Vec) -> int { return 0 } - let mut fmt_opts = FmtOptions { crown : false - , tagged : false - , mail : false - , uniform : false - , split_only : false - , use_prefix : false - , prefix : String::new() - , xprefix : false - , prefix_len : 0 - , use_anti_prefix : false - , anti_prefix : String::new() - , xanti_prefix: false - , width : 78 - , goal : 72 - , tabwidth : 8 - }; - + let mut fmt_opts = FmtOptions { + crown : false, + tagged : false, + mail : false, + uniform : false, + split_only : false, + use_prefix : false, + prefix : String::new(), + xprefix : false, + prefix_len : 0, + use_anti_prefix : false, + anti_prefix : String::new(), + xanti_prefix : false, + width : 78, + goal : 72, + tabwidth : 8, + }; + if matches.opt_present("t") { fmt_opts.tagged = true; } if matches.opt_present("c") { fmt_opts.crown = true; fmt_opts.tagged = false; } if matches.opt_present("m") { fmt_opts.mail = true; } if matches.opt_present("u") { fmt_opts.uniform = true; } - if matches.opt_present("s") { fmt_opts.split_only = true; fmt_opts.crown = false; fmt_opts.tagged = false; } + if matches.opt_present("s") { fmt_opts.split_only = true; fmt_opts.crown = false; fmt_opts.tagged = false; } if matches.opt_present("x") { fmt_opts.xprefix = true; } if matches.opt_present("X") { fmt_opts.xanti_prefix = true; } match matches.opt_str("p") { - Some(s) => { fmt_opts.prefix = s; fmt_opts.use_prefix = true; fmt_opts.prefix_len = fmt_opts.prefix.as_slice().char_len() }, - None => () + Some(s) => { + fmt_opts.prefix = s; + fmt_opts.use_prefix = true; + fmt_opts.prefix_len = fmt_opts.prefix.as_slice().char_len() + } + None => () }; match matches.opt_str("P") { - Some(s) => { fmt_opts.anti_prefix = s; fmt_opts.use_anti_prefix = true; }, - None => () + Some(s) => { + fmt_opts.anti_prefix = s; + fmt_opts.use_anti_prefix = true; + } + None => () }; match matches.opt_str("w") { - Some(s) => { fmt_opts.width = match from_str(s.as_slice()) { - Some(t) => t, - None => { crash!(1, "Invalid WIDTH specification: `{}'", s); } - }; - fmt_opts.goal = std::cmp::min(fmt_opts.width * 92 / 100, fmt_opts.width - 4); - }, - None => () + Some(s) => { + fmt_opts.width = + match from_str(s.as_slice()) { + Some(t) => t, + None => { crash!(1, "Invalid WIDTH specification: `{}'", s); } + }; + fmt_opts.goal = std::cmp::min(fmt_opts.width * 92 / 100, fmt_opts.width - 4); + } + None => () }; match matches.opt_str("g") { - Some(s) => { fmt_opts.goal = match from_str(s.as_slice()) { - Some(t) => t, - None => { crash!(1, "Invalid GOAL specification: `{}'", s); } - }; - if ! matches.opt_present("w") { - fmt_opts.width = std::cmp::max(fmt_opts.goal * 100 / 92, fmt_opts.goal + 4); - } else if fmt_opts.goal > fmt_opts.width { - crash!(1, "GOAL cannot be greater than WIDTH."); - } - }, - None => () + Some(s) => { + fmt_opts.goal = + match from_str(s.as_slice()) { + Some(t) => t, + None => { crash!(1, "Invalid GOAL specification: `{}'", s); } + }; + if !matches.opt_present("w") { + fmt_opts.width = std::cmp::max(fmt_opts.goal * 100 / 92, fmt_opts.goal + 4); + } else if fmt_opts.goal > fmt_opts.width { + crash!(1, "GOAL cannot be greater than WIDTH."); + } + } + None => () }; match matches.opt_str("T") { - Some(s) => fmt_opts.tabwidth = match from_str(s.as_slice()) { - Some(t) => t, - None => { crash!(1, "Invalid TABWIDTH specification: `{}'", s); } - }, - None => () + Some(s) => { + fmt_opts.tabwidth = + match from_str(s.as_slice()) { + Some(t) => t, + None => { crash!(1, "Invalid TABWIDTH specification: `{}'", s); } + }; + } + None => () }; if fmt_opts.tabwidth < 1 { @@ -179,17 +194,19 @@ fn uumain(args: Vec) -> int { let mut ostream = box BufferedWriter::new(stdout_raw()) as Box; for i in files.iter().map(|x| x.as_slice()) { - let mut fp = match open_file(i) { - Err(e) => { show_warning!("{}: {}",i,e); - continue; - } - Ok(f) => f - }; + let mut fp = + match open_file(i) { + Err(e) => { + show_warning!("{}: {}",i,e); + continue; + } + Ok(f) => f + }; let mut pStream = ParagraphStream::new(&fmt_opts, &mut fp); for paraResult in pStream { match paraResult { - Err(s) => silent_unwrap!(ostream.write(s.as_bytes())), - Ok(para) => { + Err(s) => silent_unwrap!(ostream.write(s.as_bytes())), + Ok(para) => { // indent let pIndent = para.pfxind_str.clone().append(fmt_opts.prefix.as_slice()).append(para.indent_str.as_slice()); let pIndentLen = para.pfxind_len + fmt_opts.prefix_len + para.indent_len; @@ -204,7 +221,7 @@ fn uumain(args: Vec) -> int { // handle "init" portion silent_unwrap!(ostream.write(para.init_str.as_bytes())); para.init_len - } else if ! para.mail_header { + } else if !para.mail_header { // for non-(crown, tagged) that's the same as a normal indent silent_unwrap!(ostream.write(pIndent.as_bytes())); pIndentLen @@ -243,8 +260,8 @@ fn open_file(filename: &str) -> IoResult { Ok(BufferedReader::new(box stdin_raw() as Box)) } else { match File::open(&Path::new(filename)) { - Ok(f) => Ok(BufferedReader::new(box f as Box)), - Err(e) => return Err(e) + Ok(f) => Ok(BufferedReader::new(box f as Box)), + Err(e) => return Err(e) } } } diff --git a/fmt/parasplit.rs b/fmt/parasplit.rs index 76435868c..583bb306c 100644 --- a/fmt/parasplit.rs +++ b/fmt/parasplit.rs @@ -26,16 +26,16 @@ impl Line { // when we know that it's a FormatLine, as in the ParagraphStream iterator fn get_fileline(self) -> FileLine { match self { - FormatLine(fl) => fl, - NoFormatLine(..) => fail!("Found NoFormatLine when expecting FormatLine") + FormatLine(fl) => fl, + NoFormatLine(..) => fail!("Found NoFormatLine when expecting FormatLine") } } // when we know that it's a NoFormatLine, as in the ParagraphStream iterator fn get_noformatline(self) -> (String, bool) { match self { - NoFormatLine(s, b) => (s, b), - FormatLine(..) => fail!("Found FormatLine when expecting NoFormatLine") + NoFormatLine(s, b) => (s, b), + FormatLine(..) => fail!("Found FormatLine when expecting NoFormatLine") } } } @@ -44,18 +44,18 @@ impl Line { // the next line or not #[deriving(Show)] struct FileLine { - line : String, - indent_end : uint, // the end of the indent, always the start of the text - prefix_end : uint, // the end of the PREFIX - pfxind_end : uint, // the end of the PREFIX's indent, that is, the spaces before the prefix - indent_len : uint, // display length of indent taking into account TABWIDTH - pfxind_len : uint, // PREFIX indent length taking into account TABWIDTH + line : String, + indent_end : uint, // the end of the indent, always the start of the text + prefix_end : uint, // the end of the PREFIX + pfxind_end : uint, // the end of the PREFIX's indent, that is, the spaces before the prefix + indent_len : uint, // display length of indent taking into account TABWIDTH + pfxind_len : uint, // PREFIX indent length taking into account TABWIDTH } // iterator that produces a stream of Lines from a file struct FileLines<'a> { - opts : &'a FmtOptions, - lines : Lines<'a, FileOrStdReader>, + opts : &'a FmtOptions, + lines : Lines<'a, FileOrStdReader>, } impl<'a> FileLines<'a> { @@ -65,18 +65,18 @@ impl<'a> FileLines<'a> { // returns true if this line should be formatted fn match_prefix(&self, line: &str) -> (bool, uint) { - if ! self.opts.use_prefix { return (true, 0u); } + if !self.opts.use_prefix { return (true, 0u); } FileLines::match_prefix_generic(self.opts.prefix.as_slice(), line, self.opts.xprefix) } // returns true if this line should be formatted fn match_anti_prefix(&self, line: &str) -> bool { - if ! self.opts.use_anti_prefix { return true; } + if !self.opts.use_anti_prefix { return true; } match FileLines::match_prefix_generic(self.opts.anti_prefix.as_slice(), line, self.opts.xanti_prefix) { - (true, _) => false, - (_ , _) => true + (true, _) => false, + (_ , _) => true } } @@ -85,7 +85,7 @@ impl<'a> FileLines<'a> { return (true, 0); } - if ! exact { + if !exact { // we do it this way rather than byte indexing to support unicode whitespace chars let mut i = 0u; while (i < line.len()) && line.char_at(i).is_whitespace() { @@ -98,6 +98,10 @@ impl<'a> FileLines<'a> { (false, 0) } + + fn displayed_length(&self, s: &str) -> uint { + s.char_len() + (self.opts.tabwidth - 1) * s.chars().filter(|x| x == &'\t').count() + } } impl<'a> Iterator for FileLines<'a> { @@ -105,10 +109,10 @@ impl<'a> Iterator for FileLines<'a> { let mut n = match self.lines.next() { Some(t) => match t { - Ok(tt) => tt, - Err(_) => return None + Ok(tt) => tt, + Err(_) => return None }, - None => return None + None => return None }; // if this line is entirely whitespace, @@ -122,13 +126,13 @@ impl<'a> Iterator for FileLines<'a> { // if this line does not match the prefix, // emit the line unprocessed and iterate again let (pmatch, poffset) = self.match_prefix(n.as_slice()); - if ! pmatch { + if !pmatch { return Some(NoFormatLine(n, false)); } // if this line matches the anti_prefix // (NOTE definition of match_anti_prefix is TRUE if we should process) - if ! self.match_anti_prefix(n.as_slice()) { + if !self.match_anti_prefix(n.as_slice()) { return Some(NoFormatLine(n, false)); } @@ -154,19 +158,16 @@ impl<'a> Iterator for FileLines<'a> { let pfxEnd = poffset + self.opts.prefix.len(); let nSlice = n.as_slice().slice_from(pfxEnd); let nSlice2 = nSlice.trim_left(); - (pfxEnd + nSlice.len() - nSlice2.len(), pfxEnd, poffset) } else { let nSlice = n.as_slice().trim_left(); - (nLen - nSlice.len(), 0, 0) }; // indent length let indLen = if indEnd > 0 { - let nSlice = n.as_slice().slice(pfxEnd, indEnd); - nSlice.char_len() + (self.opts.tabwidth - 1) * nSlice.chars().filter(|x| x == &'\t').count() + self.displayed_length(n.as_slice().slice(pfxEnd, indEnd)) } else { 0 }; @@ -174,8 +175,7 @@ impl<'a> Iterator for FileLines<'a> { // prefix indent length let pfxIndLen = if pfxIndEnd > 0 { - let nSlice = n.as_slice().slice_to(pfxIndEnd); - nSlice.char_len() + (self.opts.tabwidth - 1) * nSlice.chars().filter(|x| x == &'\t').count() + self.displayed_length(n.as_slice().slice_to(pfxIndEnd)) } else { 0 }; @@ -185,7 +185,7 @@ impl<'a> Iterator for FileLines<'a> { // [?!.]\t\t is. We could expand tabs to two spaces to force detection of tab as // sentence ending if self.opts.uniform { - let tabinds: Vec = n.as_slice().slice_from(indEnd).char_indices().filter_map(|(i,c)| if c == '\t' { Some(i) } else { None }).collect(); + let tabinds: Vec = n.as_slice().slice_from(indEnd).char_indices().filter_map(|(i, c)| if c == '\t' { Some(i) } else { None }).collect(); unsafe { let nmut = n.as_mut_bytes(); for i in tabinds.iter() { @@ -194,13 +194,14 @@ impl<'a> Iterator for FileLines<'a> { } } - Some(FormatLine(FileLine { line: n - , indent_end: indEnd - , prefix_end: pfxEnd - , pfxind_end: pfxIndEnd - , indent_len: indLen - , pfxind_len: pfxIndLen - })) + Some(FormatLine(FileLine { + line : n, + indent_end : indEnd, + prefix_end : pfxEnd, + pfxind_end : pfxIndEnd, + indent_len : indLen, + pfxind_len : pfxIndLen, + })) } } @@ -210,16 +211,16 @@ impl<'a> Iterator for FileLines<'a> { // is only there to help us in deciding how to merge lines into Paragraphs #[deriving(Show)] pub struct Paragraph { - lines : Vec, // the lines of the file - pub init_str : String, // string representing the init, that is, the first line's indent - pub init_len : uint, // printable length of the init string considering TABWIDTH - init_end : uint, // byte location of end of init in first line String - pub indent_str : String, // string representing indent - pub indent_len : uint, // length of above - indent_end : uint, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) - pub pfxind_str : String, // string representing the prefix indent - pub pfxind_len : uint, // length of above - pub mail_header : bool // we need to know if this is a mail header because we do word splitting differently in that case + lines : Vec, // the lines of the file + pub init_str : String, // string representing the init, that is, the first line's indent + pub init_len : uint, // printable length of the init string considering TABWIDTH + init_end : uint, // byte location of end of init in first line String + pub indent_str : String, // string representing indent + pub indent_len : uint, // length of above + indent_end : uint, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) + pub pfxind_str : String, // string representing the prefix indent + pub pfxind_len : uint, // length of above + pub mail_header : bool // we need to know if this is a mail header because we do word splitting differently in that case } // an iterator producing a stream of paragraphs from a stream of lines @@ -227,9 +228,9 @@ pub struct Paragraph { // NOTE as you iterate through the paragraphs, any NoFormatLines are // immediately dumped to stdout! pub struct ParagraphStream<'a> { - lines : Peekable>, - next_mail : bool, - opts : &'a FmtOptions, + lines : Peekable>, + next_mail : bool, + opts : &'a FmtOptions, } impl<'a> ParagraphStream<'a> { @@ -245,26 +246,25 @@ impl<'a> ParagraphStream<'a> { // or with a sequence of printable ASCII chars (33 to 126, inclusive, // except colon) followed by a colon. if line.indent_end > 0 { - return false; + false } else { let lSlice = line.line.as_slice(); if lSlice.starts_with("From ") { - return true; + true } else { let colonPosn = match lSlice.find(':') { Some(n) => n, - None => return false + None => return false }; // header field must be nonzero length if colonPosn == 0 { return false; } - return lSlice.slice_to(colonPosn).chars() - .all(|x| match x as uint { - y if y < 33 || y > 126 => false, - _ => true - }); + return lSlice.slice_to(colonPosn).chars().all(|x| match x as uint { + y if y < 33 || y > 126 => false, + _ => true + }); } } } @@ -275,9 +275,9 @@ impl<'a> Iterator> for ParagraphStream<'a> { // return a NoFormatLine in an Err; it should immediately be output let noformat = match self.lines.peek() { - None => return None, + None => return None, Some(l) => match l { - &FormatLine(_) => false, + &FormatLine(_) => false, &NoFormatLine(_, _) => true } }; @@ -307,11 +307,11 @@ impl<'a> Iterator> for ParagraphStream<'a> { // need to explicitly force fl out of scope before we can call self.lines.next() let fl = match self.lines.peek() { - None => break, + None => break, Some(l) => { match l { - &FormatLine(ref x) => x, - &NoFormatLine(..) => break + &FormatLine(ref x) => x, + &NoFormatLine(..) => break } } }; @@ -335,7 +335,7 @@ impl<'a> Iterator> for ParagraphStream<'a> { // these will be overwritten in the 2nd line of crown or tagged mode, but // we are not guaranteed to get to the 2nd line, e.g., if the next line // is a NoFormatLine or None. Thus, we set sane defaults the 1st time around - indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end,fl.indent_end)); + indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end, fl.indent_end)); indent_len = fl.indent_len; indent_end = fl.indent_end; @@ -356,10 +356,10 @@ impl<'a> Iterator> for ParagraphStream<'a> { } } else if in_mail { // lines following mail headers must begin with spaces - if (self.opts.use_prefix && fl.pfxind_end == 0) || (! self.opts.use_prefix && fl.indent_end == 0) { + if (self.opts.use_prefix && fl.pfxind_end == 0) || (!self.opts.use_prefix && fl.indent_end == 0) { break; // this line does not begin with spaces } - } else if ! second_done && (self.opts.crown || self.opts.tagged) { + } else if !second_done && (self.opts.crown || self.opts.tagged) { // now we have enough info to handle crown margin and tagged mode if pfxind_len != fl.pfxind_len { // in both crown and tagged modes we require that pfxind is the same @@ -370,7 +370,7 @@ impl<'a> Iterator> for ParagraphStream<'a> { } else { // this is part of the same paragraph, get the indent info from this line indent_str.clear(); - indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end,fl.indent_end)); + indent_str.push_str(fl.line.as_slice().slice(fl.prefix_end, fl.indent_end)); indent_len = fl.indent_len; indent_end = fl.indent_end; } @@ -396,24 +396,25 @@ impl<'a> Iterator> for ParagraphStream<'a> { // NoFormatLine. self.next_mail = in_mail; - Some(Ok(Paragraph { lines: pLines - , init_str: init_str - , init_len: init_len - , init_end: init_end - , indent_str: indent_str - , indent_len: indent_len - , indent_end: indent_end - , pfxind_str: pfxind_str - , pfxind_len: pfxind_len - , mail_header: in_mail - })) + Some(Ok(Paragraph { + lines : pLines, + init_str : init_str, + init_len : init_len, + init_end : init_end, + indent_str : indent_str, + indent_len : indent_len, + indent_end : indent_end, + pfxind_str : pfxind_str, + pfxind_len : pfxind_len, + mail_header : in_mail + })) } } pub struct ParaWords<'a> { - opts : &'a FmtOptions, - para : &'a Paragraph, - words : Vec<&'a str> + opts : &'a FmtOptions, + para : &'a Paragraph, + words : Vec<&'a str> } impl<'a> ParaWords<'a> { @@ -455,10 +456,10 @@ impl<'a> ParaWords<'a> { } struct WordSplit<'a> { - uniform : bool, - string : &'a str, - length : uint, - position : uint + uniform : bool, + string : &'a str, + length : uint, + position : uint } impl<'a> WordSplit<'a> { @@ -471,7 +472,7 @@ impl<'a> WordSplit<'a> { fn is_punctuation(c: char) -> bool { match c { '!' | '.' | '?' => true, - _ => false + _ => false } } } @@ -487,7 +488,7 @@ impl<'a> Iterator<&'a str> for WordSplit<'a> { // find the start of the next whitespace segment let ws_start = match self.string.slice_from(old_position).find(|x: char| x.is_whitespace()) { - None => self.length, + None => self.length, Some(s) => s + old_position }; @@ -500,31 +501,32 @@ impl<'a> Iterator<&'a str> for WordSplit<'a> { // note that this preserves the invariant that self.position points to // non-whitespace character OR end of string self.position = - match self.string.slice_from(ws_start).find(|x: char| ! x.is_whitespace()) { - None => self.length, + match self.string.slice_from(ws_start).find(|x: char| !x.is_whitespace()) { + None => self.length, Some(s) => s + ws_start }; let is_sentence_end = match self.string.char_range_at_reverse(ws_start) { - CharRange { ch, next: _ } if WordSplit::is_punctuation(ch) => self.position - ws_start > 2, - _ => false + CharRange { ch, next: _ } if WordSplit::is_punctuation(ch) => self.position - ws_start > 2, + _ => false }; Some( - if self.uniform { - // if the last non-whitespace character is a [?!.] and - // there are two or more spaces, this is the end of a - // sentence, so keep one extra space. - if is_sentence_end { - self.string.slice(old_position, ws_start + 1) + if self.uniform { + // if the last non-whitespace character is a [?!.] and + // there are two or more spaces, this is the end of a + // sentence, so keep one extra space. + if is_sentence_end { + self.string.slice(old_position, ws_start + 1) + } else { + self.string.slice(old_position, ws_start) + } } else { - self.string.slice(old_position, ws_start) + // in non-uniform mode, we just keep the whole thing + // eventually we will want to annotate where the sentence boundaries are + // so that we can give preference to splitting lines appropriately + self.string.slice(old_position, self.position) } - } else { - // in non-uniform mode, we just keep the whole thing - // eventually we will want to annotate where the sentence boundaries are - // so that we can give preference to splitting lines appropriately - self.string.slice(old_position, self.position) - }) + ) } }