From 9242ba1db6bf35aa93f8336b4b6558e4a8cea7ce Mon Sep 17 00:00:00 2001 From: Nathan Ross Date: Sun, 14 Feb 2016 14:04:25 -0500 Subject: [PATCH] printf: scaffolding for C99 hex float --- .../tokenize/num_format/format_field.rs | 1 + .../{base_conv.rs => base_conv/mod.rs} | 151 ++++++++++++++---- .../num_format/formatters/base_conv/tests.rs | 64 ++++++++ .../formatters/cninetyninehexfloatf.rs | 146 +++++++++++++++++ .../tokenize/num_format/formatters/decf.rs | 6 +- .../num_format/formatters/float_common.rs | 64 ++++++-- .../tokenize/num_format/formatters/floatf.rs | 4 +- .../tokenize/num_format/formatters/mod.rs | 1 + .../tokenize/num_format/formatters/scif.rs | 4 +- src/printf/tokenize/num_format/num_format.rs | 2 + src/printf/tokenize/sub.rs | 8 +- 11 files changed, 399 insertions(+), 52 deletions(-) rename src/printf/tokenize/num_format/formatters/{base_conv.rs => base_conv/mod.rs} (66%) create mode 100644 src/printf/tokenize/num_format/formatters/base_conv/tests.rs create mode 100644 src/printf/tokenize/num_format/formatters/cninetyninehexfloatf.rs diff --git a/src/printf/tokenize/num_format/format_field.rs b/src/printf/tokenize/num_format/format_field.rs index 65a69c35b..dadb067f7 100644 --- a/src/printf/tokenize/num_format/format_field.rs +++ b/src/printf/tokenize/num_format/format_field.rs @@ -4,6 +4,7 @@ pub enum FieldType { Strf, Floatf, + CninetyNineHexFloatf, Scif, Decf, Intf, diff --git a/src/printf/tokenize/num_format/formatters/base_conv.rs b/src/printf/tokenize/num_format/formatters/base_conv/mod.rs similarity index 66% rename from src/printf/tokenize/num_format/formatters/base_conv.rs rename to src/printf/tokenize/num_format/formatters/base_conv/mod.rs index 706bdf6cd..c6a540a2f 100644 --- a/src/printf/tokenize/num_format/formatters/base_conv.rs +++ b/src/printf/tokenize/num_format/formatters/base_conv/mod.rs @@ -1,5 +1,5 @@ pub fn arrnum_int_mult( - arrnum : &Vec, + arr_num : &Vec, basenum : u8, base_ten_int_fact : u8 ) -> Vec { @@ -10,7 +10,7 @@ pub fn arrnum_int_mult( let base : u16 = basenum as u16; let mut ret_rev : Vec = Vec::new(); - let mut it = arrnum.iter().rev(); + let mut it = arr_num.iter().rev(); loop { let i = it.next(); match i { @@ -35,41 +35,119 @@ pub fn arrnum_int_mult( ret } -pub struct Remainder { - position : usize, - replace : Option +pub struct Remainder<'a> { + pub position: usize, + pub replace: Vec, + pub arr_num: &'a Vec } -pub struct DivOut { - quotient : u8, - remainder: Remainder +pub struct DivOut<'a> { + pub quotient: u8, + pub remainder: Remainder<'a> } -pub fn arrnum_int_div( - arrnum : &Vec, - basenum : u8, - base_ten_int_divisor : u8, - rem_in : Remainder - ) -> DivOut { +pub fn arrnum_int_div_step<'a>( + rem_in: Remainder<'a>, + radix_in: u8, + base_ten_int_divisor: u8, + after_decimal: bool + ) -> DivOut<'a> { let mut rem_out = Remainder { position: rem_in.position, - replace : None + replace: Vec::new(), + arr_num: rem_in.arr_num }; - let mut bufferval : u16 = 0; - let base : u16 = basenum as u16; - let divisor : u16 = base_ten_int_divisor as u16; + let mut bufferval: u16 = 0; + let base: u16 = radix_in as u16; + let divisor: u16 = base_ten_int_divisor as u16; + let mut traversed = 0; let mut quotient = 0; - let mut u_cur : Option<&u8> = Some(match rem_in.replace { - Some(ref u) => { u } - None => { &arrnum[rem_in.position] } - }); + let refd_vals = &rem_in.arr_num[rem_in.position+rem_in.replace.len()..]; + let mut it_replace = rem_in.replace.iter(); + let mut it_f = refd_vals.iter(); + loop { + let u = match it_replace.next() { + Some(u_rep) => { u_rep.clone() as u16 } + None => { + match it_f.next() { + Some(u_orig) => { + u_orig.clone() as u16 + } + None => { + if !after_decimal { + break; + } + 0 + } + } + } + }; + traversed += 1; + bufferval += u; + if bufferval > divisor { + while bufferval >= divisor { + quotient+=1; + bufferval -= divisor; + } + rem_out.replace = if bufferval == 0 { + Vec::new() + } else { + let remainder_as_arrnum = unsigned_to_arrnum(bufferval); + let remainder_as_base_arrnum = base_conv_vec( + &remainder_as_arrnum, + 10, + radix_in + ); + remainder_as_base_arrnum + }; + rem_out.position += 1+(traversed - rem_out.replace.len()); + break; + } else { + bufferval *= base; + } + } + DivOut { quotient: quotient, remainder: rem_out } +} +/* +pub struct ArrFloat { + pub leading_zeros: u8, + pub values: Vec, + pub basenum: u8 +} - let str_f = &arrnum[rem_in.position+1..]; - let mut it_f = str_f.iter(); - loop { +pub struct ArrFloatDivOut { + pub quotient: u8, + pub remainder: ArrFloat +} + +pub fn arrfloat_int_div( + arrfloat_in : &ArrFloat, + base_ten_int_divisor : u8, + precision : u16 +) -> DivOut { + + let mut remainder = ArrFloat { + basenum: arrfloat_in.basenum, + leading_zeros: arrfloat_in.leading_zeroes, + values: Vec::new() + } + let mut quotient = 0; + + let mut bufferval : u16 = 0; + let base : u16 = arrfloat_in.basenum as u16; + let divisor : u16 = base_ten_int_divisor as u16; + + let mut it_f = arrfloat_in.values.iter(); + let mut position = 0 + arrfloat_in.leading_zeroes as u16; + let mut at_end = false; + while position< precision { + let next_digit = match it_f.next() { + Some(c) => {} + None => { 0 } + } match u_cur { Some(u) => { bufferval += u.clone() as u16; @@ -95,9 +173,9 @@ pub fn arrnum_int_div( u_cur = it_f.next().clone(); rem_out.position+=1; } - DivOut { quotient: quotient, remainder: rem_out } + ArrFloatDivOut { quotient: quotient, remainder: remainder } } - +*/ pub fn arrnum_int_add( arrnum : &Vec, basenum : u8, @@ -153,7 +231,22 @@ pub fn base_conv_vec( result } +pub fn unsigned_to_arrnum( + src : u16 +) -> Vec { + let mut result : Vec = Vec::new(); + let mut src_tmp : u16 = src.clone(); + while src_tmp > 0 { + result.push((src_tmp % 10) as u8); + src_tmp /= 10; + } + result.reverse(); + result +} + +//temporary needs-improvement-function +#[allow(unused_variables)] pub fn base_conv_float( src : &Vec, radix_src : u8, @@ -165,11 +258,10 @@ pub fn base_conv_float( // of how it would work. let mut result : Vec = Vec::new(); result.push(0); - let mut factor : f64 = radix_dest as f64; + let mut factor : f64 = 1.; let radix_src_float : f64 = radix_src as f64; let mut i = 0; let mut r :f64 = 0 as f64; - factor /= 10.; for u in src { if i > 15 { break; } i+=1; @@ -271,3 +363,4 @@ impl RadixDef for RadixHex { } } +mod tests; diff --git a/src/printf/tokenize/num_format/formatters/base_conv/tests.rs b/src/printf/tokenize/num_format/formatters/base_conv/tests.rs new file mode 100644 index 000000000..c0e17fe2f --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/base_conv/tests.rs @@ -0,0 +1,64 @@ +#[cfg(test)] + +use super::*; + +#[test] +fn test_arrnum_int_mult() { + //(in base 10) 12 * 4 = 48 + let factor : Vec = vec!(1, 2); + let base_num = 10; + let base_ten_int_fact : u8 = 4; + let should_output : Vec = vec![4, 8]; + + let product = arrnum_int_mult(&factor, + base_num, base_ten_int_fact); + assert!(product == should_output); +} + +#[test] +fn test_arrnum_int_non_base_10() { + //(in base 3) + // 5 * 4 = 20 + let factor : Vec = vec![1, 2]; + let base_num = 3; + let base_ten_int_fact : u8 = 4; + let should_output : Vec = vec![2,0,2]; + + let product = arrnum_int_mult(&factor, + base_num, base_ten_int_fact); + assert!(product == should_output); +} + +#[test] +fn test_arrnum_int_div_shortcircuit() { + //( + let arrnum : Vec = vec![5,5,5,5,0]; + let base_num = 10; + let base_ten_int_divisor : u8 = 41; + let remainder_passed_in = Remainder { + position : 1, + replace : vec![1,3], + arr_num : &arrnum + }; + + //the "replace" should mean the number being divided + // is 1350, the first time you can get 41 to go into + // 1350, its at 135, where you can get a quotient of + // 3 and a remainder of 12; + + let quotient_should_be : u8 = 3; + let remainder_position_should_be : usize = 3; + let remainder_replace_should_be = vec![1, 2]; + + let result = arrnum_int_div_step(remainder_passed_in, + base_num, + base_ten_int_divisor, + false + + ); + assert!(quotient_should_be == result.quotient); + assert!(remainder_position_should_be == + result.remainder.position); + assert!(remainder_replace_should_be == + result.remainder.replace); +} diff --git a/src/printf/tokenize/num_format/formatters/cninetyninehexfloatf.rs b/src/printf/tokenize/num_format/formatters/cninetyninehexfloatf.rs new file mode 100644 index 000000000..d0e9e2f56 --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/cninetyninehexfloatf.rs @@ -0,0 +1,146 @@ +//! formatter for %a %F C99 Hex-floating-point subs +use super::super::format_field::FormatField; +use super::super::formatter::{InPrefix,FormatPrimitive,Formatter}; +use super::float_common::{FloatAnalysis, + primitive_to_str_common}; +use super::base_conv; +use super::base_conv::{RadixDef}; + + +pub struct CninetyNineHexFloatf { + as_num : f64 +} +impl CninetyNineHexFloatf { + pub fn new() -> CninetyNineHexFloatf { + CninetyNineHexFloatf { as_num: 0.0 } + } +} + +impl Formatter for CninetyNineHexFloatf { + fn get_primitive( + &self, + field : &FormatField, + inprefix : &InPrefix, + str_in : &str + ) -> Option { + let second_field = field.second_field.unwrap_or(6)+1; + let analysis = FloatAnalysis::analyze( + &str_in, + inprefix, + Some(second_field as usize), + None, + true); + let f = get_primitive_hex( + inprefix, + &str_in[inprefix.offset..], + &analysis, + second_field as usize, + *field.field_char == 'A'); + Some(f) + } + fn primitive_to_str( + &self, + prim: &FormatPrimitive, + field: FormatField) -> String { + primitive_to_str_common( + prim, + &field + ) + } +} + +//c99 hex has unique requirements of all floating point subs in pretty much every part of building a primitive, from prefix and suffix to need for base conversion (in all other cases if you don't have decimal you must have decimal, here it's the other way around) + +// on the todo list is to have a trait for get_primitive that is implemented by each float formatter and can override a default. when that happens we can take the parts of get_primitive_dec specific to dec and spin them out to their own functions that can be overriden. +#[allow(unused_variables)] +#[allow(unused_assignments)] +fn get_primitive_hex( + inprefix : &InPrefix, + str_in : &str, + analysis : &FloatAnalysis, + last_dec_place : usize, + capitalized : bool +) -> FormatPrimitive { + + let mut f : FormatPrimitive = Default::default(); + f.prefix = Some(String::from( + if inprefix.sign == -1 { "-0x" } else { "0x" })); + + // assign the digits before and after the decimal points + // to separate slices. If no digits after decimal point, + // assign 0 + let (mut first_segment_raw, second_segment_raw) = + match analysis.decimal_pos { + Some(pos) => { + (&str_in[..pos], &str_in[pos+1..]) + }, + None => { (&str_in[..], "0") } + }; + if first_segment_raw.len() == 0 { + first_segment_raw = "0"; + } + // convert to string, hexifying if input is in dec. + /*let (first_segment, second_segment) = + match inprefix.radix_in { + Base::Ten => { + (to_hex(first_segment_raw, true), + to_hex(second_segment_raw, false)) + } + _ => { + (String::from(first_segment_raw), + String::from(second_segment_raw)) + } + }; + + + f.pre_decimal = Some(first_segment); + f.post_decimal = Some(second_segment); + */ + //TODO actual conversion, make sure to get back mantissa. + // for hex to hex, it's really just a matter of moving the + // decimal point and calculating the mantissa by its initial + // position and its moves, with every position counting for + // the addition or subtraction of 4 (2**4, because 4 bits in a hex digit) + // to the exponent. + // decimal's going to be a little more complicated. correct simulation + // of glibc will require after-decimal division to a specified precisino. + // the difficult part of this (arrnum_int_div_step) is already implemented. + + // the hex float name may be a bit misleading in terms of how to go about the + // conversion. The best way to do it is to just convert the floatnum + // directly to base 2 and then at the end translate back to hex. + let mantissa=0; + f.suffix = Some({ + let ind = if capitalized { "P" } else { "p" }; + if mantissa >=0 { + format!("{}+{}", ind, mantissa) + } else { + format!("{}{}", ind, mantissa) + } + }); + f +} + +fn to_hex( + src: &str, + before_decimal: bool + ) -> String { + let rten = base_conv::RadixTen; + let rhex = base_conv::RadixHex; + if before_decimal { + base_conv::base_conv_str(src, &rten, &rhex) + } else { + let as_arrnum_ten =base_conv::str_to_arrnum(src, &rten); + let s = format!("{}", base_conv::base_conv_float( + &as_arrnum_ten, + rten.get_max(), + rhex.get_max() + )); + if s.len() > 2 { + String::from(&s[2..]) + } else { + //zero + s + } + } +} diff --git a/src/printf/tokenize/num_format/formatters/decf.rs b/src/printf/tokenize/num_format/formatters/decf.rs index 6a0a98816..c25aec8ab 100644 --- a/src/printf/tokenize/num_format/formatters/decf.rs +++ b/src/printf/tokenize/num_format/formatters/decf.rs @@ -32,12 +32,14 @@ impl Formatter for Decf { str_in : &str ) -> Option { let second_field = field.second_field.unwrap_or(6)+1; + //default to scif interp. so as to not truncate input vals + //(that would be displayed in scif) based on relation to decimal place let analysis = FloatAnalysis::analyze( str_in, inprefix, Some(second_field as usize+1), - None - ); + None, + false); let mut f_sci = get_primitive_dec( inprefix, &str_in[inprefix.offset..], diff --git a/src/printf/tokenize/num_format/formatters/float_common.rs b/src/printf/tokenize/num_format/formatters/float_common.rs index b17f21228..e2e85c9eb 100644 --- a/src/printf/tokenize/num_format/formatters/float_common.rs +++ b/src/printf/tokenize/num_format/formatters/float_common.rs @@ -13,12 +13,37 @@ pub struct FloatAnalysis { pub decimal_pos: Option, pub follow: Option } +fn has_enough_digits( + hex_input: bool, + hex_output: bool, + string_position: usize, + starting_position: usize, + limit: usize, +) -> bool { + //-1s are for rounding + if hex_output { + if hex_input { + ((string_position-1) - starting_position >= limit) + } else { + false //undecidable without converting + } + } else { + if hex_input { + ((((string_position-1) - starting_position)*9)/8 >= limit) + } else { + ((string_position-1) - starting_position >= limit) + } + } + +} + impl FloatAnalysis { pub fn analyze( str_in: &str, inprefix: &InPrefix, max_sd_opt: Option, max_after_dec_opt: Option, + hex_output: bool ) -> FloatAnalysis { // this fn assumes // the input string @@ -29,20 +54,26 @@ impl FloatAnalysis { decimal_pos: None, follow: None }; + let hex_input = match inprefix.radix_in { + Base::Hex => { true } + Base::Ten => { false } + Base::Octal => { panic!("this should never happen: floats should never receive octal input"); } + }; let mut i=0; + let mut pos_before_first_nonzero_after_decimal : Option = None; while let Some(c) = str_it.next() { match c{ e @ '0'...'9' | e @ 'A'...'F' | e @ 'a'...'f' => { - match inprefix.radix_in { - Base::Ten => { - match e { - '0'...'9' => {}, - _ => { - warn_incomplete_conv(str_in); - break; - } + if !hex_input { + match e { + '0'...'9' => {}, + _ => { + warn_incomplete_conv(str_in); + break; } } - _ => {} + } + if ret.decimal_pos.is_some() && pos_before_first_nonzero_after_decimal.is_none() && e != '0' { + pos_before_first_nonzero_after_decimal = Some(i-1); } if let Some(max_sd) = max_sd_opt { if i == max_sd { @@ -55,12 +86,18 @@ impl FloatAnalysis { break; } } - if let Some(p) = ret.decimal_pos { - if let Some(max_after_dec) = max_after_dec_opt { - if (i-1) - p == max_after_dec { - break + if let Some(max_after_dec) = max_after_dec_opt { + if let Some(p) = ret.decimal_pos { + if has_enough_digits(hex_input, hex_output, i, p, max_after_dec) { + break; } } + } else if let Some(max_sd) = max_sd_opt { + if let Some(p) = pos_before_first_nonzero_after_decimal { + if has_enough_digits(hex_input, hex_output, i, p, max_sd) { + break; + } + } } }, '.' => { @@ -72,7 +109,6 @@ impl FloatAnalysis { } } _ => { - println!("awarn2"); warn_incomplete_conv(str_in); break; } diff --git a/src/printf/tokenize/num_format/formatters/floatf.rs b/src/printf/tokenize/num_format/formatters/floatf.rs index 7fb2a8fbf..688b83afa 100644 --- a/src/printf/tokenize/num_format/formatters/floatf.rs +++ b/src/printf/tokenize/num_format/formatters/floatf.rs @@ -25,8 +25,8 @@ impl Formatter for Floatf { &str_in, inprefix, None, - Some(second_field as usize) - ); + Some(second_field as usize), + false); let f = get_primitive_dec( inprefix, &str_in[inprefix.offset..], diff --git a/src/printf/tokenize/num_format/formatters/mod.rs b/src/printf/tokenize/num_format/formatters/mod.rs index 243a19263..329e36d87 100644 --- a/src/printf/tokenize/num_format/formatters/mod.rs +++ b/src/printf/tokenize/num_format/formatters/mod.rs @@ -1,5 +1,6 @@ pub mod intf; pub mod floatf; +pub mod cninetyninehexfloatf; pub mod scif; pub mod decf; mod float_common; diff --git a/src/printf/tokenize/num_format/formatters/scif.rs b/src/printf/tokenize/num_format/formatters/scif.rs index 3755894c6..22d919117 100644 --- a/src/printf/tokenize/num_format/formatters/scif.rs +++ b/src/printf/tokenize/num_format/formatters/scif.rs @@ -25,8 +25,8 @@ impl Formatter for Scif { str_in, inprefix, Some(second_field as usize+1), - None - ); + None, + false); let f = get_primitive_dec( inprefix, &str_in[inprefix.offset..], diff --git a/src/printf/tokenize/num_format/num_format.rs b/src/printf/tokenize/num_format/num_format.rs index 126b14584..1b838242b 100644 --- a/src/printf/tokenize/num_format/num_format.rs +++ b/src/printf/tokenize/num_format/num_format.rs @@ -7,6 +7,7 @@ use super::format_field::{FormatField, FieldType}; use super::formatter::{Formatter, FormatPrimitive, InPrefix, Base}; use super::formatters::intf::Intf; use super::formatters::floatf::Floatf; +use super::formatters::cninetyninehexfloatf::CninetyNineHexFloatf; use super::formatters::scif::Scif; use super::formatters::decf::Decf; @@ -200,6 +201,7 @@ pub fn num_format( let fmtr : Box = match *field.field_type { FieldType::Intf => Box::new(Intf::new()), FieldType::Floatf => Box::new(Floatf::new()), + FieldType::CninetyNineHexFloatf => Box::new(CninetyNineHexFloatf::new()), FieldType::Scif => Box::new(Scif::new()), FieldType::Decf => Box::new(Decf::new()), _ => { panic!("asked to do num format with non-num fieldtype"); } diff --git a/src/printf/tokenize/sub.rs b/src/printf/tokenize/sub.rs index 045219082..be2aa4051 100644 --- a/src/printf/tokenize/sub.rs +++ b/src/printf/tokenize/sub.rs @@ -67,6 +67,7 @@ impl Sub { 's' | 'b' => FieldType::Strf, 'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf, 'f' | 'F' => FieldType::Floatf, + 'a' | 'A' => FieldType::CninetyNineHexFloatf, 'e' | 'E' => FieldType::Scif, 'g' | 'G' => FieldType::Decf, 'c' => FieldType::Charf, @@ -157,9 +158,10 @@ impl SubParser { // though, as we want to mimic the original behavior of printing // the field as interpreted up until the error in the field. - let mut legal_fields=vec!['b', 'c', 'd', 'e', 'E', - 'f', 'g', 'G', 'i', 'o', - 's', 'u', 'x', 'X']; + let mut legal_fields=vec![ + //'a', 'A', //c99 hex float implementation not yet complete + 'b', 'c', 'd', 'e', 'E', 'f', + 'F', 'g', 'G', 'i', 'o','s', 'u', 'x', 'X']; let mut specifiers=vec!['h', 'j', 'l', 'L', 't', 'z']; legal_fields.sort(); specifiers.sort();