From 78c04cb8b2a6a051d2de9228fecec4507f187b1b Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Mon, 30 Oct 2023 23:31:47 +0330 Subject: [PATCH] AK+LibPDF: Make Format print floats in a roundtrip-safe way by default Previously we assumed a default precision of 6, which made the printed values quite odd in some cases. This commit changes that default to print them with just enough precision to produce the exact same float when roundtripped. This commit adds some new tests that assert exact format outputs, which have to be modified if we decide to change the default behaviour. --- AK/Format.cpp | 156 ++++++++++++++++++++++++++- AK/Format.h | 22 +++- Tests/AK/TestFormat.cpp | 22 ++++ Userland/Libraries/LibPDF/Document.h | 2 +- 4 files changed, 193 insertions(+), 9 deletions(-) diff --git a/AK/Format.cpp b/AK/Format.cpp index 6f100e7f8d..ea42971656 100644 --- a/AK/Format.cpp +++ b/AK/Format.cpp @@ -30,6 +30,10 @@ # include #endif +#ifndef KERNEL +# include +#endif + namespace AK { class FormatParser : public GenericLexer { @@ -470,7 +474,7 @@ static ErrorOr round_up_digits(StringBuilder& digits_builder) return digits_builder.try_append(digits_buffer); } -ErrorOr FormatBuilder::put_f64( +ErrorOr FormatBuilder::put_f64_with_precision( double value, u8 base, bool upper_case, @@ -543,6 +547,130 @@ ErrorOr FormatBuilder::put_f64( return put_string(string_builder.string_view(), align, min_width, NumericLimits::max(), fill); } +template T> +ErrorOr FormatBuilder::put_f32_or_f64( + T value, + u8 base, + bool upper_case, + bool zero_pad, + bool use_separator, + Align align, + size_t min_width, + Optional precision, + char fill, + SignMode sign_mode, + RealNumberDisplayMode display_mode) +{ + if (precision.has_value() || base != 10) + return put_f64_with_precision(value, base, upper_case, zero_pad, use_separator, align, min_width, precision.value_or(6), fill, sign_mode, display_mode); + + // No precision specified, so pick the best precision with roundtrip guarantees. + StringBuilder builder; + + // Special cases: NaN, inf, -inf, 0 and -0. + auto const is_nan = isnan(value); + auto const is_inf = isinf(value); + auto const is_zero = value == static_cast(0.0) || value == static_cast(-0.0); + if (is_nan || is_inf || is_zero) { + if (value < 0) + TRY(builder.try_append('-')); + else if (sign_mode == SignMode::Always) + TRY(builder.try_append('+')); + else if (sign_mode == SignMode::Reserved) + TRY(builder.try_append(' ')); + + if (is_nan) + TRY(builder.try_append(upper_case ? "NAN"sv : "nan"sv)); + else if (is_inf) + TRY(builder.try_append(upper_case ? "INF"sv : "inf"sv)); + else + TRY(builder.try_append('0')); + + return put_string(builder.string_view(), align, min_width, NumericLimits::max(), fill); + } + + auto const [sign, mantissa, exponent] = convert_floating_point_to_decimal_exponential_form(value); + + auto convert_to_decimal_digits_array = [](auto x, auto& digits) -> size_t { + size_t length = 0; + for (; x; x /= 10) + digits[length++] = x % 10 | '0'; + for (size_t i = 0; 2 * i + 1 < length; ++i) + swap(digits[i], digits[length - i - 1]); + return length; + }; + + Array mantissa_digits; + auto mantissa_length = convert_to_decimal_digits_array(mantissa, mantissa_digits); + + if (sign) + TRY(builder.try_append('-')); + else if (sign_mode == SignMode::Always) + TRY(builder.try_append('+')); + else if (sign_mode == SignMode::Reserved) + TRY(builder.try_append(' ')); + + auto const n = exponent + static_cast(mantissa_length); + auto const mantissa_text = StringView { mantissa_digits.span().slice(0, mantissa_length) }; + size_t integral_part_end = 0; + // NOTE: Range from ECMA262, seems like an okay default. + if (n >= -5 && n <= 21) { + if (exponent >= 0) { + TRY(builder.try_append(mantissa_text)); + TRY(builder.try_append_repeated('0', exponent)); + integral_part_end = builder.length(); + } else if (n > 0) { + TRY(builder.try_append(mantissa_text.substring_view(0, n))); + integral_part_end = builder.length(); + TRY(builder.try_append('.')); + TRY(builder.try_append(mantissa_text.substring_view(n))); + } else { + TRY(builder.try_append("0."sv)); + TRY(builder.try_append_repeated('0', -n)); + TRY(builder.try_append(mantissa_text)); + integral_part_end = 1; + } + } else { + auto const exponent_sign = n < 0 ? '-' : '+'; + Array exponent_digits; + auto const exponent_length = convert_to_decimal_digits_array(abs(n - 1), exponent_digits); + auto const exponent_text = StringView { exponent_digits.span().slice(0, exponent_length) }; + integral_part_end = 1; + + if (mantissa_length == 1) { + // e + TRY(builder.try_append(mantissa_text)); + TRY(builder.try_append('e')); + TRY(builder.try_append(exponent_sign)); + TRY(builder.try_append(exponent_text)); + } else { + // .e + TRY(builder.try_append(mantissa_text.substring_view(0, 1))); + TRY(builder.try_append('.')); + TRY(builder.try_append(mantissa_text.substring_view(1))); + TRY(builder.try_append('e')); + TRY(builder.try_append(exponent_sign)); + TRY(builder.try_append(exponent_text)); + } + } + + if (use_separator && integral_part_end > 3) { + // Go backwards from the end of the integral part, inserting commas every 3 consecutive digits. + StringBuilder separated_builder; + auto const string_view = builder.string_view(); + for (size_t i = 0; i < integral_part_end; ++i) { + auto const index_from_end = integral_part_end - i - 1; + if (index_from_end > 0 && index_from_end != integral_part_end - 1 && index_from_end % 3 == 2) + TRY(separated_builder.try_append(',')); + TRY(separated_builder.try_append(string_view[i])); + } + TRY(separated_builder.try_append(string_view.substring_view(integral_part_end))); + builder = move(separated_builder); + } + + return put_string(builder.string_view(), align, min_width, NumericLimits::max(), fill); +} + ErrorOr FormatBuilder::put_f80( long double value, u8 base, @@ -909,15 +1037,33 @@ ErrorOr Formatter::format(FormatBuilder& builder, double value) } m_width = m_width.value_or(0); - m_precision = m_precision.value_or(6); - return builder.put_f64(value, base, upper_case, m_zero_pad, m_use_separator, m_align, m_width.value(), m_precision.value(), m_fill, m_sign_mode, real_number_display_mode); + return builder.put_f32_or_f64(value, base, upper_case, m_zero_pad, m_use_separator, m_align, m_width.value(), m_precision, m_fill, m_sign_mode, real_number_display_mode); } ErrorOr Formatter::format(FormatBuilder& builder, float value) { - Formatter formatter { *this }; - return formatter.format(builder, value); + u8 base; + bool upper_case; + FormatBuilder::RealNumberDisplayMode real_number_display_mode = FormatBuilder::RealNumberDisplayMode::General; + if (m_mode == Mode::Default || m_mode == Mode::FixedPoint) { + base = 10; + upper_case = false; + if (m_mode == Mode::FixedPoint) + real_number_display_mode = FormatBuilder::RealNumberDisplayMode::FixedPoint; + } else if (m_mode == Mode::Hexfloat) { + base = 16; + upper_case = false; + } else if (m_mode == Mode::HexfloatUppercase) { + base = 16; + upper_case = true; + } else { + VERIFY_NOT_REACHED(); + } + + m_width = m_width.value_or(0); + + return builder.put_f32_or_f64(value, base, upper_case, m_zero_pad, m_use_separator, m_align, m_width.value(), m_precision, m_fill, m_sign_mode, real_number_display_mode); } #endif diff --git a/AK/Format.h b/AK/Format.h index 801a453044..2648cc29b9 100644 --- a/AK/Format.h +++ b/AK/Format.h @@ -238,15 +238,16 @@ public: SignMode sign_mode = SignMode::OnlyIfNeeded, RealNumberDisplayMode = RealNumberDisplayMode::Default); - ErrorOr put_f64( - double value, + template T> + ErrorOr put_f32_or_f64( + T value, u8 base = 10, bool upper_case = false, bool zero_pad = false, bool use_separator = false, Align align = Align::Right, size_t min_width = 0, - size_t precision = 6, + Optional precision = {}, char fill = ' ', SignMode sign_mode = SignMode::OnlyIfNeeded, RealNumberDisplayMode = RealNumberDisplayMode::Default); @@ -265,6 +266,21 @@ public: private: StringBuilder& m_builder; + +#ifndef KERNEL + ErrorOr put_f64_with_precision( + double value, + u8 base, + bool upper_case, + bool zero_pad, + bool use_separator, + Align align, + size_t min_width, + size_t precision, + char fill, + SignMode sign_mode, + RealNumberDisplayMode); +#endif }; class TypeErasedFormatParams { diff --git a/Tests/AK/TestFormat.cpp b/Tests/AK/TestFormat.cpp index 823555f37d..32fae024a4 100644 --- a/Tests/AK/TestFormat.cpp +++ b/Tests/AK/TestFormat.cpp @@ -283,6 +283,28 @@ TEST_CASE(floating_point_numbers) EXPECT_EQ(DeprecatedString::formatted("{:x>5.1}", 1.12), "xx1.1"); } +TEST_CASE(floating_point_default_precision) +{ +#define EXPECT_FORMAT(lit, value) \ + EXPECT_EQ(DeprecatedString::formatted("{}", lit), value##sv) + + EXPECT_FORMAT(10.3f, "10.3"); + EXPECT_FORMAT(123e4f, "1230000"); + EXPECT_FORMAT(1.23e4f, "12300"); + EXPECT_FORMAT(134232544.4365f, "134232540"); + EXPECT_FORMAT(1.43e24, "1.43e+24"); + EXPECT_FORMAT(1.43e-24, "1.43e-24"); + EXPECT_FORMAT(0.0f, "0"); + EXPECT_FORMAT(3.14159265358979, "3.14159265358979"); + EXPECT_FORMAT(1.61803399, "1.61803399"); + EXPECT_FORMAT(2.71827, "2.71827"); + EXPECT_FORMAT(42.f, "42"); + EXPECT_FORMAT(123456.78, "123456.78"); + EXPECT_FORMAT(23456.78910, "23456.7891"); + +#undef EXPECT_FORMAT +} + TEST_CASE(no_precision_no_trailing_number) { EXPECT_EQ(DeprecatedString::formatted("{:.0}", 0.1), "0"); diff --git a/Userland/Libraries/LibPDF/Document.h b/Userland/Libraries/LibPDF/Document.h index 3328e40f49..e5cb41b9b1 100644 --- a/Userland/Libraries/LibPDF/Document.h +++ b/Userland/Libraries/LibPDF/Document.h @@ -232,7 +232,7 @@ struct Formatter : Formatter { TRY(builder.put_literal(" parameters="sv)); for (auto const& param : destination.parameters) { if (param.has_value()) - TRY(builder.put_f64(double(param.value()))); + TRY(builder.put_f32_or_f64(param.value())); else TRY(builder.put_literal("{{}}"sv)); TRY(builder.put_literal(" "sv));