diff --git a/Userland/Libraries/LibC/stdlib.cpp b/Userland/Libraries/LibC/stdlib.cpp index 81759f4b1f..36ba788412 100644 --- a/Userland/Libraries/LibC/stdlib.cpp +++ b/Userland/Libraries/LibC/stdlib.cpp @@ -5,6 +5,8 @@ */ #include +#include +#include #include #include #include @@ -182,6 +184,166 @@ inline int generate_unique_filename(char* pattern, size_t suffix_length, Callbac return EEXIST; } +static bool is_infinity_string(char* parse_ptr, char** endptr) +{ + if (is_either(parse_ptr, 0, 'i', 'I')) { + if (is_either(parse_ptr, 1, 'n', 'N')) { + if (is_either(parse_ptr, 2, 'f', 'F')) { + parse_ptr += 3; + if (is_either(parse_ptr, 0, 'i', 'I')) { + if (is_either(parse_ptr, 1, 'n', 'N')) { + if (is_either(parse_ptr, 2, 'i', 'I')) { + if (is_either(parse_ptr, 3, 't', 'T')) { + if (is_either(parse_ptr, 4, 'y', 'Y')) { + parse_ptr += 5; + } + } + } + } + } + if (endptr) + *endptr = parse_ptr; + + return true; + } + } + } + + return false; +} + +static bool is_nan_string(char* parse_ptr, char** endptr) +{ + // FIXME: Actually parse (or at least skip) the (n-char-sequenceopt) part + if (is_either(parse_ptr, 0, 'n', 'N')) { + if (is_either(parse_ptr, 1, 'a', 'A')) { + if (is_either(parse_ptr, 2, 'n', 'N')) { + if (endptr) + *endptr = parse_ptr + 3; + return true; + } + } + } + + return false; +} + +template +static T c_str_to_floating_point(char const* str, char** endptr) +{ + // First, they decompose the input string into three parts: + char* parse_ptr = const_cast(str); + + // An initial, possibly empty, sequence of white-space characters (as specified by isspace()) + strtons(parse_ptr, &parse_ptr); + + // A subject sequence interpreted as a floating-point constant or representing infinity or NaN + + if (*parse_ptr == '\0') { + if (endptr) + *endptr = const_cast(str); + return 0.; + } + + bool is_hex = [&] { + // A hexfloat must start with either 0x, 0X, -0x or -0X and have something after it + char const* parse_head = parse_ptr; + if (*parse_head == '-') + ++parse_head; + + if (*parse_head != '0') + return false; + + ++parse_head; + + if (*parse_head != 'x') + return false; + + ++parse_head; + + // We must have at least one digit but it can come after the "decimal" point. + + if (is_ascii_hex_digit(*parse_head)) + return true; + + if (*parse_head != '.') + return false; + + ++parse_head; + + return is_ascii_hex_digit(*parse_head); + }(); + + AK::FloatingPointParseResults double_parse_result; + if (is_hex) { + // A 0x or 0X, then a non-empty sequence of hexadecimal digits optionally containing a radix character; + // then an optional binary exponent part consisting of the character 'p' or the character 'P', + // optionally followed by a '+' or '-' character, and then followed by one or more decimal digits + + double_parse_result = AK::parse_first_hexfloat_until_zero_character(parse_ptr); + } else { + // A non-empty sequence of decimal digits optionally containing a radix character; + // then an optional exponent part consisting of the character 'e' or the character 'E', + // optionally followed by a '+' or '-' character, and then followed by one or more decimal digits + double_parse_result = AK::parse_first_floating_point_until_zero_character(parse_ptr); + } + + if (double_parse_result.error == AK::FloatingPointError::None) { + // The only way to get NaN (which we shouldn't) or infinities is rounding up to them so we + // have to set ERANGE in that case. + if (!__builtin_isfinite(double_parse_result.value)) + errno = ERANGE; + + if (endptr) + *endptr = const_cast(double_parse_result.end_ptr); + return double_parse_result.value; + } + + if (double_parse_result.error == AK::FloatingPointError::RoundedDownToZero || double_parse_result.error == AK::FloatingPointError::OutOfRange) { + // This is a special case for strtod, where we have a double so close to zero we had to round + // it to zero, in which case we have to set ERANGE + errno = ERANGE; + + if (endptr) + *endptr = const_cast(double_parse_result.end_ptr); + return double_parse_result.value; + } + + // The only way we are here is if the input was not valid for parse_first_floating_point or not a valid hex float + // So the only cases left are: + // - One of INF or INFINITY, ignoring case + // - One of NAN or NAN(n-char-sequenceopt), ignoring case in the NAN part + + const Sign sign = strtosign(parse_ptr, &parse_ptr); + + if (is_infinity_string(parse_ptr, endptr)) { + // Don't set errno to ERANGE here: + // The caller may want to distinguish between "input is + // literal infinity" and "input is not literal infinity + // but did not fit into double". + if (sign != Sign::Negative) + return static_cast(__builtin_huge_val()); + else + return static_cast(-__builtin_huge_val()); + } + + if (is_nan_string(parse_ptr, endptr)) { + errno = ERANGE; + // FIXME: Do we actually want to return "different" NaN bit values? + if (sign != Sign::Negative) + return static_cast(__builtin_nan("")); + else + return static_cast(-__builtin_nan("")); + } + + // If no conversion could be performed, 0 shall be returned, and errno may be set to [EINVAL]. + // FIXME: This is in the posix standard linked from strtod but not in implementations of strtod + // and not in the man pages for linux strtod. + if (endptr) + *endptr = const_cast(str); + return 0; +} + extern "C" { void exit(int status) @@ -398,283 +560,7 @@ void setprogname(char const* progname) // https://pubs.opengroup.org/onlinepubs/9699919799/functions/strtod.html double strtod(char const* str, char** endptr) { - // Parse spaces, sign, and base - char* parse_ptr = const_cast(str); - strtons(parse_ptr, &parse_ptr); - const Sign sign = strtosign(parse_ptr, &parse_ptr); - - // Parse inf/nan, if applicable. - if (is_either(parse_ptr, 0, 'i', 'I')) { - if (is_either(parse_ptr, 1, 'n', 'N')) { - if (is_either(parse_ptr, 2, 'f', 'F')) { - parse_ptr += 3; - if (is_either(parse_ptr, 0, 'i', 'I')) { - if (is_either(parse_ptr, 1, 'n', 'N')) { - if (is_either(parse_ptr, 2, 'i', 'I')) { - if (is_either(parse_ptr, 3, 't', 'T')) { - if (is_either(parse_ptr, 4, 'y', 'Y')) { - parse_ptr += 5; - } - } - } - } - } - if (endptr) - *endptr = parse_ptr; - // Don't set errno to ERANGE here: - // The caller may want to distinguish between "input is - // literal infinity" and "input is not literal infinity - // but did not fit into double". - if (sign != Sign::Negative) { - return __builtin_huge_val(); - } else { - return -__builtin_huge_val(); - } - } - } - } - if (is_either(parse_ptr, 0, 'n', 'N')) { - if (is_either(parse_ptr, 1, 'a', 'A')) { - if (is_either(parse_ptr, 2, 'n', 'N')) { - if (endptr) - *endptr = parse_ptr + 3; - errno = ERANGE; - if (sign != Sign::Negative) { - return __builtin_nan(""); - } else { - return -__builtin_nan(""); - } - } - } - } - - // Parse base - char exponent_lower; - char exponent_upper; - int base = 10; - if (*parse_ptr == '0') { - char const base_ch = *(parse_ptr + 1); - if (base_ch == 'x' || base_ch == 'X') { - base = 16; - parse_ptr += 2; - } - } - - if (base == 10) { - exponent_lower = 'e'; - exponent_upper = 'E'; - } else { - exponent_lower = 'p'; - exponent_upper = 'P'; - } - - // Parse "digits", possibly keeping track of the exponent offset. - // We parse the most significant digits and the position in the - // base-`base` representation separately. This allows us to handle - // numbers like `0.0000000000000000000000000000000000001234` or - // `1234567890123456789012345678901234567890` with ease. - LongLongParser digits { sign, base }; - bool digits_usable = false; - bool should_continue = true; - bool digits_overflow = false; - bool after_decimal = false; - int exponent = 0; - do { - if (!after_decimal && *parse_ptr == '.') { - after_decimal = true; - parse_ptr += 1; - continue; - } - - bool is_a_digit; - if (digits_overflow) { - is_a_digit = digits.parse_digit(*parse_ptr) != -1; - } else { - DigitConsumeDecision decision = digits.consume(*parse_ptr); - switch (decision) { - case DigitConsumeDecision::Consumed: - is_a_digit = true; - // The very first actual digit must pass here: - digits_usable = true; - break; - case DigitConsumeDecision::PosOverflow: - case DigitConsumeDecision::NegOverflow: - is_a_digit = true; - digits_overflow = true; - break; - case DigitConsumeDecision::Invalid: - is_a_digit = false; - break; - default: - VERIFY_NOT_REACHED(); - } - } - - if (is_a_digit) { - exponent -= after_decimal ? 1 : 0; - exponent += digits_overflow ? 1 : 0; - } - - should_continue = is_a_digit; - parse_ptr += should_continue; - } while (should_continue); - - if (!digits_usable) { - // No actual number value available. - if (endptr) - *endptr = const_cast(str); - return 0.0; - } - - // Parse exponent. - // We already know the next character is not a digit in the current base, - // nor a valid decimal point. Check whether it's an exponent sign. - if (*parse_ptr == exponent_lower || *parse_ptr == exponent_upper) { - // Need to keep the old parse_ptr around, in case of rollback. - char* old_parse_ptr = parse_ptr; - parse_ptr += 1; - - // Can't use atol or strtol here: Must accept excessive exponents, - // even exponents >64 bits. - Sign exponent_sign = strtosign(parse_ptr, &parse_ptr); - IntParser exponent_parser { exponent_sign, base }; - bool exponent_usable = false; - bool exponent_overflow = false; - should_continue = true; - do { - bool is_a_digit; - if (exponent_overflow) { - is_a_digit = exponent_parser.parse_digit(*parse_ptr) != -1; - } else { - DigitConsumeDecision decision = exponent_parser.consume(*parse_ptr); - switch (decision) { - case DigitConsumeDecision::Consumed: - is_a_digit = true; - // The very first actual digit must pass here: - exponent_usable = true; - break; - case DigitConsumeDecision::PosOverflow: - case DigitConsumeDecision::NegOverflow: - is_a_digit = true; - exponent_overflow = true; - break; - case DigitConsumeDecision::Invalid: - is_a_digit = false; - break; - default: - VERIFY_NOT_REACHED(); - } - } - - should_continue = is_a_digit; - parse_ptr += should_continue; - } while (should_continue); - - if (!exponent_usable) { - parse_ptr = old_parse_ptr; - } else if (exponent_overflow) { - // Technically this is wrong. If someone gives us 5GB of digits, - // and then an exponent of -5_000_000_000, the resulting exponent - // should be around 0. - // However, I think it's safe to assume that we never have to deal - // with that many digits anyway. - if (sign != Sign::Negative) { - exponent = INT_MIN; - } else { - exponent = INT_MAX; - } - } else { - // Literal exponent is usable and fits in an int. - // However, `exponent + exponent_parser.number()` might overflow an int. - // This would result in the wrong sign of the exponent! - long long new_exponent = static_cast(exponent) + static_cast(exponent_parser.number()); - if (new_exponent < INT_MIN) { - exponent = INT_MIN; - } else if (new_exponent > INT_MAX) { - exponent = INT_MAX; - } else { - exponent = static_cast(new_exponent); - } - } - } - - // Parsing finished. now we only have to compute the result. - if (endptr) - *endptr = const_cast(parse_ptr); - - // If `digits` is zero, we don't even have to look at `exponent`. - if (digits.number() == 0) { - if (sign != Sign::Negative) { - return 0.0; - } else { - return -0.0; - } - } - - // Deal with extreme exponents. - // The smallest normal is 2^-1022. - // The smallest denormal is 2^-1074. - // The largest number in `digits` is 2^63 - 1. - // Therefore, if "base^exponent" is smaller than 2^-(1074+63), the result is 0.0 anyway. - // This threshold is roughly 5.3566 * 10^-343. - // So if the resulting exponent is -344 or lower (closer to -inf), - // the result is 0.0 anyway. - // We only need to avoid false positives, so we can ignore base 16. - if (exponent <= -344) { - errno = ERANGE; - // Definitely can't be represented more precisely. - // I lied, sometimes the result is +0.0, and sometimes -0.0. - if (sign != Sign::Negative) { - return 0.0; - } else { - return -0.0; - } - } - // The largest normal is 2^+1024-eps. - // The smallest number in `digits` is 1. - // Therefore, if "base^exponent" is 2^+1024, the result is INF anyway. - // This threshold is roughly 1.7977 * 10^-308. - // So if the resulting exponent is +309 or higher, - // the result is INF anyway. - // We only need to avoid false positives, so we can ignore base 16. - if (exponent >= 309) { - errno = ERANGE; - // Definitely can't be represented more precisely. - // I lied, sometimes the result is +INF, and sometimes -INF. - if (sign != Sign::Negative) { - return __builtin_huge_val(); - } else { - return -__builtin_huge_val(); - } - } - - // TODO: If `exponent` is large, this could be made faster. - double value = digits.number(); - double scale = 1; - - if (exponent < 0) { - exponent = -exponent; - for (int i = 0; i < min(exponent, 300); ++i) { - scale *= base; - } - value /= scale; - for (int i = 300; i < exponent; i++) { - value /= base; - } - if (value == -0.0 || value == +0.0) { - errno = ERANGE; - } - } else if (exponent > 0) { - for (int i = 0; i < exponent; ++i) { - scale *= base; - } - value *= scale; - if (value == -__builtin_huge_val() || value == +__builtin_huge_val()) { - errno = ERANGE; - } - } - - return value; + return c_str_to_floating_point(str, endptr); } // https://pubs.opengroup.org/onlinepubs/9699919799/functions/strtold.html @@ -687,7 +573,7 @@ long double strtold(char const* str, char** endptr) // https://pubs.opengroup.org/onlinepubs/9699919799/functions/strtof.html float strtof(char const* str, char** endptr) { - return strtod(str, endptr); + return c_str_to_floating_point(str, endptr); } // https://pubs.opengroup.org/onlinepubs/9699919799/functions/atof.html