diff --git a/AK/GenericLexer.cpp b/AK/GenericLexer.cpp index 5061e10b6a..6a93edbad5 100644 --- a/AK/GenericLexer.cpp +++ b/AK/GenericLexer.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #ifndef KERNEL @@ -128,6 +129,62 @@ StringView GenericLexer::consume_quoted_string(char escape_char) return m_input.substring_view(start, length); } +template +ErrorOr GenericLexer::consume_decimal_integer() +{ + using UnsignedT = MakeUnsigned; + + ArmedScopeGuard rollback { [&, rollback_position = m_index] { + m_index = rollback_position; + } }; + + bool has_minus_sign = false; + + if (next_is('+') || next_is('-')) + if (consume() == '-') + has_minus_sign = true; + + StringView number_view = consume_while(is_ascii_digit); + if (number_view.is_empty()) + return Error::from_errno(EINVAL); + + auto maybe_number = StringUtils::convert_to_uint(number_view, TrimWhitespace::No); + if (!maybe_number.has_value()) + return Error::from_errno(ERANGE); + auto number = maybe_number.value(); + + if (!has_minus_sign) { + if (NumericLimits::max() < number) // This is only possible in a signed case. + return Error::from_errno(ERANGE); + + rollback.disarm(); + return number; + } else { + if constexpr (IsUnsigned) { + if (number == 0) { + rollback.disarm(); + return 0; + } + return Error::from_errno(ERANGE); + } else { + static constexpr UnsignedT max_value = static_cast(NumericLimits::max()) + 1; + if (number > max_value) + return Error::from_errno(ERANGE); + rollback.disarm(); + return -number; + } + } +} + +template ErrorOr GenericLexer::consume_decimal_integer(); +template ErrorOr GenericLexer::consume_decimal_integer(); +template ErrorOr GenericLexer::consume_decimal_integer(); +template ErrorOr GenericLexer::consume_decimal_integer(); +template ErrorOr GenericLexer::consume_decimal_integer(); +template ErrorOr GenericLexer::consume_decimal_integer(); +template ErrorOr GenericLexer::consume_decimal_integer(); +template ErrorOr GenericLexer::consume_decimal_integer(); + #ifndef KERNEL Optional GenericLexer::consume_and_unescape_string(char escape_char) { diff --git a/AK/GenericLexer.h b/AK/GenericLexer.h index f791f92c41..4f2f2bb11e 100644 --- a/AK/GenericLexer.h +++ b/AK/GenericLexer.h @@ -31,6 +31,13 @@ public: return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0'; } + Optional peek_string(size_t length, size_t offset = 0) const + { + if (m_index + offset + length > m_input.length()) + return {}; + return m_input.substring_view(m_index + offset, length); + } + constexpr bool next_is(char expected) const { return peek() == expected; @@ -121,6 +128,8 @@ public: #ifndef KERNEL Optional consume_and_unescape_string(char escape_char = '\\'); #endif + template + ErrorOr consume_decimal_integer(); enum class UnicodeEscapeError { MalformedUnicodeEscape, diff --git a/Tests/AK/TestGenericLexer.cpp b/Tests/AK/TestGenericLexer.cpp index 39cb03d278..fb384fe850 100644 --- a/Tests/AK/TestGenericLexer.cpp +++ b/Tests/AK/TestGenericLexer.cpp @@ -204,3 +204,90 @@ TEST_CASE(consume_escaped_code_point) test("\\ud83d\\ude00"sv, 0x1f600); test("\\ud83d\\ude00"sv, 0xd83d, false); } + +TEST_CASE(consume_decimal_integer_correctly_parses) +{ +#define CHECK_PARSES_INTEGER(test, expected, type) \ + do { \ + GenericLexer lexer(test##sv); \ + auto actual = lexer.consume_decimal_integer(); \ + VERIFY(!actual.is_error()); \ + EXPECT_EQ(actual.value(), static_cast(expected)); \ + EXPECT_EQ(lexer.tell(), test##sv.length()); \ + } while (false) + CHECK_PARSES_INTEGER("0", 0, u8); + CHECK_PARSES_INTEGER("-0", -0, u8); + CHECK_PARSES_INTEGER("10", 10, u8); + CHECK_PARSES_INTEGER("255", 255, u8); + CHECK_PARSES_INTEGER("0", 0, u16); + CHECK_PARSES_INTEGER("-0", -0, u16); + CHECK_PARSES_INTEGER("1234", 1234, u16); + CHECK_PARSES_INTEGER("65535", 65535, u16); + CHECK_PARSES_INTEGER("0", 0, u32); + CHECK_PARSES_INTEGER("-0", -0, u32); + CHECK_PARSES_INTEGER("1234", 1234, u32); + CHECK_PARSES_INTEGER("4294967295", 4294967295, u32); + CHECK_PARSES_INTEGER("0", 0, u64); + CHECK_PARSES_INTEGER("-0", -0, u64); + CHECK_PARSES_INTEGER("1234", 1234, u64); + CHECK_PARSES_INTEGER("18446744073709551615", 18446744073709551615ULL, u64); + CHECK_PARSES_INTEGER("0", 0, i8); + CHECK_PARSES_INTEGER("-0", -0, i8); + CHECK_PARSES_INTEGER("10", 10, i8); + CHECK_PARSES_INTEGER("-10", -10, i8); + CHECK_PARSES_INTEGER("127", 127, i8); + CHECK_PARSES_INTEGER("-128", -128, i8); + CHECK_PARSES_INTEGER("0", 0, i16); + CHECK_PARSES_INTEGER("-0", -0, i16); + CHECK_PARSES_INTEGER("1234", 1234, i16); + CHECK_PARSES_INTEGER("-1234", -1234, i16); + CHECK_PARSES_INTEGER("32767", 32767, i16); + CHECK_PARSES_INTEGER("-32768", -32768, i16); + CHECK_PARSES_INTEGER("0", 0, i32); + CHECK_PARSES_INTEGER("-0", -0, i32); + CHECK_PARSES_INTEGER("1234", 1234, i32); + CHECK_PARSES_INTEGER("-1234", -1234, i32); + CHECK_PARSES_INTEGER("2147483647", 2147483647, i32); + CHECK_PARSES_INTEGER("-2147483648", -2147483648, i32); + CHECK_PARSES_INTEGER("0", 0, i64); + CHECK_PARSES_INTEGER("-0", -0, i64); + CHECK_PARSES_INTEGER("1234", 1234, i64); + CHECK_PARSES_INTEGER("-1234", -1234, i64); + CHECK_PARSES_INTEGER("9223372036854775807", 9223372036854775807, i64); + CHECK_PARSES_INTEGER("-9223372036854775808", -9223372036854775808ULL, i64); +#undef CHECK_PARSES_INTEGER +} + +TEST_CASE(consume_decimal_integer_fails_with_correct_error) +{ +#define CHECK_FAILS_WITH_ERROR(test, type, err) \ + do { \ + GenericLexer lexer(test##sv); \ + auto actual = lexer.consume_decimal_integer(); \ + VERIFY(actual.is_error() && actual.error().is_errno()); \ + EXPECT_EQ(actual.error().code(), err); \ + EXPECT_EQ(lexer.tell(), static_cast(0)); \ + } while (false) + CHECK_FAILS_WITH_ERROR("Well hello GenericLexer!", u64, EINVAL); + CHECK_FAILS_WITH_ERROR("+", u64, EINVAL); + CHECK_FAILS_WITH_ERROR("+WHF", u64, EINVAL); + CHECK_FAILS_WITH_ERROR("-WHF", u64, EINVAL); + CHECK_FAILS_WITH_ERROR("-1", u8, ERANGE); + CHECK_FAILS_WITH_ERROR("-100", u8, ERANGE); + CHECK_FAILS_WITH_ERROR("-1", u16, ERANGE); + CHECK_FAILS_WITH_ERROR("-100", u16, ERANGE); + CHECK_FAILS_WITH_ERROR("-1", u32, ERANGE); + CHECK_FAILS_WITH_ERROR("-100", u32, ERANGE); + CHECK_FAILS_WITH_ERROR("-1", u64, ERANGE); + CHECK_FAILS_WITH_ERROR("-100", u64, ERANGE); + + CHECK_FAILS_WITH_ERROR("-129", i8, ERANGE); + CHECK_FAILS_WITH_ERROR("128", i8, ERANGE); + CHECK_FAILS_WITH_ERROR("-32769", i16, ERANGE); + CHECK_FAILS_WITH_ERROR("32768", i16, ERANGE); + CHECK_FAILS_WITH_ERROR("-2147483649", i32, ERANGE); + CHECK_FAILS_WITH_ERROR("2147483648", i32, ERANGE); + CHECK_FAILS_WITH_ERROR("-9223372036854775809", i64, ERANGE); + CHECK_FAILS_WITH_ERROR("9223372036854775808", i64, ERANGE); +#undef CHECK_FAILS_WITH_ERROR +}