mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 19:28:12 +00:00
AK: Add GenericLexer::{consume_decimal_integer,peek_string}
This commit is contained in:
parent
6b30847120
commit
b65d281bbb
3 changed files with 153 additions and 0 deletions
|
@ -7,6 +7,7 @@
|
||||||
#include <AK/Assertions.h>
|
#include <AK/Assertions.h>
|
||||||
#include <AK/CharacterTypes.h>
|
#include <AK/CharacterTypes.h>
|
||||||
#include <AK/GenericLexer.h>
|
#include <AK/GenericLexer.h>
|
||||||
|
#include <AK/ScopeGuard.h>
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
|
|
||||||
#ifndef KERNEL
|
#ifndef KERNEL
|
||||||
|
@ -128,6 +129,62 @@ StringView GenericLexer::consume_quoted_string(char escape_char)
|
||||||
return m_input.substring_view(start, length);
|
return m_input.substring_view(start, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<Integral T>
|
||||||
|
ErrorOr<T> GenericLexer::consume_decimal_integer()
|
||||||
|
{
|
||||||
|
using UnsignedT = MakeUnsigned<T>;
|
||||||
|
|
||||||
|
ArmedScopeGuard rollback { [&, rollback_position = m_index] {
|
||||||
|
m_index = rollback_position;
|
||||||
|
} };
|
||||||
|
|
||||||
|
bool has_minus_sign = false;
|
||||||
|
|
||||||
|
if (next_is('+') || next_is('-'))
|
||||||
|
if (consume() == '-')
|
||||||
|
has_minus_sign = true;
|
||||||
|
|
||||||
|
StringView number_view = consume_while(is_ascii_digit);
|
||||||
|
if (number_view.is_empty())
|
||||||
|
return Error::from_errno(EINVAL);
|
||||||
|
|
||||||
|
auto maybe_number = StringUtils::convert_to_uint<UnsignedT>(number_view, TrimWhitespace::No);
|
||||||
|
if (!maybe_number.has_value())
|
||||||
|
return Error::from_errno(ERANGE);
|
||||||
|
auto number = maybe_number.value();
|
||||||
|
|
||||||
|
if (!has_minus_sign) {
|
||||||
|
if (NumericLimits<T>::max() < number) // This is only possible in a signed case.
|
||||||
|
return Error::from_errno(ERANGE);
|
||||||
|
|
||||||
|
rollback.disarm();
|
||||||
|
return number;
|
||||||
|
} else {
|
||||||
|
if constexpr (IsUnsigned<T>) {
|
||||||
|
if (number == 0) {
|
||||||
|
rollback.disarm();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return Error::from_errno(ERANGE);
|
||||||
|
} else {
|
||||||
|
static constexpr UnsignedT max_value = static_cast<UnsignedT>(NumericLimits<T>::max()) + 1;
|
||||||
|
if (number > max_value)
|
||||||
|
return Error::from_errno(ERANGE);
|
||||||
|
rollback.disarm();
|
||||||
|
return -number;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
|
||||||
|
template ErrorOr<i8> GenericLexer::consume_decimal_integer<i8>();
|
||||||
|
template ErrorOr<u16> GenericLexer::consume_decimal_integer<u16>();
|
||||||
|
template ErrorOr<i16> GenericLexer::consume_decimal_integer<i16>();
|
||||||
|
template ErrorOr<u32> GenericLexer::consume_decimal_integer<u32>();
|
||||||
|
template ErrorOr<i32> GenericLexer::consume_decimal_integer<i32>();
|
||||||
|
template ErrorOr<u64> GenericLexer::consume_decimal_integer<u64>();
|
||||||
|
template ErrorOr<i64> GenericLexer::consume_decimal_integer<i64>();
|
||||||
|
|
||||||
#ifndef KERNEL
|
#ifndef KERNEL
|
||||||
Optional<DeprecatedString> GenericLexer::consume_and_unescape_string(char escape_char)
|
Optional<DeprecatedString> GenericLexer::consume_and_unescape_string(char escape_char)
|
||||||
{
|
{
|
||||||
|
|
|
@ -31,6 +31,13 @@ public:
|
||||||
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
|
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<StringView> peek_string(size_t length, size_t offset = 0) const
|
||||||
|
{
|
||||||
|
if (m_index + offset + length > m_input.length())
|
||||||
|
return {};
|
||||||
|
return m_input.substring_view(m_index + offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
constexpr bool next_is(char expected) const
|
constexpr bool next_is(char expected) const
|
||||||
{
|
{
|
||||||
return peek() == expected;
|
return peek() == expected;
|
||||||
|
@ -121,6 +128,8 @@ public:
|
||||||
#ifndef KERNEL
|
#ifndef KERNEL
|
||||||
Optional<DeprecatedString> consume_and_unescape_string(char escape_char = '\\');
|
Optional<DeprecatedString> consume_and_unescape_string(char escape_char = '\\');
|
||||||
#endif
|
#endif
|
||||||
|
template<Integral T>
|
||||||
|
ErrorOr<T> consume_decimal_integer();
|
||||||
|
|
||||||
enum class UnicodeEscapeError {
|
enum class UnicodeEscapeError {
|
||||||
MalformedUnicodeEscape,
|
MalformedUnicodeEscape,
|
||||||
|
|
|
@ -204,3 +204,90 @@ TEST_CASE(consume_escaped_code_point)
|
||||||
test("\\ud83d\\ude00"sv, 0x1f600);
|
test("\\ud83d\\ude00"sv, 0x1f600);
|
||||||
test("\\ud83d\\ude00"sv, 0xd83d, false);
|
test("\\ud83d\\ude00"sv, 0xd83d, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(consume_decimal_integer_correctly_parses)
|
||||||
|
{
|
||||||
|
#define CHECK_PARSES_INTEGER(test, expected, type) \
|
||||||
|
do { \
|
||||||
|
GenericLexer lexer(test##sv); \
|
||||||
|
auto actual = lexer.consume_decimal_integer<type>(); \
|
||||||
|
VERIFY(!actual.is_error()); \
|
||||||
|
EXPECT_EQ(actual.value(), static_cast<type>(expected)); \
|
||||||
|
EXPECT_EQ(lexer.tell(), test##sv.length()); \
|
||||||
|
} while (false)
|
||||||
|
CHECK_PARSES_INTEGER("0", 0, u8);
|
||||||
|
CHECK_PARSES_INTEGER("-0", -0, u8);
|
||||||
|
CHECK_PARSES_INTEGER("10", 10, u8);
|
||||||
|
CHECK_PARSES_INTEGER("255", 255, u8);
|
||||||
|
CHECK_PARSES_INTEGER("0", 0, u16);
|
||||||
|
CHECK_PARSES_INTEGER("-0", -0, u16);
|
||||||
|
CHECK_PARSES_INTEGER("1234", 1234, u16);
|
||||||
|
CHECK_PARSES_INTEGER("65535", 65535, u16);
|
||||||
|
CHECK_PARSES_INTEGER("0", 0, u32);
|
||||||
|
CHECK_PARSES_INTEGER("-0", -0, u32);
|
||||||
|
CHECK_PARSES_INTEGER("1234", 1234, u32);
|
||||||
|
CHECK_PARSES_INTEGER("4294967295", 4294967295, u32);
|
||||||
|
CHECK_PARSES_INTEGER("0", 0, u64);
|
||||||
|
CHECK_PARSES_INTEGER("-0", -0, u64);
|
||||||
|
CHECK_PARSES_INTEGER("1234", 1234, u64);
|
||||||
|
CHECK_PARSES_INTEGER("18446744073709551615", 18446744073709551615ULL, u64);
|
||||||
|
CHECK_PARSES_INTEGER("0", 0, i8);
|
||||||
|
CHECK_PARSES_INTEGER("-0", -0, i8);
|
||||||
|
CHECK_PARSES_INTEGER("10", 10, i8);
|
||||||
|
CHECK_PARSES_INTEGER("-10", -10, i8);
|
||||||
|
CHECK_PARSES_INTEGER("127", 127, i8);
|
||||||
|
CHECK_PARSES_INTEGER("-128", -128, i8);
|
||||||
|
CHECK_PARSES_INTEGER("0", 0, i16);
|
||||||
|
CHECK_PARSES_INTEGER("-0", -0, i16);
|
||||||
|
CHECK_PARSES_INTEGER("1234", 1234, i16);
|
||||||
|
CHECK_PARSES_INTEGER("-1234", -1234, i16);
|
||||||
|
CHECK_PARSES_INTEGER("32767", 32767, i16);
|
||||||
|
CHECK_PARSES_INTEGER("-32768", -32768, i16);
|
||||||
|
CHECK_PARSES_INTEGER("0", 0, i32);
|
||||||
|
CHECK_PARSES_INTEGER("-0", -0, i32);
|
||||||
|
CHECK_PARSES_INTEGER("1234", 1234, i32);
|
||||||
|
CHECK_PARSES_INTEGER("-1234", -1234, i32);
|
||||||
|
CHECK_PARSES_INTEGER("2147483647", 2147483647, i32);
|
||||||
|
CHECK_PARSES_INTEGER("-2147483648", -2147483648, i32);
|
||||||
|
CHECK_PARSES_INTEGER("0", 0, i64);
|
||||||
|
CHECK_PARSES_INTEGER("-0", -0, i64);
|
||||||
|
CHECK_PARSES_INTEGER("1234", 1234, i64);
|
||||||
|
CHECK_PARSES_INTEGER("-1234", -1234, i64);
|
||||||
|
CHECK_PARSES_INTEGER("9223372036854775807", 9223372036854775807, i64);
|
||||||
|
CHECK_PARSES_INTEGER("-9223372036854775808", -9223372036854775808ULL, i64);
|
||||||
|
#undef CHECK_PARSES_INTEGER
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE(consume_decimal_integer_fails_with_correct_error)
|
||||||
|
{
|
||||||
|
#define CHECK_FAILS_WITH_ERROR(test, type, err) \
|
||||||
|
do { \
|
||||||
|
GenericLexer lexer(test##sv); \
|
||||||
|
auto actual = lexer.consume_decimal_integer<type>(); \
|
||||||
|
VERIFY(actual.is_error() && actual.error().is_errno()); \
|
||||||
|
EXPECT_EQ(actual.error().code(), err); \
|
||||||
|
EXPECT_EQ(lexer.tell(), static_cast<size_t>(0)); \
|
||||||
|
} while (false)
|
||||||
|
CHECK_FAILS_WITH_ERROR("Well hello GenericLexer!", u64, EINVAL);
|
||||||
|
CHECK_FAILS_WITH_ERROR("+", u64, EINVAL);
|
||||||
|
CHECK_FAILS_WITH_ERROR("+WHF", u64, EINVAL);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-WHF", u64, EINVAL);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-1", u8, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-100", u8, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-1", u16, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-100", u16, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-1", u32, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-100", u32, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-1", u64, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-100", u64, ERANGE);
|
||||||
|
|
||||||
|
CHECK_FAILS_WITH_ERROR("-129", i8, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("128", i8, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-32769", i16, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("32768", i16, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-2147483649", i32, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("2147483648", i32, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("-9223372036854775809", i64, ERANGE);
|
||||||
|
CHECK_FAILS_WITH_ERROR("9223372036854775808", i64, ERANGE);
|
||||||
|
#undef CHECK_FAILS_WITH_ERROR
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue