mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 11:47:45 +00:00
AK: Implement Utf8CodepointIterator::peek(size_t)
This adds a peek method for Utf8CodepointIterator, which enables it to be used in some parsing cases where peeking is necessary. peek(0) is equivalent to operator*, expect that peek() does not contain any assertions and will just return an empty Optional<u32>. This also implements a test case for iterating UTF-8.
This commit is contained in:
parent
31f6ba0952
commit
14506e8f5e
3 changed files with 55 additions and 0 deletions
|
@ -240,4 +240,21 @@ u32 Utf8CodepointIterator::operator*() const
|
||||||
return code_point_value_so_far;
|
return code_point_value_so_far;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<u32> Utf8CodepointIterator::peek(size_t offset) const
|
||||||
|
{
|
||||||
|
if (offset == 0) {
|
||||||
|
if (this->done())
|
||||||
|
return {};
|
||||||
|
return this->operator*();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto new_iterator = *this;
|
||||||
|
for (size_t index = 0; index < offset; ++index) {
|
||||||
|
++new_iterator;
|
||||||
|
if (new_iterator.done())
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
return *new_iterator;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,8 @@ public:
|
||||||
bool operator!=(const Utf8CodepointIterator&) const;
|
bool operator!=(const Utf8CodepointIterator&) const;
|
||||||
Utf8CodepointIterator& operator++();
|
Utf8CodepointIterator& operator++();
|
||||||
u32 operator*() const;
|
u32 operator*() const;
|
||||||
|
// NOTE: This returns {} if the peek is at or past EOF.
|
||||||
|
Optional<u32> peek(size_t offset = 0) const;
|
||||||
|
|
||||||
ssize_t operator-(const Utf8CodepointIterator& other) const
|
ssize_t operator-(const Utf8CodepointIterator& other) const
|
||||||
{
|
{
|
||||||
|
|
|
@ -67,3 +67,39 @@ TEST_CASE(validate_invalid_ut8)
|
||||||
EXPECT(!utf8_4.validate(valid_bytes));
|
EXPECT(!utf8_4.validate(valid_bytes));
|
||||||
EXPECT(valid_bytes == 0);
|
EXPECT(valid_bytes == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(iterate_utf8)
|
||||||
|
{
|
||||||
|
Utf8View view("Some weird characters \u00A9\u266A\uA755");
|
||||||
|
Utf8CodepointIterator iterator = view.begin();
|
||||||
|
|
||||||
|
EXPECT(*iterator == 'S');
|
||||||
|
EXPECT(iterator.peek().has_value() && iterator.peek().value() == 'S');
|
||||||
|
EXPECT(iterator.peek(0).has_value() && iterator.peek(0).value() == 'S');
|
||||||
|
EXPECT(iterator.peek(1).has_value() && iterator.peek(1).value() == 'o');
|
||||||
|
EXPECT(iterator.peek(22).has_value() && iterator.peek(22).value() == 0x00A9);
|
||||||
|
EXPECT(iterator.peek(24).has_value() && iterator.peek(24).value() == 0xA755);
|
||||||
|
EXPECT(!iterator.peek(25).has_value());
|
||||||
|
|
||||||
|
++iterator;
|
||||||
|
|
||||||
|
EXPECT(*iterator == 'o');
|
||||||
|
EXPECT(iterator.peek(23).has_value() && iterator.peek(23).value() == 0xA755);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 23; ++i)
|
||||||
|
++iterator;
|
||||||
|
|
||||||
|
EXPECT(!iterator.done());
|
||||||
|
EXPECT(*iterator == 0xA755);
|
||||||
|
EXPECT(iterator.peek().has_value() && iterator.peek().value() == 0xA755);
|
||||||
|
EXPECT(!iterator.peek(1).has_value());
|
||||||
|
|
||||||
|
++iterator;
|
||||||
|
|
||||||
|
EXPECT(iterator.done());
|
||||||
|
EXPECT(!iterator.peek(0).has_value());
|
||||||
|
EXPECT_CRASH("Dereferencing Utf8CodepointIterator which is already done.", [&iterator] {
|
||||||
|
*iterator;
|
||||||
|
return Test::Crash::Failure::DidNotCrash;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue