mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 07:54:58 +00:00
LibGUI: Lex INI files as Utf8
Iterating byte by byte meant that the column positions assigned to INI tokens would be off if there were any multi-byte codepoints. Using a Utf8View means these positions refer to whole codepoints instead, and the column positions match what GUI::TextEditor expects. :^) Fixes #12706.
This commit is contained in:
parent
95df712c2e
commit
ae6a84c261
2 changed files with 14 additions and 16 deletions
|
@ -1,5 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, Hüseyin Aslıtürk <asliturk@hotmail.com>
|
* Copyright (c) 2020, Hüseyin Aslıtürk <asliturk@hotmail.com>
|
||||||
|
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
@ -12,20 +13,20 @@ namespace GUI {
|
||||||
|
|
||||||
IniLexer::IniLexer(StringView input)
|
IniLexer::IniLexer(StringView input)
|
||||||
: m_input(input)
|
: m_input(input)
|
||||||
|
, m_iterator(m_input.begin())
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
char IniLexer::peek(size_t offset) const
|
u32 IniLexer::peek(size_t offset) const
|
||||||
{
|
{
|
||||||
if ((m_index + offset) >= m_input.length())
|
return m_iterator.peek(offset).value_or(0);
|
||||||
return 0;
|
|
||||||
return m_input[m_index + offset];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char IniLexer::consume()
|
u32 IniLexer::consume()
|
||||||
{
|
{
|
||||||
VERIFY(m_index < m_input.length());
|
VERIFY(m_iterator != m_input.end());
|
||||||
char ch = m_input[m_index++];
|
u32 ch = *m_iterator;
|
||||||
|
++m_iterator;
|
||||||
if (ch == '\n') {
|
if (ch == '\n') {
|
||||||
m_position.line++;
|
m_position.line++;
|
||||||
m_position.column = 0;
|
m_position.column = 0;
|
||||||
|
@ -38,8 +39,6 @@ char IniLexer::consume()
|
||||||
Vector<IniToken> IniLexer::lex()
|
Vector<IniToken> IniLexer::lex()
|
||||||
{
|
{
|
||||||
Vector<IniToken> tokens;
|
Vector<IniToken> tokens;
|
||||||
|
|
||||||
size_t token_start_index = 0;
|
|
||||||
IniPosition token_start_position;
|
IniPosition token_start_position;
|
||||||
|
|
||||||
auto emit_token = [&](auto type) {
|
auto emit_token = [&](auto type) {
|
||||||
|
@ -52,7 +51,6 @@ Vector<IniToken> IniLexer::lex()
|
||||||
};
|
};
|
||||||
|
|
||||||
auto begin_token = [&] {
|
auto begin_token = [&] {
|
||||||
token_start_index = m_index;
|
|
||||||
token_start_position = m_position;
|
token_start_position = m_position;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -64,7 +62,7 @@ Vector<IniToken> IniLexer::lex()
|
||||||
tokens.append(token);
|
tokens.append(token);
|
||||||
};
|
};
|
||||||
|
|
||||||
while (m_index < m_input.length()) {
|
while (m_iterator != m_input.end()) {
|
||||||
auto ch = peek();
|
auto ch = peek();
|
||||||
|
|
||||||
if (is_ascii_space(ch)) {
|
if (is_ascii_space(ch)) {
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <AK/StringView.h>
|
#include <AK/Utf8View.h>
|
||||||
|
|
||||||
namespace GUI {
|
namespace GUI {
|
||||||
|
|
||||||
|
@ -57,11 +57,11 @@ public:
|
||||||
Vector<IniToken> lex();
|
Vector<IniToken> lex();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
char peek(size_t offset = 0) const;
|
u32 peek(size_t offset = 0) const;
|
||||||
char consume();
|
u32 consume();
|
||||||
|
|
||||||
StringView m_input;
|
Utf8View m_input;
|
||||||
size_t m_index { 0 };
|
Utf8CodePointIterator m_iterator;
|
||||||
IniPosition m_position { 0, 0 };
|
IniPosition m_position { 0, 0 };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue