1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 07:54:58 +00:00

LibGUI: Lex INI files as Utf8

Iterating byte by byte meant that the column positions assigned to INI
tokens would be off if there were any multi-byte codepoints. Using a
Utf8View means these positions refer to whole codepoints instead, and
the column positions match what GUI::TextEditor expects. :^)

Fixes #12706.
This commit is contained in:
Sam Atkins 2023-01-10 22:57:32 +00:00 committed by Jelle Raaijmakers
parent 95df712c2e
commit ae6a84c261
2 changed files with 14 additions and 16 deletions

View file

@ -1,5 +1,6 @@
/* /*
* Copyright (c) 2020, Hüseyin Aslıtürk <asliturk@hotmail.com> * Copyright (c) 2020, Hüseyin Aslıtürk <asliturk@hotmail.com>
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -12,20 +13,20 @@ namespace GUI {
IniLexer::IniLexer(StringView input) IniLexer::IniLexer(StringView input)
: m_input(input) : m_input(input)
, m_iterator(m_input.begin())
{ {
} }
char IniLexer::peek(size_t offset) const u32 IniLexer::peek(size_t offset) const
{ {
if ((m_index + offset) >= m_input.length()) return m_iterator.peek(offset).value_or(0);
return 0;
return m_input[m_index + offset];
} }
char IniLexer::consume() u32 IniLexer::consume()
{ {
VERIFY(m_index < m_input.length()); VERIFY(m_iterator != m_input.end());
char ch = m_input[m_index++]; u32 ch = *m_iterator;
++m_iterator;
if (ch == '\n') { if (ch == '\n') {
m_position.line++; m_position.line++;
m_position.column = 0; m_position.column = 0;
@ -38,8 +39,6 @@ char IniLexer::consume()
Vector<IniToken> IniLexer::lex() Vector<IniToken> IniLexer::lex()
{ {
Vector<IniToken> tokens; Vector<IniToken> tokens;
size_t token_start_index = 0;
IniPosition token_start_position; IniPosition token_start_position;
auto emit_token = [&](auto type) { auto emit_token = [&](auto type) {
@ -52,7 +51,6 @@ Vector<IniToken> IniLexer::lex()
}; };
auto begin_token = [&] { auto begin_token = [&] {
token_start_index = m_index;
token_start_position = m_position; token_start_position = m_position;
}; };
@ -64,7 +62,7 @@ Vector<IniToken> IniLexer::lex()
tokens.append(token); tokens.append(token);
}; };
while (m_index < m_input.length()) { while (m_iterator != m_input.end()) {
auto ch = peek(); auto ch = peek();
if (is_ascii_space(ch)) { if (is_ascii_space(ch)) {

View file

@ -6,7 +6,7 @@
#pragma once #pragma once
#include <AK/StringView.h> #include <AK/Utf8View.h>
namespace GUI { namespace GUI {
@ -57,11 +57,11 @@ public:
Vector<IniToken> lex(); Vector<IniToken> lex();
private: private:
char peek(size_t offset = 0) const; u32 peek(size_t offset = 0) const;
char consume(); u32 consume();
StringView m_input; Utf8View m_input;
size_t m_index { 0 }; Utf8CodePointIterator m_iterator;
IniPosition m_position { 0, 0 }; IniPosition m_position { 0, 0 };
}; };