LibWeb: Propagate errors from CSS Tokenizer construction

Instead of constructing a Tokenizer and then calling parse() on it, we now call `Tokenizer::tokenize(...)` directly. (Renamed from `parse()` because this is a Tokenizer, not a Parser.)
2025-09-14 15:38:00 +00:00 · 2023-03-06 14:19:39 +00:00 · 2023-03-06 14:19:39 +00:00 · 17618989a3
commit 17618989a3
parent 98ee2fcd1b
5 changed files with 47 additions and 44 deletions
--- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp
+++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp
@ -84,8 +84,7 @@ AK::URL ParsingContext::complete_url(StringView relative_url) const
 Parser::Parser(ParsingContext const& context, StringView input, StringView encoding)
    : m_context(context)
-    , m_tokenizer(input, encoding)
+    , m_tokens(Tokenizer::tokenize(input, encoding).release_value_but_fixme_should_propagate_errors())
    , m_tokens(m_tokenizer.parse())
    , m_token_stream(TokenStream(m_tokens))
 {
 }
--- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h
+++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h
@ -360,7 +360,6 @@ private:
    ParsingContext m_context;
    Tokenizer m_tokenizer;
    Vector<Token> m_tokens;
    TokenStream<Token> m_token_stream;
 };
--- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp
+++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp
@ -195,7 +195,7 @@ static inline bool is_E(u32 code_point)
    return code_point == 0x45;
 }
-Tokenizer::Tokenizer(StringView input, StringView encoding)
+ErrorOr<Vector<Token>> Tokenizer::tokenize(StringView input, StringView encoding)
 {
    // https://www.w3.org/TR/css-syntax-3/#css-filter-code-points
    auto filter_code_points = [](StringView input, auto encoding) -> ErrorOr<String> {
@ -206,7 +206,7 @@ Tokenizer::Tokenizer(StringView input, StringView encoding)
        bool last_was_carriage_return = false;
        // To filter code points from a stream of (unfiltered) code points input:
-        decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> {
+        TRY(decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> {
            // Replace any U+000D CARRIAGE RETURN (CR) code points,
            // U+000C FORM FEED (FF) code points,
            // or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF)
@ -237,17 +237,22 @@ Tokenizer::Tokenizer(StringView input, StringView encoding)
                last_was_carriage_return = false;
            }
            return {};
-               })
+        }));
            .release_value_but_fixme_should_propagate_errors();
        return builder.to_string();
    };
-    m_decoded_input = filter_code_points(input, encoding).release_value_but_fixme_should_propagate_errors();
+    Tokenizer tokenizer { TRY(filter_code_points(input, encoding)) };
-    m_utf8_view = Utf8View(m_decoded_input);
+    return tokenizer.tokenize();
    m_utf8_iterator = m_utf8_view.begin();
 }
-Vector<Token> Tokenizer::parse()
+Tokenizer::Tokenizer(String decoded_input)
    : m_decoded_input(move(decoded_input))
    , m_utf8_view(m_decoded_input)
    , m_utf8_iterator(m_utf8_view.begin())
 {
 }
 Vector<Token> Tokenizer::tokenize()
 {
    Vector<Token> tokens;
    for (;;) {
--- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h
+++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h
@ -58,15 +58,16 @@ public:
 };
 class Tokenizer {
 public:
-    explicit Tokenizer(StringView input, StringView encoding);
+    static ErrorOr<Vector<Token>> tokenize(StringView input, StringView encoding);
    [[nodiscard]] Vector<Token> parse();
    [[nodiscard]] static Token create_eof_token();
 private:
    explicit Tokenizer(String decoded_input);
    [[nodiscard]] Vector<Token> tokenize();
    [[nodiscard]] u32 next_code_point();
    [[nodiscard]] u32 peek_code_point(size_t offset = 0) const;
    [[nodiscard]] U32Twin peek_twin() const;
--- a/Userland/Libraries/LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.cpp
+++ b/Userland/Libraries/LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.cpp
@ -45,8 +45,7 @@ void SyntaxHighlighter::rehighlight(Palette const& palette)
            false);
    };
-    CSS::Parser::Tokenizer tokenizer { text, "utf-8"sv };
+    auto tokens = CSS::Parser::Tokenizer::tokenize(text, "utf-8"sv).release_value_but_fixme_should_propagate_errors();
    auto tokens = tokenizer.parse();
    for (auto const& token : tokens) {
        if (token.is(Parser::Token::Type::EndOfFile))
            break;