mirror of
https://github.com/RGBCube/serenity
synced 2025-07-23 18:27:40 +00:00
LibWeb: Propagate errors from CSS Tokenizer construction
Instead of constructing a Tokenizer and then calling parse() on it, we now call `Tokenizer::tokenize(...)` directly. (Renamed from `parse()` because this is a Tokenizer, not a Parser.)
This commit is contained in:
parent
98ee2fcd1b
commit
17618989a3
5 changed files with 47 additions and 44 deletions
|
@ -84,8 +84,7 @@ AK::URL ParsingContext::complete_url(StringView relative_url) const
|
||||||
|
|
||||||
Parser::Parser(ParsingContext const& context, StringView input, StringView encoding)
|
Parser::Parser(ParsingContext const& context, StringView input, StringView encoding)
|
||||||
: m_context(context)
|
: m_context(context)
|
||||||
, m_tokenizer(input, encoding)
|
, m_tokens(Tokenizer::tokenize(input, encoding).release_value_but_fixme_should_propagate_errors())
|
||||||
, m_tokens(m_tokenizer.parse())
|
|
||||||
, m_token_stream(TokenStream(m_tokens))
|
, m_token_stream(TokenStream(m_tokens))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -360,7 +360,6 @@ private:
|
||||||
|
|
||||||
ParsingContext m_context;
|
ParsingContext m_context;
|
||||||
|
|
||||||
Tokenizer m_tokenizer;
|
|
||||||
Vector<Token> m_tokens;
|
Vector<Token> m_tokens;
|
||||||
TokenStream<Token> m_token_stream;
|
TokenStream<Token> m_token_stream;
|
||||||
};
|
};
|
||||||
|
|
|
@ -195,7 +195,7 @@ static inline bool is_E(u32 code_point)
|
||||||
return code_point == 0x45;
|
return code_point == 0x45;
|
||||||
}
|
}
|
||||||
|
|
||||||
Tokenizer::Tokenizer(StringView input, StringView encoding)
|
ErrorOr<Vector<Token>> Tokenizer::tokenize(StringView input, StringView encoding)
|
||||||
{
|
{
|
||||||
// https://www.w3.org/TR/css-syntax-3/#css-filter-code-points
|
// https://www.w3.org/TR/css-syntax-3/#css-filter-code-points
|
||||||
auto filter_code_points = [](StringView input, auto encoding) -> ErrorOr<String> {
|
auto filter_code_points = [](StringView input, auto encoding) -> ErrorOr<String> {
|
||||||
|
@ -206,7 +206,7 @@ Tokenizer::Tokenizer(StringView input, StringView encoding)
|
||||||
bool last_was_carriage_return = false;
|
bool last_was_carriage_return = false;
|
||||||
|
|
||||||
// To filter code points from a stream of (unfiltered) code points input:
|
// To filter code points from a stream of (unfiltered) code points input:
|
||||||
decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> {
|
TRY(decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> {
|
||||||
// Replace any U+000D CARRIAGE RETURN (CR) code points,
|
// Replace any U+000D CARRIAGE RETURN (CR) code points,
|
||||||
// U+000C FORM FEED (FF) code points,
|
// U+000C FORM FEED (FF) code points,
|
||||||
// or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF)
|
// or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF)
|
||||||
|
@ -237,17 +237,22 @@ Tokenizer::Tokenizer(StringView input, StringView encoding)
|
||||||
last_was_carriage_return = false;
|
last_was_carriage_return = false;
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
})
|
}));
|
||||||
.release_value_but_fixme_should_propagate_errors();
|
|
||||||
return builder.to_string();
|
return builder.to_string();
|
||||||
};
|
};
|
||||||
|
|
||||||
m_decoded_input = filter_code_points(input, encoding).release_value_but_fixme_should_propagate_errors();
|
Tokenizer tokenizer { TRY(filter_code_points(input, encoding)) };
|
||||||
m_utf8_view = Utf8View(m_decoded_input);
|
return tokenizer.tokenize();
|
||||||
m_utf8_iterator = m_utf8_view.begin();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector<Token> Tokenizer::parse()
|
Tokenizer::Tokenizer(String decoded_input)
|
||||||
|
: m_decoded_input(move(decoded_input))
|
||||||
|
, m_utf8_view(m_decoded_input)
|
||||||
|
, m_utf8_iterator(m_utf8_view.begin())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector<Token> Tokenizer::tokenize()
|
||||||
{
|
{
|
||||||
Vector<Token> tokens;
|
Vector<Token> tokens;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|
|
@ -58,15 +58,16 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
class Tokenizer {
|
class Tokenizer {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit Tokenizer(StringView input, StringView encoding);
|
static ErrorOr<Vector<Token>> tokenize(StringView input, StringView encoding);
|
||||||
|
|
||||||
[[nodiscard]] Vector<Token> parse();
|
|
||||||
|
|
||||||
[[nodiscard]] static Token create_eof_token();
|
[[nodiscard]] static Token create_eof_token();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
explicit Tokenizer(String decoded_input);
|
||||||
|
|
||||||
|
[[nodiscard]] Vector<Token> tokenize();
|
||||||
|
|
||||||
[[nodiscard]] u32 next_code_point();
|
[[nodiscard]] u32 next_code_point();
|
||||||
[[nodiscard]] u32 peek_code_point(size_t offset = 0) const;
|
[[nodiscard]] u32 peek_code_point(size_t offset = 0) const;
|
||||||
[[nodiscard]] U32Twin peek_twin() const;
|
[[nodiscard]] U32Twin peek_twin() const;
|
||||||
|
|
|
@ -45,8 +45,7 @@ void SyntaxHighlighter::rehighlight(Palette const& palette)
|
||||||
false);
|
false);
|
||||||
};
|
};
|
||||||
|
|
||||||
CSS::Parser::Tokenizer tokenizer { text, "utf-8"sv };
|
auto tokens = CSS::Parser::Tokenizer::tokenize(text, "utf-8"sv).release_value_but_fixme_should_propagate_errors();
|
||||||
auto tokens = tokenizer.parse();
|
|
||||||
for (auto const& token : tokens) {
|
for (auto const& token : tokens) {
|
||||||
if (token.is(Parser::Token::Type::EndOfFile))
|
if (token.is(Parser::Token::Type::EndOfFile))
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue