mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 18:47:34 +00:00
LibWeb: Add TokenStream class to CSS Parser
The entry points for CSS parsing in the spec are defined as accepting any of a stream of Tokens, or a stream of ComponentValues, or a String. TokenStream is an attempt to reduce the duplication of code for that.
This commit is contained in:
parent
6c03123b2d
commit
b7116711bf
4 changed files with 119 additions and 47 deletions
|
@ -53,44 +53,99 @@ URL ParsingContext::complete_url(String const& addr) const
|
||||||
return m_document ? m_document->url().complete_url(addr) : URL::create_with_url_or_path(addr);
|
return m_document ? m_document->url().complete_url(addr) : URL::create_with_url_or_path(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
TokenStream<T>::TokenStream(Vector<T> const& tokens)
|
||||||
|
: m_tokens(tokens)
|
||||||
|
, m_eof(make_eof())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
TokenStream<T>::~TokenStream()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
bool TokenStream<T>::has_next_token()
|
||||||
|
{
|
||||||
|
return (size_t)(m_iterator_offset + 1) < m_tokens.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
T const& TokenStream<T>::peek_token()
|
||||||
|
{
|
||||||
|
if (!has_next_token())
|
||||||
|
return m_eof;
|
||||||
|
|
||||||
|
return m_tokens.at(m_iterator_offset + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
T const& TokenStream<T>::next_token()
|
||||||
|
{
|
||||||
|
if (!has_next_token())
|
||||||
|
return m_eof;
|
||||||
|
|
||||||
|
++m_iterator_offset;
|
||||||
|
|
||||||
|
return m_tokens.at(m_iterator_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
T const& TokenStream<T>::current_token()
|
||||||
|
{
|
||||||
|
if ((size_t)m_iterator_offset >= m_tokens.size())
|
||||||
|
return m_eof;
|
||||||
|
|
||||||
|
return m_tokens.at(m_iterator_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void TokenStream<T>::reconsume_current_input_token()
|
||||||
|
{
|
||||||
|
VERIFY(m_iterator_offset >= 0);
|
||||||
|
--m_iterator_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void TokenStream<T>::skip_whitespace()
|
||||||
|
{
|
||||||
|
while (peek_token().is(Token::Type::Whitespace))
|
||||||
|
next_token();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
Token TokenStream<Token>::make_eof()
|
||||||
|
{
|
||||||
|
return Tokenizer::create_eof_token();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
StyleComponentValueRule TokenStream<StyleComponentValueRule>::make_eof()
|
||||||
|
{
|
||||||
|
return StyleComponentValueRule(Tokenizer::create_eof_token());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void TokenStream<T>::dump_all_tokens()
|
||||||
|
{
|
||||||
|
dbgln("Dumping all tokens:");
|
||||||
|
for (auto& token : m_tokens)
|
||||||
|
dbgln("{}", token.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
Parser::Parser(ParsingContext const& context, StringView const& input, String const& encoding)
|
Parser::Parser(ParsingContext const& context, StringView const& input, String const& encoding)
|
||||||
: m_context(context)
|
: m_context(context)
|
||||||
, m_tokenizer(input, encoding)
|
, m_tokenizer(input, encoding)
|
||||||
|
, m_tokens(m_tokenizer.parse())
|
||||||
|
, m_token_stream(TokenStream(m_tokens))
|
||||||
{
|
{
|
||||||
m_tokens = m_tokenizer.parse();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Parser::~Parser()
|
Parser::~Parser()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
Token Parser::peek_token()
|
|
||||||
{
|
|
||||||
size_t next_offset = m_iterator_offset + 1;
|
|
||||||
|
|
||||||
if (next_offset < m_tokens.size()) {
|
|
||||||
return m_tokens.at(next_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
return m_tokens.at(m_iterator_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
Token Parser::next_token()
|
|
||||||
{
|
|
||||||
if (m_iterator_offset < (int)m_tokens.size() - 1) {
|
|
||||||
++m_iterator_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto token = m_tokens.at(m_iterator_offset);
|
|
||||||
|
|
||||||
return token;
|
|
||||||
}
|
|
||||||
|
|
||||||
Token Parser::current_token()
|
|
||||||
{
|
|
||||||
return m_tokens.at(m_iterator_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
NonnullRefPtr<CSSStyleSheet> Parser::parse_as_stylesheet()
|
NonnullRefPtr<CSSStyleSheet> Parser::parse_as_stylesheet()
|
||||||
{
|
{
|
||||||
auto parser_rules = consume_a_list_of_rules(true);
|
auto parser_rules = consume_a_list_of_rules(true);
|
||||||
|
@ -440,18 +495,6 @@ Optional<Selector> Parser::parse_single_selector(Vector<StyleComponentValueRule>
|
||||||
return Selector(move(selectors));
|
return Selector(move(selectors));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Parser::dump_all_tokens()
|
|
||||||
{
|
|
||||||
dbgln("Dumping all tokens:");
|
|
||||||
for (auto& token : m_tokens)
|
|
||||||
dbgln("{}", token.to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
void Parser::reconsume_current_input_token()
|
|
||||||
{
|
|
||||||
--m_iterator_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
NonnullRefPtrVector<StyleRule> Parser::consume_a_list_of_rules(bool top_level)
|
NonnullRefPtrVector<StyleRule> Parser::consume_a_list_of_rules(bool top_level)
|
||||||
{
|
{
|
||||||
NonnullRefPtrVector<StyleRule> rules;
|
NonnullRefPtrVector<StyleRule> rules;
|
||||||
|
|
|
@ -39,6 +39,30 @@ private:
|
||||||
const DOM::Document* m_document { nullptr };
|
const DOM::Document* m_document { nullptr };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
class TokenStream {
|
||||||
|
public:
|
||||||
|
explicit TokenStream(Vector<T> const&);
|
||||||
|
~TokenStream();
|
||||||
|
|
||||||
|
bool has_next_token();
|
||||||
|
T const& next_token();
|
||||||
|
T const& peek_token();
|
||||||
|
T const& current_token();
|
||||||
|
void reconsume_current_input_token();
|
||||||
|
|
||||||
|
void skip_whitespace();
|
||||||
|
|
||||||
|
void dump_all_tokens();
|
||||||
|
|
||||||
|
private:
|
||||||
|
Vector<T> const& m_tokens;
|
||||||
|
int m_iterator_offset { -1 };
|
||||||
|
|
||||||
|
T make_eof();
|
||||||
|
T m_eof;
|
||||||
|
};
|
||||||
|
|
||||||
class Parser {
|
class Parser {
|
||||||
public:
|
public:
|
||||||
Parser(ParsingContext const&, StringView const& input, String const& encoding = "utf-8");
|
Parser(ParsingContext const&, StringView const& input, String const& encoding = "utf-8");
|
||||||
|
@ -82,13 +106,11 @@ public:
|
||||||
static Optional<String> as_valid_border_style(String input) { return input; }
|
static Optional<String> as_valid_border_style(String input) { return input; }
|
||||||
static Optional<String> as_valid_border_image_repeat(String input) { return input; }
|
static Optional<String> as_valid_border_image_repeat(String input) { return input; }
|
||||||
|
|
||||||
void dump_all_tokens();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Token next_token();
|
Token next_token() { return m_token_stream.next_token(); }
|
||||||
Token peek_token();
|
Token peek_token() { return m_token_stream.peek_token(); }
|
||||||
Token current_token();
|
Token current_token() { return m_token_stream.current_token(); }
|
||||||
void reconsume_current_input_token();
|
void reconsume_current_input_token() { m_token_stream.reconsume_current_input_token(); }
|
||||||
|
|
||||||
NonnullRefPtrVector<StyleRule> consume_a_list_of_rules(bool top_level);
|
NonnullRefPtrVector<StyleRule> consume_a_list_of_rules(bool top_level);
|
||||||
NonnullRefPtr<StyleRule> consume_an_at_rule();
|
NonnullRefPtr<StyleRule> consume_an_at_rule();
|
||||||
|
@ -106,7 +128,7 @@ private:
|
||||||
|
|
||||||
Tokenizer m_tokenizer;
|
Tokenizer m_tokenizer;
|
||||||
Vector<Token> m_tokens;
|
Vector<Token> m_tokens;
|
||||||
int m_iterator_offset { -1 };
|
TokenStream<Token> m_token_stream;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -269,6 +269,11 @@ Token Tokenizer::create_new_token(Token::Type type)
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Token Tokenizer::create_eof_token()
|
||||||
|
{
|
||||||
|
return create_new_token(Token::Type::EndOfFile);
|
||||||
|
}
|
||||||
|
|
||||||
Token Tokenizer::create_value_token(Token::Type type, String value)
|
Token Tokenizer::create_value_token(Token::Type type, String value)
|
||||||
{
|
{
|
||||||
Token token;
|
Token token;
|
||||||
|
|
|
@ -70,6 +70,8 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] Vector<Token> parse();
|
[[nodiscard]] Vector<Token> parse();
|
||||||
|
|
||||||
|
[[nodiscard]] static Token create_eof_token();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
[[nodiscard]] u32 next_code_point();
|
[[nodiscard]] u32 next_code_point();
|
||||||
[[nodiscard]] u32 peek_code_point(size_t offset = 0) const;
|
[[nodiscard]] u32 peek_code_point(size_t offset = 0) const;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue