1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 08:58:11 +00:00

LibWeb: More work on the HTML parser and tokenizer

The parser can now switch the state of the tokenizer! Very webby. :^)
This commit is contained in:
Andreas Kling 2020-05-24 20:24:43 +02:00
parent 31db3f21ae
commit 20911efd4d
8 changed files with 186 additions and 14 deletions

View file

@ -28,6 +28,7 @@
#include <AK/StringView.h>
#include <AK/Types.h>
#include <LibWeb/Forward.h>
#include <LibWeb/Parser/HTMLToken.h>
#define ENUMERATE_TOKENIZER_STATES \
@ -118,20 +119,23 @@ class HTMLTokenizer {
public:
explicit HTMLTokenizer(const StringView& input);
enum class State {
#define __ENUMERATE_TOKENIZER_STATE(state) state,
ENUMERATE_TOKENIZER_STATES
#undef __ENUMERATE_TOKENIZER_STATE
};
Optional<HTMLToken> next_token();
void switch_to(Badge<HTMLDocumentParser>, State new_state);
private:
Optional<u32> next_codepoint();
Optional<u32> peek_codepoint(size_t offset) const;
bool next_few_characters_are(const StringView&) const;
void consume(const StringView&);
void create_new_token(HTMLToken::Type);
enum class State {
#define __ENUMERATE_TOKENIZER_STATE(state) state,
ENUMERATE_TOKENIZER_STATES
#undef __ENUMERATE_TOKENIZER_STATE
};
bool current_end_tag_token_is_appropriate() const;
static const char* state_name(State state)
{
@ -145,17 +149,22 @@ private:
ASSERT_NOT_REACHED();
}
void will_emit(HTMLToken&);
void will_switch_to(State);
void will_reconsume_in(State);
State m_state { State::Data };
State m_return_state { State::Data };
StringBuilder m_temporary_buffer;
StringView m_input;
size_t m_cursor { 0 };
HTMLToken m_current_token;
HTMLToken m_last_emitted_start_tag;
bool m_has_emitted_eof { false };
};
}