1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 14:18:12 +00:00

Unicode: s/codepoint/code_point/g

Unicode calls them "code points" so let's follow their style.
This commit is contained in:
Andreas Kling 2020-08-03 19:06:41 +02:00
parent b139fb9f38
commit ea9ac3155d
45 changed files with 449 additions and 449 deletions

View file

@ -31,12 +31,12 @@
namespace Web {
namespace HTML {
Optional<EntityMatch> codepoints_from_entity(const StringView& entity)
Optional<EntityMatch> code_pointss_from_entity(const StringView& entity)
{
constexpr struct {
StringView entity;
u32 codepoint;
} single_codepoint_entities[] = {
u32 code_points;
} single_code_points_entities[] = {
{ "AElig;", 0x000C6 },
{ "AElig", 0x000C6 },
{ "AMP;", 0x00026 },
@ -2179,9 +2179,9 @@ Optional<EntityMatch> codepoints_from_entity(const StringView& entity)
constexpr struct {
StringView entity;
u32 codepoint1;
u32 codepoint2;
} double_codepoint_entities[] = {
u32 code_points1;
u32 code_points2;
} double_code_points_entities[] = {
{ "NotEqualTilde;", 0x02242, 0x00338 },
{ "NotGreaterFullEqual;", 0x02267, 0x00338 },
{ "NotGreaterGreater;", 0x0226B, 0x00338 },
@ -2279,17 +2279,17 @@ Optional<EntityMatch> codepoints_from_entity(const StringView& entity)
EntityMatch match;
for (auto& single_codepoint_entity : single_codepoint_entities) {
if (entity.starts_with(single_codepoint_entity.entity)) {
if (match.entity.is_null() || single_codepoint_entity.entity.length() > match.entity.length())
match = { { single_codepoint_entity.codepoint }, single_codepoint_entity.entity };
for (auto& single_code_points_entity : single_code_points_entities) {
if (entity.starts_with(single_code_points_entity.entity)) {
if (match.entity.is_null() || single_code_points_entity.entity.length() > match.entity.length())
match = { { single_code_points_entity.code_points }, single_code_points_entity.entity };
}
}
for (auto& double_codepoint_entity : double_codepoint_entities) {
if (entity.starts_with(double_codepoint_entity.entity)) {
if (match.entity.is_null() || double_codepoint_entity.entity.length() > match.entity.length())
match = EntityMatch { { double_codepoint_entity.codepoint1, double_codepoint_entity.codepoint2 }, StringView(double_codepoint_entity.entity) };
for (auto& double_code_points_entity : double_code_points_entities) {
if (entity.starts_with(double_code_points_entity.entity)) {
if (match.entity.is_null() || double_code_points_entity.entity.length() > match.entity.length())
match = EntityMatch { { double_code_points_entity.code_points1, double_code_points_entity.code_points2 }, StringView(double_code_points_entity.entity) };
}
}

View file

@ -33,11 +33,11 @@ namespace Web {
namespace HTML {
struct EntityMatch {
Vector<u32, 2> codepoints;
Vector<u32, 2> code_pointss;
StringView entity;
};
Optional<EntityMatch> codepoints_from_entity(const StringView&);
Optional<EntityMatch> code_pointss_from_entity(const StringView&);
}
}

View file

@ -472,7 +472,7 @@ void HTMLDocumentParser::insert_comment(HTMLToken& token)
void HTMLDocumentParser::handle_in_head(HTMLToken& token)
{
if (token.is_parser_whitespace()) {
insert_character(token.codepoint());
insert_character(token.code_points());
return;
}
@ -671,7 +671,7 @@ void HTMLDocumentParser::insert_character(u32 data)
void HTMLDocumentParser::handle_after_head(HTMLToken& token)
{
if (token.is_character() && token.is_parser_whitespace()) {
insert_character(token.codepoint());
insert_character(token.code_points());
return;
}
@ -1004,17 +1004,17 @@ bool HTMLDocumentParser::is_special_tag(const FlyString& tag_name)
void HTMLDocumentParser::handle_in_body(HTMLToken& token)
{
if (token.is_character()) {
if (token.codepoint() == 0) {
if (token.code_points() == 0) {
PARSE_ERROR();
return;
}
if (token.is_parser_whitespace()) {
reconstruct_the_active_formatting_elements();
insert_character(token.codepoint());
insert_character(token.code_points());
return;
}
reconstruct_the_active_formatting_elements();
insert_character(token.codepoint());
insert_character(token.code_points());
m_frameset_ok = false;
return;
}
@ -1162,7 +1162,7 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token)
// then ignore that token and move on to the next one.
// (Newlines at the start of pre blocks are ignored as an authoring convenience.)
auto next_token = m_tokenizer.next_token();
if (next_token.has_value() && next_token.value().is_character() && next_token.value().codepoint() == '\n') {
if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_points() == '\n') {
// Ignore it.
} else {
process_using_the_rules_for(m_insertion_mode, next_token.value());
@ -1503,7 +1503,7 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token)
m_frameset_ok = false;
m_insertion_mode = InsertionMode::Text;
if (next_token.has_value() && next_token.value().is_character() && next_token.value().codepoint() == '\n') {
if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_points() == '\n') {
// Ignore it.
} else {
process_using_the_rules_for(m_insertion_mode, next_token.value());
@ -1750,7 +1750,7 @@ void HTMLDocumentParser::decrement_script_nesting_level()
void HTMLDocumentParser::handle_text(HTMLToken& token)
{
if (token.is_character()) {
insert_character(token.codepoint());
insert_character(token.code_points());
return;
}
if (token.is_end_of_file()) {
@ -1979,7 +1979,7 @@ void HTMLDocumentParser::handle_in_cell(HTMLToken& token)
void HTMLDocumentParser::handle_in_table_text(HTMLToken& token)
{
if (token.is_character()) {
if (token.codepoint() == 0) {
if (token.code_points() == 0) {
PARSE_ERROR();
return;
}
@ -2000,7 +2000,7 @@ void HTMLDocumentParser::handle_in_table_text(HTMLToken& token)
}
for (auto& pending_token : m_pending_table_character_tokens) {
insert_character(pending_token.codepoint());
insert_character(pending_token.code_points());
}
m_insertion_mode = m_original_insertion_mode;
@ -2210,11 +2210,11 @@ void HTMLDocumentParser::handle_in_select_in_table(HTMLToken& token)
void HTMLDocumentParser::handle_in_select(HTMLToken& token)
{
if (token.is_character()) {
if (token.codepoint() == 0) {
if (token.code_points() == 0) {
PARSE_ERROR();
return;
}
insert_character(token.codepoint());
insert_character(token.code_points());
return;
}
@ -2384,7 +2384,7 @@ void HTMLDocumentParser::handle_in_caption(HTMLToken& token)
void HTMLDocumentParser::handle_in_column_group(HTMLToken& token)
{
if (token.is_character() && token.is_parser_whitespace()) {
insert_character(token.codepoint());
insert_character(token.code_points());
return;
}
@ -2527,7 +2527,7 @@ void HTMLDocumentParser::handle_in_template(HTMLToken& token)
void HTMLDocumentParser::handle_in_frameset(HTMLToken& token)
{
if (token.is_character() && token.is_parser_whitespace()) {
insert_character(token.codepoint());
insert_character(token.code_points());
return;
}
@ -2587,7 +2587,7 @@ void HTMLDocumentParser::handle_in_frameset(HTMLToken& token)
void HTMLDocumentParser::handle_after_frameset(HTMLToken& token)
{
if (token.is_character() && token.is_parser_whitespace()) {
insert_character(token.codepoint());
insert_character(token.code_points());
return;
}

View file

@ -50,11 +50,11 @@ public:
EndOfFile,
};
static HTMLToken make_character(u32 codepoint)
static HTMLToken make_character(u32 code_points)
{
HTMLToken token;
token.m_type = Type::Character;
token.m_comment_or_character.data.append(codepoint);
token.m_comment_or_character.data.append(code_points);
return token;
}
@ -73,11 +73,11 @@ public:
bool is_character() const { return m_type == Type::Character; }
bool is_end_of_file() const { return m_type == Type::EndOfFile; }
u32 codepoint() const
u32 code_points() const
{
ASSERT(is_character());
Utf8View view(m_comment_or_character.data.string_view());
ASSERT(view.length_in_codepoints() == 1);
ASSERT(view.length_in_code_pointss() == 1);
return *view.begin();
}
@ -86,7 +86,7 @@ public:
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
if (!is_character())
return false;
switch (codepoint()) {
switch (code_points()) {
case '\t':
case '\n':
case '\f':

View file

@ -46,7 +46,7 @@ namespace Web::HTML {
#endif
#define CONSUME_NEXT_INPUT_CHARACTER \
current_input_character = next_codepoint();
current_input_character = next_code_points();
#define SWITCH_TO(new_state) \
do { \
@ -86,22 +86,22 @@ namespace Web::HTML {
return m_queued_tokens.dequeue(); \
} while (0)
#define EMIT_CHARACTER_AND_RECONSUME_IN(codepoint, new_state) \
#define EMIT_CHARACTER_AND_RECONSUME_IN(code_points, new_state) \
do { \
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint)); \
m_queued_tokens.enqueue(HTMLToken::make_character(code_points)); \
will_reconsume_in(State::new_state); \
m_state = State::new_state; \
goto new_state; \
} while (0)
#define FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE \
#define FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE \
do { \
for (auto codepoint : m_temporary_buffer) { \
for (auto code_points : m_temporary_buffer) { \
if (consumed_as_part_of_an_attribute()) { \
m_current_token.m_tag.attributes.last().value_builder.append_codepoint(codepoint); \
m_current_token.m_tag.attributes.last().value_builder.append_code_points(code_points); \
} else { \
create_new_token(HTMLToken::Type::Character); \
m_current_token.m_comment_or_character.data.append_codepoint(codepoint); \
m_current_token.m_comment_or_character.data.append_code_points(code_points); \
m_queued_tokens.enqueue(m_current_token); \
} \
} \
@ -112,8 +112,8 @@ namespace Web::HTML {
m_utf8_iterator = m_prev_utf8_iterator; \
} while (0)
#define ON(codepoint) \
if (current_input_character.has_value() && current_input_character.value() == codepoint)
#define ON(code_points) \
if (current_input_character.has_value() && current_input_character.value() == code_points)
#define ON_EOF \
if (!current_input_character.has_value())
@ -159,10 +159,10 @@ namespace Web::HTML {
return m_queued_tokens.dequeue(); \
} while (0)
#define EMIT_CHARACTER(codepoint) \
#define EMIT_CHARACTER(code_points) \
do { \
create_new_token(HTMLToken::Type::Character); \
m_current_token.m_comment_or_character.data.append_codepoint(codepoint); \
m_current_token.m_comment_or_character.data.append_code_points(code_points); \
m_queued_tokens.enqueue(m_current_token); \
return m_queued_tokens.dequeue(); \
} while (0)
@ -170,11 +170,11 @@ namespace Web::HTML {
#define EMIT_CURRENT_CHARACTER \
EMIT_CHARACTER(current_input_character.value());
#define SWITCH_TO_AND_EMIT_CHARACTER(codepoint, new_state) \
#define SWITCH_TO_AND_EMIT_CHARACTER(code_points, new_state) \
do { \
will_switch_to(State::new_state); \
m_state = State::new_state; \
EMIT_CHARACTER(codepoint); \
EMIT_CHARACTER(code_points); \
} while (0)
#define SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(new_state) \
@ -193,39 +193,39 @@ namespace Web::HTML {
} \
}
static inline bool is_surrogate(u32 codepoint)
static inline bool is_surrogate(u32 code_points)
{
return (codepoint & 0xfffff800) == 0xd800;
return (code_points & 0xfffff800) == 0xd800;
}
static inline bool is_noncharacter(u32 codepoint)
static inline bool is_noncharacter(u32 code_points)
{
return codepoint >= 0xfdd0 && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe) && codepoint <= 0x10ffff;
return code_points >= 0xfdd0 && (code_points <= 0xfdef || (code_points & 0xfffe) == 0xfffe) && code_points <= 0x10ffff;
}
static inline bool is_c0_control(u32 codepoint)
static inline bool is_c0_control(u32 code_points)
{
return codepoint <= 0x1f;
return code_points <= 0x1f;
}
static inline bool is_control(u32 codepoint)
static inline bool is_control(u32 code_points)
{
return is_c0_control(codepoint) || (codepoint >= 0x7f && codepoint <= 0x9f);
return is_c0_control(code_points) || (code_points >= 0x7f && code_points <= 0x9f);
}
Optional<u32> HTMLTokenizer::next_codepoint()
Optional<u32> HTMLTokenizer::next_code_points()
{
if (m_utf8_iterator == m_utf8_view.end())
return {};
m_prev_utf8_iterator = m_utf8_iterator;
++m_utf8_iterator;
#ifdef TOKENIZER_TRACE
dbg() << "(Tokenizer) Next codepoint: " << (char)*m_prev_utf8_iterator;
dbg() << "(Tokenizer) Next code_points: " << (char)*m_prev_utf8_iterator;
#endif
return *m_prev_utf8_iterator;
}
Optional<u32> HTMLTokenizer::peek_codepoint(size_t offset) const
Optional<u32> HTMLTokenizer::peek_code_points(size_t offset) const
{
auto it = m_utf8_iterator;
for (size_t i = 0; i < offset && it != m_utf8_view.end(); ++i)
@ -242,7 +242,7 @@ _StartOfFunction:
return m_queued_tokens.dequeue();
for (;;) {
auto current_input_character = next_codepoint();
auto current_input_character = next_code_points();
switch (m_state) {
BEGIN_STATE(Data)
{
@ -328,7 +328,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_tag.tag_name.append_codepoint(0xFFFD);
m_current_token.m_tag.tag_name.append_code_points(0xFFFD);
continue;
}
ON_EOF
@ -338,7 +338,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_tag.tag_name.append_codepoint(current_input_character.value());
m_current_token.m_tag.tag_name.append_code_points(current_input_character.value());
continue;
}
}
@ -408,12 +408,12 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_comment_or_character.data.append_codepoint(0xFFFD);
m_current_token.m_comment_or_character.data.append_code_points(0xFFFD);
continue;
}
ANYTHING_ELSE
{
m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value());
m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value());
continue;
}
}
@ -462,7 +462,7 @@ _StartOfFunction:
{
PARSE_ERROR();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.name.append_codepoint(0xFFFD);
m_current_token.m_doctype.name.append_code_points(0xFFFD);
m_current_token.m_doctype.missing_name = false;
SWITCH_TO(DOCTYPEName);
}
@ -484,7 +484,7 @@ _StartOfFunction:
ANYTHING_ELSE
{
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.name.append_codepoint(current_input_character.value());
m_current_token.m_doctype.name.append_code_points(current_input_character.value());
m_current_token.m_doctype.missing_name = false;
SWITCH_TO(DOCTYPEName);
}
@ -509,7 +509,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_doctype.name.append_codepoint(0xFFFD);
m_current_token.m_doctype.name.append_code_points(0xFFFD);
continue;
}
ON_EOF
@ -521,7 +521,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_doctype.name.append_codepoint(current_input_character.value());
m_current_token.m_doctype.name.append_code_points(current_input_character.value());
continue;
}
}
@ -732,7 +732,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_doctype.public_identifier.append_codepoint(0xFFFD);
m_current_token.m_doctype.public_identifier.append_code_points(0xFFFD);
continue;
}
ON('>')
@ -750,7 +750,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_doctype.public_identifier.append_codepoint(current_input_character.value());
m_current_token.m_doctype.public_identifier.append_code_points(current_input_character.value());
continue;
}
}
@ -765,7 +765,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_doctype.public_identifier.append_codepoint(0xFFFD);
m_current_token.m_doctype.public_identifier.append_code_points(0xFFFD);
continue;
}
ON('>')
@ -783,7 +783,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_doctype.public_identifier.append_codepoint(current_input_character.value());
m_current_token.m_doctype.public_identifier.append_code_points(current_input_character.value());
continue;
}
}
@ -798,7 +798,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_doctype.system_identifier.append_codepoint(0xFFFD);
m_current_token.m_doctype.system_identifier.append_code_points(0xFFFD);
continue;
}
ON('>')
@ -816,7 +816,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_doctype.system_identifier.append_codepoint(current_input_character.value());
m_current_token.m_doctype.system_identifier.append_code_points(current_input_character.value());
continue;
}
}
@ -831,7 +831,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_doctype.system_identifier.append_codepoint(0xFFFD);
m_current_token.m_doctype.system_identifier.append_code_points(0xFFFD);
continue;
}
ON('>')
@ -849,7 +849,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_doctype.system_identifier.append_codepoint(current_input_character.value());
m_current_token.m_doctype.system_identifier.append_code_points(current_input_character.value());
continue;
}
}
@ -1003,7 +1003,7 @@ _StartOfFunction:
{
PARSE_ERROR();
auto new_attribute = HTMLToken::AttributeBuilder();
new_attribute.local_name_builder.append_codepoint(current_input_character.value());
new_attribute.local_name_builder.append_code_points(current_input_character.value());
m_current_token.m_tag.attributes.append(new_attribute);
SWITCH_TO(AttributeName);
}
@ -1059,13 +1059,13 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_tag.attributes.last().local_name_builder.append_codepoint(tolower(current_input_character.value()));
m_current_token.m_tag.attributes.last().local_name_builder.append_code_points(tolower(current_input_character.value()));
continue;
}
ON(0)
{
PARSE_ERROR();
m_current_token.m_tag.attributes.last().local_name_builder.append_codepoint(0xFFFD);
m_current_token.m_tag.attributes.last().local_name_builder.append_code_points(0xFFFD);
continue;
}
ON('"')
@ -1086,7 +1086,7 @@ _StartOfFunction:
ANYTHING_ELSE
{
AnythingElseAttributeName:
m_current_token.m_tag.attributes.last().local_name_builder.append_codepoint(current_input_character.value());
m_current_token.m_tag.attributes.last().local_name_builder.append_code_points(current_input_character.value());
continue;
}
}
@ -1163,7 +1163,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_tag.attributes.last().value_builder.append_codepoint(0xFFFD);
m_current_token.m_tag.attributes.last().value_builder.append_code_points(0xFFFD);
continue;
}
ON_EOF
@ -1173,7 +1173,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_tag.attributes.last().value_builder.append_codepoint(current_input_character.value());
m_current_token.m_tag.attributes.last().value_builder.append_code_points(current_input_character.value());
continue;
}
}
@ -1193,7 +1193,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_tag.attributes.last().value_builder.append_codepoint(0xFFFD);
m_current_token.m_tag.attributes.last().value_builder.append_code_points(0xFFFD);
continue;
}
ON_EOF
@ -1203,7 +1203,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_tag.attributes.last().value_builder.append_codepoint(current_input_character.value());
m_current_token.m_tag.attributes.last().value_builder.append_code_points(current_input_character.value());
continue;
}
}
@ -1227,7 +1227,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_tag.attributes.last().value_builder.append_codepoint(0xFFFD);
m_current_token.m_tag.attributes.last().value_builder.append_code_points(0xFFFD);
continue;
}
ON('"')
@ -1263,7 +1263,7 @@ _StartOfFunction:
ANYTHING_ELSE
{
AnythingElseAttributeValueUnquoted:
m_current_token.m_tag.attributes.last().value_builder.append_codepoint(current_input_character.value());
m_current_token.m_tag.attributes.last().value_builder.append_code_points(current_input_character.value());
continue;
}
}
@ -1343,7 +1343,7 @@ _StartOfFunction:
{
ON('<')
{
m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value());
m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value());
SWITCH_TO(CommentLessThanSign);
}
ON('-')
@ -1353,7 +1353,7 @@ _StartOfFunction:
ON(0)
{
PARSE_ERROR();
m_current_token.m_comment_or_character.data.append_codepoint(0xFFFD);
m_current_token.m_comment_or_character.data.append_code_points(0xFFFD);
continue;
}
ON_EOF
@ -1364,7 +1364,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value());
m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value());
continue;
}
}
@ -1449,12 +1449,12 @@ _StartOfFunction:
{
ON('!')
{
m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value());
m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value());
SWITCH_TO(CommentLessThanSignBang);
}
ON('<')
{
m_current_token.m_comment_or_character.data.append_codepoint(current_input_character.value());
m_current_token.m_comment_or_character.data.append_code_points(current_input_character.value());
continue;
}
ANYTHING_ELSE
@ -1533,7 +1533,7 @@ _StartOfFunction:
{
size_t byte_offset = m_utf8_view.byte_offset_of(m_prev_utf8_iterator);
auto match = HTML::codepoints_from_entity(m_decoded_input.substring_view(byte_offset, m_decoded_input.length() - byte_offset - 1));
auto match = HTML::code_pointss_from_entity(m_decoded_input.substring_view(byte_offset, m_decoded_input.length() - byte_offset - 1));
if (match.has_value()) {
for (size_t i = 0; i < match.value().entity.length() - 1; ++i) {
@ -1543,18 +1543,18 @@ _StartOfFunction:
for (auto ch : match.value().entity)
m_temporary_buffer.append(ch);
if (consumed_as_part_of_an_attribute() && match.value().codepoints.last() != ';') {
auto next = peek_codepoint(0);
if (consumed_as_part_of_an_attribute() && match.value().code_pointss.last() != ';') {
auto next = peek_code_points(0);
if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE;
}
}
if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) {
auto next_codepoint = peek_codepoint(0);
if (next_codepoint.has_value() && next_codepoint.value() == '=') {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
auto next_code_points = peek_code_points(0);
if (next_code_points.has_value() && next_code_points.value() == '=') {
FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE;
}
}
@ -1564,12 +1564,12 @@ _StartOfFunction:
}
m_temporary_buffer.clear();
m_temporary_buffer.append(match.value().codepoints);
m_temporary_buffer.append(match.value().code_pointss);
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE;
} else {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO(AmbiguousAmpersand);
}
}
@ -1580,7 +1580,7 @@ _StartOfFunction:
ON_ASCII_ALPHANUMERIC
{
if (consumed_as_part_of_an_attribute()) {
m_current_token.m_tag.attributes.last().value_builder.append_codepoint(current_input_character.value());
m_current_token.m_tag.attributes.last().value_builder.append_code_points(current_input_character.value());
continue;
} else {
EMIT_CURRENT_CHARACTER;
@ -1628,7 +1628,7 @@ _StartOfFunction:
ANYTHING_ELSE
{
PARSE_ERROR();
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
RECONSUME_IN_RETURN_STATE;
}
}
@ -1643,7 +1643,7 @@ _StartOfFunction:
ANYTHING_ELSE
{
PARSE_ERROR();
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
RECONSUME_IN_RETURN_STATE;
}
}
@ -1724,7 +1724,7 @@ _StartOfFunction:
PARSE_ERROR();
constexpr struct {
u32 number;
u32 codepoint;
u32 code_points;
} conversion_table[] = {
{ 0x80, 0x20AC },
{ 0x82, 0x201A },
@ -1756,7 +1756,7 @@ _StartOfFunction:
};
for (auto& entry : conversion_table) {
if (m_character_reference_code == entry.number) {
m_character_reference_code = entry.codepoint;
m_character_reference_code = entry.code_points;
break;
}
}
@ -1764,7 +1764,7 @@ _StartOfFunction:
m_temporary_buffer.clear();
m_temporary_buffer.append(m_character_reference_code);
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
FLUSH_CODE_POINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE;
}
END_STATE
@ -1833,8 +1833,8 @@ _StartOfFunction:
if (!current_end_tag_token_is_appropriate()) {
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(RCDATA);
}
SWITCH_TO(BeforeAttributeName);
@ -1844,8 +1844,8 @@ _StartOfFunction:
if (!current_end_tag_token_is_appropriate()) {
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(RCDATA);
}
SWITCH_TO(SelfClosingStartTag);
@ -1855,8 +1855,8 @@ _StartOfFunction:
if (!current_end_tag_token_is_appropriate()) {
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(RCDATA);
}
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
@ -1869,7 +1869,7 @@ _StartOfFunction:
}
ON_ASCII_LOWER_ALPHA
{
m_current_token.m_tag.tag_name.append_codepoint(current_input_character.value());
m_current_token.m_tag.tag_name.append_code_points(current_input_character.value());
m_temporary_buffer.append(current_input_character.value());
continue;
}
@ -1877,8 +1877,8 @@ _StartOfFunction:
{
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(RCDATA);
}
}
@ -1943,8 +1943,8 @@ _StartOfFunction:
if (!current_end_tag_token_is_appropriate()) {
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(RAWTEXT);
}
SWITCH_TO(BeforeAttributeName);
@ -1954,8 +1954,8 @@ _StartOfFunction:
if (!current_end_tag_token_is_appropriate()) {
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(RAWTEXT);
}
SWITCH_TO(SelfClosingStartTag);
@ -1965,8 +1965,8 @@ _StartOfFunction:
if (!current_end_tag_token_is_appropriate()) {
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(RAWTEXT);
}
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
@ -1987,8 +1987,8 @@ _StartOfFunction:
{
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(RAWTEXT);
}
}
@ -2155,8 +2155,8 @@ _StartOfFunction:
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer) {
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer) {
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
}
RECONSUME_IN(ScriptDataEscaped);
}
@ -2167,8 +2167,8 @@ _StartOfFunction:
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer) {
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer) {
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
}
RECONSUME_IN(ScriptDataEscaped);
}
@ -2179,8 +2179,8 @@ _StartOfFunction:
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer) {
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer) {
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
}
RECONSUME_IN(ScriptDataEscaped);
}
@ -2200,8 +2200,8 @@ _StartOfFunction:
{
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer) {
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer) {
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
}
RECONSUME_IN(ScriptDataEscaped);
}
@ -2479,8 +2479,8 @@ _StartOfFunction:
SWITCH_TO(BeforeAttributeName);
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(ScriptData);
}
ON('/')
@ -2489,8 +2489,8 @@ _StartOfFunction:
SWITCH_TO(SelfClosingStartTag);
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(ScriptData);
}
ON('>')
@ -2499,8 +2499,8 @@ _StartOfFunction:
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(ScriptData);
}
ON_ASCII_UPPER_ALPHA
@ -2519,8 +2519,8 @@ _StartOfFunction:
{
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
for (auto codepoint : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
for (auto code_points : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_points));
RECONSUME_IN(ScriptData);
}
}
@ -2585,18 +2585,18 @@ _StartOfFunction:
bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitivity case_sensitivity)
{
for (size_t i = 0; i < string.length(); ++i) {
auto codepoint = peek_codepoint(i);
if (!codepoint.has_value())
auto code_points = peek_code_points(i);
if (!code_points.has_value())
return false;
// FIXME: This should be more Unicode-aware.
if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
if (codepoint.value() < 0x80) {
if (tolower(codepoint.value()) != tolower(string[i]))
if (code_points.value() < 0x80) {
if (tolower(code_points.value()) != tolower(string[i]))
return false;
continue;
}
}
if (codepoint.value() != (u32)string[i])
if (code_points.value() != (u32)string[i])
return false;
}
for (size_t i = 0; i < string.length(); ++i) {

View file

@ -137,8 +137,8 @@ public:
String source() const { return m_decoded_input; }
private:
Optional<u32> next_codepoint();
Optional<u32> peek_codepoint(size_t offset) const;
Optional<u32> next_code_points();
Optional<u32> peek_code_points(size_t offset) const;
bool consume_next_if_match(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive);
void create_new_token(HTMLToken::Type);
bool current_end_tag_token_is_appropriate() const;