1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 22:57:44 +00:00

LibWeb: Use start_of_input_stream_twin() for is_valid_escape_sequence()

This means we can get rid of the hacks where we were peeking a code
point instead of getting the next one so that we could peek_twin()
later. Now, we follow the spec more closely. :^)
This commit is contained in:
Sam Atkins 2021-12-24 16:35:19 +00:00 committed by Andreas Kling
parent 269a24d4ca
commit 5d0851cb0e

View file

@ -663,8 +663,7 @@ String Tokenizer::consume_a_name()
} }
// the stream starts with a valid escape // the stream starts with a valid escape
auto next = peek_code_point(); if (is_valid_escape_sequence(start_of_input_stream_twin())) {
if (!is_eof(next) && is_valid_escape_sequence({ input, next })) {
// Consume an escaped code point. Append the returned code point to result. // Consume an escaped code point. Append the returned code point to result.
result.append_code_point(consume_escaped_code_point()); result.append_code_point(consume_escaped_code_point());
continue; continue;
@ -705,14 +704,11 @@ Token Tokenizer::consume_a_url_token()
// 3. Repeatedly consume the next input code point from the stream: // 3. Repeatedly consume the next input code point from the stream:
for (;;) { for (;;) {
// NOTE: We peek here instead of consuming, so that we can peek a twin later auto input = next_code_point();
// to determine if it's a valid escape sequence.
auto input = peek_code_point();
// U+0029 RIGHT PARENTHESIS ()) // U+0029 RIGHT PARENTHESIS ())
if (is_right_paren(input)) { if (is_right_paren(input)) {
// Return the <url-token>. // Return the <url-token>.
(void)next_code_point(); // Not to spec, see NOTE above.
return make_token(); return make_token();
} }
@ -755,7 +751,6 @@ Token Tokenizer::consume_a_url_token()
if (is_quotation_mark(input) || is_apostrophe(input) || is_left_paren(input) || is_non_printable(input)) { if (is_quotation_mark(input) || is_apostrophe(input) || is_left_paren(input) || is_non_printable(input)) {
// This is a parse error. Consume the remnants of a bad url, create a <bad-url-token>, and return it. // This is a parse error. Consume the remnants of a bad url, create a <bad-url-token>, and return it.
log_parse_error(); log_parse_error();
(void)next_code_point(); // Not to spec, see NOTE above.
consume_the_remnants_of_a_bad_url(); consume_the_remnants_of_a_bad_url();
return create_new_token(Token::Type::BadUrl); return create_new_token(Token::Type::BadUrl);
} }
@ -763,13 +758,12 @@ Token Tokenizer::consume_a_url_token()
// U+005C REVERSE SOLIDUS (\) // U+005C REVERSE SOLIDUS (\)
if (is_reverse_solidus(input)) { if (is_reverse_solidus(input)) {
// If the stream starts with a valid escape, // If the stream starts with a valid escape,
if (is_valid_escape_sequence(peek_twin())) { if (is_valid_escape_sequence(start_of_input_stream_twin())) {
// consume an escaped code point and append the returned code point to the <url-token>s value. // consume an escaped code point and append the returned code point to the <url-token>s value.
builder.append_code_point(consume_escaped_code_point()); builder.append_code_point(consume_escaped_code_point());
} else { } else {
// Otherwise, this is a parse error. // Otherwise, this is a parse error.
log_parse_error(); log_parse_error();
(void)next_code_point(); // Not to spec, see NOTE above.
// Consume the remnants of a bad url, create a <bad-url-token>, and return it. // Consume the remnants of a bad url, create a <bad-url-token>, and return it.
consume_the_remnants_of_a_bad_url(); consume_the_remnants_of_a_bad_url();
return create_new_token(Token::Type::BadUrl); return create_new_token(Token::Type::BadUrl);
@ -779,7 +773,6 @@ Token Tokenizer::consume_a_url_token()
// anything else // anything else
// Append the current input code point to the <url-token>s value. // Append the current input code point to the <url-token>s value.
builder.append_code_point(input); builder.append_code_point(input);
(void)next_code_point(); // Not to spec, see NOTE above.
} }
} }
@ -793,31 +786,25 @@ void Tokenizer::consume_the_remnants_of_a_bad_url()
// Repeatedly consume the next input code point from the stream: // Repeatedly consume the next input code point from the stream:
for (;;) { for (;;) {
// NOTE: We peek instead of consuming so is_valid_escape_sequence() can peek a twin. auto input = next_code_point();
// So, we have to consume the code point later.
auto input = peek_code_point();
// U+0029 RIGHT PARENTHESIS ()) // U+0029 RIGHT PARENTHESIS ())
// EOF // EOF
if (is_eof(input) || is_right_paren(input)) { if (is_eof(input) || is_right_paren(input)) {
(void)next_code_point(); // Not to spec, see NOTE above.
// Return. // Return.
return; return;
} }
// the input stream starts with a valid escape // the input stream starts with a valid escape
if (is_valid_escape_sequence(peek_twin())) { if (is_valid_escape_sequence(start_of_input_stream_twin())) {
// Consume an escaped code point. // Consume an escaped code point.
// This allows an escaped right parenthesis ("\)") to be encountered without ending // This allows an escaped right parenthesis ("\)") to be encountered without ending
// the <bad-url-token>. This is otherwise identical to the "anything else" clause. // the <bad-url-token>. This is otherwise identical to the "anything else" clause.
(void)next_code_point(); // Not to spec, see NOTE above.
(void)consume_escaped_code_point(); (void)consume_escaped_code_point();
} }
// anything else // anything else
// Do nothing. // Do nothing.
(void)next_code_point(); // Not to spec, see NOTE above.
} }
} }
@ -1298,7 +1285,7 @@ Token Tokenizer::consume_a_token()
dbgln_if(CSS_TOKENIZER_DEBUG, "is reverse solidus"); dbgln_if(CSS_TOKENIZER_DEBUG, "is reverse solidus");
// If the input stream starts with a valid escape, reconsume the current input code point, // If the input stream starts with a valid escape, reconsume the current input code point,
// consume an ident-like token, and return it. // consume an ident-like token, and return it.
if (is_valid_escape_sequence({ input, peek_code_point() })) { if (is_valid_escape_sequence(start_of_input_stream_twin())) {
reconsume_current_input_code_point(); reconsume_current_input_code_point();
return consume_an_ident_like_token(); return consume_an_ident_like_token();
} }