mirror of
https://github.com/RGBCube/serenity
synced 2025-05-20 18:15:07 +00:00
LibWeb: Escape HTML text fragments with multi-byte code point awareness
The UTF-8 encoding of U+00A0 (NBSP) is the bytes 0xc2 0xa0. By looping over the string to escape byte-by-byte, we replace the second byte with " ", but leave the first byte in the resulting text. This creates an invalid UTF-8 string, with a lone leading byte.
This commit is contained in:
parent
3219ecbdc0
commit
f5f1a5228e
1 changed files with 7 additions and 7 deletions
|
@ -3586,23 +3586,23 @@ DeprecatedString HTMLParser::serialize_html_fragment(DOM::Node const& node)
|
|||
auto escape_string = [](StringView string, AttributeMode attribute_mode) -> DeprecatedString {
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#escapingString
|
||||
StringBuilder builder;
|
||||
for (auto& ch : string) {
|
||||
for (auto code_point : Utf8View { string }) {
|
||||
// 1. Replace any occurrence of the "&" character by the string "&".
|
||||
if (ch == '&')
|
||||
if (code_point == '&')
|
||||
builder.append("&"sv);
|
||||
// 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the string " ".
|
||||
else if (ch == '\xA0')
|
||||
else if (code_point == 0xA0)
|
||||
builder.append(" "sv);
|
||||
// 3. If the algorithm was invoked in the attribute mode, replace any occurrences of the """ character by the string """.
|
||||
else if (ch == '"' && attribute_mode == AttributeMode::Yes)
|
||||
else if (code_point == '"' && attribute_mode == AttributeMode::Yes)
|
||||
builder.append("""sv);
|
||||
// 4. If the algorithm was not invoked in the attribute mode, replace any occurrences of the "<" character by the string "<", and any occurrences of the ">" character by the string ">".
|
||||
else if (ch == '<' && attribute_mode == AttributeMode::No)
|
||||
else if (code_point == '<' && attribute_mode == AttributeMode::No)
|
||||
builder.append("<"sv);
|
||||
else if (ch == '>' && attribute_mode == AttributeMode::No)
|
||||
else if (code_point == '>' && attribute_mode == AttributeMode::No)
|
||||
builder.append(">"sv);
|
||||
else
|
||||
builder.append(ch);
|
||||
builder.append_code_point(code_point);
|
||||
}
|
||||
return builder.to_deprecated_string();
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue