From 9ce267944cfbb219b48d4262dbad234465c10181 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Thu, 28 Dec 2023 23:30:20 +0100 Subject: [PATCH] LibWeb: Fix crash in HTML encoding detection when handling non-ASCII The fix here was to stop using StringBuilder::append(char) when told to append a code point, and switch to StringBuilder::append_code_point(u32) There's probably a bunch more issues like this, and we should stop using append(char) in general since it allows building of garbage strings. --- .../expected/html-encoding-detection-crash.txt | 7 +++++++ .../Layout/input/html-encoding-detection-crash.html | 1 + .../LibWeb/HTML/Parser/HTMLEncodingDetection.cpp | 12 ++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 Tests/LibWeb/Layout/expected/html-encoding-detection-crash.txt create mode 100644 Tests/LibWeb/Layout/input/html-encoding-detection-crash.html diff --git a/Tests/LibWeb/Layout/expected/html-encoding-detection-crash.txt b/Tests/LibWeb/Layout/expected/html-encoding-detection-crash.txt new file mode 100644 index 0000000000..f5204a0f46 --- /dev/null +++ b/Tests/LibWeb/Layout/expected/html-encoding-detection-crash.txt @@ -0,0 +1,7 @@ +Viewport <#document> at (0,0) content-size 800x600 children: not-inline + BlockContainer at (0,0) content-size 800x600 [BFC] children: not-inline + BlockContainer at (8,8) content-size 784x0 children: not-inline + +ViewportPaintable (Viewport<#document>) [0,0 800x600] + PaintableWithLines (BlockContainer) [0,0 800x600] + PaintableWithLines (BlockContainer) [8,8 784x0] diff --git a/Tests/LibWeb/Layout/input/html-encoding-detection-crash.html b/Tests/LibWeb/Layout/input/html-encoding-detection-crash.html new file mode 100644 index 0000000000..90424c0f8c --- /dev/null +++ b/Tests/LibWeb/Layout/input/html-encoding-detection-crash.html @@ -0,0 +1 @@ + prescan_get_attribute(DOM::Document& document, ByteBuffer c // -> If it is in the range 0x41 (A) to 0x5A (Z) if (input[position] >= 'A' && input[position] <= 'Z') { // Append the code point b+0x20 to attribute name (where b is the value of the byte at position). (This converts the input to lowercase.) - attribute_name.append(input[position] + 0x20); + attribute_name.append_code_point(input[position] + 0x20); } // -> Anything else else { @@ -185,7 +185,7 @@ value: // 4. Otherwise, if the value of the byte at position is in the range 0x41 (A) to 0x5A (Z), // then append a code point to attribute value whose value is 0x20 more than the value of the byte at position. if (input[position] >= 'A' && input[position] <= 'Z') { - attribute_value.append(input[position] + 0x20); + attribute_value.append_code_point(input[position] + 0x20); } // 5. Otherwise, append a code point to attribute value whose value is the same as the value of the byte at position. else { @@ -206,14 +206,14 @@ value: // -> If it is in the range 0x41 (A) to 0x5A (Z) if (input[position] >= 'A' && input[position] <= 'Z') { // Append a code point b+0x20 to attribute value (where b is the value of the byte at position). - attribute_value.append(input[position] + 0x20); + attribute_value.append_code_point(input[position] + 0x20); // Advance position to the next byte. ++position; } // -> Anything else else { // Append a code point with the same value as the byte at position to attribute value. - attribute_value.append(input[position]); + attribute_value.append_code_point(input[position]); // Advance position to the next byte. ++position; } @@ -232,12 +232,12 @@ value: // -> If it is in the range 0x41 (A) to 0x5A (Z) if (input[position] >= 'A' && input[position] <= 'Z') { // Append a code point b+0x20 to attribute value (where b is the value of the byte at position). - attribute_value.append(input[position] + 0x20); + attribute_value.append_code_point(input[position] + 0x20); } // -> Anything else else { // Append a code point with the same value as the byte at position to attribute value. - attribute_value.append(input[position]); + attribute_value.append_code_point(input[position]); } // 12. Advance position to the next byte and return to the previous step.