mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 08:04:57 +00:00
LibXML+Tests: Consume >
in the character data ending ]]>
and test it
For example, with this input: ```xml <C>]]> ``` After seeing `<C>`, the parser will start parsing the content of the element. The content parser will then parse any character data it sees. The character parser would see the first two `]]` and consume them. Then, it would see the `>` and set the state machine to say we have seen this, but it did _not_ consume it and would instead tell GenericLexer that it should stop consuming characters. Therefore, we only consumed 2 characters. Then, it would see that we are in the state where we've seen the full `]]>` and try to take off three characters from the end of the consumed input when we only have 2 characters, causing an assertion failure as we are asking to take off more characters than there really is.
This commit is contained in:
parent
9a97ffe883
commit
adb5f7e485
4 changed files with 32 additions and 2 deletions
|
@ -23,6 +23,7 @@ add_subdirectory(LibTimeZone)
|
||||||
add_subdirectory(LibUnicode)
|
add_subdirectory(LibUnicode)
|
||||||
add_subdirectory(LibWasm)
|
add_subdirectory(LibWasm)
|
||||||
add_subdirectory(LibWeb)
|
add_subdirectory(LibWeb)
|
||||||
|
add_subdirectory(LibXML)
|
||||||
if (${SERENITY_ARCH} STREQUAL "i686")
|
if (${SERENITY_ARCH} STREQUAL "i686")
|
||||||
add_subdirectory(UserspaceEmulator)
|
add_subdirectory(UserspaceEmulator)
|
||||||
endif()
|
endif()
|
||||||
|
|
7
Tests/LibXML/CMakeLists.txt
Normal file
7
Tests/LibXML/CMakeLists.txt
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
set(TEST_SOURCES
|
||||||
|
TestParser.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
foreach(source IN LISTS TEST_SOURCES)
|
||||||
|
serenity_test("${source}" LibXML LIBS LibXML)
|
||||||
|
endforeach()
|
22
Tests/LibXML/TestParser.cpp
Normal file
22
Tests/LibXML/TestParser.cpp
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Luke Wilde <lukew@serenityos.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <LibTest/TestCase.h>
|
||||||
|
#include <LibXML/Parser/Parser.h>
|
||||||
|
|
||||||
|
TEST_CASE(char_data_ending)
|
||||||
|
{
|
||||||
|
EXPECT_NO_CRASH("parsing character data ending by itself should not crash", [] {
|
||||||
|
// After seeing `<C>`, the parser will start parsing the content of the element. The content parser will then parse any character data it sees.
|
||||||
|
// The character parser would see the first two `]]` and consume them. Then, it would see the `>` and set the state machine to say we have seen this,
|
||||||
|
// but it did _not_ consume it and would instead tell GenericLexer that it should stop consuming characters. Therefore, we only consumed 2 characters.
|
||||||
|
// Then, it would see that we are in the state where we've seen the full `]]>` and try to take off three characters from the end of the consumed
|
||||||
|
// input when we only have 2 characters, causing an assertion failure as we are asking to take off more characters than there really is.
|
||||||
|
XML::Parser parser("<C>]]>");
|
||||||
|
(void)parser.parse();
|
||||||
|
return Test::Crash::Failure::DidNotCrash;
|
||||||
|
});
|
||||||
|
}
|
|
@ -891,7 +891,7 @@ ErrorOr<StringView, ParseError> Parser::parse_char_data()
|
||||||
// CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
|
// CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
|
||||||
auto cend_state = 0; // 1: ], 2: ], 3: >
|
auto cend_state = 0; // 1: ], 2: ], 3: >
|
||||||
auto text = m_lexer.consume_while([&](auto ch) {
|
auto text = m_lexer.consume_while([&](auto ch) {
|
||||||
if (ch == '<' || ch == '&')
|
if (ch == '<' || ch == '&' || cend_state == 3)
|
||||||
return false;
|
return false;
|
||||||
switch (cend_state) {
|
switch (cend_state) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -904,7 +904,7 @@ ErrorOr<StringView, ParseError> Parser::parse_char_data()
|
||||||
case 2:
|
case 2:
|
||||||
if (ch == '>') {
|
if (ch == '>') {
|
||||||
cend_state++;
|
cend_state++;
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
cend_state = 0;
|
cend_state = 0;
|
||||||
return true;
|
return true;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue