diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h index 683ea61dda..f8803dc801 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h @@ -42,6 +42,8 @@ namespace Web::HTML { RefPtr parse_html_document(StringView, const AK::URL&, const String& encoding); class HTMLParser { + friend class HTMLTokenizer; + public: HTMLParser(DOM::Document&, StringView input, const String& encoding); ~HTMLParser(); diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 63c996a2d6..37be0a014d 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2020, Andreas Kling + * Copyright (c) 2022, Linus Groh * * SPDX-License-Identifier: BSD-2-Clause */ @@ -9,8 +10,10 @@ #include #include #include +#include #include #include +#include #include namespace Web::HTML { @@ -394,7 +397,15 @@ _StartOfFunction: SWITCH_TO(DOCTYPE); } if (consume_next_if_match("[CDATA[")) { - TODO(); + // We keep the parser optional so that syntax highlighting can be lexer-only. + // The parser registers itself with the lexer it creates. + if (m_parser != nullptr && m_parser->adjusted_current_node().namespace_() != Namespace::HTML) { + SWITCH_TO(CDATASection); + } else { + create_new_token(HTMLToken::Type::Comment); + m_current_builder.append("[CDATA["); + SWITCH_TO_WITH_UNCLEAN_BUILDER(BogusComment); + } } ANYTHING_ELSE {