1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-16 18:25:06 +00:00

LibWeb: Plumb content encoding into the new HTML parser

We still don't handle non-ASCII input correctly, but at least now we'll
convert e.g ISO-8859-1 to UTF-8 before starting to tokenize.
This patch also makes "view source" work with the new parser. :^)
This commit is contained in:
Andreas Kling 2020-05-28 12:35:19 +02:00
parent 772b51038e
commit 5e53c45113
6 changed files with 18 additions and 9 deletions

View file

@ -24,7 +24,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define PARSER_DEBUG
//#define PARSER_DEBUG
#include <AK/Utf32View.h>
#include <LibWeb/DOM/Comment.h>
@ -51,8 +51,8 @@
namespace Web {
HTMLDocumentParser::HTMLDocumentParser(const StringView& input)
: m_tokenizer(input)
HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding)
: m_tokenizer(input, encoding)
{
}
@ -64,6 +64,7 @@ void HTMLDocumentParser::run(const URL& url)
{
m_document = adopt(*new Document);
m_document->set_url(url);
m_document->set_source(m_tokenizer.source());
for (;;) {
auto optional_token = m_tokenizer.next_token();