1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 19:27:44 +00:00

LibWeb: Support named character references (e.g "&")

This commit is contained in:
Andreas Kling 2020-05-28 11:44:19 +02:00
parent ca6e361279
commit 5c35f3c9ba
4 changed files with 2385 additions and 4 deletions

View file

@ -85,6 +85,7 @@ set(SOURCES
Layout/LineBox.cpp
Layout/LineBoxFragment.cpp
Parser/CSSParser.cpp
Parser/Entities.cpp
Parser/HTMLDocumentParser.cpp
Parser/HTMLParser.cpp
Parser/HTMLToken.cpp

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/StringView.h>
#include <AK/Vector.h>
namespace Web {
namespace HTML {
struct EntityMatch {
Vector<u32, 2> codepoints;
StringView entity;
};
Optional<EntityMatch> codepoints_from_entity(const StringView&);
}
}

View file

@ -24,6 +24,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibWeb/Parser/Entities.h>
#include <LibWeb/Parser/HTMLToken.h>
#include <LibWeb/Parser/HTMLTokenizer.h>
#include <ctype.h>
@ -1185,15 +1186,55 @@ _StartOfFunction:
BEGIN_STATE(NamedCharacterReference)
{
// FIXME:This is not the right way to implement this state!!
auto match = HTML::codepoints_from_entity(m_input.substring_view(m_cursor - 1, m_input.length() - m_cursor + 1));
if (match.has_value()) {
m_cursor += match.value().entity.length();
for (auto ch : match.value().entity)
m_temporary_buffer.append(ch);
if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) {
auto next_codepoint = peek_codepoint(0);
if (next_codepoint.has_value() && next_codepoint.value() == '=') {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE;
}
}
if (!match.value().entity.ends_with(';')) {
TODO();
}
m_temporary_buffer.clear();
m_temporary_buffer.append(match.value().codepoints);
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE;
} else {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO(AmbiguousAmpersand);
}
}
END_STATE
BEGIN_STATE(AmbiguousAmpersand)
{
ON_ASCII_ALPHANUMERIC
{
if (consumed_as_part_of_an_attribute()) {
m_current_token.m_tag.attributes.last().value_builder.append(current_input_character.value());
continue;
} else {
EMIT_CURRENT_CHARACTER;
}
}
ON(';')
{
SWITCH_TO_RETURN_STATE;
TODO();
}
ANYTHING_ELSE
{
dbg() << "NamedCharacterReference: '" << (char)current_input_character.value() << "'";
continue;
RECONSUME_IN_RETURN_STATE;
}
}
END_STATE