mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 19:27:44 +00:00
LibWeb: Support named character references (e.g "&")
This commit is contained in:
parent
ca6e361279
commit
5c35f3c9ba
4 changed files with 2385 additions and 4 deletions
|
@ -85,6 +85,7 @@ set(SOURCES
|
|||
Layout/LineBox.cpp
|
||||
Layout/LineBoxFragment.cpp
|
||||
Parser/CSSParser.cpp
|
||||
Parser/Entities.cpp
|
||||
Parser/HTMLDocumentParser.cpp
|
||||
Parser/HTMLParser.cpp
|
||||
Parser/HTMLToken.cpp
|
||||
|
|
2296
Libraries/LibWeb/Parser/Entities.cpp
Normal file
2296
Libraries/LibWeb/Parser/Entities.cpp
Normal file
File diff suppressed because it is too large
Load diff
43
Libraries/LibWeb/Parser/Entities.h
Normal file
43
Libraries/LibWeb/Parser/Entities.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Web {
|
||||
namespace HTML {
|
||||
|
||||
struct EntityMatch {
|
||||
Vector<u32, 2> codepoints;
|
||||
StringView entity;
|
||||
};
|
||||
|
||||
Optional<EntityMatch> codepoints_from_entity(const StringView&);
|
||||
|
||||
}
|
||||
}
|
|
@ -24,6 +24,7 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <LibWeb/Parser/Entities.h>
|
||||
#include <LibWeb/Parser/HTMLToken.h>
|
||||
#include <LibWeb/Parser/HTMLTokenizer.h>
|
||||
#include <ctype.h>
|
||||
|
@ -1185,15 +1186,55 @@ _StartOfFunction:
|
|||
|
||||
BEGIN_STATE(NamedCharacterReference)
|
||||
{
|
||||
// FIXME:This is not the right way to implement this state!!
|
||||
auto match = HTML::codepoints_from_entity(m_input.substring_view(m_cursor - 1, m_input.length() - m_cursor + 1));
|
||||
|
||||
if (match.has_value()) {
|
||||
m_cursor += match.value().entity.length();
|
||||
for (auto ch : match.value().entity)
|
||||
m_temporary_buffer.append(ch);
|
||||
|
||||
if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) {
|
||||
auto next_codepoint = peek_codepoint(0);
|
||||
if (next_codepoint.has_value() && next_codepoint.value() == '=') {
|
||||
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
|
||||
SWITCH_TO_RETURN_STATE;
|
||||
}
|
||||
}
|
||||
|
||||
if (!match.value().entity.ends_with(';')) {
|
||||
TODO();
|
||||
}
|
||||
|
||||
m_temporary_buffer.clear();
|
||||
m_temporary_buffer.append(match.value().codepoints);
|
||||
|
||||
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
|
||||
SWITCH_TO_RETURN_STATE;
|
||||
} else {
|
||||
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
|
||||
SWITCH_TO(AmbiguousAmpersand);
|
||||
}
|
||||
}
|
||||
END_STATE
|
||||
|
||||
BEGIN_STATE(AmbiguousAmpersand)
|
||||
{
|
||||
ON_ASCII_ALPHANUMERIC
|
||||
{
|
||||
if (consumed_as_part_of_an_attribute()) {
|
||||
m_current_token.m_tag.attributes.last().value_builder.append(current_input_character.value());
|
||||
continue;
|
||||
} else {
|
||||
EMIT_CURRENT_CHARACTER;
|
||||
}
|
||||
}
|
||||
ON(';')
|
||||
{
|
||||
SWITCH_TO_RETURN_STATE;
|
||||
TODO();
|
||||
}
|
||||
ANYTHING_ELSE
|
||||
{
|
||||
dbg() << "NamedCharacterReference: '" << (char)current_input_character.value() << "'";
|
||||
continue;
|
||||
RECONSUME_IN_RETURN_STATE;
|
||||
}
|
||||
}
|
||||
END_STATE
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue