1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-16 23:05:08 +00:00

LibWeb: Fix parser interpreting """ as "&quot"

There was a logic mistake in the entity parser that chose the shorter
matching entity instead of the longer. Fix this and make the entity
lists constexpr while we're here.
This commit is contained in:
Andreas Kling 2020-06-10 10:34:28 +02:00
parent 43951f18e7
commit e836f09094

View file

@ -24,6 +24,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <AK/LogStream.h>
#include <AK/StringView.h> #include <AK/StringView.h>
#include <LibWeb/Parser/Entities.h> #include <LibWeb/Parser/Entities.h>
@ -32,8 +33,8 @@ namespace HTML {
Optional<EntityMatch> codepoints_from_entity(const StringView& entity) Optional<EntityMatch> codepoints_from_entity(const StringView& entity)
{ {
struct { constexpr struct {
const char* entity; StringView entity;
u32 codepoint; u32 codepoint;
} single_codepoint_entities[] = { } single_codepoint_entities[] = {
{ "AElig;", 0x000C6 }, { "AElig;", 0x000C6 },
@ -2176,8 +2177,8 @@ Optional<EntityMatch> codepoints_from_entity(const StringView& entity)
{ "zwnj;", 0x0200C } { "zwnj;", 0x0200C }
}; };
struct { constexpr struct {
const char* entity; StringView entity;
u32 codepoint1; u32 codepoint1;
u32 codepoint2; u32 codepoint2;
} double_codepoint_entities[] = { } double_codepoint_entities[] = {
@ -2280,14 +2281,14 @@ Optional<EntityMatch> codepoints_from_entity(const StringView& entity)
for (auto& single_codepoint_entity : single_codepoint_entities) { for (auto& single_codepoint_entity : single_codepoint_entities) {
if (entity.starts_with(single_codepoint_entity.entity)) { if (entity.starts_with(single_codepoint_entity.entity)) {
if (match.entity.is_null() || entity.length() > match.entity.length()) if (match.entity.is_null() || single_codepoint_entity.entity.length() > match.entity.length())
match = { { single_codepoint_entity.codepoint }, StringView(single_codepoint_entity.entity) }; match = { { single_codepoint_entity.codepoint }, single_codepoint_entity.entity };
} }
} }
for (auto& double_codepoint_entity : double_codepoint_entities) { for (auto& double_codepoint_entity : double_codepoint_entities) {
if (entity.starts_with(double_codepoint_entity.entity)) { if (entity.starts_with(double_codepoint_entity.entity)) {
if (match.entity.is_null() || entity.length() > match.entity.length()) if (match.entity.is_null() || double_codepoint_entity.entity.length() > match.entity.length())
match = EntityMatch { { double_codepoint_entity.codepoint1, double_codepoint_entity.codepoint2 }, StringView(double_codepoint_entity.entity) }; match = EntityMatch { { double_codepoint_entity.codepoint1, double_codepoint_entity.codepoint2 }, StringView(double_codepoint_entity.entity) };
} }
} }