mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 12:48:10 +00:00
LibHTML: Parse HTML escape sequences
This commit is contained in:
parent
6491493e26
commit
db8a1a6aba
1 changed files with 29 additions and 5 deletions
|
@ -1,4 +1,5 @@
|
||||||
#include <AK/NonnullRefPtrVector.h>
|
#include <AK/NonnullRefPtrVector.h>
|
||||||
|
#include <AK/StringBuilder.h>
|
||||||
#include <LibHTML/DOM/Element.h>
|
#include <LibHTML/DOM/Element.h>
|
||||||
#include <LibHTML/DOM/Text.h>
|
#include <LibHTML/DOM/Text.h>
|
||||||
#include <LibHTML/Parser/HTMLParser.h>
|
#include <LibHTML/Parser/HTMLParser.h>
|
||||||
|
@ -54,7 +55,7 @@ NonnullRefPtr<Document> parse_html(const String& html)
|
||||||
|
|
||||||
auto state = State::Free;
|
auto state = State::Free;
|
||||||
|
|
||||||
Vector<char, 256> text_buffer;
|
StringBuilder text_buffer;
|
||||||
|
|
||||||
Vector<char, 32> tag_name_buffer;
|
Vector<char, 32> tag_name_buffer;
|
||||||
|
|
||||||
|
@ -74,9 +75,8 @@ NonnullRefPtr<Document> parse_html(const String& html)
|
||||||
attribute_name_buffer.clear();
|
attribute_name_buffer.clear();
|
||||||
if (new_state == State::BeforeAttributeValue)
|
if (new_state == State::BeforeAttributeValue)
|
||||||
attribute_value_buffer.clear();
|
attribute_value_buffer.clear();
|
||||||
if (state == State::Free && !text_buffer.is_empty()) {
|
if (state == State::Free && !text_buffer.string_view().is_empty()) {
|
||||||
auto text_node = adopt(*new Text(String::copy(text_buffer)));
|
auto text_node = adopt(*new Text(text_buffer.to_string()));
|
||||||
text_buffer.clear();
|
|
||||||
node_stack.last().append_child(text_node);
|
node_stack.last().append_child(text_node);
|
||||||
}
|
}
|
||||||
state = new_state;
|
state = new_state;
|
||||||
|
@ -120,7 +120,31 @@ NonnullRefPtr<Document> parse_html(const String& html)
|
||||||
move_to_state(State::BeforeTagName);
|
move_to_state(State::BeforeTagName);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
text_buffer.append(ch);
|
if (ch != '&') {
|
||||||
|
text_buffer.append(ch);
|
||||||
|
} else {
|
||||||
|
struct Escape {
|
||||||
|
const char* code;
|
||||||
|
const char* value;
|
||||||
|
};
|
||||||
|
static Escape escapes[] = {
|
||||||
|
{ "<", "<" },
|
||||||
|
{ ">", ">" },
|
||||||
|
{ "&", "&" }
|
||||||
|
};
|
||||||
|
auto rest_of_html = html.substring_view(i, html.length() - i);
|
||||||
|
bool found = false;
|
||||||
|
for (auto& escape : escapes) {
|
||||||
|
if (rest_of_html.starts_with(escape.code)) {
|
||||||
|
text_buffer.append(escape.value);
|
||||||
|
found = true;
|
||||||
|
i += strlen(escape.code) - 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found)
|
||||||
|
dbg() << "Unhandled escape sequence";
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case State::BeforeTagName:
|
case State::BeforeTagName:
|
||||||
if (ch == '/') {
|
if (ch == '/') {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue