1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 02:38:11 +00:00

LibHTML: Preserve UTF-8 codepoints when collapsing whitespace

This is extremely awkward and I'm sure there are many better ways to
achieve this..
This commit is contained in:
Andreas Kling 2019-10-18 22:50:44 +02:00
parent f4e6dae6fe
commit 07cbe2daa4

View file

@ -141,16 +141,19 @@ void LayoutText::split_into_lines(LayoutBlock& container)
} }
// Collapse whitespace into single spaces // Collapse whitespace into single spaces
auto& raw_text = node().data(); auto utf8_view = Utf8View(node().data());
StringBuilder builder(raw_text.length()); StringBuilder builder(node().data().length());
for (int i = 0; i < raw_text.length(); ++i) { for (auto it = utf8_view.begin(); it != utf8_view.end(); ++it) {
if (!isspace(raw_text[i])) { if (!isspace(*it)) {
builder.append(raw_text[i]); builder.append(utf8_view.as_string().characters_without_null_termination() + utf8_view.byte_offset_of(it), it.codepoint_length_in_bytes());
} else { } else {
builder.append(' '); builder.append(' ');
while (i < raw_text.length() && isspace(raw_text[i])) auto prev = it;
++i; while (it != utf8_view.end() && isspace(*it)) {
--i; prev = it;
++it;
}
it = prev;
} }
} }
m_text_for_rendering = builder.to_string(); m_text_for_rendering = builder.to_string();