mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 02:38:11 +00:00
LibHTML: Preserve UTF-8 codepoints when collapsing whitespace
This is extremely awkward and I'm sure there are many better ways to achieve this..
This commit is contained in:
parent
f4e6dae6fe
commit
07cbe2daa4
1 changed files with 11 additions and 8 deletions
|
@ -141,16 +141,19 @@ void LayoutText::split_into_lines(LayoutBlock& container)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collapse whitespace into single spaces
|
// Collapse whitespace into single spaces
|
||||||
auto& raw_text = node().data();
|
auto utf8_view = Utf8View(node().data());
|
||||||
StringBuilder builder(raw_text.length());
|
StringBuilder builder(node().data().length());
|
||||||
for (int i = 0; i < raw_text.length(); ++i) {
|
for (auto it = utf8_view.begin(); it != utf8_view.end(); ++it) {
|
||||||
if (!isspace(raw_text[i])) {
|
if (!isspace(*it)) {
|
||||||
builder.append(raw_text[i]);
|
builder.append(utf8_view.as_string().characters_without_null_termination() + utf8_view.byte_offset_of(it), it.codepoint_length_in_bytes());
|
||||||
} else {
|
} else {
|
||||||
builder.append(' ');
|
builder.append(' ');
|
||||||
while (i < raw_text.length() && isspace(raw_text[i]))
|
auto prev = it;
|
||||||
++i;
|
while (it != utf8_view.end() && isspace(*it)) {
|
||||||
--i;
|
prev = it;
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
it = prev;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_text_for_rendering = builder.to_string();
|
m_text_for_rendering = builder.to_string();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue