1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 10:27:35 +00:00

Everywhere: Replace ctype.h to avoid narrowing conversions

This replaces ctype.h with CharacterType.h everywhere I could find
issues with narrowing conversions. While using it will probably make
sense almost everywhere in the future, the most critical places should
have been addressed.
This commit is contained in:
Max Wipfli 2021-06-01 21:18:08 +02:00 committed by Andreas Kling
parent 1c9d87c455
commit bc8d16ad28
16 changed files with 153 additions and 266 deletions

View file

@ -4,11 +4,11 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/CharacterTypes.h>
#include <AK/SourceLocation.h>
#include <AK/Vector.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/CSS/Parser/Tokenizer.h>
#include <ctype.h>
#define CSS_TOKENIZER_TRACE 0
@ -20,11 +20,6 @@ static inline void log_parse_error(const SourceLocation& location = SourceLocati
dbgln_if(CSS_TOKENIZER_TRACE, "Parse error (css tokenization) {} ", location);
}
static inline bool is_surrogate(u32 code_point)
{
return (code_point & 0xfffff800) == 0xd800;
}
static inline bool is_quotation_mark(u32 code_point)
{
return code_point == 0x22;
@ -35,24 +30,14 @@ static inline bool is_greater_than_maximum_allowed_code_point(u32 code_point)
return code_point > 0x10FFFF;
}
static inline bool is_hex_digit(u32 code_point)
{
return isxdigit(code_point);
}
static inline bool is_low_line(u32 code_point)
{
return code_point == 0x5F;
}
static inline bool is_non_ascii(u32 code_point)
{
return code_point >= 0x80;
}
static inline bool is_name_start_code_point(u32 code_point)
{
return isalpha(code_point) || is_non_ascii(code_point) || is_low_line(code_point);
return is_ascii_alpha(code_point) || !is_ascii(code_point) || is_low_line(code_point);
}
static inline bool is_hyphen_minus(u32 code_point)
@ -62,7 +47,7 @@ static inline bool is_hyphen_minus(u32 code_point)
static inline bool is_name_code_point(u32 code_point)
{
return is_name_start_code_point(code_point) || isdigit(code_point) || is_hyphen_minus(code_point);
return is_name_start_code_point(code_point) || is_ascii_digit(code_point) || is_hyphen_minus(code_point);
}
static inline bool is_non_printable(u32 code_point)
@ -303,12 +288,12 @@ u32 Tokenizer::consume_escaped_code_point()
auto input = code_point.value();
if (is_hex_digit(input)) {
if (is_ascii_hex_digit(input)) {
StringBuilder builder;
builder.append_code_point(input);
size_t counter = 0;
while (is_hex_digit(peek_code_point().value()) && counter++ < 5) {
while (is_ascii_hex_digit(peek_code_point().value()) && counter++ < 5) {
builder.append_code_point(next_code_point().value());
}
@ -317,7 +302,7 @@ u32 Tokenizer::consume_escaped_code_point()
}
auto unhexed = strtoul(builder.to_string().characters(), nullptr, 16);
if (unhexed == 0 || is_surrogate(unhexed) || is_greater_than_maximum_allowed_code_point(unhexed)) {
if (unhexed == 0 || is_unicode_surrogate(unhexed) || is_greater_than_maximum_allowed_code_point(unhexed)) {
return REPLACEMENT_CHARACTER;
}
@ -378,14 +363,14 @@ CSSNumber Tokenizer::consume_a_number()
for (;;) {
auto digits = peek_code_point().value();
if (!isdigit(digits))
if (!is_ascii_digit(digits))
break;
repr.append_code_point(next_code_point().value());
}
auto maybe_number = peek_twin().value();
if (is_full_stop(maybe_number.first) && isdigit(maybe_number.second)) {
if (is_full_stop(maybe_number.first) && is_ascii_digit(maybe_number.second)) {
repr.append_code_point(next_code_point().value());
repr.append_code_point(next_code_point().value());
@ -393,7 +378,7 @@ CSSNumber Tokenizer::consume_a_number()
for (;;) {
auto digits = peek_code_point();
if (digits.has_value() && !isdigit(digits.value()))
if (digits.has_value() && !is_ascii_digit(digits.value()))
break;
repr.append_code_point(next_code_point().value());
@ -403,12 +388,12 @@ CSSNumber Tokenizer::consume_a_number()
auto maybe_exp = peek_triplet().value();
if (is_E(maybe_exp.first) || is_e(maybe_exp.first)) {
if (is_plus_sign(maybe_exp.second) || is_hyphen_minus(maybe_exp.second)) {
if (isdigit(maybe_exp.third)) {
if (is_ascii_digit(maybe_exp.third)) {
repr.append_code_point(next_code_point().value());
repr.append_code_point(next_code_point().value());
repr.append_code_point(next_code_point().value());
}
} else if (isdigit(maybe_exp.second)) {
} else if (is_ascii_digit(maybe_exp.second)) {
repr.append_code_point(next_code_point().value());
repr.append_code_point(next_code_point().value());
}
@ -417,7 +402,7 @@ CSSNumber Tokenizer::consume_a_number()
for (;;) {
auto digits = peek_code_point().value();
if (!isdigit(digits))
if (!is_ascii_digit(digits))
break;
repr.append_code_point(next_code_point().value());
@ -588,19 +573,19 @@ bool Tokenizer::starts_with_a_number() const
bool Tokenizer::starts_with_a_number(U32Triplet values)
{
if (is_plus_sign(values.first) || is_hyphen_minus(values.first)) {
if (isdigit(values.second))
if (is_ascii_digit(values.second))
return true;
if (is_full_stop(values.second) && isdigit(values.third))
if (is_full_stop(values.second) && is_ascii_digit(values.third))
return true;
return false;
}
if (is_full_stop(values.first))
return isdigit(values.second);
return is_ascii_digit(values.second);
if (isdigit(values.first))
if (is_ascii_digit(values.first))
return true;
return false;
@ -902,7 +887,7 @@ Token Tokenizer::consume_a_token()
return create_new_token(Token::TokenType::CloseCurly);
}
if (isdigit(input)) {
if (is_ascii_digit(input)) {
dbgln_if(CSS_TOKENIZER_TRACE, "is digit");
reconsume_current_input_code_point();
return consume_a_numeric_token();

View file

@ -6,6 +6,7 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/CharacterTypes.h>
#include <AK/StringBuilder.h>
#include <AK/Utf8View.h>
#include <LibCore/Timer.h>
@ -53,7 +54,6 @@
#include <LibWeb/Page/BrowsingContext.h>
#include <LibWeb/SVG/TagNames.h>
#include <LibWeb/UIEvents/MouseEvent.h>
#include <ctype.h>
namespace Web::DOM {
@ -253,7 +253,7 @@ String Document::title() const
StringBuilder builder;
bool last_was_space = false;
for (auto code_point : Utf8View(raw_title)) {
if (isspace(code_point)) {
if (is_ascii_space(code_point)) {
last_was_space = true;
} else {
if (last_was_space && !builder.is_empty())

View file

@ -4,13 +4,13 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/CharacterTypes.h>
#include <AK/Debug.h>
#include <AK/SourceLocation.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/HTML/Parser/Entities.h>
#include <LibWeb/HTML/Parser/HTMLToken.h>
#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
#include <ctype.h>
#include <string.h>
namespace Web::HTML {
@ -93,25 +93,25 @@ namespace Web::HTML {
if (!current_input_character.has_value())
#define ON_ASCII_ALPHA \
if (current_input_character.has_value() && isalpha(current_input_character.value()))
if (current_input_character.has_value() && is_ascii_alpha(current_input_character.value()))
#define ON_ASCII_ALPHANUMERIC \
if (current_input_character.has_value() && isalnum(current_input_character.value()))
if (current_input_character.has_value() && is_ascii_alphanumeric(current_input_character.value()))
#define ON_ASCII_UPPER_ALPHA \
if (current_input_character.has_value() && current_input_character.value() >= 'A' && current_input_character.value() <= 'Z')
if (current_input_character.has_value() && is_ascii_upper_alpha(current_input_character.value()))
#define ON_ASCII_LOWER_ALPHA \
if (current_input_character.has_value() && current_input_character.value() >= 'a' && current_input_character.value() <= 'z')
if (current_input_character.has_value() && is_ascii_lower_alpha(current_input_character.value()))
#define ON_ASCII_DIGIT \
if (current_input_character.has_value() && isdigit(current_input_character.value()))
if (current_input_character.has_value() && is_ascii_digit(current_input_character.value()))
#define ON_ASCII_HEX_DIGIT \
if (current_input_character.has_value() && isxdigit(current_input_character.value()))
if (current_input_character.has_value() && is_ascii_hex_digit(current_input_character.value()))
#define ON_WHITESPACE \
if (current_input_character.has_value() && strchr("\t\n\f ", current_input_character.value()))
if (current_input_character.has_value() && is_ascii(current_input_character.value()) && "\t\n\f "sv.contains(current_input_character.value()))
#define ANYTHING_ELSE if (1)
@ -172,26 +172,6 @@ static inline void log_parse_error(const SourceLocation& location = SourceLocati
dbgln_if(TOKENIZER_TRACE_DEBUG, "Parse error (tokenization) {}", location);
}
static inline bool is_surrogate(u32 code_point)
{
return (code_point & 0xfffff800) == 0xd800;
}
static inline bool is_noncharacter(u32 code_point)
{
return code_point >= 0xfdd0 && (code_point <= 0xfdef || (code_point & 0xfffe) == 0xfffe) && code_point <= 0x10ffff;
}
static inline bool is_c0_control(u32 code_point)
{
return code_point <= 0x1f;
}
static inline bool is_control(u32 code_point)
{
return is_c0_control(code_point) || (code_point >= 0x7f && code_point <= 0x9f);
}
Optional<u32> HTMLTokenizer::next_code_point()
{
if (m_utf8_iterator == m_utf8_view.end())
@ -322,7 +302,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_current_token.m_end_position = nth_last_position(0);
continue;
}
@ -458,7 +438,7 @@ _StartOfFunction:
ON_ASCII_UPPER_ALPHA
{
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.name.append(tolower(current_input_character.value()));
m_current_token.m_doctype.name.append(to_ascii_lowercase(current_input_character.value()));
m_current_token.m_doctype.missing_name = false;
SWITCH_TO(DOCTYPEName);
}
@ -507,7 +487,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_doctype.name.append(tolower(current_input_character.value()));
m_current_token.m_doctype.name.append(to_ascii_lowercase(current_input_character.value()));
continue;
}
ON(0)
@ -550,10 +530,10 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
if (toupper(current_input_character.value()) == 'P' && consume_next_if_match("UBLIC", CaseSensitivity::CaseInsensitive)) {
if (to_ascii_uppercase(current_input_character.value()) == 'P' && consume_next_if_match("UBLIC", CaseSensitivity::CaseInsensitive)) {
SWITCH_TO(AfterDOCTYPEPublicKeyword);
}
if (toupper(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) {
if (to_ascii_uppercase(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) {
SWITCH_TO(AfterDOCTYPESystemKeyword);
}
log_parse_error();
@ -1068,7 +1048,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_tag.attributes.last().local_name_builder.append_code_point(tolower(current_input_character.value()));
m_current_token.m_tag.attributes.last().local_name_builder.append_code_point(to_ascii_lowercase(current_input_character.value()));
continue;
}
ON(0)
@ -1558,7 +1538,7 @@ _StartOfFunction:
if (consumed_as_part_of_an_attribute() && !match.value().entity.ends_with(';')) {
auto next_code_point = peek_code_point(0);
if (next_code_point.has_value() && (next_code_point.value() == '=' || isalnum(next_code_point.value()))) {
if (next_code_point.has_value() && (next_code_point.value() == '=' || is_ascii_alphanumeric(next_code_point.value()))) {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE;
}
@ -1720,14 +1700,14 @@ _StartOfFunction:
log_parse_error();
m_character_reference_code = 0xFFFD;
}
if (is_surrogate(m_character_reference_code)) {
if (is_unicode_surrogate(m_character_reference_code)) {
log_parse_error();
m_character_reference_code = 0xFFFD;
}
if (is_noncharacter(m_character_reference_code)) {
if (is_unicode_noncharacter(m_character_reference_code)) {
log_parse_error();
}
if (m_character_reference_code == 0xd || (is_control(m_character_reference_code) && !isspace(m_character_reference_code))) {
if (m_character_reference_code == 0xd || (is_unicode_control(m_character_reference_code) && !is_ascii_space(m_character_reference_code))) {
log_parse_error();
constexpr struct {
u32 number;
@ -1870,7 +1850,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_temporary_buffer.append(current_input_character.value());
continue;
}
@ -1980,7 +1960,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_temporary_buffer.append(current_input_character.value());
continue;
}
@ -2193,7 +2173,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_temporary_buffer.append(current_input_character.value());
continue;
}
@ -2247,7 +2227,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_temporary_buffer.append(tolower(current_input_character.value()));
m_temporary_buffer.append(to_ascii_lowercase(current_input_character.value()));
EMIT_CURRENT_CHARACTER;
}
ON_ASCII_LOWER_ALPHA
@ -2393,7 +2373,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_temporary_buffer.append(tolower(current_input_character.value()));
m_temporary_buffer.append(to_ascii_lowercase(current_input_character.value()));
EMIT_CURRENT_CHARACTER;
}
ON_ASCII_LOWER_ALPHA
@ -2512,7 +2492,7 @@ _StartOfFunction:
}
ON_ASCII_UPPER_ALPHA
{
m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
m_current_token.m_tag.tag_name.append(to_ascii_lowercase(current_input_character.value()));
m_temporary_buffer.append(current_input_character.value());
continue;
}
@ -2598,7 +2578,7 @@ bool HTMLTokenizer::consume_next_if_match(const StringView& string, CaseSensitiv
// FIXME: This should be more Unicode-aware.
if (case_sensitivity == CaseSensitivity::CaseInsensitive) {
if (code_point.value() < 0x80) {
if (tolower(code_point.value()) != tolower(string[i]))
if (to_ascii_lowercase(code_point.value()) != to_ascii_lowercase(string[i]))
return false;
continue;
}

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/CharacterTypes.h>
#include <AK/ScopeGuard.h>
#include <AK/StringBuilder.h>
#include <LibGfx/Painter.h>
@ -13,7 +14,6 @@
#include <LibWeb/Layout/Label.h>
#include <LibWeb/Layout/TextNode.h>
#include <LibWeb/Page/BrowsingContext.h>
#include <ctype.h>
namespace Web::Layout {
@ -30,7 +30,7 @@ TextNode::~TextNode()
static bool is_all_whitespace(const StringView& string)
{
for (size_t i = 0; i < string.length(); ++i) {
if (!isspace(string[i]))
if (!is_ascii_space(string[i]))
return false;
}
return true;
@ -116,7 +116,7 @@ void TextNode::compute_text_for_rendering(bool collapse, bool previous_is_empty_
auto it = utf8_view.begin();
auto skip_over_whitespace = [&] {
auto prev = it;
while (it != utf8_view.end() && isspace(*it)) {
while (it != utf8_view.end() && is_ascii_space(*it)) {
prev = it;
++it;
}
@ -125,7 +125,7 @@ void TextNode::compute_text_for_rendering(bool collapse, bool previous_is_empty_
if (previous_is_empty_or_ends_in_whitespace)
skip_over_whitespace();
for (; it != utf8_view.end(); ++it) {
if (!isspace(*it)) {
if (!is_ascii_space(*it)) {
builder.append(utf8_view.as_string().characters_without_null_termination() + utf8_view.byte_offset_of(it), it.code_point_length_in_bytes());
} else {
builder.append(' ');
@ -160,7 +160,7 @@ void TextNode::split_into_lines_by_rules(InlineFormattingContext& context, Layou
float chunk_width;
if (do_wrap_lines) {
if (do_collapse && isspace(*chunk.view.begin()) && line_boxes.last().is_empty_or_ends_in_whitespace()) {
if (do_collapse && is_ascii_space(*chunk.view.begin()) && line_boxes.last().is_empty_or_ends_in_whitespace()) {
// This is a non-empty chunk that starts with collapsible whitespace.
// We are at either at the start of a new line, or after something that ended in whitespace,
// so we don't need to contribute our own whitespace to the line. Skip over it instead!
@ -264,7 +264,7 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
, m_start_of_chunk(m_utf8_view.begin())
, m_iterator(m_utf8_view.begin())
{
m_last_was_space = !text.is_empty() && isspace(*m_utf8_view.begin());
m_last_was_space = !text.is_empty() && is_ascii_space(*m_utf8_view.begin());
}
Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
@ -286,7 +286,7 @@ Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
return result.release_value();
}
if (m_wrap_lines) {
bool is_space = isspace(*m_iterator);
bool is_space = is_ascii_space(*m_iterator);
if (is_space != m_last_was_space) {
m_last_was_space = is_space;
if (auto result = try_commit_chunk(m_iterator, false); result.has_value())