diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 7a14f30eb9..d7630fb25d 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -687,6 +687,8 @@ TEST_CASE(ECMA262_unicode_match) ECMAScriptFlags options {}; }; _test tests[] { + { "\xf0\x9d\x8c\x86"sv, "abcdef"sv, false, ECMAScriptFlags::Unicode }, + { "[\xf0\x9d\x8c\x86]"sv, "abcdef"sv, false, ECMAScriptFlags::Unicode }, { "\\ud83d"sv, "😀"sv, true }, { "\\ud83d"sv, "😀"sv, false, ECMAScriptFlags::Unicode }, { "\\ude00"sv, "😀"sv, true }, diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index 02598f5da9..6e7f4eba8d 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -145,9 +145,9 @@ ALWAYS_INLINE bool Parser::lookahead_any(StringView str) return false; } -ALWAYS_INLINE char Parser::skip() +ALWAYS_INLINE unsigned char Parser::skip() { - char ch; + unsigned char ch; if (m_parser_state.current_token.value().length() == 1) { ch = m_parser_state.current_token.value()[0]; } else { @@ -1287,7 +1287,7 @@ bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bo // Also part of AtomEscape. auto token = consume(); match_length_minimum += 1; - stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token.value()[1] } }); + stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (u8)token.value()[1] } }); return true; } if (try_skip("\\")) { @@ -1326,7 +1326,7 @@ bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bo if (m_should_use_browser_extended_grammar) { auto token = consume(); match_length_minimum += 1; - stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token.value()[0] } }); + stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (u8)token.value()[0] } }); return true; } else { return false; @@ -1336,7 +1336,7 @@ bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bo if (match_ordinary_characters()) { auto token = consume().value(); match_length_minimum += 1; - stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token[0] } }); + stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (u8)token[0] } }); return true; } @@ -1594,7 +1594,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini // Allow all SourceCharacter's as escapes here. auto token = consume(); match_length_minimum += 1; - stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token.value()[0] } }); + stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (u8)token.value()[0] } }); return true; } diff --git a/Userland/Libraries/LibRegex/RegexParser.h b/Userland/Libraries/LibRegex/RegexParser.h index 480320037a..33bccca223 100644 --- a/Userland/Libraries/LibRegex/RegexParser.h +++ b/Userland/Libraries/LibRegex/RegexParser.h @@ -84,7 +84,7 @@ protected: ALWAYS_INLINE Optional consume_escaped_code_point(bool unicode); ALWAYS_INLINE bool try_skip(StringView); ALWAYS_INLINE bool lookahead_any(StringView); - ALWAYS_INLINE char skip(); + ALWAYS_INLINE unsigned char skip(); ALWAYS_INLINE void back(size_t = 1); ALWAYS_INLINE void reset(); ALWAYS_INLINE bool done() const;