From 2e4b6fd1ac0e768393b2d66fa1f404b059e08ca5 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 11 Aug 2021 09:58:08 -0400 Subject: [PATCH] LibRegex: Ensure escaped code points are exactly 4 digits in length --- Tests/LibRegex/Regex.cpp | 2 ++ Userland/Libraries/LibRegex/RegexParser.cpp | 13 ++++++++----- Userland/Libraries/LibRegex/RegexParser.h | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index fc72472d36..3a4f26a348 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -503,6 +503,8 @@ TEST_CASE(ECMA262_parse) { "(?", regex::Error::InvalidCaptureGroup }, { "\\u1234", regex::Error::NoError, regex::ECMAScriptFlags::Unicode }, { "[\\u1234]", regex::Error::NoError, regex::ECMAScriptFlags::Unicode }, + { "\\u1", regex::Error::InvalidPattern, regex::ECMAScriptFlags::Unicode }, + { "[\\u1]", regex::Error::InvalidPattern, regex::ECMAScriptFlags::Unicode }, { ",(?", regex::Error::InvalidCaptureGroup }, // #4583 { "{1}", regex::Error::InvalidPattern }, { "{1,2}", regex::Error::InvalidPattern }, diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index 26d0d31320..c60bb7d7e0 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -1083,7 +1083,7 @@ bool ECMA262Parser::parse_quantifiable_assertion(ByteCode& stack, size_t&, bool return false; } -StringView ECMA262Parser::read_digits_as_string(ReadDigitsInitialZeroState initial_zero, bool hex, int max_count) +StringView ECMA262Parser::read_digits_as_string(ReadDigitsInitialZeroState initial_zero, bool hex, int max_count, int min_count) { if (!match(TokenType::Char)) return {}; @@ -1109,12 +1109,15 @@ StringView ECMA262Parser::read_digits_as_string(ReadDigitsInitialZeroState initi ++count; } + if (count < min_count) + return {}; + return StringView { start_token.value().characters_without_null_termination(), offset }; } -Optional ECMA262Parser::read_digits(ECMA262Parser::ReadDigitsInitialZeroState initial_zero, bool hex, int max_count) +Optional ECMA262Parser::read_digits(ECMA262Parser::ReadDigitsInitialZeroState initial_zero, bool hex, int max_count, int min_count) { - auto str = read_digits_as_string(initial_zero, hex, max_count); + auto str = read_digits_as_string(initial_zero, hex, max_count, min_count); if (str.is_empty()) return {}; if (hex) @@ -1483,7 +1486,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini return false; } - if (auto code_point = read_digits(ReadDigitsInitialZeroState::Allow, true, 4); code_point.has_value()) { + if (auto code_point = read_digits(ReadDigitsInitialZeroState::Allow, true, 4, 4); code_point.has_value()) { // In Unicode mode, we need to combine surrogate pairs into a single code point. But we also need to be // rather forgiving if the surrogate pairs are invalid. So if a second code unit follows this code unit, // but doesn't form a valid surrogate pair, insert bytecode for both code units individually. @@ -1811,7 +1814,7 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector& } if (try_skip("u")) { - if (auto code_point = read_digits(ReadDigitsInitialZeroState::Allow, true, 4); code_point.has_value()) { + if (auto code_point = read_digits(ReadDigitsInitialZeroState::Allow, true, 4, 4); code_point.has_value()) { // FIXME: While code point ranges are supported, code point matches as "Char" are not! return { CharClassRangeElement { .code_point = code_point.value(), .is_character_class = false } }; } else if (!unicode) { diff --git a/Userland/Libraries/LibRegex/RegexParser.h b/Userland/Libraries/LibRegex/RegexParser.h index 9f1a934406..25a9677189 100644 --- a/Userland/Libraries/LibRegex/RegexParser.h +++ b/Userland/Libraries/LibRegex/RegexParser.h @@ -210,8 +210,8 @@ private: Allow, Disallow, }; - StringView read_digits_as_string(ReadDigitsInitialZeroState initial_zero = ReadDigitsInitialZeroState::Allow, bool hex = false, int max_count = -1); - Optional read_digits(ReadDigitsInitialZeroState initial_zero = ReadDigitsInitialZeroState::Allow, bool hex = false, int max_count = -1); + StringView read_digits_as_string(ReadDigitsInitialZeroState initial_zero = ReadDigitsInitialZeroState::Allow, bool hex = false, int max_count = -1, int min_count = -1); + Optional read_digits(ReadDigitsInitialZeroState initial_zero = ReadDigitsInitialZeroState::Allow, bool hex = false, int max_count = -1, int min_count = -1); StringView read_capture_group_specifier(bool take_starting_angle_bracket = false); struct Script {