diff --git a/Userland/Libraries/LibDiff/Forward.h b/Userland/Libraries/LibDiff/Forward.h index 484bbf0dd9..c21cad11da 100644 --- a/Userland/Libraries/LibDiff/Forward.h +++ b/Userland/Libraries/LibDiff/Forward.h @@ -8,6 +8,8 @@ namespace Diff { +class Parser; + struct Hunk; struct HunkLocation; struct Line; diff --git a/Userland/Libraries/LibDiff/Hunks.cpp b/Userland/Libraries/LibDiff/Hunks.cpp index 10c17ad685..cecce7c9b3 100644 --- a/Userland/Libraries/LibDiff/Hunks.cpp +++ b/Userland/Libraries/LibDiff/Hunks.cpp @@ -10,45 +10,100 @@ namespace Diff { -ErrorOr> parse_hunks(StringView diff) +Optional Parser::consume_unified_location() { - Vector diff_lines = diff.split_view('\n'); - if (diff_lines.is_empty()) - return Vector {}; + auto consume_range = [this](Range& range) { + if (!consume_line_number(range.start_line)) + return false; + if (consume_specific(',')) { + if (!consume_line_number(range.number_of_lines)) + return false; + } else { + range.number_of_lines = 1; + } + return true; + }; + + if (!consume_specific("@@ -")) + return {}; + + HunkLocation location; + + if (!consume_range(location.old_range)) + return {}; + + if (!consume_specific(" +")) + return {}; + + if (!consume_range(location.new_range)) + return {}; + + if (!consume_specific(" @@")) + return {}; + + return location; +} + +bool Parser::consume_line_number(size_t& number) +{ + auto line = consume_while(is_ascii_digit); + + auto maybe_number = line.to_uint(); + if (!maybe_number.has_value()) + return false; + + number = maybe_number.value(); + return true; +} + +ErrorOr> Parser::parse_hunks() +{ Vector hunks; - size_t line_index = 0; - HunkLocation current_location {}; + while (!is_eof()) { + // Try an locate a hunk location in this hunk. It may be prefixed with information. + auto maybe_location = consume_unified_location(); + consume_line(); - // Skip to first hunk - while (diff_lines[line_index][0] != '@') { - ++line_index; - } - - while (line_index < diff_lines.size()) { - if (diff_lines[line_index][0] == '@') { - current_location = parse_hunk_location(diff_lines[line_index]); - ++line_index; + if (!maybe_location.has_value()) continue; - } - Hunk hunk {}; - hunk.location = current_location; + Hunk hunk { *maybe_location, {} }; - while (line_index < diff_lines.size()) { - auto const& line = diff_lines[line_index]; + auto old_lines_expected = hunk.location.old_range.number_of_lines; + auto new_lines_expected = hunk.location.new_range.number_of_lines; - char const operation = line[0]; - if (operation != ' ' && operation != '+' && operation != '-') - break; + // We've found a location. Now parse out all of the expected content lines. + while (old_lines_expected != 0 || new_lines_expected != 0) { + StringView line = consume_line(); + + if (line.is_empty()) + return Error::from_string_literal("Malformed empty content line in patch"); + + if (line[0] != ' ' && line[0] != '+' && line[0] != '-') + return Error::from_string_literal("Invaid operation in patch"); + + auto const operation = Line::operation_from_symbol(line[0]); + + if (operation != Line::Operation::Removal) { + if (new_lines_expected == 0) + return Error::from_string_literal("Found more removal and context lines in patch than expected"); + + --new_lines_expected; + } + + if (operation != Line::Operation::Addition) { + if (old_lines_expected == 0) + return Error::from_string_literal("Found more addition and context lines in patch than expected"); + + --old_lines_expected; + } auto const content = line.substring_view(1, line.length() - 1); - - TRY(hunk.lines.try_append(Line { Line::operation_from_symbol(operation), TRY(String::from_utf8(content)) })); - - ++line_index; + TRY(hunk.lines.try_append(Line { operation, TRY(String::from_utf8(content)) })); } + TRY(hunks.try_append(hunk)); } @@ -63,48 +118,9 @@ ErrorOr> parse_hunks(StringView diff) return hunks; } -HunkLocation parse_hunk_location(StringView location_line) +ErrorOr> parse_hunks(StringView diff) { - size_t char_index = 0; - auto parse_start_and_length_pair = [](StringView raw) { - auto maybe_index_of_separator = raw.find(','); - - size_t start = 0; - size_t length = 0; - if (maybe_index_of_separator.has_value()) { - auto index_of_separator = maybe_index_of_separator.value(); - start = raw.substring_view(0, index_of_separator).to_uint().value(); - length = raw.substring_view(index_of_separator + 1, raw.length() - index_of_separator - 1).to_uint().value(); - } else { - length = 1; - start = raw.to_uint().value(); - } - - return Range { start, length }; - }; - while (char_index < location_line.length() && location_line[char_index++] != '-') { - } - VERIFY(char_index < location_line.length()); - - size_t original_location_start_index = char_index; - - while (char_index < location_line.length() && location_line[char_index++] != ' ') { - } - VERIFY(char_index < location_line.length() && location_line[char_index] == '+'); - size_t original_location_end_index = char_index - 2; - - size_t target_location_start_index = char_index + 1; - - char_index += 1; - while (char_index < location_line.length() && location_line[char_index++] != ' ') { - } - VERIFY(char_index < location_line.length()); - - size_t target_location_end_index = char_index - 2; - - auto old_range = parse_start_and_length_pair(location_line.substring_view(original_location_start_index, original_location_end_index - original_location_start_index + 1)); - auto new_range = parse_start_and_length_pair(location_line.substring_view(target_location_start_index, target_location_end_index - target_location_start_index + 1)); - return { old_range, new_range }; + Parser lexer(diff); + return lexer.parse_hunks(); +} } - -}; diff --git a/Userland/Libraries/LibDiff/Hunks.h b/Userland/Libraries/LibDiff/Hunks.h index ca89d35e63..17b3d4a4bc 100644 --- a/Userland/Libraries/LibDiff/Hunks.h +++ b/Userland/Libraries/LibDiff/Hunks.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -58,10 +59,21 @@ struct Hunk { Vector lines; }; -ErrorOr> parse_hunks(StringView diff); -HunkLocation parse_hunk_location(StringView location_line); +class Parser : public GenericLexer { +public: + using GenericLexer::GenericLexer; + + ErrorOr> parse_hunks(); + +private: + Optional consume_unified_location(); + bool consume_line_number(size_t& number); }; +ErrorOr> parse_hunks(StringView diff); + +} + template<> struct AK::Formatter : Formatter { ErrorOr format(FormatBuilder& builder, Diff::Line::Operation operation)