From f690807c5a41a796361560a08c70699f97608ac7 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Sat, 24 Jun 2023 17:33:04 +1200 Subject: [PATCH] LibDiff: Change underlying representation of Hunk to allow context The existing hunk data structure does not contain any way to easily store information about context surrounding the additions and removals in a hunk. While this does work fine for normal diffs (where there is never any surrounding context) this data structure is quite limiting for other use cases. Without support for surrounding context it is not possible to: * Add support for unified or context format to the diff utility to output surrounding context. * Be able to implement a patch utility that uses the surrounding context to reliably locate where to apply a patch when a hunk range does not apply perfectly. This patch changes Diff::Hunk such that its data structure more closely resembles a unified diff. Each line in a hunk is now either a change, removal, addition or context. Allowing hunks to have context inside of them exposes that HackStudio heavily relies on there being no context in the hunks that it uses for its' git gutter implementation. The fix here is simple - ask git to produce us a diff that has no context in it! --- Userland/DevTools/HackStudio/Editor.cpp | 13 +-- .../DevTools/HackStudio/Git/DiffViewer.cpp | 30 ++++--- Userland/DevTools/HackStudio/Git/GitRepo.cpp | 2 +- .../LibCpp/SemanticSyntaxHighlighter.cpp | 6 +- Userland/Libraries/LibDiff/Format.cpp | 49 +++++------ Userland/Libraries/LibDiff/Generator.cpp | 23 +++-- Userland/Libraries/LibDiff/Hunks.cpp | 87 +++++++------------ Userland/Libraries/LibDiff/Hunks.h | 86 +++++++++++++++--- 8 files changed, 169 insertions(+), 127 deletions(-) diff --git a/Userland/DevTools/HackStudio/Editor.cpp b/Userland/DevTools/HackStudio/Editor.cpp index d85061f706..a34d504acc 100644 --- a/Userland/DevTools/HackStudio/Editor.cpp +++ b/Userland/DevTools/HackStudio/Editor.cpp @@ -831,12 +831,15 @@ ErrorOr Editor::update_git_diff_indicators() for (auto i = 0u; i < document().line_count(); ++i) line_differences.unchecked_append(CodeDocument::DiffType::None); - for (auto& hunk : wrapper().hunks()) { - auto start_line = hunk.target_start_line; - auto finish_line = start_line + hunk.added_lines.size(); + for (auto const& hunk : wrapper().hunks()) { + auto start_line = hunk.location.new_range.start_line; + // Account for 1 indexed hunk location + if (start_line != 0) + start_line--; + auto finish_line = start_line + hunk.location.new_range.number_of_lines; - auto additions = hunk.added_lines.size(); - auto deletions = hunk.removed_lines.size(); + auto additions = hunk.location.new_range.number_of_lines; + auto deletions = hunk.location.old_range.number_of_lines; for (size_t line_offset = 0; line_offset < additions; line_offset++) { auto line = start_line + line_offset; diff --git a/Userland/DevTools/HackStudio/Git/DiffViewer.cpp b/Userland/DevTools/HackStudio/Git/DiffViewer.cpp index 06d95bc2b7..5fd11ef3fe 100644 --- a/Userland/DevTools/HackStudio/Git/DiffViewer.cpp +++ b/Userland/DevTools/HackStudio/Git/DiffViewer.cpp @@ -36,28 +36,32 @@ void DiffViewer::paint_event(GUI::PaintEvent& event) size_t y_offset = 10; size_t current_original_line_index = 0; for (auto const& hunk : m_hunks) { - for (size_t i = current_original_line_index; i < hunk.original_start_line; ++i) { + for (size_t i = current_original_line_index; i < hunk.location.old_range.start_line; ++i) { draw_line(painter, m_original_lines[i], y_offset, LinePosition::Both, LineType::Normal); y_offset += line_height(); } - current_original_line_index = hunk.original_start_line + hunk.removed_lines.size(); + current_original_line_index = hunk.location.old_range.start_line + hunk.location.old_range.number_of_lines; size_t left_y_offset = y_offset; - for (auto const& removed_line : hunk.removed_lines) { - draw_line(painter, removed_line, left_y_offset, LinePosition::Left, LineType::Diff); + for (auto const& line : hunk.lines) { + if (line.operation != Diff::Line::Operation::Removal) + continue; + draw_line(painter, line.content, left_y_offset, LinePosition::Left, LineType::Diff); left_y_offset += line_height(); } - for (int i = 0; i < (int)hunk.added_lines.size() - (int)hunk.removed_lines.size(); ++i) { + for (int i = 0; i < (int)hunk.location.new_range.number_of_lines - (int)hunk.location.old_range.number_of_lines; ++i) { draw_line(painter, ""sv, left_y_offset, LinePosition::Left, LineType::Missing); left_y_offset += line_height(); } size_t right_y_offset = y_offset; - for (auto const& added_line : hunk.added_lines) { - draw_line(painter, added_line, right_y_offset, LinePosition::Right, LineType::Diff); + for (auto const& line : hunk.lines) { + if (line.operation != Diff::Line::Operation::Addition) + continue; + draw_line(painter, line.content, right_y_offset, LinePosition::Right, LineType::Diff); right_y_offset += line_height(); } - for (int i = 0; i < (int)hunk.removed_lines.size() - (int)hunk.added_lines.size(); ++i) { + for (int i = 0; i < (int)hunk.location.old_range.number_of_lines - (int)hunk.location.new_range.number_of_lines; ++i) { draw_line(painter, ""sv, right_y_offset, LinePosition::Right, LineType::Missing); right_y_offset += line_height(); } @@ -205,13 +209,13 @@ void DiffViewer::update_content_size() size_t num_lines = 0; size_t current_original_line_index = 0; for (auto const& hunk : m_hunks) { - num_lines += ((int)hunk.original_start_line - (int)current_original_line_index); + num_lines += (hunk.location.old_range.start_line - (int)current_original_line_index); - num_lines += hunk.removed_lines.size(); - if (hunk.added_lines.size() > hunk.removed_lines.size()) { - num_lines += ((int)hunk.added_lines.size() - (int)hunk.removed_lines.size()); + num_lines += hunk.location.old_range.number_of_lines; + if (hunk.location.new_range.number_of_lines > hunk.location.old_range.number_of_lines) { + num_lines += ((int)hunk.location.new_range.number_of_lines - (int)hunk.location.old_range.number_of_lines); } - current_original_line_index = hunk.original_start_line + hunk.removed_lines.size(); + current_original_line_index = hunk.location.old_range.start_line + hunk.location.old_range.number_of_lines; } num_lines += ((int)m_original_lines.size() - (int)current_original_line_index); diff --git a/Userland/DevTools/HackStudio/Git/GitRepo.cpp b/Userland/DevTools/HackStudio/Git/GitRepo.cpp index c289434eaf..2fb11b2021 100644 --- a/Userland/DevTools/HackStudio/Git/GitRepo.cpp +++ b/Userland/DevTools/HackStudio/Git/GitRepo.cpp @@ -118,7 +118,7 @@ Optional GitRepo::original_file_content(DeprecatedString const Optional GitRepo::unstaged_diff(DeprecatedString const& file) const { - return command({ "diff", file.characters() }); + return command({ "diff", "-U0", file.characters() }); } bool GitRepo::is_tracked(DeprecatedString const& file) const diff --git a/Userland/Libraries/LibCpp/SemanticSyntaxHighlighter.cpp b/Userland/Libraries/LibCpp/SemanticSyntaxHighlighter.cpp index 08dcea3991..2f37178905 100644 --- a/Userland/Libraries/LibCpp/SemanticSyntaxHighlighter.cpp +++ b/Userland/Libraries/LibCpp/SemanticSyntaxHighlighter.cpp @@ -63,14 +63,14 @@ void SemanticSyntaxHighlighter::rehighlight(Palette const& palette) size_t previous_token_index = 0; size_t current_token_index = 0; for (auto const& hunk : diff_hunks) { - for (; previous_token_index < hunk.original_start_line; ++previous_token_index) { + for (; previous_token_index < hunk.location.old_range.start_line; ++previous_token_index) { new_tokens_info[current_token_index].type = m_tokens_info[previous_token_index].type; ++current_token_index; } - for (size_t i = 0; i < hunk.added_lines.size(); ++i) { + for (size_t i = 0; i < hunk.location.new_range.number_of_lines; ++i) { ++current_token_index; } - for (size_t i = 0; i < hunk.removed_lines.size(); ++i) { + for (size_t i = 0; i < hunk.location.old_range.number_of_lines; ++i) { ++previous_token_index; } } diff --git a/Userland/Libraries/LibDiff/Format.cpp b/Userland/Libraries/LibDiff/Format.cpp index 9efc6871b5..353b606017 100644 --- a/Userland/Libraries/LibDiff/Format.cpp +++ b/Userland/Libraries/LibDiff/Format.cpp @@ -26,47 +26,40 @@ DeprecatedString generate_only_additions(StringView text) ErrorOr write_normal(Hunk const& hunk, Stream& stream, ColorOutput color_output) { - auto original_start = hunk.original_start_line; - auto target_start = hunk.target_start_line; - auto num_added = hunk.added_lines.size(); - auto num_removed = hunk.removed_lines.size(); - // Source line(s) - TRY(stream.write_formatted("{}", original_start)); - - if (num_removed > 1) - TRY(stream.write_formatted(",{}", original_start + num_removed - 1)); + TRY(stream.write_formatted("{}", hunk.location.old_range.start_line)); + if (hunk.location.old_range.number_of_lines > 1) + TRY(stream.write_formatted(",{}", (hunk.location.old_range.start_line + hunk.location.old_range.number_of_lines - 1))); // Action - if (num_added > 0 && num_removed > 0) + if (hunk.location.old_range.number_of_lines > 0 && hunk.location.new_range.number_of_lines > 0) TRY(stream.write_formatted("c")); - else if (num_added > 0) + else if (hunk.location.new_range.number_of_lines > 0) TRY(stream.write_formatted("a")); else TRY(stream.write_formatted("d")); // Target line(s) - TRY(stream.write_formatted("{}", target_start)); - if (num_added > 1) - TRY(stream.write_formatted(",{}", target_start + num_added - 1)); + TRY(stream.write_formatted("{}", hunk.location.new_range.start_line)); + if (hunk.location.new_range.number_of_lines > 1) + TRY(stream.write_formatted(",{}", (hunk.location.new_range.start_line + hunk.location.new_range.number_of_lines - 1))); TRY(stream.write_formatted("\n")); - for (auto const& line : hunk.removed_lines) { - if (color_output == ColorOutput::Yes) - TRY(stream.write_formatted("\033[31;1m< {}\033[0m\n", line)); - else - TRY(stream.write_formatted("< {}\n", line)); - } + for (auto const& line : hunk.lines) { + VERIFY(line.operation == Line::Operation::Removal || line.operation == Line::Operation::Addition); - if (num_added > 0 && num_removed > 0) - TRY(stream.write_formatted("---\n")); - - for (auto const& line : hunk.added_lines) { - if (color_output == ColorOutput::Yes) - TRY(stream.write_formatted("\033[32;1m> {}\033[0m\n", line)); - else - TRY(stream.write_formatted("> {}\n", line)); + if (line.operation == Line::Operation::Addition) { + if (color_output == ColorOutput::Yes) + TRY(stream.write_formatted("\033[32;1m> {}\033[0m\n", line.content)); + else + TRY(stream.write_formatted("> {}\n", line.content)); + } else { + if (color_output == ColorOutput::Yes) + TRY(stream.write_formatted("\033[31;1m< {}\033[0m\n", line.content)); + else + TRY(stream.write_formatted("< {}\n", line.content)); + } } return {}; diff --git a/Userland/Libraries/LibDiff/Generator.cpp b/Userland/Libraries/LibDiff/Generator.cpp index c952ea8702..3be17fe6ba 100644 --- a/Userland/Libraries/LibDiff/Generator.cpp +++ b/Userland/Libraries/LibDiff/Generator.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, Mustafa Quraish + * Copyright (c) 2023, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ @@ -68,13 +69,19 @@ ErrorOr> from_text(StringView old_text, StringView new_text) auto update_hunk = [&](size_t i, size_t j, Direction direction) -> ErrorOr { if (!in_hunk) { + HunkLocation location; + location.old_range.start_line = i; + location.new_range.start_line = j; in_hunk = true; - cur_hunk = { i, j, {}, {} }; + cur_hunk = { location, {} }; } + if (direction == Direction::Down) { - TRY(cur_hunk.added_lines.try_append(TRY(String::from_utf8(new_lines[j])))); + TRY(cur_hunk.lines.try_append(Line { Line::Operation::Addition, TRY(String::from_utf8(new_lines[j])) })); + cur_hunk.location.new_range.number_of_lines++; } else if (direction == Direction::Right) { - TRY(cur_hunk.removed_lines.try_append(TRY(String::from_utf8(old_lines[i])))); + TRY(cur_hunk.lines.try_append(Line { Line::Operation::Removal, TRY(String::from_utf8(old_lines[i])) })); + cur_hunk.location.old_range.number_of_lines++; } return {}; @@ -82,10 +89,12 @@ ErrorOr> from_text(StringView old_text, StringView new_text) auto flush_hunk = [&]() -> ErrorOr { if (in_hunk) { - if (cur_hunk.added_lines.size() > 0) - cur_hunk.target_start_line++; - if (cur_hunk.removed_lines.size() > 0) - cur_hunk.original_start_line++; + // A file with no content has a zero indexed start line. + if (cur_hunk.location.new_range.start_line != 0 || cur_hunk.location.new_range.number_of_lines != 0) + cur_hunk.location.new_range.start_line++; + if (cur_hunk.location.old_range.start_line != 0 || cur_hunk.location.old_range.number_of_lines != 0) + cur_hunk.location.old_range.start_line++; + TRY(hunks.try_append(cur_hunk)); in_hunk = false; } diff --git a/Userland/Libraries/LibDiff/Hunks.cpp b/Userland/Libraries/LibDiff/Hunks.cpp index cd7e54cb6c..10c17ad685 100644 --- a/Userland/Libraries/LibDiff/Hunks.cpp +++ b/Userland/Libraries/LibDiff/Hunks.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2020, Itamar S. + * Copyright (c) 2023, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ @@ -31,28 +32,21 @@ ErrorOr> parse_hunks(StringView diff) ++line_index; continue; } - if (diff_lines[line_index][0] == ' ') { - current_location.apply_offset(1, HunkLocation::LocationType::Both); - ++line_index; - continue; - } + Hunk hunk {}; - hunk.original_start_line = current_location.original_start_line; - hunk.target_start_line = current_location.target_start_line; + hunk.location = current_location; - while (line_index < diff_lines.size() && diff_lines[line_index][0] == '-') { - TRY(hunk.removed_lines.try_append(TRY(String::from_utf8(diff_lines[line_index].substring_view(1, diff_lines[line_index].length() - 1))))); - current_location.apply_offset(1, HunkLocation::LocationType::Original); - ++line_index; - } - while (line_index < diff_lines.size() && diff_lines[line_index][0] == '+') { - TRY(hunk.added_lines.try_append(TRY(String::from_utf8(diff_lines[line_index].substring_view(1, diff_lines[line_index].length() - 1))))); - current_location.apply_offset(1, HunkLocation::LocationType::Target); - ++line_index; - } + while (line_index < diff_lines.size()) { + auto const& line = diff_lines[line_index]; + + char const operation = line[0]; + if (operation != ' ' && operation != '+' && operation != '-') + break; + + auto const content = line.substring_view(1, line.length() - 1); + + TRY(hunk.lines.try_append(Line { Line::operation_from_symbol(operation), TRY(String::from_utf8(content)) })); - while (line_index < diff_lines.size() && diff_lines[line_index][0] == ' ') { - current_location.apply_offset(1, HunkLocation::LocationType::Both); ++line_index; } TRY(hunks.try_append(hunk)); @@ -60,15 +54,9 @@ ErrorOr> parse_hunks(StringView diff) if constexpr (HUNKS_DEBUG) { for (auto const& hunk : hunks) { - dbgln("Hunk location:"); - dbgln(" orig: {}", hunk.original_start_line); - dbgln(" target: {}", hunk.target_start_line); - dbgln(" removed:"); - for (auto const& line : hunk.removed_lines) - dbgln("- {}", line); - dbgln(" added:"); - for (auto const& line : hunk.added_lines) - dbgln("+ {}", line); + dbgln("{}", hunk.location); + for (auto const& line : hunk.lines) + dbgln("{}", line); } } @@ -78,22 +66,21 @@ ErrorOr> parse_hunks(StringView diff) HunkLocation parse_hunk_location(StringView location_line) { size_t char_index = 0; - struct StartAndLength { - size_t start { 0 }; - size_t length { 0 }; - }; auto parse_start_and_length_pair = [](StringView raw) { - auto index_of_separator = raw.find(',').value(); - auto start = raw.substring_view(0, index_of_separator).to_uint().value(); - auto length = raw.substring_view(index_of_separator + 1, raw.length() - index_of_separator - 1).to_uint().value(); + auto maybe_index_of_separator = raw.find(','); - if (start != 0) - start--; + size_t start = 0; + size_t length = 0; + if (maybe_index_of_separator.has_value()) { + auto index_of_separator = maybe_index_of_separator.value(); + start = raw.substring_view(0, index_of_separator).to_uint().value(); + length = raw.substring_view(index_of_separator + 1, raw.length() - index_of_separator - 1).to_uint().value(); + } else { + length = 1; + start = raw.to_uint().value(); + } - if (length != 0) - length--; - - return StartAndLength { start, length }; + return Range { start, length }; }; while (char_index < location_line.length() && location_line[char_index++] != '-') { } @@ -115,21 +102,9 @@ HunkLocation parse_hunk_location(StringView location_line) size_t target_location_end_index = char_index - 2; - auto original_pair = parse_start_and_length_pair(location_line.substring_view(original_location_start_index, original_location_end_index - original_location_start_index + 1)); - auto target_pair = parse_start_and_length_pair(location_line.substring_view(target_location_start_index, target_location_end_index - target_location_start_index + 1)); - return { original_pair.start, original_pair.length, target_pair.start, target_pair.length }; -} - -void HunkLocation::apply_offset(size_t offset, HunkLocation::LocationType type) -{ - if (type == LocationType::Original || type == LocationType::Both) { - original_start_line += offset; - original_length -= offset; - } - if (type == LocationType::Target || type == LocationType::Both) { - target_start_line += offset; - target_length -= offset; - } + auto old_range = parse_start_and_length_pair(location_line.substring_view(original_location_start_index, original_location_end_index - original_location_start_index + 1)); + auto new_range = parse_start_and_length_pair(location_line.substring_view(target_location_start_index, target_location_end_index - target_location_start_index + 1)); + return { old_range, new_range }; } }; diff --git a/Userland/Libraries/LibDiff/Hunks.h b/Userland/Libraries/LibDiff/Hunks.h index 03ebd0542a..a67a0e00b6 100644 --- a/Userland/Libraries/LibDiff/Hunks.h +++ b/Userland/Libraries/LibDiff/Hunks.h @@ -1,38 +1,96 @@ /* * Copyright (c) 2020, Itamar S. + * Copyright (c) 2023, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once +#include +#include #include #include #include namespace Diff { -struct HunkLocation { - size_t original_start_line { 0 }; - size_t original_length { 0 }; - size_t target_start_line { 0 }; - size_t target_length { 0 }; +struct Range { + size_t start_line { 0 }; + size_t number_of_lines { 0 }; +}; + +struct HunkLocation { + Range old_range; + Range new_range; +}; + +struct Line { + enum class Operation { + Addition = '+', + Removal = '-', + Context = ' ', - enum class LocationType { - Original, - Target, - Both }; - void apply_offset(size_t offset, LocationType); + + static constexpr Operation operation_from_symbol(char symbol) + { + switch (symbol) { + case '+': + return Operation::Addition; + case '-': + return Operation::Removal; + case ' ': + return Operation::Context; + default: + VERIFY_NOT_REACHED(); + } + } + + Operation operation; + String content; }; struct Hunk { - size_t original_start_line { 0 }; - size_t target_start_line { 0 }; - Vector removed_lines; - Vector added_lines; + HunkLocation location; + Vector lines; }; ErrorOr> parse_hunks(StringView diff); HunkLocation parse_hunk_location(StringView location_line); }; + +template<> +struct AK::Formatter : Formatter { + ErrorOr format(FormatBuilder& builder, Diff::Line::Operation operation) + { + return Formatter::format(builder, "{}"sv, static_cast(operation)); + } +}; + +template<> +struct AK::Formatter : Formatter { + ErrorOr format(FormatBuilder& builder, Diff::Line const& line) + { + return Formatter::format(builder, "{}{}"sv, line.operation, line.content); + } +}; + +template<> +struct AK::Formatter : Formatter { + static ErrorOr format(FormatBuilder& format_builder, Diff::HunkLocation const& location) + { + auto& builder = format_builder.builder(); + TRY(builder.try_appendff("@@ -{}"sv, location.old_range.start_line)); + + if (location.old_range.number_of_lines != 1) + TRY(builder.try_appendff(",{}", location.old_range.number_of_lines)); + + TRY(builder.try_appendff(" +{}", location.new_range.start_line)); + + if (location.new_range.number_of_lines != 1) + TRY(builder.try_appendff(",{}", location.new_range.number_of_lines)); + + return builder.try_appendff(" @@"); + } +};