From 828d791a4fe46dc23be303b42bb18b4305dcd92c Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Fri, 7 Jul 2023 00:32:54 +1200 Subject: [PATCH] LibDiff: Add Diff::apply_patch Given a set of lines from the file we are patching, and a patch itself, this function will try and locate where in the file to apply that patch, and write the result of patching that file (if successful) to the output stream. --- Userland/Libraries/LibDiff/Applier.cpp | 153 ++++++++++++++++++++++ Userland/Libraries/LibDiff/Applier.h | 16 +++ Userland/Libraries/LibDiff/CMakeLists.txt | 1 + Userland/Libraries/LibDiff/Forward.h | 1 + Userland/Libraries/LibDiff/Hunks.h | 5 + 5 files changed, 176 insertions(+) create mode 100644 Userland/Libraries/LibDiff/Applier.cpp create mode 100644 Userland/Libraries/LibDiff/Applier.h diff --git a/Userland/Libraries/LibDiff/Applier.cpp b/Userland/Libraries/LibDiff/Applier.cpp new file mode 100644 index 0000000000..7a093804fc --- /dev/null +++ b/Userland/Libraries/LibDiff/Applier.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2023, Shannon Booth + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace Diff { + +static size_t expected_line_number(HunkLocation const& location) +{ + auto line = location.old_range.start_line; + + // NOTE: This is to handle the case we are adding a file, e.g for a range such as: + // '@@ -0,0 +1,3 @@' + if (location.old_range.start_line == 0) + ++line; + + VERIFY(line != 0); + + return line; +} + +struct Location { + size_t line_number; + size_t fuzz { 0 }; + ssize_t offset { 0 }; +}; + +static Optional locate_hunk(Vector const& content, Hunk const& hunk, ssize_t offset, size_t max_fuzz = 3) +{ + // Make a first best guess at where the from-file range is telling us where the hunk should be. + size_t offset_guess = expected_line_number(hunk.location) - 1 + offset; + + // If there's no lines surrounding this hunk - it will always succeed, so there is no point in checking any further. + if (hunk.location.old_range.number_of_lines == 0) + return Location { offset_guess, 0, 0 }; + + size_t patch_prefix_context = 0; + for (auto const& line : hunk.lines) { + if (line.operation != Line::Operation::Context) + break; + ++patch_prefix_context; + } + + size_t patch_suffix_context = 0; + for (auto const& line : hunk.lines.in_reverse()) { + if (line.operation != Line::Operation::Context) + break; + ++patch_suffix_context; + } + + size_t context = max(patch_prefix_context, patch_suffix_context); + + // Look through the file trying to match the hunk for it. If we can't find anything anywhere in the file, then try and + // match the hunk by ignoring an increasing amount of context lines. The number of context lines that are ignored is + // called the 'fuzz'. + for (size_t fuzz = 0; fuzz <= max_fuzz; ++fuzz) { + + auto suffix_fuzz = max(fuzz + patch_suffix_context - context, 0); + auto prefix_fuzz = max(fuzz + patch_prefix_context - context, 0); + + // If the fuzz is greater than the total number of lines for a hunk, then it may be possible for the hunk to match anything. + if (suffix_fuzz + prefix_fuzz >= hunk.lines.size()) + return {}; + + auto hunk_matches_starting_from_line = [&](size_t line) { + line += prefix_fuzz; + + // Ensure that all of the lines in the hunk match starting from 'line', ignoring the specified number of context lines. + return all_of(hunk.lines.begin() + prefix_fuzz, hunk.lines.end() - suffix_fuzz, [&](const Line& hunk_line) { + // Ignore additions in our increment of line and comparison as they are not part of the 'original file' + if (hunk_line.operation == Line::Operation::Addition) + return true; + + if (line >= content.size()) + return false; + + if (content[line] != hunk_line.content) + return false; + + ++line; + return true; + }); + }; + + for (size_t line = offset_guess; line < content.size(); ++line) { + if (hunk_matches_starting_from_line(line)) + return Location { line, fuzz, static_cast(line - offset_guess) }; + } + + for (size_t line = offset_guess; line != 0; --line) { + if (hunk_matches_starting_from_line(line - 1)) + return Location { line - 1, fuzz, static_cast(line - offset_guess) }; + } + } + + // No bueno. + return {}; +} + +static ErrorOr write_hunk(Stream& out, Hunk const& hunk, Location const& location, Vector const& lines) +{ + auto line_number = location.line_number; + + for (auto const& patch_line : hunk.lines) { + if (patch_line.operation == Line::Operation::Context) { + TRY(out.write_formatted("{}\n", lines.at(line_number))); + ++line_number; + } else if (patch_line.operation == Line::Operation::Addition) { + TRY(out.write_formatted("{}\n", patch_line.content)); + } else if (patch_line.operation == Line::Operation::Removal) { + ++line_number; + } + } + + return line_number; +} + +ErrorOr apply_patch(Stream& out, Vector const& lines, Patch const& patch) +{ + size_t line_number = 0; // NOTE: relative to 'old' file. + ssize_t offset_error = 0; + + for (size_t hunk_num = 0; hunk_num < patch.hunks.size(); ++hunk_num) { + auto const& hunk = patch.hunks[hunk_num]; + + auto maybe_location = locate_hunk(lines, hunk, offset_error); + if (!maybe_location.has_value()) + return Error::from_string_literal("Failed to locate where to apply patch"); + + auto location = *maybe_location; + offset_error += location.offset; + + // Write up until where we have found this latest hunk from the old file. + for (; line_number < location.line_number; ++line_number) + TRY(out.write_formatted("{}\n", lines.at(line_number))); + + // Then output the hunk to what we hope is the correct location in the file. + line_number = TRY(write_hunk(out, hunk, location, lines)); + } + + // We've finished applying all hunks, write out anything from the old file we haven't already. + for (; line_number < lines.size(); ++line_number) + TRY(out.write_formatted("{}\n", lines[line_number])); + + return {}; +} + +} diff --git a/Userland/Libraries/LibDiff/Applier.h b/Userland/Libraries/LibDiff/Applier.h new file mode 100644 index 0000000000..e28d416675 --- /dev/null +++ b/Userland/Libraries/LibDiff/Applier.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023, Shannon Booth + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include + +namespace Diff { + +ErrorOr apply_patch(Stream& out, Vector const& lines, Patch const& patch); + +} diff --git a/Userland/Libraries/LibDiff/CMakeLists.txt b/Userland/Libraries/LibDiff/CMakeLists.txt index 46d97fe960..84581a1f1b 100644 --- a/Userland/Libraries/LibDiff/CMakeLists.txt +++ b/Userland/Libraries/LibDiff/CMakeLists.txt @@ -1,5 +1,6 @@ set(SOURCES + Applier.cpp Format.cpp Generator.cpp Hunks.cpp diff --git a/Userland/Libraries/LibDiff/Forward.h b/Userland/Libraries/LibDiff/Forward.h index d4d2acb27a..b8cbca0e08 100644 --- a/Userland/Libraries/LibDiff/Forward.h +++ b/Userland/Libraries/LibDiff/Forward.h @@ -16,6 +16,7 @@ struct Header; struct Hunk; struct HunkLocation; struct Line; +struct Patch; struct Range; } diff --git a/Userland/Libraries/LibDiff/Hunks.h b/Userland/Libraries/LibDiff/Hunks.h index ac7d42dd4e..50ca3337b4 100644 --- a/Userland/Libraries/LibDiff/Hunks.h +++ b/Userland/Libraries/LibDiff/Hunks.h @@ -71,6 +71,11 @@ struct Header { String new_file_path; }; +struct Patch { + Header header; + Vector hunks; +}; + class Parser : public GenericLexer { public: using GenericLexer::GenericLexer;