1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 08:27:45 +00:00

pdf: Add a --json flag that currently affects only --debugging-stats

This commit is contained in:
Nico Weber 2023-11-09 21:35:01 +01:00 committed by Andreas Kling
parent cd9f4655ec
commit ed7549c64f

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
#include <AK/JsonObject.h>
#include <AK/LexicalPath.h> #include <AK/LexicalPath.h>
#include <LibCore/ArgsParser.h> #include <LibCore/ArgsParser.h>
#include <LibCore/File.h> #include <LibCore/File.h>
@ -79,10 +80,10 @@ static PDF::PDFErrorOr<void> save_rendered_page(PDF::Document& document, int pag
return {}; return {};
} }
// Takes a sorted non-empty vector of ints like `1 1 3 4 5 5 5` and returns a RLE-y summary string like " 1 (2x) 3 4 5 (3x)" (with a leading space). // Takes a sorted non-empty vector of ints like `1 1 3 4 5 5 5` and returns a RLE vector with alternating elements and counts like `1 2 3 1 4 1 5 3`.
static ErrorOr<String> summary_string(Vector<int> const& pages) static Vector<int> rle_vector(Vector<int> const& pages)
{ {
StringBuilder builder; Vector<int> rle;
int last_page = 0; int last_page = 0;
int page_count = 0; int page_count = 0;
for (int page : pages) { for (int page : pages) {
@ -92,25 +93,38 @@ static ErrorOr<String> summary_string(Vector<int> const& pages)
} }
if (last_page != 0) { if (last_page != 0) {
builder.appendff(" {}", last_page); rle.append(last_page);
if (page_count > 1) rle.append(page_count);
builder.appendff(" ({}x)", page_count);
} }
last_page = page; last_page = page;
page_count = 1; page_count = 1;
} }
builder.appendff(" {}", last_page); rle.append(last_page);
if (page_count > 1) rle.append(page_count);
builder.appendff(" ({}x)", page_count); return rle;
}
// Takes a sorted non-empty vector of ints like `1 1 3 4 5 5 5` and returns a RLE-y summary string like " 1 (2x) 3 4 5 (3x)" (with a leading space).
static ErrorOr<String> summary_string(Vector<int> const& pages)
{
StringBuilder builder;
auto rle = rle_vector(pages);
for (size_t i = 0; i < rle.size(); i += 2) {
builder.appendff(" {}", rle[i]);
if (rle[i + 1] > 1)
builder.appendff(" ({}x)", rle[i + 1]);
}
return builder.to_string(); return builder.to_string();
} }
static PDF::PDFErrorOr<void> print_debugging_stats(PDF::Document& document) static PDF::PDFErrorOr<void> print_debugging_stats(PDF::Document& document, bool json)
{ {
HashMap<DeprecatedString, Vector<int>> diags_to_pages; HashMap<DeprecatedString, Vector<int>> diags_to_pages;
for (u32 page_number = 1; page_number <= document.get_page_count(); ++page_number) { for (u32 page_number = 1; page_number <= document.get_page_count(); ++page_number) {
if (!json) {
out("page number {} / {}", page_number, document.get_page_count()); out("page number {} / {}", page_number, document.get_page_count());
fflush(stdout); fflush(stdout);
}
auto page = TRY(document.get_page(page_number - 1)); auto page = TRY(document.get_page(page_number - 1));
auto page_size = Gfx::IntSize { 200, round_to<int>(200 * page.media_box.height() / page.media_box.width()) }; auto page_size = Gfx::IntSize { 200, round_to<int>(200 * page.media_box.height() / page.media_box.width()) };
auto bitmap = TRY(Gfx::Bitmap::create(Gfx::BitmapFormat::BGRx8888, page_size)); auto bitmap = TRY(Gfx::Bitmap::create(Gfx::BitmapFormat::BGRx8888, page_size));
@ -119,22 +133,40 @@ static PDF::PDFErrorOr<void> print_debugging_stats(PDF::Document& document)
for (auto const& error : errors.error().errors()) for (auto const& error : errors.error().errors())
diags_to_pages.ensure(error.message()).append(page_number); diags_to_pages.ensure(error.message()).append(page_number);
} }
if (!json)
out("\r"); out("\r");
} }
if (!json)
outln(); outln();
if (diags_to_pages.is_empty()) { JsonObject json_output;
json_output.set("num_pages", document.get_page_count());
if (diags_to_pages.is_empty() && !json) {
outln("no issues found"); outln("no issues found");
return {}; return {};
} }
JsonObject issues;
auto keys = diags_to_pages.keys(); auto keys = diags_to_pages.keys();
quick_sort(keys, [&](auto& k1, auto& k2) { return diags_to_pages.get(k1)->size() < diags_to_pages.get(k2)->size(); }); quick_sort(keys, [&](auto& k1, auto& k2) { return diags_to_pages.get(k1)->size() < diags_to_pages.get(k2)->size(); });
for (auto const& key : keys.in_reverse()) { for (auto const& key : keys.in_reverse()) {
auto const& value = diags_to_pages.get(key).value(); auto const& value = diags_to_pages.get(key).value();
if (json) {
JsonArray page_counts;
auto rle = rle_vector(value);
for (size_t i = 0; i < rle.size(); i += 2)
page_counts.must_append(JsonArray { Vector { rle[i], rle[i + 1] } });
issues.set(key, page_counts);
} else {
outln("{} times: {}", value.size(), key); outln("{} times: {}", value.size(), key);
outln(" on pages:{}", TRY(summary_string(value))); outln(" on pages:{}", TRY(summary_string(value)));
} }
}
if (json) {
json_output.set("issues", issues);
outln("{}", json_output.to_deprecated_string());
}
return {}; return {};
} }
@ -157,6 +189,10 @@ static PDF::PDFErrorOr<int> pdf_main(Main::Arguments arguments)
bool dump_outline = false; bool dump_outline = false;
args_parser.add_option(dump_outline, "Dump document outline", "dump-outline", {}); args_parser.add_option(dump_outline, "Dump document outline", "dump-outline", {});
// FIXME: Currently only honored for --debugging-stats, should be honored for no-arg output too.
bool json = false;
args_parser.add_option(json, "Print output as json", "json", {});
u32 page_number = 1; u32 page_number = 1;
args_parser.add_option(page_number, "Page number (1-based)", "page", {}, "PAGE"); args_parser.add_option(page_number, "Page number (1-based)", "page", {}, "PAGE");
@ -194,7 +230,7 @@ static PDF::PDFErrorOr<int> pdf_main(Main::Arguments arguments)
#endif #endif
if (debugging_stats) { if (debugging_stats) {
TRY(print_debugging_stats(*document)); TRY(print_debugging_stats(*document, json));
return 0; return 0;
} }