1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 05:17:35 +00:00

LibPDF: Make pdf --dump-contents dump less binary data

For pages containing images or embedded fonts, --dump-contents
used to dump a ton of binary data. That isn't very useful, so
stop doing it.

Before:

    % time Build/lagom/bin/pdf --render out.png \
        ~/Downloads/0000/0000711.pdf --dump-contents | wc -l
      937972

Now:

    % time Build/lagom/bin/pdf --render out.png \
        ~/Downloads/0000/0000711.pdf --dump-contents | wc -l
        6566

Printing 7k lines is also much faster than printing 940k,
0.15s instead of 2s.
This commit is contained in:
Nico Weber 2024-02-02 19:37:11 -05:00 committed by Sam Atkins
parent 5a29440bdf
commit 955d73657e

View file

@ -150,15 +150,21 @@ ByteString StreamObject::to_byte_string(int indent) const
}
}
} else {
auto string = encode_hex(bytes());
int const chars_per_line = 60;
int const bytes_per_line = chars_per_line / 2;
int const max_lines_to_print = 10;
int const max_bytes_to_print = max_lines_to_print * bytes_per_line;
auto string = encode_hex(bytes().trim(max_bytes_to_print));
StringView view { string };
while (view.length() > 60) {
builder.appendff("{}\n", view.substring_view(0, 60));
builder.appendff("{}\n", view.substring_view(0, chars_per_line));
append_indent(builder, indent);
view = view.substring_view(60);
}
builder.appendff("{}\n", view);
if (bytes().size() > max_bytes_to_print)
builder.appendff("... (and {} more bytes)\n", bytes().size() - max_bytes_to_print);
}
builder.append("endstream"sv);