mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 05:54:58 +00:00
LibPDF: Always treat /Subtype /Image
as binary data when dumping
Sometimes, the "is mostly text" heuristic fails for images. Before: Build/lagom/bin/pdf --render out.png ~/Downloads/0000/0000521.pdf \ --page 10 --dump-contents 2>&1 | wc -l 25709 After: Build/lagom/bin/pdf --render out.png ~/Downloads/0000/0000521.pdf \ --page 10 --dump-contents 2>&1 | wc -l 11376
This commit is contained in:
parent
a9df60ff1c
commit
92a628c07c
1 changed files with 4 additions and 0 deletions
|
@ -5,6 +5,7 @@
|
|||
*/
|
||||
|
||||
#include <AK/Hex.h>
|
||||
#include <LibPDF/CommonNames.h>
|
||||
#include <LibPDF/Document.h>
|
||||
#include <LibPDF/ObjectDerivatives.h>
|
||||
|
||||
|
@ -136,6 +137,9 @@ ByteString StreamObject::to_byte_string(int indent) const
|
|||
percentage_ascii = ascii_count * 100 / bytes().size();
|
||||
bool is_mostly_text = percentage_ascii > 95;
|
||||
|
||||
if (dict()->contains(CommonNames::Subtype) && dict()->get_name(CommonNames::Subtype)->name() == "Image")
|
||||
is_mostly_text = false;
|
||||
|
||||
if (is_mostly_text) {
|
||||
for (size_t i = 0; i < bytes().size(); ++i) {
|
||||
auto c = bytes()[i];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue