diff --git a/Userland/Libraries/LibPDF/CommonNames.h b/Userland/Libraries/LibPDF/CommonNames.h index 6f9b613935..9ef7481faf 100644 --- a/Userland/Libraries/LibPDF/CommonNames.h +++ b/Userland/Libraries/LibPDF/CommonNames.h @@ -18,6 +18,7 @@ A(BM) \ A(BaseEncoding) \ A(BaseFont) \ + A(BitsPerComponent) \ A(BlackPoint) \ A(C) \ A(CA) \ @@ -25,7 +26,9 @@ A(CalRGB) \ A(CIDSystemInfo) \ A(CIDToGIDMap) \ + A(Colors) \ A(ColorSpace) \ + A(Columns) \ A(Contents) \ A(Count) \ A(CropBox) \ @@ -33,6 +36,7 @@ A(D) \ A(DW) \ A(DCTDecode) \ + A(DecodeParms) \ A(DescendantFonts) \ A(Dest) \ A(Dests) \ @@ -101,6 +105,7 @@ A(Pages) \ A(Parent) \ A(Pattern) \ + A(Predictor) \ A(Prev) \ A(R) \ A(RI) \ diff --git a/Userland/Libraries/LibPDF/Filter.cpp b/Userland/Libraries/LibPDF/Filter.cpp index c184952be0..021fe65006 100644 --- a/Userland/Libraries/LibPDF/Filter.cpp +++ b/Userland/Libraries/LibPDF/Filter.cpp @@ -12,8 +12,24 @@ namespace PDF { -ErrorOr Filter::decode(ReadonlyBytes bytes, FlyString const& encoding_type) +ErrorOr Filter::decode(ReadonlyBytes bytes, FlyString const& encoding_type, RefPtr decode_parms) { + int predictor = 1; + int columns = 1; + int colors = 1; + int bits_per_component = 8; + + if (decode_parms) { + if (decode_parms->contains(CommonNames::Predictor)) + predictor = decode_parms->get_value(CommonNames::Predictor).get(); + if (decode_parms->contains(CommonNames::Columns)) + columns = decode_parms->get_value(CommonNames::Columns).get(); + if (decode_parms->contains(CommonNames::Colors)) + colors = decode_parms->get_value(CommonNames::Colors).get(); + if (decode_parms->contains(CommonNames::BitsPerComponent)) + bits_per_component = decode_parms->get_value(CommonNames::BitsPerComponent).get(); + } + if (encoding_type == CommonNames::ASCIIHexDecode) return decode_ascii_hex(bytes); if (encoding_type == CommonNames::ASCII85Decode) @@ -21,7 +37,7 @@ ErrorOr Filter::decode(ReadonlyBytes bytes, FlyString const& encodin if (encoding_type == CommonNames::LZWDecode) return decode_lzw(bytes); if (encoding_type == CommonNames::FlateDecode) - return decode_flate(bytes); + return decode_flate(bytes, predictor, columns, colors, bits_per_component); if (encoding_type == CommonNames::RunLengthDecode) return decode_run_length(bytes); if (encoding_type == CommonNames::CCITTFaxDecode) @@ -35,7 +51,7 @@ ErrorOr Filter::decode(ReadonlyBytes bytes, FlyString const& encodin if (encoding_type == CommonNames::Crypt) return decode_crypt(bytes); - return Error::from_string_literal("Unrecognized filter encoding"); + return AK::Error::from_string_literal("Unrecognized filter encoding"); } ErrorOr Filter::decode_ascii_hex(ReadonlyBytes bytes) @@ -50,11 +66,11 @@ ErrorOr Filter::decode_ascii_hex(ReadonlyBytes bytes) for (size_t i = 0; i < bytes.size() / 2; ++i) { auto const c1 = decode_hex_digit(static_cast(bytes[i * 2])); if (c1 >= 16) - return Error::from_string_literal("Hex string contains invalid digit"); + return AK::Error::from_string_literal("Hex string contains invalid digit"); auto const c2 = decode_hex_digit(static_cast(bytes[i * 2 + 1])); if (c2 >= 16) - return Error::from_string_literal("Hex string contains invalid digit"); + return AK::Error::from_string_literal("Hex string contains invalid digit"); output[i] = (c1 << 4) + c2; } @@ -120,20 +136,95 @@ ErrorOr Filter::decode_ascii85(ReadonlyBytes bytes) return ByteBuffer::copy(buff.span()); }; +ErrorOr Filter::decode_png_prediction(Bytes bytes, int bytes_per_row) +{ + int number_of_rows = bytes.size() / bytes_per_row; + + ByteBuffer decoded; + decoded.ensure_capacity(bytes.size() - number_of_rows); + + auto empty_row = TRY(ByteBuffer::create_zeroed(bytes_per_row)); + auto previous_row = empty_row.data(); + + for (int row_index = 0; row_index < number_of_rows; ++row_index) { + auto row = bytes.data() + row_index * bytes_per_row; + + u8 algorithm_tag = row[0]; + switch (algorithm_tag) { + case 0: + break; + case 1: + for (int i = 2; i < bytes_per_row; ++i) + row[i] += row[i - 1]; + break; + case 2: + for (int i = 1; i < bytes_per_row; ++i) + row[i] += previous_row[i]; + break; + case 3: + for (int i = 1; i < bytes_per_row; ++i) { + u8 left = 0; + if (i > 1) + left = row[i - 1]; + u8 above = previous_row[i]; + row[i] += (left + above) / 2; + } + break; + case 4: + for (int i = 1; i < bytes_per_row; ++i) { + u8 left = 0; + u8 upper_left = 0; + if (i > 1) { + left = row[i - 1]; + upper_left = previous_row[i - 1]; + } + u8 above = previous_row[i]; + u8 p = left + above - upper_left; + + int left_distance = abs(p - left); + int above_distance = abs(p - above); + int upper_left_distance = abs(p - upper_left); + + u8 paeth = min(left_distance, min(above_distance, upper_left_distance)); + + row[i] += paeth; + } + break; + default: + return AK::Error::from_string_literal("Unknown PNG algorithm tag"); + } + + previous_row = row; + decoded.append(row + 1, bytes_per_row - 1); + } + + return decoded; +} + ErrorOr Filter::decode_lzw(ReadonlyBytes) { dbgln("LZW decoding is not supported"); VERIFY_NOT_REACHED(); }; -ErrorOr Filter::decode_flate(ReadonlyBytes bytes) +ErrorOr Filter::decode_flate(ReadonlyBytes bytes, int predictor, int columns, int colors, int bits_per_component) { - // FIXME: The spec says Flate decoding is "based on" zlib, does that mean they - // aren't exactly the same? + auto buff = Compress::DeflateDecompressor::decompress_all(bytes.slice(2)).value(); + if (predictor == 1) + return buff; - auto buff = Compress::DeflateDecompressor::decompress_all(bytes.slice(2)); - VERIFY(buff.has_value()); - return buff.value(); + // Check if we are dealing with a PNG prediction + if (predictor == 2) + return AK::Error::from_string_literal("The TIFF predictor is not supported"); + if (predictor < 10 || predictor > 15) + return AK::Error::from_string_literal("Invalid predictor value"); + + // Rows are always a whole number of bytes long, starting with an algorithm tag + int bytes_per_row = AK::ceil_div(columns * colors * bits_per_component, 8) + 1; + if (buff.size() % bytes_per_row) + return AK::Error::from_string_literal("Flate input data is not divisible into columns"); + + return decode_png_prediction(buff, bytes_per_row); }; ErrorOr Filter::decode_run_length(ReadonlyBytes) diff --git a/Userland/Libraries/LibPDF/Filter.h b/Userland/Libraries/LibPDF/Filter.h index aaa389788f..dd68eb36e6 100644 --- a/Userland/Libraries/LibPDF/Filter.h +++ b/Userland/Libraries/LibPDF/Filter.h @@ -9,18 +9,20 @@ #include #include #include +#include namespace PDF { class Filter { public: - static ErrorOr decode(ReadonlyBytes bytes, FlyString const& encoding_type); + static ErrorOr decode(ReadonlyBytes bytes, FlyString const& encoding_type, RefPtr decode_parms); private: static ErrorOr decode_ascii_hex(ReadonlyBytes bytes); static ErrorOr decode_ascii85(ReadonlyBytes bytes); + static ErrorOr decode_png_prediction(Bytes bytes, int bytes_per_row); static ErrorOr decode_lzw(ReadonlyBytes bytes); - static ErrorOr decode_flate(ReadonlyBytes bytes); + static ErrorOr decode_flate(ReadonlyBytes bytes, int predictor, int columns, int colors, int bits_per_component); static ErrorOr decode_run_length(ReadonlyBytes bytes); static ErrorOr decode_ccitt(ReadonlyBytes bytes); static ErrorOr decode_jbig2(ReadonlyBytes bytes); diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index 5c3095efc6..e888cbbf38 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -488,8 +488,32 @@ PDFErrorOr> Parser::parse_stream(NonnullRefPtrcast()->name()); } - for (auto const& filter_type : filters) - stream_object->buffer() = TRY(Filter::decode(stream_object->bytes(), filter_type)); + // Every filter may get its own parameter dictionary + Vector> decode_parms_vector; + RefPtr decode_parms_object; + if (dict->contains(CommonNames::DecodeParms)) { + decode_parms_object = TRY(dict->get_object(m_document, CommonNames::DecodeParms)); + if (decode_parms_object->is()) { + auto decode_parms_array = decode_parms_object->cast(); + for (size_t i = 0; i < decode_parms_array->size(); ++i) { + // FIXME: This entry may be the null object instead + RefPtr decode_parms = decode_parms_array->at(i).get>()->cast(); + decode_parms_vector.append(decode_parms); + } + } else { + decode_parms_vector.append(decode_parms_object->cast()); + } + } + + VERIFY(decode_parms_vector.is_empty() || decode_parms_vector.size() == filters.size()); + + for (size_t i = 0; i < filters.size(); ++i) { + RefPtr decode_parms; + if (!decode_parms_vector.is_empty()) + decode_parms = decode_parms_vector.at(i); + + stream_object->buffer() = TRY(Filter::decode(stream_object->bytes(), filters.at(i), decode_parms)); + } } return stream_object;