From 7e587a615e116a9a91ae647637bb1d939f39aea9 Mon Sep 17 00:00:00 2001 From: Idan Horowitz Date: Sun, 14 Mar 2021 11:43:18 +0200 Subject: [PATCH] LibCompress: Handle literal only lz77 streams in DeflateCompressor Very incompressible data could sometimes produce no backreferences which would result in no distance huffman code being created (as it was not needed), so VERIFY the code exists only if it is actually needed for writing the stream. --- Userland/Libraries/LibCompress/Deflate.cpp | 12 ++++++------ Userland/Libraries/LibCompress/Deflate.h | 4 ++-- Userland/Utilities/test-compress.cpp | 8 ++++++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Userland/Libraries/LibCompress/Deflate.cpp b/Userland/Libraries/LibCompress/Deflate.cpp index cf6115956c..71cdec5ce9 100644 --- a/Userland/Libraries/LibCompress/Deflate.cpp +++ b/Userland/Libraries/LibCompress/Deflate.cpp @@ -792,21 +792,22 @@ size_t DeflateCompressor::dynamic_block_length(const Array& distance_code) { + auto has_distances = distance_code.has_value(); for (size_t i = 0; i < m_pending_symbol_size; i++) { if (m_symbol_buffer[i].distance == 0) { literal_code.write_symbol(m_output_stream, m_symbol_buffer[i].literal); continue; } - + VERIFY(has_distances); auto symbol = length_to_symbol[m_symbol_buffer[i].length]; literal_code.write_symbol(m_output_stream, symbol); // Emit extra bits if needed m_output_stream.write_bits(m_symbol_buffer[i].length - packed_length_symbols[symbol - 257].base_length, packed_length_symbols[symbol - 257].extra_bits); auto base_distance = distance_to_base(m_symbol_buffer[i].distance); - distance_code.write_symbol(m_output_stream, base_distance); + distance_code.value().write_symbol(m_output_stream, base_distance); // Emit extra bits if needed m_output_stream.write_bits(m_symbol_buffer[i].distance - packed_distances[base_distance].base_distance, packed_distances[base_distance].extra_bits); } @@ -880,7 +881,7 @@ size_t DeflateCompressor::encode_block_lengths(const Array& code_lengths_bit_lengths, size_t code_length_count, const Array& encoded_lengths, size_t encoded_lengths_count) +void DeflateCompressor::write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const Optional& distance_code, size_t distance_code_count, const Array& code_lengths_bit_lengths, size_t code_length_count, const Array& encoded_lengths, size_t encoded_lengths_count) { m_output_stream.write_bits(literal_code_count - 257, 5); m_output_stream.write_bits(distance_code_count - 1, 5); @@ -991,8 +992,7 @@ void DeflateCompressor::flush() auto literal_code = CanonicalCode::from_bytes(dynamic_literal_bit_lengths); VERIFY(literal_code.has_value()); auto distance_code = CanonicalCode::from_bytes(dynamic_distance_bit_lengths); - VERIFY(distance_code.has_value()); - write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code.value(), distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count); + write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code, distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count); } if (m_finished) m_output_stream.align_to_byte_boundary(); diff --git a/Userland/Libraries/LibCompress/Deflate.h b/Userland/Libraries/LibCompress/Deflate.h index b933f60fcb..fb5e171b3c 100644 --- a/Userland/Libraries/LibCompress/Deflate.h +++ b/Userland/Libraries/LibCompress/Deflate.h @@ -190,10 +190,10 @@ private: template static void generate_huffman_lengths(Array& lengths, const Array& frequencies, size_t max_bit_length, u16 frequency_cap = UINT16_MAX); size_t huffman_block_length(const Array& literal_bit_lengths, const Array& distance_bit_lengths); - void write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code); + void write_huffman(const CanonicalCode& literal_code, const Optional& distance_code); static size_t encode_huffman_lengths(const Array& lengths, size_t lengths_count, Array& encoded_lengths); size_t encode_block_lengths(const Array& literal_bit_lengths, const Array& distance_bit_lengths, Array& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count); - void write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array& code_lengths_bit_lengths, size_t code_length_count, const Array& encoded_lengths, size_t encoded_lengths_count); + void write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const Optional& distance_code, size_t distance_code_count, const Array& code_lengths_bit_lengths, size_t code_length_count, const Array& encoded_lengths, size_t encoded_lengths_count); size_t uncompressed_block_length(); size_t fixed_block_length(); diff --git a/Userland/Utilities/test-compress.cpp b/Userland/Utilities/test-compress.cpp index 4c7f4fc798..9752c2b7e9 100644 --- a/Userland/Utilities/test-compress.cpp +++ b/Userland/Utilities/test-compress.cpp @@ -168,6 +168,14 @@ TEST_CASE(deflate_round_trip_compress_large) EXPECT(uncompressed.value() == original); } +TEST_CASE(deflate_compress_literals) +{ + // This byte array is known to not produce any back references with our lz77 implementation even at the highest compression settings + Array test { 0, 0, 0, 0, 0x72, 0, 0, 0xee, 0, 0, 0, 0x26, 0, 0, 0, 0x28, 0, 0, 0x72 }; + auto compressed = Compress::DeflateCompressor::compress_all(test, Compress::DeflateCompressor::CompressionLevel::GOOD); + EXPECT(compressed.has_value()); +} + TEST_CASE(zlib_decompress_simple) { const Array compressed {