1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 06:27:45 +00:00

LibCompress: Handle literal only lz77 streams in DeflateCompressor

Very incompressible data could sometimes produce no backreferences
which would result in no distance huffman code being created (as it
was not needed), so VERIFY the code exists only if it is actually
needed for writing the stream.
This commit is contained in:
Idan Horowitz 2021-03-14 11:43:18 +02:00 committed by Andreas Kling
parent 1db943e146
commit 7e587a615e
3 changed files with 16 additions and 8 deletions

View file

@ -792,21 +792,22 @@ size_t DeflateCompressor::dynamic_block_length(const Array<u8, max_huffman_liter
return length + huffman_block_length(literal_bit_lengths, distance_bit_lengths);
}
void DeflateCompressor::write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code)
void DeflateCompressor::write_huffman(const CanonicalCode& literal_code, const Optional<CanonicalCode>& distance_code)
{
auto has_distances = distance_code.has_value();
for (size_t i = 0; i < m_pending_symbol_size; i++) {
if (m_symbol_buffer[i].distance == 0) {
literal_code.write_symbol(m_output_stream, m_symbol_buffer[i].literal);
continue;
}
VERIFY(has_distances);
auto symbol = length_to_symbol[m_symbol_buffer[i].length];
literal_code.write_symbol(m_output_stream, symbol);
// Emit extra bits if needed
m_output_stream.write_bits(m_symbol_buffer[i].length - packed_length_symbols[symbol - 257].base_length, packed_length_symbols[symbol - 257].extra_bits);
auto base_distance = distance_to_base(m_symbol_buffer[i].distance);
distance_code.write_symbol(m_output_stream, base_distance);
distance_code.value().write_symbol(m_output_stream, base_distance);
// Emit extra bits if needed
m_output_stream.write_bits(m_symbol_buffer[i].distance - packed_distances[base_distance].base_distance, packed_distances[base_distance].extra_bits);
}
@ -880,7 +881,7 @@ size_t DeflateCompressor::encode_block_lengths(const Array<u8, max_huffman_liter
return encode_huffman_lengths(all_lengths, lengths_count, encoded_lengths);
}
void DeflateCompressor::write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count)
void DeflateCompressor::write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const Optional<CanonicalCode>& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count)
{
m_output_stream.write_bits(literal_code_count - 257, 5);
m_output_stream.write_bits(distance_code_count - 1, 5);
@ -991,8 +992,7 @@ void DeflateCompressor::flush()
auto literal_code = CanonicalCode::from_bytes(dynamic_literal_bit_lengths);
VERIFY(literal_code.has_value());
auto distance_code = CanonicalCode::from_bytes(dynamic_distance_bit_lengths);
VERIFY(distance_code.has_value());
write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code.value(), distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count);
write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code, distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count);
}
if (m_finished)
m_output_stream.align_to_byte_boundary();

View file

@ -190,10 +190,10 @@ private:
template<size_t Size>
static void generate_huffman_lengths(Array<u8, Size>& lengths, const Array<u16, Size>& frequencies, size_t max_bit_length, u16 frequency_cap = UINT16_MAX);
size_t huffman_block_length(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths);
void write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code);
void write_huffman(const CanonicalCode& literal_code, const Optional<CanonicalCode>& distance_code);
static size_t encode_huffman_lengths(const Array<u8, max_huffman_literals + max_huffman_distances>& lengths, size_t lengths_count, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths);
size_t encode_block_lengths(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count);
void write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count);
void write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const Optional<CanonicalCode>& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count);
size_t uncompressed_block_length();
size_t fixed_block_length();

View file

@ -168,6 +168,14 @@ TEST_CASE(deflate_round_trip_compress_large)
EXPECT(uncompressed.value() == original);
}
TEST_CASE(deflate_compress_literals)
{
// This byte array is known to not produce any back references with our lz77 implementation even at the highest compression settings
Array<u8, 0x13> test { 0, 0, 0, 0, 0x72, 0, 0, 0xee, 0, 0, 0, 0x26, 0, 0, 0, 0x28, 0, 0, 0x72 };
auto compressed = Compress::DeflateCompressor::compress_all(test, Compress::DeflateCompressor::CompressionLevel::GOOD);
EXPECT(compressed.has_value());
}
TEST_CASE(zlib_decompress_simple)
{
const Array<u8, 40> compressed {