diff --git a/Userland/Libraries/LibCompress/Deflate.cpp b/Userland/Libraries/LibCompress/Deflate.cpp index 2340630033..b2bb4d0475 100644 --- a/Userland/Libraries/LibCompress/Deflate.cpp +++ b/Userland/Libraries/LibCompress/Deflate.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2020, the SerenityOS developers + * Copyright (c) 2021, Idan Horowitz * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,8 +27,10 @@ #include #include +#include #include #include +#include #include @@ -41,13 +44,7 @@ const CanonicalCode& CanonicalCode::fixed_literal_codes() if (initialized) return code; - Array data; - data.span().slice(0, 144 - 0).fill(8); - data.span().slice(144, 256 - 144).fill(9); - data.span().slice(256, 280 - 256).fill(7); - data.span().slice(280, 288 - 280).fill(8); - - code = CanonicalCode::from_bytes(data).value(); + code = CanonicalCode::from_bytes(fixed_literal_bit_lengths).value(); initialized = true; return code; @@ -61,10 +58,7 @@ const CanonicalCode& CanonicalCode::fixed_distance_codes() if (initialized) return code; - Array data; - data.span().fill(5); - - code = CanonicalCode::from_bytes(data).value(); + code = CanonicalCode::from_bytes(fixed_distance_bit_lengths).value(); initialized = true; return code; @@ -76,6 +70,22 @@ Optional CanonicalCode::from_bytes(ReadonlyBytes bytes) CanonicalCode code; + auto non_zero_symbols = 0; + auto last_non_zero = -1; + for (size_t i = 0; i < bytes.size(); i++) { + if (bytes[i] != 0) { + non_zero_symbols++; + last_non_zero = i; + } + } + if (non_zero_symbols == 1) { // special case - only 1 symbol + code.m_symbol_codes.append(0b10); + code.m_symbol_values.append(last_non_zero); + code.m_bit_codes[last_non_zero] = 0; + code.m_bit_code_lengths[last_non_zero] = 1; + return code; + } + auto next_code = 0; for (size_t code_length = 1; code_length <= 15; ++code_length) { next_code <<= 1; @@ -90,6 +100,8 @@ Optional CanonicalCode::from_bytes(ReadonlyBytes bytes) code.m_symbol_codes.append(start_bit | next_code); code.m_symbol_values.append(symbol); + code.m_bit_codes[symbol] = fast_reverse16(start_bit | next_code, code_length); // DEFLATE writes huffman encoded symbols as lsb-first + code.m_bit_code_lengths[symbol] = code_length; next_code++; } @@ -118,6 +130,11 @@ u32 CanonicalCode::read_symbol(InputBitStream& stream) const } } +void CanonicalCode::write_symbol(OutputBitStream& stream, u32 symbol) const +{ + stream.write_bits(m_bit_codes[symbol], m_bit_code_lengths[symbol]); +} + DeflateDecompressor::CompressedBlock::CompressedBlock(DeflateDecompressor& decompressor, CanonicalCode literal_codes, Optional distance_codes) : m_decompressor(decompressor) , m_literal_codes(literal_codes) @@ -366,8 +383,7 @@ void DeflateDecompressor::decode_codes(CanonicalCode& literal_code, Optional(symbol)); continue; - } else if (symbol == 17) { + } else if (symbol == DeflateSpecialCodeLengths::ZEROS) { auto nrepeat = 3 + m_input_stream.read_bits(3); for (size_t j = 0; j < nrepeat; ++j) code_lengths.append(0); continue; - } else if (symbol == 18) { + } else if (symbol == DeflateSpecialCodeLengths::LONG_ZEROS) { auto nrepeat = 11 + m_input_stream.read_bits(7); for (size_t j = 0; j < nrepeat; ++j) code_lengths.append(0); continue; } else { - VERIFY(symbol == 16); + VERIFY(symbol == DeflateSpecialCodeLengths::COPY); if (code_lengths.is_empty()) { set_fatal_error(); @@ -448,4 +464,571 @@ void DeflateDecompressor::decode_codes(CanonicalCode& literal_code, Optional(m_compression_level)]) + , m_output_stream(stream) +{ + m_symbol_frequencies.fill(0); + m_distance_frequencies.fill(0); +} + +DeflateCompressor::~DeflateCompressor() +{ + VERIFY(m_finished); +} + +size_t DeflateCompressor::write(ReadonlyBytes bytes) +{ + VERIFY(!m_finished); + + if (bytes.size() == 0) + return 0; // recursion base case + + auto n_written = bytes.copy_trimmed_to(pending_block().slice(m_pending_block_size)); + m_pending_block_size += n_written; + + if (m_pending_block_size == block_size) + flush(); + + return n_written + write(bytes.slice(n_written)); +} + +bool DeflateCompressor::write_or_error(ReadonlyBytes bytes) +{ + if (write(bytes) < bytes.size()) { + set_fatal_error(); + return false; + } + + return true; +} + +// Knuth's multiplicative hash on 4 bytes +u16 DeflateCompressor::hash_sequence(const u8* bytes) +{ + constexpr const u32 knuth_constant = 2654435761; // shares no common factors with 2^32 + return ((bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24) * knuth_constant) >> (32 - hash_bits); +} + +size_t DeflateCompressor::compare_match_candidate(size_t start, size_t candidate, size_t previous_match_length, size_t maximum_match_length) +{ + VERIFY(previous_match_length < maximum_match_length); + + // We firstly check that the match is at least (prev_match_length + 1) long, we check backwards as theres a higher chance the end mismatches + for (ssize_t i = previous_match_length; i >= 0; i--) { + if (m_rolling_window[start + i] != m_rolling_window[candidate + i]) + return 0; + } + + // Find the actual length + auto match_length = previous_match_length + 1; + while (match_length < maximum_match_length && m_rolling_window[start + match_length] == m_rolling_window[candidate + match_length]) { + match_length++; + } + + VERIFY(match_length > previous_match_length); + VERIFY(match_length <= maximum_match_length); + return match_length; +} + +size_t DeflateCompressor::find_back_match(size_t start, u16 hash, size_t previous_match_length, size_t maximum_match_length, size_t& match_position) +{ + auto max_chain_length = m_compression_constants.max_chain; + if (previous_match_length == 0) + previous_match_length = min_match_length - 1; // we only care about matches that are at least min_match_length long + if (previous_match_length >= maximum_match_length) + return 0; // we cant improve a maximum length match + if (previous_match_length >= m_compression_constants.max_lazy_length) + return 0; // the previous match is already pretty, we shouldn't waste another full search + if (previous_match_length >= m_compression_constants.good_match_length) + max_chain_length /= 4; // we already have a pretty good much, so do a shorter search + + auto candidate = m_hash_head[hash]; + auto match_found = false; + while (max_chain_length--) { + if (candidate == empty_slot) + break; // no remaining candidates + + VERIFY(candidate < start); + if (start - candidate > window_size) + break; // outside the window + + auto match_length = compare_match_candidate(start, candidate, previous_match_length, maximum_match_length); + + if (match_length != 0) { + match_found = true; + match_position = candidate; + previous_match_length = match_length; + + if (match_length == maximum_match_length) + return match_length; // bail if we got the maximum possible length + } + + candidate = m_hash_prev[candidate % window_size]; + } + if (!match_found) + return 0; // we didnt find any matches + return previous_match_length; // we found matches, but they were at most previous_match_length long +} + +ALWAYS_INLINE u8 DeflateCompressor::distance_to_base(u16 distance) +{ + return (distance <= 256) ? distance_to_base_lo[distance - 1] : distance_to_base_hi[(distance - 1) >> 7]; +} + +template +void DeflateCompressor::generate_huffman_lengths(Array& lengths, const Array& frequencies, size_t max_bit_length) +{ + VERIFY((1u << max_bit_length) >= Size); + u16 heap_keys[Size]; // Used for O(n) heap construction + u16 heap_values[Size]; + + u16 huffman_links[Size * 2 + 1] = { 0 }; + u16 frequency_cap = UINT16_MAX; +try_again: + size_t non_zero_freqs = 0; + for (size_t i = 0; i < Size; i++) { + auto frequency = frequencies[i]; + if (frequency == 0) + continue; + + if (frequency > frequency_cap) { + frequency = frequency_cap; + } + + heap_keys[non_zero_freqs] = frequency; // sort symbols by frequency + heap_values[non_zero_freqs] = Size + non_zero_freqs; // huffman_links "links" + non_zero_freqs++; + } + + // special case for only 1 used symbol + if (non_zero_freqs < 2) { + for (size_t i = 0; i < Size; i++) + lengths[i] = (frequencies[i] == 0) ? 0 : 1; + return; + } + + BinaryHeap heap { heap_keys, heap_values, non_zero_freqs }; + + // build the huffman tree - binary heap is used for efficient frequency comparisons + while (heap.size() > 1) { + u16 lowest_frequency = heap.peek_min_key(); + u16 lowest_link = heap.pop_min(); + u16 second_lowest_frequency = heap.peek_min_key(); + u16 second_lowest_link = heap.pop_min(); + + u16 new_link = heap.size() + 2; + + heap.insert(lowest_frequency + second_lowest_frequency, new_link); + + huffman_links[lowest_link] = new_link; + huffman_links[second_lowest_link] = new_link; + } + + non_zero_freqs = 0; + for (size_t i = 0; i < Size; i++) { + if (frequencies[i] == 0) { + lengths[i] = 0; + continue; + } + + u16 link = huffman_links[Size + non_zero_freqs]; + non_zero_freqs++; + + size_t bit_length = 1; + while (link != 2) { + bit_length++; + link = huffman_links[link]; + } + + if (bit_length > max_bit_length) { + VERIFY(frequency_cap != 1); + frequency_cap /= 2; + goto try_again; // FIXME: gotos are ugly, but i cant think of a good way to flatten this + } + + lengths[i] = bit_length; + } +} + +void DeflateCompressor::lz77_compress_block() +{ + for (auto& slot : m_hash_head) { // initialize chained hash table + slot = empty_slot; + } + + auto insert_hash = [&](auto pos, auto hash) { + auto window_pos = pos % window_size; + m_hash_prev[window_pos] = m_hash_head[hash]; + m_hash_head[hash] = window_pos; + }; + + auto emit_literal = [&](auto literal) { + VERIFY(m_pending_symbol_size <= block_size + 1); + auto index = m_pending_symbol_size++; + m_symbol_buffer[index].distance = 0; + m_symbol_buffer[index].literal = literal; + m_symbol_frequencies[literal]++; + }; + + auto emit_back_reference = [&](auto distance, auto length) { + VERIFY(m_pending_symbol_size <= block_size + 1); + auto index = m_pending_symbol_size++; + m_symbol_buffer[index].distance = distance; + m_symbol_buffer[index].length = length; + m_symbol_frequencies[length_to_symbol[length]]++; + m_distance_frequencies[distance_to_base(distance)]++; + }; + + size_t previous_match_length = 0; + size_t previous_match_position = 0; + + VERIFY(m_compression_constants.great_match_length <= max_match_length); + + // our block starts at block_size and is m_pending_block_size in length + auto block_end = block_size + m_pending_block_size; + size_t current_position; + for (current_position = block_size; current_position < block_end - min_match_length + 1; current_position++) { + auto hash = hash_sequence(&m_rolling_window[current_position]); + size_t match_position; + auto match_length = find_back_match(current_position, hash, previous_match_length, + min(m_compression_constants.great_match_length, block_end - current_position), match_position); + + insert_hash(current_position, hash); + + // if the previous match is as good as the new match, just use it + if (previous_match_length != 0 && previous_match_length >= match_length) { + emit_back_reference((current_position - 1) - previous_match_position, previous_match_length); + + // skip all the bytes that are included in this match + for (size_t j = current_position + 1; j < min(current_position - 1 + previous_match_length, block_end - min_match_length + 1); j++) { + insert_hash(j, hash_sequence(&m_rolling_window[j])); + } + current_position = (current_position - 1) + previous_match_length - 1; + previous_match_length = 0; + continue; + } + + if (match_length == 0) { + VERIFY(previous_match_length == 0); + emit_literal(m_rolling_window[current_position]); + continue; + } + + // if this is a lazy match, and the new match is better than the old one, output previous as literal + if (previous_match_length != 0) { + emit_literal(m_rolling_window[current_position - 1]); + } + + previous_match_length = match_length; + previous_match_position = match_position; + } + + // clean up leftover lazy match + if (previous_match_length != 0) { + emit_back_reference((current_position - 1) - previous_match_position, previous_match_length); + current_position = (current_position - 1) + previous_match_length; + } + + // output remaining literals + while (current_position < block_end) { + emit_literal(m_rolling_window[current_position++]); + } +} + +size_t DeflateCompressor::huffman_block_length(const Array& literal_bit_lengths, const Array& distance_bit_lengths) +{ + size_t length = 0; + + for (size_t i = 0; i < 286; i++) { + auto frequency = m_symbol_frequencies[i]; + length += literal_bit_lengths[i] * frequency; + + if (i >= 257) // back reference length symbols + length += packed_length_symbols[i - 257].extra_bits * frequency; + } + + for (size_t i = 0; i < 30; i++) { + auto frequency = m_distance_frequencies[i]; + length += distance_bit_lengths[i] * frequency; + length += packed_distances[i].extra_bits * frequency; + } + + return length; +} + +size_t DeflateCompressor::uncompressed_block_length() +{ + auto padding = 8 - ((m_output_stream.bit_offset() + 3) % 8); + // 3 bit block header + align to byte + 2 * 16 bit length fields + block contents + return 3 + padding + (2 * 16) + m_pending_block_size * 8; +} + +size_t DeflateCompressor::fixed_block_length() +{ + // block header + fixed huffman encoded block contents + return 3 + huffman_block_length(fixed_literal_bit_lengths, fixed_distance_bit_lengths); +} + +size_t DeflateCompressor::dynamic_block_length(const Array& literal_bit_lengths, const Array& distance_bit_lengths, const Array& code_lengths_bit_lengths, const Array& code_lengths_frequencies, size_t code_lengths_count) +{ + // block header + literal code count + distance code count + code length count + auto length = 3 + 5 + 5 + 4; + + // 3 bits per code_length + length += 3 * code_lengths_count; + + for (size_t i = 0; i < code_lengths_frequencies.size(); i++) { + auto frequency = code_lengths_frequencies[i]; + length += code_lengths_bit_lengths[i] * frequency; + + if (i == DeflateSpecialCodeLengths::COPY) { + length += 2 * frequency; + } else if (i == DeflateSpecialCodeLengths::ZEROS) { + length += 3 * frequency; + } else if (i == DeflateSpecialCodeLengths::LONG_ZEROS) { + length += 7 * frequency; + } + } + + return length + huffman_block_length(literal_bit_lengths, distance_bit_lengths); +} + +void DeflateCompressor::write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code) +{ + for (size_t i = 0; i < m_pending_symbol_size; i++) { + if (m_symbol_buffer[i].distance == 0) { + literal_code.write_symbol(m_output_stream, m_symbol_buffer[i].literal); + continue; + } + + auto symbol = length_to_symbol[m_symbol_buffer[i].length]; + literal_code.write_symbol(m_output_stream, symbol); + // Emit extra bits if needed + m_output_stream.write_bits(m_symbol_buffer[i].length - packed_length_symbols[symbol - 257].base_length, packed_length_symbols[symbol - 257].extra_bits); + + auto base_distance = distance_to_base(m_symbol_buffer[i].distance); + distance_code.write_symbol(m_output_stream, base_distance); + // Emit extra bits if needed + m_output_stream.write_bits(m_symbol_buffer[i].distance - packed_distances[base_distance].base_distance, packed_distances[base_distance].extra_bits); + } +} + +size_t DeflateCompressor::encode_huffman_lengths(const Array& lengths, size_t lengths_count, Array& encoded_lengths) +{ + size_t encoded_count = 0; + size_t i = 0; + while (i < lengths_count) { + if (lengths[i] == 0) { + auto zero_count = 0; + for (size_t j = i; j < min(lengths_count, i + 138) && lengths[j] == 0; j++) + zero_count++; + + if (zero_count < 3) { // below minimum repeated zero count + encoded_lengths[encoded_count++].symbol = 0; + i++; + continue; + } + + if (zero_count <= 10) { + encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::ZEROS; + encoded_lengths[encoded_count++].count = zero_count; + } else { + encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::LONG_ZEROS; + encoded_lengths[encoded_count++].count = zero_count; + } + i += zero_count; + continue; + } + + encoded_lengths[encoded_count++].symbol = lengths[i++]; + + auto copy_count = 0; + for (size_t j = i; j < min(lengths_count, i + 6) && lengths[j] == lengths[i - 1]; j++) + copy_count++; + + if (copy_count >= 3) { + encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::COPY; + encoded_lengths[encoded_count++].count = copy_count; + i += copy_count; + continue; + } + } + return encoded_count; +} + +size_t DeflateCompressor::encode_block_lengths(const Array& literal_bit_lengths, const Array& distance_bit_lengths, Array& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count) +{ + literal_code_count = max_huffman_literals; + distance_code_count = max_huffman_distances; + + VERIFY(literal_bit_lengths[256] != 0); // Make sure at least the EndOfBlock marker is present + while (literal_bit_lengths[literal_code_count - 1] == 0) + literal_code_count--; + + // Drop trailing zero lengths, keeping at least one + while (distance_bit_lengths[distance_code_count - 1] == 0 && distance_code_count > 1) + distance_code_count--; + + Array all_lengths {}; + size_t lengths_count = 0; + for (size_t i = 0; i < literal_code_count; i++) { + all_lengths[lengths_count++] = literal_bit_lengths[i]; + } + for (size_t i = 0; i < distance_code_count; i++) { + all_lengths[lengths_count++] = distance_bit_lengths[i]; + } + + return encode_huffman_lengths(all_lengths, lengths_count, encoded_lengths); +} + +void DeflateCompressor::write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array& code_lengths_bit_lengths, size_t code_length_count, const Array& encoded_lengths, size_t encoded_lengths_count) +{ + m_output_stream.write_bits(literal_code_count - 257, 5); + m_output_stream.write_bits(distance_code_count - 1, 5); + m_output_stream.write_bits(code_length_count - 4, 4); + + for (size_t i = 0; i < code_length_count; i++) { + m_output_stream.write_bits(code_lengths_bit_lengths[code_lengths_code_lengths_order[i]], 3); + } + + auto code_lengths_code = CanonicalCode::from_bytes(code_lengths_bit_lengths); + VERIFY(code_lengths_code.has_value()); + for (size_t i = 0; i < encoded_lengths_count; i++) { + auto encoded_length = encoded_lengths[i]; + code_lengths_code->write_symbol(m_output_stream, encoded_length.symbol); + if (encoded_length.symbol == DeflateSpecialCodeLengths::COPY) { + m_output_stream.write_bits(encoded_length.count - 3, 2); + } else if (encoded_length.symbol == DeflateSpecialCodeLengths::ZEROS) { + m_output_stream.write_bits(encoded_length.count - 3, 3); + } else if (encoded_length.symbol == DeflateSpecialCodeLengths::LONG_ZEROS) { + m_output_stream.write_bits(encoded_length.count - 11, 7); + } + } + + write_huffman(literal_code, distance_code); +} + +void DeflateCompressor::flush() +{ + if (m_output_stream.handle_any_error()) { + set_fatal_error(); + return; + } + + m_output_stream.write_bit(m_finished); + + // if this is just an empty block to signify the end of the deflate stream use the smallest block possible (10 bits total) + if (m_pending_block_size == 0) { + VERIFY(m_finished); // we shouldn't be writing empty blocks unless this is the final one + m_output_stream.write_bits(0b01, 2); // fixed huffman codes + m_output_stream.write_bits(0b0000000, 7); // end of block symbol + m_output_stream.align_to_byte_boundary(); + return; + } + + auto write_uncompressed = [&]() { + m_output_stream.write_bits(0b00, 2); // no compression + m_output_stream.align_to_byte_boundary(); + LittleEndian len = m_pending_block_size; + m_output_stream << len; + LittleEndian nlen = ~m_pending_block_size; + m_output_stream << nlen; + m_output_stream.write_or_error(pending_block().slice(0, m_pending_block_size)); + }; + + if (m_compression_level == CompressionLevel::STORE) { // disabled compression fast path + write_uncompressed(); + m_pending_block_size = 0; + return; + } + + // The following implementation of lz77 compression and huffman encoding is based on the reference implementation by Hans Wennborg https://www.hanshq.net/zip.html + + // this reads from the pending block and writes to m_symbol_buffer + lz77_compress_block(); + + // insert EndOfBlock marker to the symbol buffer + m_symbol_buffer[m_pending_symbol_size].distance = 0; + m_symbol_buffer[m_pending_symbol_size++].literal = 256; + m_symbol_frequencies[256]++; + + // generate optimal dynamic huffman code lengths + Array dynamic_literal_bit_lengths {}; + Array dynamic_distance_bit_lengths {}; + generate_huffman_lengths(dynamic_literal_bit_lengths, m_symbol_frequencies, 15); // deflate data huffman can use up to 15 bits per symbol + generate_huffman_lengths(dynamic_distance_bit_lengths, m_distance_frequencies, 15); + + // encode literal and distance lengths together in deflate format + Array encoded_lengths {}; + size_t literal_code_count; + size_t distance_code_count; + auto encoded_lengths_count = encode_block_lengths(dynamic_literal_bit_lengths, dynamic_distance_bit_lengths, encoded_lengths, literal_code_count, distance_code_count); + + // count code length frequencies + Array code_lengths_frequencies { 0 }; + for (size_t i = 0; i < encoded_lengths_count; i++) { + code_lengths_frequencies[encoded_lengths[i].symbol]++; + } + // generate optimal huffman code lengths code lengths + Array code_lengths_bit_lengths {}; + generate_huffman_lengths(code_lengths_bit_lengths, code_lengths_frequencies, 7); // deflate code length huffman can use up to 7 bits per symbol + // calculate actual code length code lengths count (without trailing zeros) + auto code_lengths_count = code_lengths_bit_lengths.size(); + while (code_lengths_bit_lengths[code_lengths_code_lengths_order[code_lengths_count - 1]] == 0) + code_lengths_count--; + + auto uncompressed_size = uncompressed_block_length(); + auto fixed_huffman_size = fixed_block_length(); + auto dynamic_huffman_size = dynamic_block_length(dynamic_literal_bit_lengths, dynamic_distance_bit_lengths, code_lengths_bit_lengths, code_lengths_frequencies, code_lengths_count); + + // If the compression somehow didnt reduce the size enough, just write out the block uncompressed as it allows for much faster decompression + if (uncompressed_size <= min(fixed_huffman_size, dynamic_huffman_size)) { + write_uncompressed(); + } else if (fixed_huffman_size <= dynamic_huffman_size) { // If the fixed and dynamic huffman codes come out the same size, prefer the fixed version, as it takes less time to decode + m_output_stream.write_bits(0b01, 2); // fixed huffman codes + write_huffman(CanonicalCode::fixed_literal_codes(), CanonicalCode::fixed_distance_codes()); + } else { + m_output_stream.write_bits(0b10, 2); // dynamic huffman codes + auto literal_code = CanonicalCode::from_bytes(dynamic_literal_bit_lengths); + VERIFY(literal_code.has_value()); + auto distance_code = CanonicalCode::from_bytes(dynamic_distance_bit_lengths); + VERIFY(distance_code.has_value()); + write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code.value(), distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count); + } + if (m_finished) + m_output_stream.align_to_byte_boundary(); + + // reset all block specific members + m_pending_block_size = 0; + m_pending_symbol_size = 0; + m_symbol_frequencies.fill(0); + m_distance_frequencies.fill(0); + // On the final block this copy will potentially produce an invalid search window, but since its the final block we dont care + pending_block().copy_trimmed_to({ m_rolling_window, block_size }); +} + +void DeflateCompressor::final_flush() +{ + VERIFY(!m_finished); + m_finished = true; + flush(); +} + +Optional DeflateCompressor::compress_all(const ReadonlyBytes& bytes, CompressionLevel compression_level) +{ + DuplexMemoryStream output_stream; + DeflateCompressor deflate_stream { output_stream, compression_level }; + + deflate_stream.write_or_error(bytes); + + deflate_stream.final_flush(); + + if (deflate_stream.handle_any_error()) + return {}; + + return output_stream.copy_into_contiguous_buffer(); +} + } diff --git a/Userland/Libraries/LibCompress/Deflate.h b/Userland/Libraries/LibCompress/Deflate.h index c069ba6951..05d0f3cfe0 100644 --- a/Userland/Libraries/LibCompress/Deflate.h +++ b/Userland/Libraries/LibCompress/Deflate.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2020, the SerenityOS developers + * Copyright (c) 2021, Idan Horowitz * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +32,7 @@ #include #include #include +#include namespace Compress { @@ -38,6 +40,7 @@ class CanonicalCode { public: CanonicalCode() = default; u32 read_symbol(InputBitStream&) const; + void write_symbol(OutputBitStream&, u32) const; static const CanonicalCode& fixed_literal_codes(); static const CanonicalCode& fixed_distance_codes(); @@ -45,8 +48,13 @@ public: static Optional from_bytes(ReadonlyBytes); private: + // Decompression - indexed by code Vector m_symbol_codes; Vector m_symbol_values; + + // Compression - indexed by symbol + Array m_bit_codes {}; // deflate uses a maximum of 288 symbols (maximum of 32 for distances) + Array m_bit_code_lengths {}; }; class DeflateDecompressor final : public InputStream { @@ -111,7 +119,109 @@ private: }; InputBitStream m_input_stream; - CircularDuplexStream<32 * 1024> m_output_stream; + CircularDuplexStream<32 * KiB> m_output_stream; +}; + +enum DeflateSpecialCodeLengths : u32 { + COPY = 16, + ZEROS = 17, + LONG_ZEROS = 18 +}; + +class DeflateCompressor final : public OutputStream { +public: + static constexpr size_t block_size = 32 * KiB - 1; // TODO: this can theoretically be increased to 64 KiB - 2 + static constexpr size_t window_size = block_size * 2; + static constexpr size_t hash_bits = 15; + static constexpr size_t max_huffman_literals = 288; + static constexpr size_t max_huffman_distances = 32; + static constexpr size_t min_match_length = 4; // matches smaller than these are not worth the size of the back reference + static constexpr size_t max_match_length = 258; // matches longer than these cannot be encoded using huffman codes + static constexpr u16 empty_slot = UINT16_MAX; + + struct CompressionConstants { + size_t good_match_length; // Once we find a match of at least this length (a good enough match) we reduce max_chain to lower processing time + size_t max_lazy_length; // If the match is at least this long we dont defer matching to the next byte (which takes time) as its good enough + size_t great_match_length; // Once we find a match of at least this length (a great match) we can just stop searching for longer ones + size_t max_chain; // We only check the actual length of the max_chain closest matches + }; + + // These constants were shamelessly "borrowed" from zlib + static constexpr CompressionConstants compression_constants[] = { + { 0, 0, 0, 0 }, + { 4, 4, 8, 4 }, + { 8, 16, 128, 128 }, + { 32, 258, 258, 4096 }, + { max_match_length, max_match_length, max_match_length, 1 << hash_bits } // disable all limits + }; + + enum class CompressionLevel : int { + STORE = 0, + FAST, + GOOD, + GREAT, + BEST // WARNING: this one can take an unreasonable amount of time! + }; + + DeflateCompressor(OutputStream&, CompressionLevel = CompressionLevel::GOOD); + ~DeflateCompressor(); + + size_t write(ReadonlyBytes) override; + bool write_or_error(ReadonlyBytes) override; + void final_flush(); + + static Optional compress_all(const ReadonlyBytes& bytes, CompressionLevel = CompressionLevel::GOOD); + +private: + Bytes pending_block() { return { m_rolling_window + block_size, block_size }; } + + // LZ77 Compression + static u16 hash_sequence(const u8* bytes); + size_t compare_match_candidate(size_t start, size_t candidate, size_t prev_match_length, size_t max_match_length); + size_t find_back_match(size_t start, u16 hash, size_t previous_match_length, size_t max_match_length, size_t& match_position); + void lz77_compress_block(); + + // Huffman Coding + struct code_length_symbol { + u8 symbol; + u8 count; // used for special symbols 16-18 + }; + static u8 distance_to_base(u16 distance); + template + static void generate_huffman_lengths(Array& lengths, const Array& frequencies, size_t max_bit_length); + size_t huffman_block_length(const Array& literal_bit_lengths, const Array& distance_bit_lengths); + void write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code); + static size_t encode_huffman_lengths(const Array& lengths, size_t lengths_count, Array& encoded_lengths); + size_t encode_block_lengths(const Array& literal_bit_lengths, const Array& distance_bit_lengths, Array& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count); + void write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array& code_lengths_bit_lengths, size_t code_length_count, const Array& encoded_lengths, size_t encoded_lengths_count); + + size_t uncompressed_block_length(); + size_t fixed_block_length(); + size_t dynamic_block_length(const Array& literal_bit_lengths, const Array& distance_bit_lengths, const Array& code_lengths_bit_lengths, const Array& code_lengths_frequencies, size_t code_lengths_count); + void flush(); + + bool m_finished { false }; + CompressionLevel m_compression_level; + CompressionConstants m_compression_constants; + OutputBitStream m_output_stream; + + u8 m_rolling_window[window_size]; + size_t m_pending_block_size { 0 }; + + struct [[gnu::packed]] { + u16 distance; // back reference length + union { + u16 literal; // literal byte or on of block symbol + u16 length; // back reference length (if distance != 0) + }; + } m_symbol_buffer[block_size + 1]; + size_t m_pending_symbol_size { 0 }; + Array m_symbol_frequencies; // there are 286 valid symbol values (symbols 286-287 never occur) + Array m_distance_frequencies; // there are 30 valid distance values (distances 30-31 never occur) + + // LZ77 Chained hash table + u16 m_hash_head[1 << hash_bits]; + u16 m_hash_prev[window_size]; }; } diff --git a/Userland/Libraries/LibCompress/DeflateTables.h b/Userland/Libraries/LibCompress/DeflateTables.h new file mode 100644 index 0000000000..78997a18d4 --- /dev/null +++ b/Userland/Libraries/LibCompress/DeflateTables.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2021, Idan Horowitz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +namespace Compress { + +// RFC 1951 - 3.2.5 +static constexpr struct { + u16 symbol; + u16 base_length; + u16 extra_bits; +} packed_length_symbols[29] = { + { 257, 3, 0 }, + { 258, 4, 0 }, + { 259, 5, 0 }, + { 260, 6, 0 }, + { 261, 7, 0 }, + { 262, 8, 0 }, + { 263, 9, 0 }, + { 264, 10, 0 }, + { 265, 11, 1 }, + { 266, 13, 1 }, + { 267, 15, 1 }, + { 268, 17, 1 }, + { 269, 19, 2 }, + { 270, 23, 2 }, + { 271, 27, 2 }, + { 272, 31, 2 }, + { 273, 35, 3 }, + { 274, 43, 3 }, + { 275, 51, 3 }, + { 276, 59, 3 }, + { 277, 67, 4 }, + { 278, 83, 4 }, + { 279, 99, 4 }, + { 280, 115, 4 }, + { 281, 131, 5 }, + { 282, 163, 5 }, + { 283, 195, 5 }, + { 284, 227, 5 }, + { 285, 258, 0 } +}; + +// RFC 1951 - 3.2.5 +static constexpr struct { + u16 symbol; + u16 base_distance; + u16 extra_bits; +} packed_distances[31] = { + { 0, 1, 0 }, + { 1, 2, 0 }, + { 2, 3, 0 }, + { 3, 4, 0 }, + { 4, 5, 1 }, + { 5, 7, 1 }, + { 6, 9, 2 }, + { 7, 13, 2 }, + { 8, 17, 3 }, + { 9, 25, 3 }, + { 10, 33, 4 }, + { 11, 49, 4 }, + { 12, 65, 5 }, + { 13, 97, 5 }, + { 14, 129, 6 }, + { 15, 193, 6 }, + { 16, 257, 7 }, + { 17, 385, 7 }, + { 18, 513, 8 }, + { 19, 769, 8 }, + { 20, 1025, 9 }, + { 21, 1537, 9 }, + { 22, 2049, 10 }, + { 23, 3073, 10 }, + { 24, 4097, 11 }, + { 25, 6145, 11 }, + { 26, 8193, 12 }, + { 27, 12289, 12 }, + { 28, 16385, 13 }, + { 29, 24577, 13 }, + { 30, 32 * KiB + 1, 0 }, // signifies end +}; + +// RFC 1951 - 3.2.6 +static constexpr struct { + u16 base_value; + u16 bits; +} fixed_literal_bits[5] = { + { 0, 8 }, + { 144, 9 }, + { 256, 7 }, + { 280, 8 }, + { 288, 0 } // signifies end +}; + +// RFC 1951 - 3.2.7 +static constexpr size_t code_lengths_code_lengths_order[] { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static consteval Array generate_length_to_symbol() +{ + Array array = { UINT16_MAX, UINT16_MAX, UINT16_MAX }; // there are 256 valid lengths (3-258) + 3 invalid lengths (0-2) + size_t base_length = 0; + for (size_t len = 3; len < 259; len++) { + if (len == packed_length_symbols[base_length + 1].base_length) + base_length++; + array[len] = packed_length_symbols[base_length].symbol; + } + return array; +}; +static constexpr auto length_to_symbol = generate_length_to_symbol(); + +static consteval Array generate_distance_to_base_lo() +{ + Array array; + size_t base_distance = 0; + for (size_t dist = 1; dist <= 256; dist++) { + if (dist == packed_distances[base_distance + 1].base_distance) + base_distance++; + array[dist - 1] = packed_distances[base_distance].symbol; + } + return array; +}; +static constexpr auto distance_to_base_lo = generate_distance_to_base_lo(); +static consteval Array generate_distance_to_base_hi() +{ + Array array = { UINT16_MAX, UINT16_MAX }; + size_t base_distance = 16; + for (size_t dist = 257; dist <= 32 * KiB; dist++) { + if (dist == packed_distances[base_distance + 1].base_distance) + base_distance++; + array[(dist - 1) >> 7] = packed_distances[base_distance].symbol; + } + return array; +}; +static constexpr auto distance_to_base_hi = generate_distance_to_base_hi(); + +static consteval Array generate_fixed_literal_bit_lengths() +{ + Array array; + for (size_t i = 0; i < 4; i++) { + array.span().slice(fixed_literal_bits[i].base_value, fixed_literal_bits[i + 1].base_value - fixed_literal_bits[i].base_value).fill(fixed_literal_bits[i].bits); + } + return array; +}; +static constexpr auto fixed_literal_bit_lengths = generate_fixed_literal_bit_lengths(); + +static consteval Array generate_fixed_distance_bit_lengths() +{ + Array array; + array.fill(5); + return array; +}; +static constexpr auto fixed_distance_bit_lengths = generate_fixed_distance_bit_lengths(); + +static consteval u8 reverse8(u8 value) +{ + u8 result = 0; + for (size_t i = 0; i < 8; i++) { + if (value & (1 << i)) + result |= 1 << (7 - i); + } + return result; +} +static consteval Array generate_reverse8_lookup_table() +{ + Array array; + for (size_t i = 0; i <= UINT8_MAX; i++) { + array[i] = reverse8(i); + } + return array; +} +static constexpr auto reverse8_lookup_table = generate_reverse8_lookup_table(); + +// Lookup-table based bit swap +ALWAYS_INLINE static u16 fast_reverse16(u16 value, size_t bits) +{ + VERIFY(bits <= 16); + + u16 lo = value & 0xff; + u16 hi = value >> 8; + + u16 reversed = (u16)((reverse8_lookup_table[lo] << 8) | reverse8_lookup_table[hi]); + + return reversed >> (16 - bits); +} + +}