LibCompress: Use prefix tables to decode Huffman codes up to 8 bits long

Huffman codes have a useful property in that they are prefix codes. That is, a set of bits representing a Huffman-coded symbol is never a prefix of another symbol. This allows us to create a table, where each index in the table are integers whose prefix is the entry's corresponding Huffman code. With Deflate, we can have codes up to 16 bits in length, thus creating a prefix table with 2^16 entries. So instead of creating a table fit all possible codes, we use a cutoff of 8-bit codes. Codes larger than 8 bits fall back to the binary search method. Using the "enwik8" file as a test (100MB uncompressed, commonly used in benchmarks: https://www.mattmahoney.net/dc/enwik8.zip), decompression time decreases from 3.527s to 2.585s on Linux.
2025-05-31 22:18:12 +00:00 · 2023-03-28 14:45:20 -04:00 · 2023-03-28 14:45:20 -04:00 · 5aaefe4e62
commit 5aaefe4e62
parent 8e834d4bb2
2 changed files with 63 additions and 11 deletions
--- a/Userland/Libraries/LibCompress/Deflate.cpp
+++ b/Userland/Libraries/LibCompress/Deflate.cpp
@ -63,14 +63,26 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
            last_non_zero = i;
        }
    }
+
    if (non_zero_symbols == 1) { // special case - only 1 symbol
-        code.m_symbol_codes.append(0b10);
-        code.m_symbol_values.append(last_non_zero);
+        code.m_prefix_table[0] = PrefixTableEntry { static_cast<u16>(last_non_zero), 1u };
+        code.m_prefix_table[1] = code.m_prefix_table[0];
+        code.m_max_prefixed_code_length = 1;
+
        code.m_bit_codes[last_non_zero] = 0;
        code.m_bit_code_lengths[last_non_zero] = 1;
+
        return code;
    }

+    struct PrefixCode {
+        u16 symbol_code { 0 };
+        u16 symbol_value { 0 };
+        u16 code_length { 0 };
+    };
+    Array<PrefixCode, 1 << CanonicalCode::max_allowed_prefixed_code_length> prefix_codes;
+    size_t number_of_prefix_codes = 0;
+
    auto next_code = 0;
    for (size_t code_length = 1; code_length <= 15; ++code_length) {
        next_code <<= 1;
@ -83,8 +95,18 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
            if (next_code > start_bit)
                return {};

-            code.m_symbol_codes.append(start_bit | next_code);
-            code.m_symbol_values.append(symbol);
+            if (code_length <= CanonicalCode::max_allowed_prefixed_code_length) {
+                auto& prefix_code = prefix_codes[number_of_prefix_codes++];
+                prefix_code.symbol_code = next_code;
+                prefix_code.symbol_value = symbol;
+                prefix_code.code_length = code_length;
+
+                code.m_max_prefixed_code_length = code_length;
+            } else {
+                code.m_symbol_codes.append(start_bit | next_code);
+                code.m_symbol_values.append(symbol);
+            }
+
            code.m_bit_codes[symbol] = fast_reverse16(start_bit | next_code, code_length); // DEFLATE writes huffman encoded symbols as lsb-first
            code.m_bit_code_lengths[symbol] = code_length;

@ -96,24 +118,44 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
        return {};
    }

+    for (auto [symbol_code, symbol_value, code_length] : prefix_codes) {
+        if (code_length == 0 || code_length > CanonicalCode::max_allowed_prefixed_code_length)
+            break;
+
+        auto shift = code.m_max_prefixed_code_length - code_length;
+        symbol_code <<= shift;
+
+        for (size_t j = 0; j < (1u << shift); ++j) {
+            auto index = fast_reverse16(symbol_code + j, code.m_max_prefixed_code_length);
+            code.m_prefix_table[index] = PrefixTableEntry { symbol_value, code_length };
+        }
+    }
+
    return code;
 }

 ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& stream) const
 {
-    u32 code_bits = 1;
+    auto prefix = TRY(stream.peek_bits<size_t>(m_max_prefixed_code_length));

-    for (;;) {
-        code_bits = code_bits << 1 | TRY(stream.read_bit());
-        if (code_bits >= (1 << 16))
-            return Error::from_string_literal("Symbol exceeds maximum symbol number");
+    if (auto [symbol_value, code_length] = m_prefix_table[prefix]; code_length != 0) {
+        stream.discard_previously_peeked_bits(code_length);
+        return symbol_value;
+    }

-        // FIXME: This is very inefficient and could greatly be improved by implementing this
-        //        algorithm: https://www.hanshq.net/zip.html#huffdec
+    auto code_bits = TRY(stream.read_bits<u16>(m_max_prefixed_code_length));
+    code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length);
+    code_bits |= 1 << m_max_prefixed_code_length;
+
+    for (size_t i = m_max_prefixed_code_length; i < 16; ++i) {
        size_t index;
        if (binary_search(m_symbol_codes.span(), code_bits, &index))
            return m_symbol_values[index];
+
+        code_bits = code_bits << 1 | TRY(stream.read_bit());
    }
+
+    return Error::from_string_literal("Symbol exceeds maximum symbol number");
 }

 ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& stream, u32 symbol) const
--- a/Userland/Libraries/LibCompress/Deflate.h
+++ b/Userland/Libraries/LibCompress/Deflate.h
@ -30,10 +30,20 @@ public:
    static Optional<CanonicalCode> from_bytes(ReadonlyBytes);

 private:
+    static constexpr size_t max_allowed_prefixed_code_length = 8;
+
+    struct PrefixTableEntry {
+        u16 symbol_value { 0 };
+        u16 code_length { 0 };
+    };
+
    // Decompression - indexed by code
    Vector<u16> m_symbol_codes;
    Vector<u16> m_symbol_values;

+    Array<PrefixTableEntry, 1 << max_allowed_prefixed_code_length> m_prefix_table {};
+    size_t m_max_prefixed_code_length { 0 };
+
    // Compression - indexed by symbol
    Array<u16, 288> m_bit_codes {}; // deflate uses a maximum of 288 symbols (maximum of 32 for distances)
    Array<u16, 288> m_bit_code_lengths {};