mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 19:57:44 +00:00
LibCompress: Implement Brotli decompressor
This implements the BrotliDecompressionStream, which is a Core::Stream that can decompress another Core::Stream.
This commit is contained in:
parent
68772463cb
commit
d6a5b11f04
28 changed files with 2725 additions and 0 deletions
906
Userland/Libraries/LibCompress/Brotli.cpp
Normal file
906
Userland/Libraries/LibCompress/Brotli.cpp
Normal file
|
@ -0,0 +1,906 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/BinarySearch.h>
|
||||
#include <AK/QuickSort.h>
|
||||
#include <LibCompress/Brotli.h>
|
||||
#include <LibCompress/BrotliDictionary.h>
|
||||
|
||||
namespace Compress {
|
||||
|
||||
ErrorOr<size_t> BrotliDecompressionStream::CanonicalCode::read_symbol(LittleEndianInputBitStream& input_stream)
|
||||
{
|
||||
size_t code_bits = 1;
|
||||
|
||||
while (code_bits < (1 << 16)) {
|
||||
// FIXME: This is very inefficient and could greatly be improved by implementing this
|
||||
// algorithm: https://www.hanshq.net/zip.html#huffdec
|
||||
size_t index;
|
||||
if (binary_search(m_symbol_codes.span(), code_bits, &index))
|
||||
return m_symbol_values[index];
|
||||
|
||||
code_bits = (code_bits << 1) | TRY(input_stream.read_bit());
|
||||
}
|
||||
|
||||
return Error::from_string_literal("no matching code found");
|
||||
}
|
||||
|
||||
BrotliDecompressionStream::BrotliDecompressionStream(Stream& stream)
|
||||
: m_input_stream(stream)
|
||||
{
|
||||
}
|
||||
|
||||
ErrorOr<size_t> BrotliDecompressionStream::read_window_length()
|
||||
{
|
||||
if (TRY(m_input_stream.read_bit())) {
|
||||
switch (TRY(m_input_stream.read_bits(3))) {
|
||||
case 0: {
|
||||
switch (TRY(m_input_stream.read_bits(3))) {
|
||||
case 0:
|
||||
return 17;
|
||||
case 1:
|
||||
return Error::from_string_literal("invalid window length");
|
||||
case 2:
|
||||
return 10;
|
||||
case 3:
|
||||
return 11;
|
||||
case 4:
|
||||
return 12;
|
||||
case 5:
|
||||
return 13;
|
||||
case 6:
|
||||
return 14;
|
||||
case 7:
|
||||
return 15;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
case 1:
|
||||
return 18;
|
||||
case 2:
|
||||
return 19;
|
||||
case 3:
|
||||
return 20;
|
||||
case 4:
|
||||
return 21;
|
||||
case 5:
|
||||
return 22;
|
||||
case 6:
|
||||
return 23;
|
||||
case 7:
|
||||
return 24;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
} else {
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
||||
ErrorOr<size_t> BrotliDecompressionStream::read_size_number_of_nibbles()
|
||||
{
|
||||
switch (TRY(m_input_stream.read_bits(2))) {
|
||||
case 0:
|
||||
return 4;
|
||||
case 1:
|
||||
return 5;
|
||||
case 2:
|
||||
return 6;
|
||||
case 3:
|
||||
return 0;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
ErrorOr<size_t> BrotliDecompressionStream::read_variable_length()
|
||||
{
|
||||
// Value Bit Pattern
|
||||
// ----- -----------
|
||||
// 1 0
|
||||
// 2 0001
|
||||
// 3..4 x0011
|
||||
// 5..8 xx0101
|
||||
// 9..16 xxx0111
|
||||
// 17..32 xxxx1001
|
||||
// 33..64 xxxxx1011
|
||||
// 65..128 xxxxxx1101
|
||||
// 129..256 xxxxxxx1111
|
||||
|
||||
if (TRY(m_input_stream.read_bit())) {
|
||||
switch (TRY(m_input_stream.read_bits(3))) {
|
||||
case 0:
|
||||
return 2;
|
||||
case 1:
|
||||
return 3 + TRY(m_input_stream.read_bits(1));
|
||||
case 2:
|
||||
return 5 + TRY(m_input_stream.read_bits(2));
|
||||
case 3:
|
||||
return 9 + TRY(m_input_stream.read_bits(3));
|
||||
case 4:
|
||||
return 17 + TRY(m_input_stream.read_bits(4));
|
||||
case 5:
|
||||
return 33 + TRY(m_input_stream.read_bits(5));
|
||||
case 6:
|
||||
return 65 + TRY(m_input_stream.read_bits(6));
|
||||
case 7:
|
||||
return 129 + TRY(m_input_stream.read_bits(7));
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
ErrorOr<size_t> BrotliDecompressionStream::read_complex_prefix_code_length()
|
||||
{
|
||||
// Symbol Code
|
||||
// ------ ----
|
||||
// 0 00
|
||||
// 1 0111
|
||||
// 2 011
|
||||
// 3 10
|
||||
// 4 01
|
||||
// 5 1111
|
||||
|
||||
switch (TRY(m_input_stream.read_bits(2))) {
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
return 4;
|
||||
case 2:
|
||||
return 3;
|
||||
case 3: {
|
||||
if (TRY(m_input_stream.read_bit()) == 0) {
|
||||
return 2;
|
||||
} else {
|
||||
if (TRY(m_input_stream.read_bit()) == 0) {
|
||||
return 1;
|
||||
} else {
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
ErrorOr<void> BrotliDecompressionStream::read_prefix_code(CanonicalCode& code, size_t alphabet_size)
|
||||
{
|
||||
size_t hskip = TRY(m_input_stream.read_bits(2));
|
||||
|
||||
if (hskip == 1) {
|
||||
TRY(read_simple_prefix_code(code, alphabet_size));
|
||||
} else {
|
||||
TRY(read_complex_prefix_code(code, alphabet_size, hskip));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<void> BrotliDecompressionStream::read_simple_prefix_code(CanonicalCode& code, size_t alphabet_size)
|
||||
{
|
||||
VERIFY(code.m_symbol_codes.is_empty());
|
||||
VERIFY(code.m_symbol_values.is_empty());
|
||||
|
||||
size_t number_of_symbols = 1 + TRY(m_input_stream.read_bits(2));
|
||||
|
||||
size_t symbol_size = 0;
|
||||
while ((1u << symbol_size) < alphabet_size)
|
||||
symbol_size++;
|
||||
|
||||
Vector<size_t> symbols;
|
||||
for (size_t i = 0; i < number_of_symbols; i++) {
|
||||
size_t symbol = TRY(m_input_stream.read_bits(symbol_size));
|
||||
symbols.append(symbol);
|
||||
|
||||
if (symbol >= alphabet_size)
|
||||
return Error::from_string_literal("symbol larger than alphabet");
|
||||
}
|
||||
|
||||
if (number_of_symbols == 1) {
|
||||
code.m_symbol_codes.append(0b1);
|
||||
code.m_symbol_values = move(symbols);
|
||||
} else if (number_of_symbols == 2) {
|
||||
code.m_symbol_codes.extend({ 0b10, 0b11 });
|
||||
if (symbols[0] > symbols[1])
|
||||
swap(symbols[0], symbols[1]);
|
||||
code.m_symbol_values = move(symbols);
|
||||
} else if (number_of_symbols == 3) {
|
||||
code.m_symbol_codes.extend({ 0b10, 0b110, 0b111 });
|
||||
if (symbols[1] > symbols[2])
|
||||
swap(symbols[1], symbols[2]);
|
||||
code.m_symbol_values = move(symbols);
|
||||
} else if (number_of_symbols == 4) {
|
||||
bool tree_select = TRY(m_input_stream.read_bit());
|
||||
if (tree_select) {
|
||||
code.m_symbol_codes.extend({ 0b10, 0b110, 0b1110, 0b1111 });
|
||||
if (symbols[2] > symbols[3])
|
||||
swap(symbols[2], symbols[3]);
|
||||
code.m_symbol_values = move(symbols);
|
||||
} else {
|
||||
code.m_symbol_codes.extend({ 0b100, 0b101, 0b110, 0b111 });
|
||||
quick_sort(symbols);
|
||||
code.m_symbol_values = move(symbols);
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<void> BrotliDecompressionStream::read_complex_prefix_code(CanonicalCode& code, size_t alphabet_size, size_t hskip)
|
||||
{
|
||||
// hskip should only be 0, 2 or 3
|
||||
VERIFY(hskip != 1);
|
||||
VERIFY(hskip <= 3);
|
||||
|
||||
// Read the prefix code_value that is used to encode the actual prefix code_value
|
||||
size_t const symbol_mapping[18] = { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
size_t code_length[18] { 0 };
|
||||
size_t code_length_counts[6] { 0 };
|
||||
|
||||
size_t sum = 0;
|
||||
size_t number_of_non_zero_symbols = 0;
|
||||
for (size_t i = hskip; i < 18; i++) {
|
||||
size_t len = TRY(read_complex_prefix_code_length());
|
||||
code_length[symbol_mapping[i]] = len;
|
||||
|
||||
if (len != 0) {
|
||||
code_length_counts[len]++;
|
||||
sum += (32 >> len);
|
||||
number_of_non_zero_symbols++;
|
||||
}
|
||||
|
||||
if (sum == 32)
|
||||
break;
|
||||
else if (sum > 32)
|
||||
return Error::from_string_literal("invalid prefix code");
|
||||
}
|
||||
|
||||
BrotliDecompressionStream::CanonicalCode temp_code;
|
||||
if (number_of_non_zero_symbols > 1) {
|
||||
size_t code_value = 0;
|
||||
for (size_t bits = 1; bits <= 5; bits++) {
|
||||
code_value = (code_value + code_length_counts[bits - 1]) << 1;
|
||||
size_t current_code_value = code_value;
|
||||
|
||||
for (size_t i = 0; i < 18; i++) {
|
||||
size_t len = code_length[i];
|
||||
if (len == bits) {
|
||||
temp_code.m_symbol_codes.append((1 << bits) | current_code_value);
|
||||
temp_code.m_symbol_values.append(i);
|
||||
current_code_value++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < 18; i++) {
|
||||
size_t len = code_length[i];
|
||||
if (len != 0) {
|
||||
temp_code.m_symbol_codes.append(1);
|
||||
temp_code.m_symbol_values.append(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read the actual prefix code_value
|
||||
sum = 0;
|
||||
size_t i = 0;
|
||||
|
||||
size_t previous_non_zero_code_length = 8;
|
||||
size_t last_symbol = 0;
|
||||
size_t last_repeat = 0;
|
||||
|
||||
Vector<size_t> result_symbols;
|
||||
Vector<size_t> result_lengths;
|
||||
size_t result_lengths_count[16] { 0 };
|
||||
while (i < alphabet_size) {
|
||||
auto symbol = TRY(temp_code.read_symbol(m_input_stream));
|
||||
|
||||
if (symbol < 16) {
|
||||
result_symbols.append(i);
|
||||
result_lengths.append(symbol);
|
||||
result_lengths_count[symbol]++;
|
||||
|
||||
if (symbol != 0) {
|
||||
previous_non_zero_code_length = symbol;
|
||||
sum += (32768 >> symbol);
|
||||
if (sum == 32768)
|
||||
break;
|
||||
else if (sum > 32768)
|
||||
return Error::from_string_literal("invalid prefix code");
|
||||
}
|
||||
|
||||
last_repeat = 0;
|
||||
i++;
|
||||
} else if (symbol == 16) {
|
||||
size_t repeat_count = 0;
|
||||
if (last_symbol == 16 && last_repeat != 0) {
|
||||
repeat_count = (4 * (last_repeat - 2));
|
||||
} else {
|
||||
last_repeat = 0;
|
||||
}
|
||||
repeat_count += 3 + TRY(m_input_stream.read_bits(2));
|
||||
|
||||
for (size_t rep = 0; rep < (repeat_count - last_repeat); rep++) {
|
||||
result_symbols.append(i);
|
||||
result_lengths.append(previous_non_zero_code_length);
|
||||
result_lengths_count[previous_non_zero_code_length]++;
|
||||
|
||||
if (previous_non_zero_code_length != 0) {
|
||||
sum += (32768 >> previous_non_zero_code_length);
|
||||
if (sum == 32768)
|
||||
break;
|
||||
else if (sum > 32768)
|
||||
return Error::from_string_literal("invalid prefix code");
|
||||
}
|
||||
|
||||
i++;
|
||||
if (i >= alphabet_size)
|
||||
break;
|
||||
}
|
||||
if (sum == 32768)
|
||||
break;
|
||||
VERIFY(sum < 32768);
|
||||
|
||||
last_repeat = repeat_count;
|
||||
} else if (symbol == 17) {
|
||||
size_t repeat_count = 0;
|
||||
if (last_symbol == 17 && last_repeat != 0) {
|
||||
repeat_count = (8 * (last_repeat - 2));
|
||||
} else {
|
||||
last_repeat = 0;
|
||||
}
|
||||
repeat_count += 3 + TRY(m_input_stream.read_bits(3));
|
||||
|
||||
i += (repeat_count - last_repeat);
|
||||
last_repeat = repeat_count;
|
||||
}
|
||||
|
||||
last_symbol = symbol;
|
||||
}
|
||||
result_lengths_count[0] = 0;
|
||||
|
||||
size_t code_value = 0;
|
||||
for (size_t bits = 1; bits < 16; bits++) {
|
||||
code_value = (code_value + result_lengths_count[bits - 1]) << 1;
|
||||
size_t current_code_value = code_value;
|
||||
|
||||
for (size_t n = 0; n < result_symbols.size(); n++) {
|
||||
size_t len = result_lengths[n];
|
||||
if (len == bits) {
|
||||
code.m_symbol_codes.append((1 << bits) | current_code_value);
|
||||
code.m_symbol_values.append(result_symbols[n]);
|
||||
current_code_value++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static void inverse_move_to_front_transform(Span<u8> v)
|
||||
{
|
||||
// RFC 7932 section 7.3
|
||||
u8 mtf[256];
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
mtf[i] = (u8)i;
|
||||
}
|
||||
for (size_t i = 0; i < v.size(); ++i) {
|
||||
u8 index = v[i];
|
||||
u8 value = mtf[index];
|
||||
v[i] = value;
|
||||
for (; index; --index) {
|
||||
mtf[index] = mtf[index - 1];
|
||||
}
|
||||
mtf[0] = value;
|
||||
}
|
||||
}
|
||||
|
||||
ErrorOr<void> BrotliDecompressionStream::read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size)
|
||||
{
|
||||
bool use_run_length_encoding = TRY(m_input_stream.read_bit());
|
||||
size_t run_length_encoding_max = 0;
|
||||
if (use_run_length_encoding) {
|
||||
run_length_encoding_max = 1 + TRY(m_input_stream.read_bits(4));
|
||||
}
|
||||
|
||||
BrotliDecompressionStream::CanonicalCode code;
|
||||
TRY(read_prefix_code(code, number_of_codes + run_length_encoding_max));
|
||||
|
||||
size_t i = 0;
|
||||
while (i < context_map_size) {
|
||||
size_t symbol = TRY(code.read_symbol(m_input_stream));
|
||||
|
||||
if (symbol <= run_length_encoding_max) {
|
||||
size_t repeat_base = 1 << symbol;
|
||||
size_t repeat_additional = TRY(m_input_stream.read_bits(symbol));
|
||||
size_t repeat_count = repeat_base + repeat_additional;
|
||||
while (repeat_count--) {
|
||||
context_map.append(0);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
size_t value = symbol - run_length_encoding_max;
|
||||
context_map.append(value);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
bool inverse_move_to_front = TRY(m_input_stream.read_bit());
|
||||
if (inverse_move_to_front)
|
||||
inverse_move_to_front_transform(context_map.span());
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<void> BrotliDecompressionStream::read_block_configuration(Block& block)
|
||||
{
|
||||
size_t blocks_of_type = TRY(read_variable_length());
|
||||
|
||||
block.type = 0;
|
||||
block.type_previous = 1;
|
||||
block.number_of_types = blocks_of_type;
|
||||
|
||||
block.type_code.clear();
|
||||
block.length_code.clear();
|
||||
|
||||
if (blocks_of_type == 1) {
|
||||
block.length = 16 * MiB;
|
||||
} else {
|
||||
TRY(read_prefix_code(block.type_code, 2 + blocks_of_type));
|
||||
TRY(read_prefix_code(block.length_code, 26));
|
||||
TRY(block_update_length(block));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<void> BrotliDecompressionStream::block_update_length(Block& block)
|
||||
{
|
||||
size_t const block_length_code_base[26] { 1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625 };
|
||||
size_t const block_length_code_extra[26] { 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24 };
|
||||
|
||||
size_t symbol = TRY(block.length_code.read_symbol(m_input_stream));
|
||||
size_t block_length = block_length_code_base[symbol] + TRY(m_input_stream.read_bits(block_length_code_extra[symbol]));
|
||||
|
||||
block.length = block_length;
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<void> BrotliDecompressionStream::block_read_new_state(Block& block)
|
||||
{
|
||||
size_t block_type_symbol = TRY(block.type_code.read_symbol(m_input_stream));
|
||||
TRY(block_update_length(block));
|
||||
|
||||
if (block_type_symbol == 0) {
|
||||
swap(block.type, block.type_previous);
|
||||
} else if (block_type_symbol == 1) {
|
||||
block.type_previous = block.type;
|
||||
block.type = (block.type + 1) % block.number_of_types;
|
||||
} else {
|
||||
block.type_previous = block.type;
|
||||
block.type = block_type_symbol - 2;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t BrotliDecompressionStream::literal_code_index_from_context()
|
||||
{
|
||||
size_t const context_id_lut0[256] {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
||||
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
|
||||
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
|
||||
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
||||
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
||||
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3
|
||||
};
|
||||
size_t const context_id_lut1[256] {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
||||
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
size_t const context_id_lut2[256] {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7
|
||||
};
|
||||
|
||||
size_t context_mode = m_literal_context_modes[m_literal_block.type];
|
||||
size_t context_id;
|
||||
switch (context_mode) {
|
||||
case 0:
|
||||
context_id = m_lookback_buffer.value().lookback(1, 0) & 0x3f;
|
||||
break;
|
||||
case 1:
|
||||
context_id = m_lookback_buffer.value().lookback(1, 0) >> 2;
|
||||
break;
|
||||
case 2:
|
||||
context_id = context_id_lut0[m_lookback_buffer.value().lookback(1, 0)] | context_id_lut1[m_lookback_buffer.value().lookback(2, 0)];
|
||||
break;
|
||||
case 3:
|
||||
context_id = (context_id_lut2[m_lookback_buffer.value().lookback(1, 0)] << 3) | context_id_lut2[m_lookback_buffer.value().lookback(2, 0)];
|
||||
break;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
size_t literal_code_index = m_context_mapping_literal[64 * m_literal_block.type + context_id];
|
||||
return literal_code_index;
|
||||
}
|
||||
|
||||
ErrorOr<Bytes> BrotliDecompressionStream::read(Bytes output_buffer)
|
||||
{
|
||||
size_t bytes_read = 0;
|
||||
while (bytes_read < output_buffer.size()) {
|
||||
if (m_current_state == State::WindowSize) {
|
||||
size_t window_bits = TRY(read_window_length());
|
||||
m_window_size = (1 << window_bits) - 16;
|
||||
|
||||
m_lookback_buffer = TRY(LookbackBuffer::try_create(m_window_size));
|
||||
|
||||
m_current_state = State::Idle;
|
||||
} else if (m_current_state == State::Idle) {
|
||||
// If the final block was read, we are done decompressing
|
||||
if (m_read_final_block)
|
||||
break;
|
||||
|
||||
m_read_final_block = TRY(m_input_stream.read_bit());
|
||||
if (m_read_final_block) {
|
||||
bool is_last_block_empty = TRY(m_input_stream.read_bit());
|
||||
// If the last block is empty we are done decompressing
|
||||
if (is_last_block_empty)
|
||||
break;
|
||||
}
|
||||
|
||||
size_t size_number_of_nibbles = TRY(read_size_number_of_nibbles());
|
||||
if (size_number_of_nibbles == 0) {
|
||||
// This block only contains meta-data
|
||||
bool reserved = TRY(m_input_stream.read_bit());
|
||||
if (reserved)
|
||||
return Error::from_string_literal("invalid reserved bit");
|
||||
|
||||
size_t skip_bytes = TRY(m_input_stream.read_bits(2));
|
||||
size_t skip_length = TRY(m_input_stream.read_bits(8 * skip_bytes));
|
||||
|
||||
u8 remainder = m_input_stream.align_to_byte_boundary();
|
||||
if (remainder != 0)
|
||||
return Error::from_string_literal("remainder bits are non-zero");
|
||||
|
||||
// Discard meta-data bytes
|
||||
u8 temp_buffer[4096];
|
||||
Bytes temp_bytes { temp_buffer, 4096 };
|
||||
while (skip_length > 0) {
|
||||
Bytes temp_bytes_slice = temp_bytes.slice(0, min(4096, skip_length));
|
||||
auto metadata_bytes = TRY(m_input_stream.read(temp_bytes_slice));
|
||||
if (metadata_bytes.is_empty())
|
||||
return Error::from_string_literal("eof");
|
||||
skip_length -= metadata_bytes.size();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t uncompressed_size = 1 + TRY(m_input_stream.read_bits(4 * size_number_of_nibbles));
|
||||
bool is_uncompressed = false;
|
||||
if (!m_read_final_block)
|
||||
is_uncompressed = TRY(m_input_stream.read_bit());
|
||||
|
||||
m_bytes_left = uncompressed_size;
|
||||
if (is_uncompressed) {
|
||||
u8 remainder = m_input_stream.align_to_byte_boundary();
|
||||
if (remainder != 0)
|
||||
return Error::from_string_literal("remainder is non-zero");
|
||||
m_current_state = State::UncompressedData;
|
||||
} else {
|
||||
TRY(read_block_configuration(m_literal_block));
|
||||
TRY(read_block_configuration(m_insert_and_copy_block));
|
||||
TRY(read_block_configuration(m_distance_block));
|
||||
|
||||
m_postfix_bits = TRY(m_input_stream.read_bits(2));
|
||||
m_direct_distances = TRY(m_input_stream.read_bits(4)) << m_postfix_bits;
|
||||
|
||||
m_literal_context_modes.clear();
|
||||
for (size_t i = 0; i < m_literal_block.number_of_types; i++) {
|
||||
size_t context_mode = TRY(m_input_stream.read_bits(2));
|
||||
m_literal_context_modes.append(context_mode);
|
||||
}
|
||||
|
||||
m_context_mapping_literal.clear();
|
||||
size_t number_of_literal_codes = TRY(read_variable_length());
|
||||
if (number_of_literal_codes == 1) {
|
||||
for (size_t i = 0; i < 64 * m_literal_block.number_of_types; i++)
|
||||
m_context_mapping_literal.append(0);
|
||||
} else {
|
||||
TRY(read_context_map(number_of_literal_codes, m_context_mapping_literal, 64 * m_literal_block.number_of_types));
|
||||
}
|
||||
|
||||
m_context_mapping_distance.clear();
|
||||
size_t number_of_distance_codes = TRY(read_variable_length());
|
||||
if (number_of_distance_codes == 1) {
|
||||
for (size_t i = 0; i < 4 * m_distance_block.number_of_types; i++)
|
||||
m_context_mapping_distance.append(0);
|
||||
} else {
|
||||
TRY(read_context_map(number_of_distance_codes, m_context_mapping_distance, 4 * m_distance_block.number_of_types));
|
||||
}
|
||||
|
||||
m_literal_codes.clear();
|
||||
for (size_t i = 0; i < number_of_literal_codes; i++) {
|
||||
CanonicalCode code;
|
||||
TRY(read_prefix_code(code, 256));
|
||||
m_literal_codes.append(move(code));
|
||||
}
|
||||
|
||||
m_insert_and_copy_codes.clear();
|
||||
for (size_t i = 0; i < m_insert_and_copy_block.number_of_types; i++) {
|
||||
CanonicalCode code;
|
||||
TRY(read_prefix_code(code, 704));
|
||||
m_insert_and_copy_codes.append(move(code));
|
||||
}
|
||||
|
||||
m_distance_codes.clear();
|
||||
for (size_t i = 0; i < number_of_distance_codes; i++) {
|
||||
CanonicalCode code;
|
||||
TRY(read_prefix_code(code, 16 + m_direct_distances + (48 << m_postfix_bits)));
|
||||
m_distance_codes.append(move(code));
|
||||
}
|
||||
|
||||
m_current_state = State::CompressedCommand;
|
||||
}
|
||||
} else if (m_current_state == State::UncompressedData) {
|
||||
size_t number_of_fitting_bytes = min(output_buffer.size() - bytes_read, m_bytes_left);
|
||||
VERIFY(number_of_fitting_bytes > 0);
|
||||
|
||||
auto uncompressed_bytes = TRY(m_input_stream.read(output_buffer.slice(bytes_read, number_of_fitting_bytes)));
|
||||
if (uncompressed_bytes.is_empty())
|
||||
return Error::from_string_literal("eof");
|
||||
|
||||
m_bytes_left -= uncompressed_bytes.size();
|
||||
bytes_read += uncompressed_bytes.size();
|
||||
|
||||
// If all bytes were read, return to the idle state
|
||||
if (m_bytes_left == 0)
|
||||
m_current_state = State::Idle;
|
||||
} else if (m_current_state == State::CompressedCommand) {
|
||||
if (m_insert_and_copy_block.length == 0) {
|
||||
TRY(block_read_new_state(m_insert_and_copy_block));
|
||||
}
|
||||
m_insert_and_copy_block.length--;
|
||||
|
||||
size_t insert_and_copy_symbol = TRY(m_insert_and_copy_codes[m_insert_and_copy_block.type].read_symbol(m_input_stream));
|
||||
|
||||
size_t const insert_length_code_base[11] { 0, 0, 0, 0, 8, 8, 0, 16, 8, 16, 16 };
|
||||
size_t const copy_length_code_base[11] { 0, 8, 0, 8, 0, 8, 16, 0, 16, 8, 16 };
|
||||
bool const implicit_zero_distance[11] { true, true, false, false, false, false, false, false, false, false, false };
|
||||
|
||||
size_t insert_and_copy_index = insert_and_copy_symbol >> 6;
|
||||
size_t insert_length_code_offset = (insert_and_copy_symbol >> 3) & 0b111;
|
||||
size_t copy_length_code_offset = insert_and_copy_symbol & 0b111;
|
||||
|
||||
size_t insert_length_code = insert_length_code_base[insert_and_copy_index] + insert_length_code_offset;
|
||||
size_t copy_length_code = copy_length_code_base[insert_and_copy_index] + copy_length_code_offset;
|
||||
|
||||
m_implicit_zero_distance = implicit_zero_distance[insert_and_copy_index];
|
||||
|
||||
size_t const insert_length_base[24] { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
|
||||
size_t const insert_length_extra[24] { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
|
||||
size_t const copy_length_base[24] { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
|
||||
size_t const copy_length_extra[24] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
|
||||
|
||||
m_insert_length = insert_length_base[insert_length_code] + TRY(m_input_stream.read_bits(insert_length_extra[insert_length_code]));
|
||||
m_copy_length = copy_length_base[copy_length_code] + TRY(m_input_stream.read_bits(copy_length_extra[copy_length_code]));
|
||||
|
||||
if (m_insert_length > 0) {
|
||||
m_current_state = State::CompressedLiteral;
|
||||
} else {
|
||||
m_current_state = State::CompressedDistance;
|
||||
}
|
||||
} else if (m_current_state == State::CompressedLiteral) {
|
||||
if (m_literal_block.length == 0) {
|
||||
TRY(block_read_new_state(m_literal_block));
|
||||
}
|
||||
m_literal_block.length--;
|
||||
|
||||
size_t literal_code_index = literal_code_index_from_context();
|
||||
size_t literal_value = TRY(m_literal_codes[literal_code_index].read_symbol(m_input_stream));
|
||||
|
||||
output_buffer[bytes_read] = literal_value;
|
||||
m_lookback_buffer.value().write(literal_value);
|
||||
bytes_read++;
|
||||
m_insert_length--;
|
||||
m_bytes_left--;
|
||||
|
||||
if (m_bytes_left == 0)
|
||||
m_current_state = State::Idle;
|
||||
else if (m_insert_length == 0)
|
||||
m_current_state = State::CompressedDistance;
|
||||
} else if (m_current_state == State::CompressedDistance) {
|
||||
size_t distance_symbol;
|
||||
if (m_implicit_zero_distance) {
|
||||
distance_symbol = 0;
|
||||
} else {
|
||||
if (m_distance_block.length == 0) {
|
||||
TRY(block_read_new_state(m_distance_block));
|
||||
}
|
||||
m_distance_block.length--;
|
||||
|
||||
size_t context_id = clamp(m_copy_length - 2, 0, 3);
|
||||
size_t distance_code_index = m_context_mapping_distance[4 * m_distance_block.type + context_id];
|
||||
|
||||
distance_symbol = TRY(m_distance_codes[distance_code_index].read_symbol(m_input_stream));
|
||||
}
|
||||
|
||||
size_t distance;
|
||||
bool reuse_previous_distance = false;
|
||||
if (distance_symbol < 16) {
|
||||
switch (distance_symbol) {
|
||||
case 0:
|
||||
distance = m_distances[0];
|
||||
reuse_previous_distance = true;
|
||||
break;
|
||||
case 1:
|
||||
distance = m_distances[1];
|
||||
break;
|
||||
case 2:
|
||||
distance = m_distances[2];
|
||||
break;
|
||||
case 3:
|
||||
distance = m_distances[3];
|
||||
break;
|
||||
case 4:
|
||||
distance = m_distances[0] - 1;
|
||||
break;
|
||||
case 5:
|
||||
distance = m_distances[0] + 1;
|
||||
break;
|
||||
case 6:
|
||||
distance = m_distances[0] - 2;
|
||||
break;
|
||||
case 7:
|
||||
distance = m_distances[0] + 2;
|
||||
break;
|
||||
case 8:
|
||||
distance = m_distances[0] - 3;
|
||||
break;
|
||||
case 9:
|
||||
distance = m_distances[0] + 3;
|
||||
break;
|
||||
case 10:
|
||||
distance = m_distances[1] - 1;
|
||||
break;
|
||||
case 11:
|
||||
distance = m_distances[1] + 1;
|
||||
break;
|
||||
case 12:
|
||||
distance = m_distances[1] - 2;
|
||||
break;
|
||||
case 13:
|
||||
distance = m_distances[1] + 2;
|
||||
break;
|
||||
case 14:
|
||||
distance = m_distances[1] - 3;
|
||||
break;
|
||||
case 15:
|
||||
distance = m_distances[1] + 3;
|
||||
break;
|
||||
}
|
||||
} else if (distance_symbol < 16 + m_direct_distances) {
|
||||
distance = distance_symbol - 15;
|
||||
} else {
|
||||
size_t POSTFIX_MASK = (1 << m_postfix_bits) - 1;
|
||||
|
||||
size_t ndistbits = 1 + ((distance_symbol - m_direct_distances - 16) >> (m_postfix_bits + 1));
|
||||
size_t dextra = TRY(m_input_stream.read_bits(ndistbits));
|
||||
|
||||
size_t hcode = (distance_symbol - m_direct_distances - 16) >> m_postfix_bits;
|
||||
size_t lcode = (distance_symbol - m_direct_distances - 16) & POSTFIX_MASK;
|
||||
size_t offset = ((2 + (hcode & 1)) << ndistbits) - 4;
|
||||
distance = ((offset + dextra) << m_postfix_bits) + lcode + m_direct_distances + 1;
|
||||
}
|
||||
m_distance = distance;
|
||||
|
||||
size_t total_written = m_lookback_buffer.value().total_written();
|
||||
size_t max_lookback = min(total_written, m_window_size);
|
||||
|
||||
if (distance > max_lookback) {
|
||||
size_t word_index = distance - (max_lookback + 1);
|
||||
m_dictionary_data = TRY(BrotliDictionary::lookup_word(word_index, m_copy_length));
|
||||
m_copy_length = m_dictionary_data.size();
|
||||
|
||||
if (m_copy_length == 0)
|
||||
m_current_state = State::CompressedCommand;
|
||||
else
|
||||
m_current_state = State::CompressedDictionary;
|
||||
} else {
|
||||
if (!reuse_previous_distance) {
|
||||
m_distances[3] = m_distances[2];
|
||||
m_distances[2] = m_distances[1];
|
||||
m_distances[1] = m_distances[0];
|
||||
m_distances[0] = distance;
|
||||
}
|
||||
|
||||
m_current_state = State::CompressedCopy;
|
||||
}
|
||||
} else if (m_current_state == State::CompressedCopy) {
|
||||
u8 copy_value = m_lookback_buffer.value().lookback(m_distance);
|
||||
|
||||
output_buffer[bytes_read] = copy_value;
|
||||
m_lookback_buffer.value().write(copy_value);
|
||||
bytes_read++;
|
||||
m_copy_length--;
|
||||
m_bytes_left--;
|
||||
|
||||
if (m_bytes_left == 0)
|
||||
m_current_state = State::Idle;
|
||||
else if (m_copy_length == 0)
|
||||
m_current_state = State::CompressedCommand;
|
||||
} else if (m_current_state == State::CompressedDictionary) {
|
||||
size_t offset = m_dictionary_data.size() - m_copy_length;
|
||||
u8 dictionary_value = m_dictionary_data[offset];
|
||||
|
||||
output_buffer[bytes_read] = dictionary_value;
|
||||
m_lookback_buffer.value().write(dictionary_value);
|
||||
bytes_read++;
|
||||
m_copy_length--;
|
||||
m_bytes_left--;
|
||||
|
||||
if (m_bytes_left == 0)
|
||||
m_current_state = State::Idle;
|
||||
else if (m_copy_length == 0)
|
||||
m_current_state = State::CompressedCommand;
|
||||
}
|
||||
}
|
||||
|
||||
return output_buffer.slice(0, bytes_read);
|
||||
}
|
||||
|
||||
bool BrotliDecompressionStream::is_eof() const
|
||||
{
|
||||
return m_read_final_block && m_current_state == State::Idle;
|
||||
}
|
||||
|
||||
}
|
165
Userland/Libraries/LibCompress/Brotli.h
Normal file
165
Userland/Libraries/LibCompress/Brotli.h
Normal file
|
@ -0,0 +1,165 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/CircularQueue.h>
|
||||
#include <AK/FixedArray.h>
|
||||
#include <LibCore/InputBitStream.h>
|
||||
#include <LibCore/Stream.h>
|
||||
|
||||
namespace Compress {
|
||||
|
||||
using Core::Stream::LittleEndianInputBitStream;
|
||||
using Core::Stream::Stream;
|
||||
|
||||
class BrotliDecompressionStream : public Stream {
|
||||
public:
|
||||
enum class State {
|
||||
WindowSize,
|
||||
Idle,
|
||||
UncompressedData,
|
||||
CompressedCommand,
|
||||
CompressedLiteral,
|
||||
CompressedDistance,
|
||||
CompressedCopy,
|
||||
CompressedDictionary,
|
||||
};
|
||||
|
||||
class CanonicalCode {
|
||||
friend class BrotliDecompressionStream;
|
||||
|
||||
public:
|
||||
CanonicalCode() = default;
|
||||
ErrorOr<size_t> read_symbol(LittleEndianInputBitStream&);
|
||||
void clear()
|
||||
{
|
||||
m_symbol_codes.clear();
|
||||
m_symbol_values.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
Vector<size_t> m_symbol_codes;
|
||||
Vector<size_t> m_symbol_values;
|
||||
};
|
||||
|
||||
struct Block {
|
||||
size_t type;
|
||||
size_t type_previous;
|
||||
size_t number_of_types;
|
||||
|
||||
size_t length;
|
||||
|
||||
CanonicalCode type_code;
|
||||
CanonicalCode length_code;
|
||||
};
|
||||
|
||||
class LookbackBuffer {
|
||||
private:
|
||||
LookbackBuffer(FixedArray<u8>& buffer)
|
||||
: m_buffer(move(buffer))
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
static ErrorOr<LookbackBuffer> try_create(size_t size)
|
||||
{
|
||||
auto buffer = TRY(FixedArray<u8>::try_create(size));
|
||||
return LookbackBuffer { buffer };
|
||||
}
|
||||
|
||||
void write(u8 value)
|
||||
{
|
||||
m_buffer[m_offset] = value;
|
||||
m_offset = (m_offset + 1) % m_buffer.size();
|
||||
m_total_written++;
|
||||
}
|
||||
|
||||
u8 lookback(size_t offset) const
|
||||
{
|
||||
VERIFY(offset <= m_total_written);
|
||||
VERIFY(offset <= m_buffer.size());
|
||||
size_t index = (m_offset + m_buffer.size() - offset) % m_buffer.size();
|
||||
return m_buffer[index];
|
||||
}
|
||||
|
||||
u8 lookback(size_t offset, u8 fallback) const
|
||||
{
|
||||
if (offset > m_total_written || offset > m_buffer.size())
|
||||
return fallback;
|
||||
VERIFY(offset <= m_total_written);
|
||||
VERIFY(offset <= m_buffer.size());
|
||||
size_t index = (m_offset + m_buffer.size() - offset) % m_buffer.size();
|
||||
return m_buffer[index];
|
||||
}
|
||||
|
||||
size_t total_written() { return m_total_written; }
|
||||
|
||||
private:
|
||||
FixedArray<u8> m_buffer;
|
||||
size_t m_offset { 0 };
|
||||
size_t m_total_written { 0 };
|
||||
};
|
||||
|
||||
public:
|
||||
BrotliDecompressionStream(Stream&);
|
||||
|
||||
bool is_readable() const override { return m_input_stream.is_readable(); }
|
||||
ErrorOr<Bytes> read(Bytes output_buffer) override;
|
||||
bool is_writable() const override { return m_input_stream.is_writable(); }
|
||||
ErrorOr<size_t> write(ReadonlyBytes bytes) override { return m_input_stream.write(bytes); }
|
||||
bool is_eof() const override;
|
||||
bool is_open() const override { return m_input_stream.is_open(); }
|
||||
void close() override { m_input_stream.close(); }
|
||||
|
||||
private:
|
||||
ErrorOr<size_t> read_window_length();
|
||||
ErrorOr<size_t> read_size_number_of_nibbles();
|
||||
ErrorOr<size_t> read_variable_length();
|
||||
ErrorOr<size_t> read_complex_prefix_code_length();
|
||||
|
||||
ErrorOr<void> read_prefix_code(CanonicalCode&, size_t alphabet_size);
|
||||
ErrorOr<void> read_simple_prefix_code(CanonicalCode&, size_t alphabet_size);
|
||||
ErrorOr<void> read_complex_prefix_code(CanonicalCode&, size_t alphabet_size, size_t hskip);
|
||||
ErrorOr<void> read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size);
|
||||
ErrorOr<void> read_block_configuration(Block&);
|
||||
|
||||
ErrorOr<void> block_update_length(Block&);
|
||||
ErrorOr<void> block_read_new_state(Block&);
|
||||
|
||||
size_t literal_code_index_from_context();
|
||||
|
||||
LittleEndianInputBitStream m_input_stream;
|
||||
State m_current_state { State::WindowSize };
|
||||
Optional<LookbackBuffer> m_lookback_buffer;
|
||||
|
||||
size_t m_window_size { 0 };
|
||||
bool m_read_final_block { false };
|
||||
size_t m_postfix_bits { 0 };
|
||||
size_t m_direct_distances { 0 };
|
||||
size_t m_distances[4] { 4, 11, 15, 16 };
|
||||
|
||||
size_t m_bytes_left { 0 };
|
||||
size_t m_insert_length { 0 };
|
||||
size_t m_copy_length { 0 };
|
||||
bool m_implicit_zero_distance { false };
|
||||
size_t m_distance { 0 };
|
||||
ByteBuffer m_dictionary_data;
|
||||
|
||||
Block m_literal_block;
|
||||
Vector<u8> m_literal_context_modes;
|
||||
Block m_insert_and_copy_block;
|
||||
Block m_distance_block;
|
||||
|
||||
Vector<u8> m_context_mapping_literal;
|
||||
Vector<u8> m_context_mapping_distance;
|
||||
|
||||
Vector<CanonicalCode> m_literal_codes;
|
||||
Vector<CanonicalCode> m_insert_and_copy_codes;
|
||||
Vector<CanonicalCode> m_distance_codes;
|
||||
};
|
||||
|
||||
}
|
244
Userland/Libraries/LibCompress/BrotliDictionary.cpp
Normal file
244
Userland/Libraries/LibCompress/BrotliDictionary.cpp
Normal file
|
@ -0,0 +1,244 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Types.h>
|
||||
#include <LibCompress/BrotliDictionary.h>
|
||||
|
||||
// Include the 119.9 KiB of dictionary data from a binary file
|
||||
extern u8 const brotli_dictionary_data[];
|
||||
#if defined(__APPLE__)
|
||||
asm(".const_data\n"
|
||||
".globl _brotli_dictionary_data\n"
|
||||
"_brotli_dictionary_data:\n");
|
||||
#else
|
||||
asm(".section .rodata\n"
|
||||
".global brotli_dictionary_data\n"
|
||||
"brotli_dictionary_data:\n");
|
||||
#endif
|
||||
asm(".incbin \"LibCompress/BrotliDictionaryData.bin\"\n"
|
||||
".previous\n");
|
||||
|
||||
namespace Compress {
|
||||
|
||||
static size_t const bits_by_length[25] {
|
||||
0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5
|
||||
};
|
||||
|
||||
static size_t const offset_by_length[25] {
|
||||
0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
|
||||
104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280, 122016
|
||||
};
|
||||
|
||||
static int ferment(Bytes word, size_t pos)
|
||||
{
|
||||
if (word[pos] < 192) {
|
||||
if (word[pos] >= 97 && word[pos] <= 122) {
|
||||
word[pos] = word[pos] ^ 32;
|
||||
}
|
||||
return 1;
|
||||
} else if (word[pos] < 224) {
|
||||
if (pos + 1 < word.size()) {
|
||||
word[pos + 1] = word[pos + 1] ^ 32;
|
||||
}
|
||||
return 2;
|
||||
} else {
|
||||
if (pos + 2 < word.size()) {
|
||||
word[pos + 2] = word[pos + 2] ^ 5;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
static void ferment_first(Bytes word)
|
||||
{
|
||||
if (word.size() > 0) {
|
||||
ferment(word, 0);
|
||||
}
|
||||
}
|
||||
|
||||
[[maybe_unused]] static void ferment_all(Bytes word)
|
||||
{
|
||||
size_t i = 0;
|
||||
while (i < word.size()) {
|
||||
i += ferment(word, i);
|
||||
}
|
||||
}
|
||||
|
||||
using BrotliDictionary::TransformationOperation::FermentAll;
|
||||
using BrotliDictionary::TransformationOperation::FermentFirst;
|
||||
using BrotliDictionary::TransformationOperation::Identity;
|
||||
using BrotliDictionary::TransformationOperation::OmitFirst;
|
||||
using BrotliDictionary::TransformationOperation::OmitLast;
|
||||
constexpr static BrotliDictionary::Transformation transformations[121] {
|
||||
// ID Prefix Transform Suffix
|
||||
// -- ------ --------- ------
|
||||
{ ""sv, Identity, 0, ""sv }, // 0 "" Identity ""
|
||||
{ ""sv, Identity, 0, " "sv }, // 1 "" Identity " "
|
||||
{ " "sv, Identity, 0, " "sv }, // 2 " " Identity " "
|
||||
{ ""sv, OmitFirst, 1, ""sv }, // 3 "" OmitFirst1 ""
|
||||
{ ""sv, FermentFirst, 0, " "sv }, // 4 "" FermentFirst " "
|
||||
{ ""sv, Identity, 0, " the "sv }, // 5 "" Identity " the "
|
||||
{ " "sv, Identity, 0, ""sv }, // 6 " " Identity ""
|
||||
{ "s "sv, Identity, 0, " "sv }, // 7 "s " Identity " "
|
||||
{ ""sv, Identity, 0, " of "sv }, // 8 "" Identity " of "
|
||||
{ ""sv, FermentFirst, 0, ""sv }, // 9 "" FermentFirst ""
|
||||
{ ""sv, Identity, 0, " and "sv }, // 10 "" Identity " and "
|
||||
{ ""sv, OmitFirst, 2, ""sv }, // 11 "" OmitFirst2 ""
|
||||
{ ""sv, OmitLast, 1, ""sv }, // 12 "" OmitLast1 ""
|
||||
{ ", "sv, Identity, 0, " "sv }, // 13 ", " Identity " "
|
||||
{ ""sv, Identity, 0, ", "sv }, // 14 "" Identity ", "
|
||||
{ " "sv, FermentFirst, 0, " "sv }, // 15 " " FermentFirst " "
|
||||
{ ""sv, Identity, 0, " in "sv }, // 16 "" Identity " in "
|
||||
{ ""sv, Identity, 0, " to "sv }, // 17 "" Identity " to "
|
||||
{ "e "sv, Identity, 0, " "sv }, // 18 "e " Identity " "
|
||||
{ ""sv, Identity, 0, "\""sv }, // 19 "" Identity "\""
|
||||
{ ""sv, Identity, 0, "."sv }, // 20 "" Identity "."
|
||||
{ ""sv, Identity, 0, "\">"sv }, // 21 "" Identity "\">"
|
||||
{ ""sv, Identity, 0, "\n"sv }, // 22 "" Identity "\n"
|
||||
{ ""sv, OmitLast, 3, ""sv }, // 23 "" OmitLast3 ""
|
||||
{ ""sv, Identity, 0, "]"sv }, // 24 "" Identity "]"
|
||||
{ ""sv, Identity, 0, " for "sv }, // 25 "" Identity " for "
|
||||
{ ""sv, OmitFirst, 3, ""sv }, // 26 "" OmitFirst3 ""
|
||||
{ ""sv, OmitLast, 2, ""sv }, // 27 "" OmitLast2 ""
|
||||
{ ""sv, Identity, 0, " a "sv }, // 28 "" Identity " a "
|
||||
{ ""sv, Identity, 0, " that "sv }, // 29 "" Identity " that "
|
||||
{ " "sv, FermentFirst, 0, ""sv }, // 30 " " FermentFirst ""
|
||||
{ ""sv, Identity, 0, ". "sv }, // 31 "" Identity ". "
|
||||
{ "."sv, Identity, 0, ""sv }, // 32 "." Identity ""
|
||||
{ " "sv, Identity, 0, ", "sv }, // 33 " " Identity ", "
|
||||
{ ""sv, OmitFirst, 4, ""sv }, // 34 "" OmitFirst4 ""
|
||||
{ ""sv, Identity, 0, " with "sv }, // 35 "" Identity " with "
|
||||
{ ""sv, Identity, 0, "'"sv }, // 36 "" Identity "'"
|
||||
{ ""sv, Identity, 0, " from "sv }, // 37 "" Identity " from "
|
||||
{ ""sv, Identity, 0, " by "sv }, // 38 "" Identity " by "
|
||||
{ ""sv, OmitFirst, 5, ""sv }, // 39 "" OmitFirst5 ""
|
||||
{ ""sv, OmitFirst, 6, ""sv }, // 40 "" OmitFirst6 ""
|
||||
{ " the "sv, Identity, 0, ""sv }, // 41 " the " Identity ""
|
||||
{ ""sv, OmitLast, 4, ""sv }, // 42 "" OmitLast4 ""
|
||||
{ ""sv, Identity, 0, ". The "sv }, // 43 "" Identity ". The "
|
||||
{ ""sv, FermentAll, 0, ""sv }, // 44 "" FermentAll ""
|
||||
{ ""sv, Identity, 0, " on "sv }, // 45 "" Identity " on "
|
||||
{ ""sv, Identity, 0, " as "sv }, // 46 "" Identity " as "
|
||||
{ ""sv, Identity, 0, " is "sv }, // 47 "" Identity " is "
|
||||
{ ""sv, OmitLast, 7, ""sv }, // 48 "" OmitLast7 ""
|
||||
{ ""sv, OmitLast, 1, "ing "sv }, // 49 "" OmitLast1 "ing "
|
||||
{ ""sv, Identity, 0, "\n\t"sv }, // 50 "" Identity "\n\t"
|
||||
{ ""sv, Identity, 0, ":"sv }, // 51 "" Identity ":"
|
||||
{ " "sv, Identity, 0, ". "sv }, // 52 " " Identity ". "
|
||||
{ ""sv, Identity, 0, "ed "sv }, // 53 "" Identity "ed "
|
||||
{ ""sv, OmitFirst, 9, ""sv }, // 54 "" OmitFirst9 ""
|
||||
{ ""sv, OmitFirst, 7, ""sv }, // 55 "" OmitFirst7 ""
|
||||
{ ""sv, OmitLast, 6, ""sv }, // 56 "" OmitLast6 ""
|
||||
{ ""sv, Identity, 0, "("sv }, // 57 "" Identity "("
|
||||
{ ""sv, FermentFirst, 0, ", "sv }, // 58 "" FermentFirst ", "
|
||||
{ ""sv, OmitLast, 8, ""sv }, // 59 "" OmitLast8 ""
|
||||
{ ""sv, Identity, 0, " at "sv }, // 60 "" Identity " at "
|
||||
{ ""sv, Identity, 0, "ly "sv }, // 61 "" Identity "ly "
|
||||
{ " the "sv, Identity, 0, " of "sv }, // 62 " the " Identity " of "
|
||||
{ ""sv, OmitLast, 5, ""sv }, // 63 "" OmitLast5 ""
|
||||
{ ""sv, OmitLast, 9, ""sv }, // 64 "" OmitLast9 ""
|
||||
{ " "sv, FermentFirst, 0, ", "sv }, // 65 " " FermentFirst ", "
|
||||
{ ""sv, FermentFirst, 0, "\""sv }, // 66 "" FermentFirst "\""
|
||||
{ "."sv, Identity, 0, "("sv }, // 67 "." Identity "("
|
||||
{ ""sv, FermentAll, 0, " "sv }, // 68 "" FermentAll " "
|
||||
{ ""sv, FermentFirst, 0, "\">"sv }, // 69 "" FermentFirst "\">"
|
||||
{ ""sv, Identity, 0, "=\""sv }, // 70 "" Identity "=\""
|
||||
{ " "sv, Identity, 0, "."sv }, // 71 " " Identity "."
|
||||
{ ".com/"sv, Identity, 0, ""sv }, // 72 ".com/" Identity ""
|
||||
{ " the "sv, Identity, 0, " of the "sv }, // 73 " the " Identity " of the "
|
||||
{ ""sv, FermentFirst, 0, "'"sv }, // 74 "" FermentFirst "'"
|
||||
{ ""sv, Identity, 0, ". This "sv }, // 75 "" Identity ". This "
|
||||
{ ""sv, Identity, 0, ","sv }, // 76 "" Identity ","
|
||||
{ "."sv, Identity, 0, " "sv }, // 77 "." Identity " "
|
||||
{ ""sv, FermentFirst, 0, "("sv }, // 78 "" FermentFirst "("
|
||||
{ ""sv, FermentFirst, 0, "."sv }, // 79 "" FermentFirst "."
|
||||
{ ""sv, Identity, 0, " not "sv }, // 80 "" Identity " not "
|
||||
{ " "sv, Identity, 0, "=\""sv }, // 81 " " Identity "=\""
|
||||
{ ""sv, Identity, 0, "er "sv }, // 82 "" Identity "er "
|
||||
{ " "sv, FermentAll, 0, " "sv }, // 83 " " FermentAll " "
|
||||
{ ""sv, Identity, 0, "al "sv }, // 84 "" Identity "al "
|
||||
{ " "sv, FermentAll, 0, ""sv }, // 85 " " FermentAll ""
|
||||
{ ""sv, Identity, 0, "='"sv }, // 86 "" Identity "='"
|
||||
{ ""sv, FermentAll, 0, "\""sv }, // 87 "" FermentAll "\""
|
||||
{ ""sv, FermentFirst, 0, ". "sv }, // 88 "" FermentFirst ". "
|
||||
{ " "sv, Identity, 0, "("sv }, // 89 " " Identity "("
|
||||
{ ""sv, Identity, 0, "ful "sv }, // 90 "" Identity "ful "
|
||||
{ " "sv, FermentFirst, 0, ". "sv }, // 91 " " FermentFirst ". "
|
||||
{ ""sv, Identity, 0, "ive "sv }, // 92 "" Identity "ive "
|
||||
{ ""sv, Identity, 0, "less "sv }, // 93 "" Identity "less "
|
||||
{ ""sv, FermentAll, 0, "'"sv }, // 94 "" FermentAll "'"
|
||||
{ ""sv, Identity, 0, "est "sv }, // 95 "" Identity "est "
|
||||
{ " "sv, FermentFirst, 0, "."sv }, // 96 " " FermentFirst "."
|
||||
{ ""sv, FermentAll, 0, "\">"sv }, // 97 "" FermentAll "\">"
|
||||
{ " "sv, Identity, 0, "='"sv }, // 98 " " Identity "='"
|
||||
{ ""sv, FermentFirst, 0, ","sv }, // 99 "" FermentFirst ","
|
||||
{ ""sv, Identity, 0, "ize "sv }, // 100 "" Identity "ize "
|
||||
{ ""sv, FermentAll, 0, "."sv }, // 101 "" FermentAll "."
|
||||
{ "\xc2\xa0"sv, Identity, 0, ""sv }, // 102 "\xc2\xa0" Identity ""
|
||||
{ " "sv, Identity, 0, ","sv }, // 103 " " Identity ","
|
||||
{ ""sv, FermentFirst, 0, "=\""sv }, // 104 "" FermentFirst "=\""
|
||||
{ ""sv, FermentAll, 0, "=\""sv }, // 105 "" FermentAll "=\""
|
||||
{ ""sv, Identity, 0, "ous "sv }, // 106 "" Identity "ous "
|
||||
{ ""sv, FermentAll, 0, ", "sv }, // 107 "" FermentAll ", "
|
||||
{ ""sv, FermentFirst, 0, "='"sv }, // 108 "" FermentFirst "='"
|
||||
{ " "sv, FermentFirst, 0, ","sv }, // 109 " " FermentFirst ","
|
||||
{ " "sv, FermentAll, 0, "=\""sv }, // 110 " " FermentAll "=\""
|
||||
{ " "sv, FermentAll, 0, ", "sv }, // 111 " " FermentAll ", "
|
||||
{ ""sv, FermentAll, 0, ","sv }, // 112 "" FermentAll ","
|
||||
{ ""sv, FermentAll, 0, "("sv }, // 113 "" FermentAll "("
|
||||
{ ""sv, FermentAll, 0, ". "sv }, // 114 "" FermentAll ". "
|
||||
{ " "sv, FermentAll, 0, "."sv }, // 115 " " FermentAll "."
|
||||
{ ""sv, FermentAll, 0, "='"sv }, // 116 "" FermentAll "='"
|
||||
{ " "sv, FermentAll, 0, ". "sv }, // 117 " " FermentAll ". "
|
||||
{ " "sv, FermentFirst, 0, "=\""sv }, // 118 " " FermentFirst "=\""
|
||||
{ " "sv, FermentAll, 0, "='"sv }, // 119 " " FermentAll "='"
|
||||
{ " "sv, FermentFirst, 0, "='"sv }, // 120 " " FermentFirst "='"
|
||||
};
|
||||
|
||||
ErrorOr<ByteBuffer> BrotliDictionary::lookup_word(size_t index, size_t length)
|
||||
{
|
||||
if (length < 4 || length > 24)
|
||||
return Error::from_string_literal("invalid dictionary lookup length");
|
||||
|
||||
size_t word_index = index % (1 << bits_by_length[length]);
|
||||
ReadonlyBytes base_word { brotli_dictionary_data + offset_by_length[length] + (word_index * length), length };
|
||||
size_t transform_id = index >> bits_by_length[length];
|
||||
|
||||
if (transform_id >= 121)
|
||||
return Error::from_string_literal("invalid dictionary transformation");
|
||||
|
||||
auto transformation = transformations[transform_id];
|
||||
ByteBuffer bb;
|
||||
bb.append(transformation.prefix.bytes());
|
||||
size_t prefix_length = bb.size();
|
||||
|
||||
switch (transformation.operation) {
|
||||
case TransformationOperation::Identity:
|
||||
bb.append(base_word);
|
||||
break;
|
||||
case TransformationOperation::FermentFirst:
|
||||
bb.append(base_word);
|
||||
ferment_first(bb.bytes().slice(prefix_length));
|
||||
break;
|
||||
case TransformationOperation::FermentAll:
|
||||
bb.append(base_word);
|
||||
ferment_all(bb.bytes().slice(prefix_length));
|
||||
break;
|
||||
case TransformationOperation::OmitFirst:
|
||||
if (transformation.operation_data < base_word.size())
|
||||
bb.append(base_word.slice(transformation.operation_data));
|
||||
break;
|
||||
case TransformationOperation::OmitLast:
|
||||
if (transformation.operation_data < base_word.size())
|
||||
bb.append(base_word.slice(0, base_word.size() - transformation.operation_data));
|
||||
break;
|
||||
}
|
||||
|
||||
bb.append(transformation.suffix.bytes());
|
||||
return bb;
|
||||
}
|
||||
|
||||
}
|
32
Userland/Libraries/LibCompress/BrotliDictionary.h
Normal file
32
Userland/Libraries/LibCompress/BrotliDictionary.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/ByteBuffer.h>
|
||||
|
||||
namespace Compress {
|
||||
|
||||
class BrotliDictionary {
|
||||
public:
|
||||
enum TransformationOperation {
|
||||
Identity,
|
||||
FermentFirst,
|
||||
FermentAll,
|
||||
OmitFirst,
|
||||
OmitLast,
|
||||
};
|
||||
struct Transformation {
|
||||
StringView prefix;
|
||||
TransformationOperation operation;
|
||||
u8 operation_data;
|
||||
StringView suffix;
|
||||
};
|
||||
|
||||
static ErrorOr<ByteBuffer> lookup_word(size_t index, size_t length);
|
||||
};
|
||||
|
||||
}
|
432
Userland/Libraries/LibCompress/BrotliDictionaryData.bin
Normal file
432
Userland/Libraries/LibCompress/BrotliDictionaryData.bin
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,4 +1,6 @@
|
|||
set(SOURCES
|
||||
Brotli.cpp
|
||||
BrotliDictionary.cpp
|
||||
Deflate.cpp
|
||||
Zlib.cpp
|
||||
Gzip.cpp
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue