1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 11:57:35 +00:00

LibCompress: Decode non-self-referencing back-references in one shot

We currently decode back-references one byte at a time, while writing
that byte back out to the output buffer. This is only necessary when the
back-reference refers to itself, i.e. when the back-reference distance
is less than its length. In other cases, we can read the entire back-
reference block in one shot.

Using the "enwik8" file as a test (100MB uncompressed, commonly used in
benchmarks: https://www.mattmahoney.net/dc/enwik8.zip), decompression
time decreases from:

    5.8s to 4.89s on Serenity (cold)
    2.3s to 1.72s on Serenity (warm)
    1.6s to 1.06s on Linux
This commit is contained in:
Timothy Flynn 2023-03-29 07:27:07 -04:00 committed by Linus Groh
parent e1f8443db0
commit 7447a91d7e
2 changed files with 13 additions and 5 deletions

View file

@ -199,12 +199,18 @@ ErrorOr<bool> DeflateDecompressor::CompressedBlock::try_read_more()
auto const distance = TRY(m_decompressor.decode_distance(distance_symbol)); auto const distance = TRY(m_decompressor.decode_distance(distance_symbol));
if (distance < length) {
for (size_t idx = 0; idx < length; ++idx) { for (size_t idx = 0; idx < length; ++idx) {
u8 byte = 0; u8 byte = 0;
TRY(m_decompressor.m_output_buffer.read_with_seekback({ &byte, sizeof(byte) }, distance)); TRY(m_decompressor.m_output_buffer.read_with_seekback({ &byte, sizeof(byte) }, distance));
m_decompressor.m_output_buffer.write({ &byte, sizeof(byte) }); m_decompressor.m_output_buffer.write({ &byte, sizeof(byte) });
} }
} else {
Array<u8, DeflateDecompressor::max_back_reference_length> buffer;
auto bytes = TRY(m_decompressor.m_output_buffer.read_with_seekback({ buffer.data(), length }, distance));
m_decompressor.m_output_buffer.write(bytes);
}
return true; return true;
} }
@ -386,7 +392,7 @@ ErrorOr<u32> DeflateDecompressor::decode_length(u32 symbol)
} }
if (symbol == 285) if (symbol == 285)
return 258; return DeflateDecompressor::max_back_reference_length;
VERIFY_NOT_REACHED(); VERIFY_NOT_REACHED();
} }

View file

@ -104,6 +104,8 @@ private:
ErrorOr<u32> decode_distance(u32); ErrorOr<u32> decode_distance(u32);
ErrorOr<void> decode_codes(CanonicalCode& literal_code, Optional<CanonicalCode>& distance_code); ErrorOr<void> decode_codes(CanonicalCode& literal_code, Optional<CanonicalCode>& distance_code);
static constexpr u16 max_back_reference_length = 258;
bool m_read_final_bock { false }; bool m_read_final_bock { false };
State m_state { State::Idle }; State m_state { State::Idle };