1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 04:07:45 +00:00

LibCompress: Decode non-self-referencing back-references in one shot

We currently decode back-references one byte at a time, while writing
that byte back out to the output buffer. This is only necessary when the
back-reference refers to itself, i.e. when the back-reference distance
is less than its length. In other cases, we can read the entire back-
reference block in one shot.

Using the "enwik8" file as a test (100MB uncompressed, commonly used in
benchmarks: https://www.mattmahoney.net/dc/enwik8.zip), decompression
time decreases from:

    5.8s to 4.89s on Serenity (cold)
    2.3s to 1.72s on Serenity (warm)
    1.6s to 1.06s on Linux
This commit is contained in:
Timothy Flynn 2023-03-29 07:27:07 -04:00 committed by Linus Groh
parent e1f8443db0
commit 7447a91d7e
2 changed files with 13 additions and 5 deletions

View file

@ -199,11 +199,17 @@ ErrorOr<bool> DeflateDecompressor::CompressedBlock::try_read_more()
auto const distance = TRY(m_decompressor.decode_distance(distance_symbol));
for (size_t idx = 0; idx < length; ++idx) {
u8 byte = 0;
TRY(m_decompressor.m_output_buffer.read_with_seekback({ &byte, sizeof(byte) }, distance));
if (distance < length) {
for (size_t idx = 0; idx < length; ++idx) {
u8 byte = 0;
TRY(m_decompressor.m_output_buffer.read_with_seekback({ &byte, sizeof(byte) }, distance));
m_decompressor.m_output_buffer.write({ &byte, sizeof(byte) });
m_decompressor.m_output_buffer.write({ &byte, sizeof(byte) });
}
} else {
Array<u8, DeflateDecompressor::max_back_reference_length> buffer;
auto bytes = TRY(m_decompressor.m_output_buffer.read_with_seekback({ buffer.data(), length }, distance));
m_decompressor.m_output_buffer.write(bytes);
}
return true;
@ -386,7 +392,7 @@ ErrorOr<u32> DeflateDecompressor::decode_length(u32 symbol)
}
if (symbol == 285)
return 258;
return DeflateDecompressor::max_back_reference_length;
VERIFY_NOT_REACHED();
}