From 7447a91d7ed8646462aa64828cdff328f56d34d1 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 29 Mar 2023 07:27:07 -0400 Subject: [PATCH] LibCompress: Decode non-self-referencing back-references in one shot We currently decode back-references one byte at a time, while writing that byte back out to the output buffer. This is only necessary when the back-reference refers to itself, i.e. when the back-reference distance is less than its length. In other cases, we can read the entire back- reference block in one shot. Using the "enwik8" file as a test (100MB uncompressed, commonly used in benchmarks: https://www.mattmahoney.net/dc/enwik8.zip), decompression time decreases from: 5.8s to 4.89s on Serenity (cold) 2.3s to 1.72s on Serenity (warm) 1.6s to 1.06s on Linux --- Userland/Libraries/LibCompress/Deflate.cpp | 16 +++++++++++----- Userland/Libraries/LibCompress/Deflate.h | 2 ++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Userland/Libraries/LibCompress/Deflate.cpp b/Userland/Libraries/LibCompress/Deflate.cpp index ae8b9ac2bb..bfdeb991a4 100644 --- a/Userland/Libraries/LibCompress/Deflate.cpp +++ b/Userland/Libraries/LibCompress/Deflate.cpp @@ -199,11 +199,17 @@ ErrorOr DeflateDecompressor::CompressedBlock::try_read_more() auto const distance = TRY(m_decompressor.decode_distance(distance_symbol)); - for (size_t idx = 0; idx < length; ++idx) { - u8 byte = 0; - TRY(m_decompressor.m_output_buffer.read_with_seekback({ &byte, sizeof(byte) }, distance)); + if (distance < length) { + for (size_t idx = 0; idx < length; ++idx) { + u8 byte = 0; + TRY(m_decompressor.m_output_buffer.read_with_seekback({ &byte, sizeof(byte) }, distance)); - m_decompressor.m_output_buffer.write({ &byte, sizeof(byte) }); + m_decompressor.m_output_buffer.write({ &byte, sizeof(byte) }); + } + } else { + Array buffer; + auto bytes = TRY(m_decompressor.m_output_buffer.read_with_seekback({ buffer.data(), length }, distance)); + m_decompressor.m_output_buffer.write(bytes); } return true; @@ -386,7 +392,7 @@ ErrorOr DeflateDecompressor::decode_length(u32 symbol) } if (symbol == 285) - return 258; + return DeflateDecompressor::max_back_reference_length; VERIFY_NOT_REACHED(); } diff --git a/Userland/Libraries/LibCompress/Deflate.h b/Userland/Libraries/LibCompress/Deflate.h index 4767032061..1ca761ad40 100644 --- a/Userland/Libraries/LibCompress/Deflate.h +++ b/Userland/Libraries/LibCompress/Deflate.h @@ -104,6 +104,8 @@ private: ErrorOr decode_distance(u32); ErrorOr decode_codes(CanonicalCode& literal_code, Optional& distance_code); + static constexpr u16 max_back_reference_length = 258; + bool m_read_final_bock { false }; State m_state { State::Idle };