diff --git a/AK/Stream.h b/AK/Stream.h index af71773b97..75830ca0f7 100644 --- a/AK/Stream.h +++ b/AK/Stream.h @@ -309,10 +309,28 @@ public: } m_read_offset += nread; + try_discard_chunks(); + return nread; } + size_t read(Bytes bytes, size_t offset) + { + const auto backup = this->roffset(); + + bool do_discard_chunks = false; + exchange(m_do_discard_chunks, do_discard_chunks); + + rseek(offset); + const auto count = read(bytes); + rseek(backup); + + exchange(m_do_discard_chunks, do_discard_chunks); + + return count; + } + bool read_or_error(Bytes bytes) override { if (m_write_offset - m_read_offset < bytes.size()) { @@ -344,20 +362,24 @@ public: return true; } - void seek(size_t offset) + size_t roffset() const { return m_read_offset; } + size_t woffset() const { return m_write_offset; } + + void rseek(size_t offset) { ASSERT(offset >= m_base_offset); ASSERT(offset <= m_write_offset); m_read_offset = offset; } - size_t offset() const { return m_read_offset; } - size_t remaining() const { return m_write_offset - m_read_offset; } private: void try_discard_chunks() { + if (!m_do_discard_chunks) + return; + while (m_read_offset - m_base_offset >= history_size + chunk_size) { m_chunks.take_first(); m_base_offset += chunk_size; @@ -368,7 +390,9 @@ private: size_t m_write_offset { 0 }; size_t m_read_offset { 0 }; size_t m_base_offset { 0 }; + bool m_do_discard_chunks { false }; }; + } using AK::DuplexMemoryStream; diff --git a/AK/Tests/TestStream.cpp b/AK/Tests/TestStream.cpp index a5162885c5..aa547d3caa 100644 --- a/AK/Tests/TestStream.cpp +++ b/AK/Tests/TestStream.cpp @@ -146,7 +146,7 @@ TEST_CASE(duplex_seek_into_history) // We now have 128KiB on the stream. Because the stream has a // history size of 64KiB, we should be able to seek to 64KiB. static_assert(DuplexMemoryStream::history_size == 64 * 1024); - stream.seek(64 * 1024); + stream.rseek(64 * 1024); EXPECT_EQ(stream.remaining(), 192 * 1024ul); @@ -166,10 +166,10 @@ TEST_CASE(duplex_wild_seeking) stream << input2; stream << input0 << input1; - stream.seek(0); + stream.rseek(0); stream << input2 << input0; - stream.seek(4); + stream.rseek(4); stream >> output0 >> output1 >> output2; EXPECT(!stream.eof()); diff --git a/Libraries/LibCompress/Deflate.cpp b/Libraries/LibCompress/Deflate.cpp index 9bd05f00da..c321ac775b 100644 --- a/Libraries/LibCompress/Deflate.cpp +++ b/Libraries/LibCompress/Deflate.cpp @@ -33,39 +33,38 @@ namespace Compress { -Vector Deflate::decompress() +bool DeflateStream::read_next_block() const { - bool is_final_block = false; + if (m_read_last_block) + return false; - do { - is_final_block = m_reader.read(); - auto block_type = m_reader.read_bits(2); + m_read_last_block = m_reader.read_bits(1); + auto block_type = m_reader.read_bits(2); - switch (block_type) { - case 0: - decompress_uncompressed_block(); - break; - case 1: - decompress_static_block(); - break; - case 2: - decompress_dynamic_block(); - break; - case 3: - dbg() << "Block contains reserved block type..."; - ASSERT_NOT_REACHED(); - break; - default: - dbg() << "Invalid block type was read..."; - ASSERT_NOT_REACHED(); - break; - } - } while (!is_final_block); + switch (block_type) { + case 0: + decompress_uncompressed_block(); + break; + case 1: + decompress_static_block(); + break; + case 2: + decompress_dynamic_block(); + break; + case 3: + dbg() << "Block contains reserved block type..."; + ASSERT_NOT_REACHED(); + break; + default: + dbg() << "Invalid block type was read..."; + ASSERT_NOT_REACHED(); + break; + } - return m_output_buffer; + return true; } -void Deflate::decompress_uncompressed_block() +void DeflateStream::decompress_uncompressed_block() const { // Align to the next byte boundary. while (m_reader.get_bit_byte_offset() != 0) { @@ -87,17 +86,16 @@ void Deflate::decompress_uncompressed_block() ASSERT_NOT_REACHED(); } - m_output_buffer.append(byte); - m_history_buffer.enqueue(byte); + m_intermediate_stream << byte; } } -void Deflate::decompress_static_block() +void DeflateStream::decompress_static_block() const { decompress_huffman_block(m_literal_length_codes, &m_fixed_distance_codes); } -void Deflate::decompress_dynamic_block() +void DeflateStream::decompress_dynamic_block() const { auto codes = decode_huffman_codes(); if (codes.size() == 2) { @@ -107,7 +105,7 @@ void Deflate::decompress_dynamic_block() } } -void Deflate::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCode* distance_codes) +void DeflateStream::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCode* distance_codes) const { for (;;) { u32 symbol = length_codes.next_symbol(m_reader); @@ -119,8 +117,7 @@ void Deflate::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCod // literal byte. if (symbol < 256) { - m_history_buffer.enqueue(symbol); - m_output_buffer.append(symbol); + m_intermediate_stream << static_cast(symbol); continue; } @@ -144,7 +141,7 @@ void Deflate::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCod } } -Vector Deflate::decode_huffman_codes() +Vector DeflateStream::decode_huffman_codes() const { // FIXME: This path is not tested. Vector result; @@ -244,7 +241,7 @@ Vector Deflate::decode_huffman_codes() return result; } -u32 Deflate::decode_run_length(u32 symbol) +u32 DeflateStream::decode_run_length(u32 symbol) const { if (symbol <= 264) { return symbol - 254; @@ -263,7 +260,7 @@ u32 Deflate::decode_run_length(u32 symbol) ASSERT_NOT_REACHED(); } -u32 Deflate::decode_distance(u32 symbol) +u32 DeflateStream::decode_distance(u32 symbol) const { if (symbol <= 3) { return symbol + 1; @@ -278,15 +275,19 @@ u32 Deflate::decode_distance(u32 symbol) ASSERT_NOT_REACHED(); } -void Deflate::copy_from_history(u32 distance, u32 run) +void DeflateStream::copy_from_history(u32 distance, u32 run) const { - auto head_index = (m_history_buffer.head_index() + m_history_buffer.size()) % m_history_buffer.capacity(); - auto read_index = (head_index - distance + m_history_buffer.capacity()) % m_history_buffer.capacity(); - for (size_t i = 0; i < run; i++) { - auto data = m_history_buffer.at(read_index++); - m_output_buffer.append(data); - m_history_buffer.enqueue(data); + u8 byte; + + // FIXME: In many cases we can read more than one byte at a time, this should + // be refactored into a while loop. Beware, edge case: + // + // // The first four bytes are on the stream already, the other four + // // are written by copy_from_history() itself. + // copy_from_history(4, 8); + m_intermediate_stream.read({ &byte, sizeof(byte) }, m_intermediate_stream.woffset() - distance); + m_intermediate_stream << byte; } } @@ -335,7 +336,7 @@ u32 BitStreamReader::read_bits(u8 count) return result; } -Vector Deflate::generate_literal_length_codes() +Vector DeflateStream::generate_literal_length_codes() const { Vector ll_codes; ll_codes.resize(288); @@ -346,7 +347,7 @@ Vector Deflate::generate_literal_length_codes() return ll_codes; } -Vector Deflate::generate_fixed_distance_codes() +Vector DeflateStream::generate_fixed_distance_codes() const { Vector fd_codes; fd_codes.resize(32); @@ -423,4 +424,5 @@ u32 CanonicalCode::next_symbol(BitStreamReader& reader) } } } + } diff --git a/Libraries/LibCompress/Deflate.h b/Libraries/LibCompress/Deflate.h index 54e053155b..fc138f52df 100644 --- a/Libraries/LibCompress/Deflate.h +++ b/Libraries/LibCompress/Deflate.h @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -65,34 +66,120 @@ private: Vector m_symbol_values; }; -class Deflate { +// Implements a DEFLATE decompressor according to RFC 1951. +class DeflateStream final : public InputStream { public: - Deflate(ReadonlyBytes data) + // FIXME: This should really return a ByteBuffer. + static Vector decompress_all(ReadonlyBytes bytes) + { + DeflateStream stream { bytes }; + while (stream.read_next_block()) { + } + + Vector vector; + vector.resize(stream.m_intermediate_stream.remaining()); + stream >> vector; + + return vector; + } + + DeflateStream(ReadonlyBytes data) : m_reader(data) , m_literal_length_codes(generate_literal_length_codes()) , m_fixed_distance_codes(generate_fixed_distance_codes()) { } - Vector decompress(); + // FIXME: Accept an InputStream. + + size_t read(Bytes bytes) override + { + if (m_intermediate_stream.remaining() >= bytes.size()) + return m_intermediate_stream.read_or_error(bytes); + + while (read_next_block()) { + if (m_intermediate_stream.remaining() >= bytes.size()) + return m_intermediate_stream.read_or_error(bytes); + } + + return m_intermediate_stream.read(bytes); + } + + bool read_or_error(Bytes bytes) override + { + if (m_intermediate_stream.remaining() >= bytes.size()) { + m_intermediate_stream.read_or_error(bytes); + return true; + } + + while (read_next_block()) { + if (m_intermediate_stream.remaining() >= bytes.size()) { + m_intermediate_stream.read_or_error(bytes); + return true; + } + } + + m_error = true; + return false; + } + + bool eof() const override + { + if (!m_intermediate_stream.eof()) + return false; + + while (read_next_block()) { + if (!m_intermediate_stream.eof()) + return false; + } + + return true; + } + + bool discard_or_error(size_t count) override + { + if (m_intermediate_stream.remaining() >= count) { + m_intermediate_stream.discard_or_error(count); + return true; + } + + while (read_next_block()) { + if (m_intermediate_stream.remaining() >= count) { + m_intermediate_stream.discard_or_error(count); + return true; + } + } + + m_error = true; + return false; + } private: - void decompress_uncompressed_block(); - void decompress_static_block(); - void decompress_dynamic_block(); - void decompress_huffman_block(CanonicalCode&, CanonicalCode*); - Vector decode_huffman_codes(); - void copy_from_history(u32, u32); - u32 decode_run_length(u32); - u32 decode_distance(u32); - Vector generate_literal_length_codes(); - Vector generate_fixed_distance_codes(); + void decompress_uncompressed_block() const; + void decompress_static_block() const; + void decompress_dynamic_block() const; + void decompress_huffman_block(CanonicalCode&, CanonicalCode*) const; - BitStreamReader m_reader; - CircularQueue m_history_buffer; - Vector m_output_buffer; + Vector decode_huffman_codes() const; + u32 decode_run_length(u32) const; + u32 decode_distance(u32) const; - CanonicalCode m_literal_length_codes; - CanonicalCode m_fixed_distance_codes; + void copy_from_history(u32, u32) const; + + Vector generate_literal_length_codes() const; + Vector generate_fixed_distance_codes() const; + + mutable BitStreamReader m_reader; + + mutable CanonicalCode m_literal_length_codes; + mutable CanonicalCode m_fixed_distance_codes; + + // FIXME: Theoretically, blocks can be extremly large, reading a single block could + // exhaust memory. Maybe wait for C++20 coroutines? + bool read_next_block() const; + + mutable bool m_read_last_block { false }; + mutable DuplexMemoryStream m_intermediate_stream; }; + } diff --git a/Libraries/LibCompress/Zlib.cpp b/Libraries/LibCompress/Zlib.cpp index 2ce9a415b6..15eb13f8bd 100644 --- a/Libraries/LibCompress/Zlib.cpp +++ b/Libraries/LibCompress/Zlib.cpp @@ -57,7 +57,7 @@ Zlib::Zlib(ReadonlyBytes data) Vector Zlib::decompress() { - return Deflate(m_data_bytes).decompress(); + return DeflateStream::decompress_all(m_data_bytes); } u32 Zlib::checksum() diff --git a/Libraries/LibCompress/Zlib.h b/Libraries/LibCompress/Zlib.h index cd99bbb0f0..8336527e39 100644 --- a/Libraries/LibCompress/Zlib.h +++ b/Libraries/LibCompress/Zlib.h @@ -49,4 +49,5 @@ private: ReadonlyBytes m_input_data; ReadonlyBytes m_data_bytes; }; + } diff --git a/Userland/test-compress.cpp b/Userland/test-compress.cpp index 5dfe1439fe..ce43de84b0 100644 --- a/Userland/test-compress.cpp +++ b/Userland/test-compress.cpp @@ -61,8 +61,7 @@ auto main(int argc, char** argv) -> int 0xCB, 0x4A, 0x13, 0x00 }; - auto deflater = Compress::Deflate({ data_bytes, 4 * 7 }); - auto deflated = deflater.decompress(); + auto deflated = Compress::DeflateStream::decompress_all({ data_bytes, 4 * 7 }); auto decompressed = String((const char*)deflated.data(), deflated.size()); if (decompressed.equals_ignoring_case("This is a simple text file :)")) { @@ -80,7 +79,7 @@ auto main(int argc, char** argv) -> int 0x78, 0x01, 0x01, 0x1D, 0x00, 0xE2, 0xFF, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x73, 0x69, 0x6D, 0x70, 0x6C, 0x65, 0x20, - 0x74, 0x65, 0x78, 0x74, 0x20, 0x66, 0x69, 0x6C, + 0x74, 0x65, 0x78, 0x74, 0x20, 0x66, 0x69, 0x6C, 0x65, 0x20, 0x3A, 0x29, 0x99, 0x5E, 0x09, 0xE8 };