mirror of
https://github.com/RGBCube/serenity
synced 2025-10-16 12:22:06 +00:00

Previously, the implementation would produce one Vector<u8> which would contain the whole decompressed data. That can be a lot and even exhaust memory. With these changes it is still necessary to store the whole input data in one piece (I am working on this next,) but the output can be read block by block. (That's not optimal either because blocks can be arbitrarily large, but it's good for now.)
185 lines
5.3 KiB
C++
185 lines
5.3 KiB
C++
/*
|
|
* Copyright (c) 2020, the SerenityOS developers
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/CircularQueue.h>
|
|
#include <AK/Span.h>
|
|
#include <AK/Stream.h>
|
|
#include <AK/Types.h>
|
|
#include <AK/Vector.h>
|
|
#include <cstring>
|
|
|
|
namespace Compress {
|
|
|
|
// Reads one bit at a time starting with the rightmost bit
|
|
class BitStreamReader {
|
|
public:
|
|
BitStreamReader(ReadonlyBytes data)
|
|
: m_data(data)
|
|
{
|
|
}
|
|
|
|
i8 read();
|
|
i8 read_byte();
|
|
u32 read_bits(u8);
|
|
u8 get_bit_byte_offset();
|
|
|
|
private:
|
|
ReadonlyBytes m_data;
|
|
size_t m_data_index { 0 };
|
|
|
|
i8 m_current_byte { 0 };
|
|
u8 m_remaining_bits { 0 };
|
|
};
|
|
|
|
class CanonicalCode {
|
|
public:
|
|
CanonicalCode(Vector<u8>);
|
|
u32 next_symbol(BitStreamReader&);
|
|
|
|
private:
|
|
Vector<u32> m_symbol_codes;
|
|
Vector<u32> m_symbol_values;
|
|
};
|
|
|
|
// Implements a DEFLATE decompressor according to RFC 1951.
|
|
class DeflateStream final : public InputStream {
|
|
public:
|
|
// FIXME: This should really return a ByteBuffer.
|
|
static Vector<u8> decompress_all(ReadonlyBytes bytes)
|
|
{
|
|
DeflateStream stream { bytes };
|
|
while (stream.read_next_block()) {
|
|
}
|
|
|
|
Vector<u8> vector;
|
|
vector.resize(stream.m_intermediate_stream.remaining());
|
|
stream >> vector;
|
|
|
|
return vector;
|
|
}
|
|
|
|
DeflateStream(ReadonlyBytes data)
|
|
: m_reader(data)
|
|
, m_literal_length_codes(generate_literal_length_codes())
|
|
, m_fixed_distance_codes(generate_fixed_distance_codes())
|
|
{
|
|
}
|
|
|
|
// FIXME: Accept an InputStream.
|
|
|
|
size_t read(Bytes bytes) override
|
|
{
|
|
if (m_intermediate_stream.remaining() >= bytes.size())
|
|
return m_intermediate_stream.read_or_error(bytes);
|
|
|
|
while (read_next_block()) {
|
|
if (m_intermediate_stream.remaining() >= bytes.size())
|
|
return m_intermediate_stream.read_or_error(bytes);
|
|
}
|
|
|
|
return m_intermediate_stream.read(bytes);
|
|
}
|
|
|
|
bool read_or_error(Bytes bytes) override
|
|
{
|
|
if (m_intermediate_stream.remaining() >= bytes.size()) {
|
|
m_intermediate_stream.read_or_error(bytes);
|
|
return true;
|
|
}
|
|
|
|
while (read_next_block()) {
|
|
if (m_intermediate_stream.remaining() >= bytes.size()) {
|
|
m_intermediate_stream.read_or_error(bytes);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
m_error = true;
|
|
return false;
|
|
}
|
|
|
|
bool eof() const override
|
|
{
|
|
if (!m_intermediate_stream.eof())
|
|
return false;
|
|
|
|
while (read_next_block()) {
|
|
if (!m_intermediate_stream.eof())
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool discard_or_error(size_t count) override
|
|
{
|
|
if (m_intermediate_stream.remaining() >= count) {
|
|
m_intermediate_stream.discard_or_error(count);
|
|
return true;
|
|
}
|
|
|
|
while (read_next_block()) {
|
|
if (m_intermediate_stream.remaining() >= count) {
|
|
m_intermediate_stream.discard_or_error(count);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
m_error = true;
|
|
return false;
|
|
}
|
|
|
|
private:
|
|
void decompress_uncompressed_block() const;
|
|
void decompress_static_block() const;
|
|
void decompress_dynamic_block() const;
|
|
void decompress_huffman_block(CanonicalCode&, CanonicalCode*) const;
|
|
|
|
Vector<CanonicalCode> decode_huffman_codes() const;
|
|
u32 decode_run_length(u32) const;
|
|
u32 decode_distance(u32) const;
|
|
|
|
void copy_from_history(u32, u32) const;
|
|
|
|
Vector<u8> generate_literal_length_codes() const;
|
|
Vector<u8> generate_fixed_distance_codes() const;
|
|
|
|
mutable BitStreamReader m_reader;
|
|
|
|
mutable CanonicalCode m_literal_length_codes;
|
|
mutable CanonicalCode m_fixed_distance_codes;
|
|
|
|
// FIXME: Theoretically, blocks can be extremly large, reading a single block could
|
|
// exhaust memory. Maybe wait for C++20 coroutines?
|
|
bool read_next_block() const;
|
|
|
|
mutable bool m_read_last_block { false };
|
|
mutable DuplexMemoryStream m_intermediate_stream;
|
|
};
|
|
|
|
}
|