From 726963edc7330e8e8862e54fcabbddf788852113 Mon Sep 17 00:00:00 2001 From: Tim Schumacher Date: Thu, 30 Mar 2023 13:16:40 +0200 Subject: [PATCH] LibCompress: Implement support for multiple concatenated XZ streams --- Tests/LibCompress/TestXz.cpp | 16 ++++---- Userland/Libraries/LibCompress/Xz.cpp | 56 +++++++++++++++++++++++++-- Userland/Libraries/LibCompress/Xz.h | 3 +- 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/Tests/LibCompress/TestXz.cpp b/Tests/LibCompress/TestXz.cpp index ec8dc31f1f..548511a3e9 100644 --- a/Tests/LibCompress/TestXz.cpp +++ b/Tests/LibCompress/TestXz.cpp @@ -39,8 +39,8 @@ TEST_CASE(xz_utils_bad_0cat_alone) auto stream = MUST(try_make(compressed)); auto decompressor = MUST(Compress::XzDecompressor::create(move(stream))); - // TODO: We currently don't support XZ files with multiple concatenated streams, so we don't check for trailing garbage either. - (void)decompressor->read_until_eof(PAGE_SIZE); + auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE); + EXPECT(buffer_or_error.is_error()); } TEST_CASE(xz_utils_bad_0cat_header_magic) @@ -56,8 +56,8 @@ TEST_CASE(xz_utils_bad_0cat_header_magic) auto stream = MUST(try_make(compressed)); auto decompressor = MUST(Compress::XzDecompressor::create(move(stream))); - // TODO: We currently don't support XZ files with multiple concatenated streams, so we don't check for the second header magic. - (void)decompressor->read_until_eof(PAGE_SIZE); + auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE); + EXPECT(buffer_or_error.is_error()); } TEST_CASE(xz_utils_bad_0catpad_empty) @@ -74,8 +74,8 @@ TEST_CASE(xz_utils_bad_0catpad_empty) auto stream = MUST(try_make(compressed)); auto decompressor = MUST(Compress::XzDecompressor::create(move(stream))); - // TODO: We currently don't support XZ files with multiple concatenated streams, so we don't check for the stream padding. - (void)decompressor->read_until_eof(PAGE_SIZE); + auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE); + EXPECT(buffer_or_error.is_error()); } TEST_CASE(xz_utils_bad_0_empty_truncated) @@ -151,8 +151,8 @@ TEST_CASE(xz_utils_bad_0pad_empty) auto stream = MUST(try_make(compressed)); auto decompressor = MUST(Compress::XzDecompressor::create(move(stream))); - // TODO: We currently don't support XZ files with multiple concatenated streams, so we don't check for the stream padding. - (void)decompressor->read_until_eof(PAGE_SIZE); + auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE); + EXPECT(buffer_or_error.is_error()); } TEST_CASE(xz_utils_bad_1_block_header_1) diff --git a/Userland/Libraries/LibCompress/Xz.cpp b/Userland/Libraries/LibCompress/Xz.cpp index cf996111e4..fae595290b 100644 --- a/Userland/Libraries/LibCompress/Xz.cpp +++ b/Userland/Libraries/LibCompress/Xz.cpp @@ -177,14 +177,61 @@ XzDecompressor::XzDecompressor(NonnullOwnPtr stream) ErrorOr XzDecompressor::read_some(Bytes bytes) { - if (m_found_stream_footer) + if (m_found_last_stream_footer) return bytes.trim(0); if (!m_stream_flags.has_value()) { - auto stream_header = TRY(m_stream->read_value()); + // This assumes that we can just read the Stream Header into memory as-is. Check that this still holds up for good measure. + static_assert(AK::Traits::is_trivially_serializable()); + + XzStreamHeader stream_header {}; + Bytes stream_header_bytes { &stream_header, sizeof(stream_header) }; + + if (m_found_first_stream_header) { + // 2.2. Stream Padding: + // "Stream Padding MUST contain only null bytes. To preserve the + // four-byte alignment of consecutive Streams, the size of Stream + // Padding MUST be a multiple of four bytes. Empty Stream Padding + // is allowed. If these requirements are not met, the decoder MUST + // indicate an error." + + VERIFY(m_stream->read_bytes() % 4 == 0); + + while (true) { + // Read the first byte until we either get a non-null byte or reach EOF. + auto byte_or_error = m_stream->read_value(); + + if (byte_or_error.is_error() && m_stream->is_eof()) + break; + + auto byte = TRY(byte_or_error); + + if (byte != 0) { + stream_header_bytes[0] = byte; + stream_header_bytes = stream_header_bytes.slice(1); + break; + } + } + + // If we aren't at EOF we already read the potential first byte of the header, so we need to subtract that. + auto end_of_padding_offset = m_stream->read_bytes(); + if (!m_stream->is_eof()) + end_of_padding_offset -= 1; + + if (end_of_padding_offset % 4 != 0) + return Error::from_string_literal("XZ Stream Padding is not aligned to 4 bytes"); + + if (m_stream->is_eof()) { + m_found_last_stream_footer = true; + return bytes.trim(0); + } + } + + TRY(m_stream->read_until_filled(stream_header_bytes)); TRY(stream_header.validate()); m_stream_flags = stream_header.flags; + m_found_first_stream_header = true; } if (!m_current_block_stream.has_value() || (*m_current_block_stream)->is_eof()) { @@ -335,7 +382,8 @@ ErrorOr XzDecompressor::read_some(Bytes bytes) if (Bytes { &*m_stream_flags, sizeof(XzStreamFlags) } != Bytes { &stream_footer.flags, sizeof(stream_footer.flags) }) return Error::from_string_literal("XZ stream header flags don't match the stream footer"); - m_found_stream_footer = true; + // Another XZ Stream might follow, so we just unset the current information and continue on the next read. + m_stream_flags.clear(); return bytes.trim(0); } @@ -462,7 +510,7 @@ ErrorOr XzDecompressor::write_some(ReadonlyBytes) bool XzDecompressor::is_eof() const { - return m_found_stream_footer; + return m_found_last_stream_footer; } bool XzDecompressor::is_open() const diff --git a/Userland/Libraries/LibCompress/Xz.h b/Userland/Libraries/LibCompress/Xz.h index 8decf07799..2bfdf7c1dd 100644 --- a/Userland/Libraries/LibCompress/Xz.h +++ b/Userland/Libraries/LibCompress/Xz.h @@ -112,7 +112,8 @@ private: NonnullOwnPtr m_stream; Optional m_stream_flags; - bool m_found_stream_footer { false }; + bool m_found_first_stream_header { false }; + bool m_found_last_stream_footer { false }; Optional> m_current_block_stream {}; Optional m_current_block_uncompressed_size {};