mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 07:07:34 +00:00
LibCompress: Implement proper handling of LZMA end-of-stream markers
This commit is contained in:
parent
b6f3b2f116
commit
8ff36e5910
4 changed files with 34 additions and 12 deletions
|
@ -166,7 +166,6 @@ TEST_CASE(specification_bad_incorrect_size_lzma_decompress)
|
|||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
// FIXME: This should detect that there is still remaining (non-'end of stream') data after the expected data and reject the last read.
|
||||
// As of now, this test accepts any result, except for crashes.
|
||||
(void)decompressor->read_until_eof(PAGE_SIZE);
|
||||
auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE);
|
||||
EXPECT(buffer_or_error.is_error());
|
||||
}
|
||||
|
|
|
@ -81,6 +81,7 @@ ErrorOr<LzmaDecompressorOptions> LzmaHeader::as_decompressor_options() const
|
|||
.position_bits = model_properties.position_bits,
|
||||
.dictionary_size = dictionary_size(),
|
||||
.uncompressed_size = uncompressed_size(),
|
||||
.reject_end_of_stream_marker = false,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -457,17 +458,15 @@ ErrorOr<u32> LzmaDecompressor::decode_normalized_match_distance(u16 normalized_m
|
|||
ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
|
||||
{
|
||||
while (m_dictionary->used_space() < bytes.size() && m_dictionary->empty_space() != 0) {
|
||||
if (m_found_end_of_stream_marker) {
|
||||
if (m_options.uncompressed_size.has_value() && m_total_decoded_bytes < m_options.uncompressed_size.value())
|
||||
return Error::from_string_literal("Found end-of-stream marker earlier than expected");
|
||||
|
||||
if (m_found_end_of_stream_marker)
|
||||
break;
|
||||
}
|
||||
|
||||
if (has_reached_expected_data_size()) {
|
||||
// FIXME: This should validate that either EOF or the 'end of stream' marker follow immediately.
|
||||
// Both of those cases count as the 'end of stream' marker being found and should check for a clean decoder state.
|
||||
// If the decoder is in a clean state, we assume that this is fine.
|
||||
if (is_range_decoder_in_clean_state())
|
||||
break;
|
||||
|
||||
// Otherwise, we give it one last try to find the end marker in the remaining data.
|
||||
}
|
||||
|
||||
// "The decoder calculates "state2" variable value to select exact variable from
|
||||
|
@ -543,6 +542,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
|
|||
// IsMatch[state2] decode
|
||||
// 0 - the Literal"
|
||||
if (TRY(decode_bit_with_probability(m_is_match_probabilities[state2])) == 0) {
|
||||
// If we are already past the expected uncompressed size, we are already in "look for EOS only" mode.
|
||||
if (has_reached_expected_data_size())
|
||||
return Error::from_string_literal("Found literal after reaching expected uncompressed size");
|
||||
|
||||
// "At first the LZMA decoder must check that it doesn't exceed
|
||||
// specified uncompressed size."
|
||||
// This is already checked for at the beginning of the loop.
|
||||
|
@ -577,11 +580,20 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
|
|||
// "End of stream" marker, so we can stop decoding and check finishing
|
||||
// condition in Range Decoder"
|
||||
if (m_rep0 == 0xFFFFFFFF) {
|
||||
// If we should reject end-of-stream markers, do so now.
|
||||
// Note that this is not part of LZMA, as LZMA allows end-of-stream markers in all contexts, so pure LZMA should never set this option.
|
||||
if (m_options.reject_end_of_stream_marker)
|
||||
return Error::from_string_literal("An end-of-stream marker was found, but the LZMA stream is configured to reject them");
|
||||
|
||||
// The range decoder condition is checked after breaking out of the loop.
|
||||
m_found_end_of_stream_marker = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we are looking for EOS, but haven't found it here, the stream is corrupted.
|
||||
if (has_reached_expected_data_size())
|
||||
return Error::from_string_literal("First simple match after the expected uncompressed size is not the EOS marker");
|
||||
|
||||
// "If uncompressed size is defined, LZMA decoder must check that it doesn't
|
||||
// exceed that specified uncompressed size."
|
||||
// This is being checked for in the common "copy to buffer" implementation.
|
||||
|
@ -598,6 +610,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
|
|||
continue;
|
||||
}
|
||||
|
||||
// If we are looking for EOS, but find another match type, the stream is also corrupted.
|
||||
if (has_reached_expected_data_size())
|
||||
return Error::from_string_literal("First match type after the expected uncompressed size is not a simple match");
|
||||
|
||||
// " 1 - Rep Match
|
||||
// IsRepG0[state] decode
|
||||
// 0 - the distance is rep0"
|
||||
|
@ -666,7 +682,10 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
|
|||
TRY(copy_match_to_buffer(normalized_length + normalized_to_real_match_length_offset));
|
||||
}
|
||||
|
||||
if (m_found_end_of_stream_marker) {
|
||||
if (m_found_end_of_stream_marker || has_reached_expected_data_size()) {
|
||||
if (m_options.uncompressed_size.has_value() && m_total_decoded_bytes < m_options.uncompressed_size.value())
|
||||
return Error::from_string_literal("Found end-of-stream marker earlier than expected");
|
||||
|
||||
if (!is_range_decoder_in_clean_state())
|
||||
return Error::from_string_literal("LZMA stream ends in an unclean state");
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ struct LzmaDecompressorOptions {
|
|||
u8 position_bits { 0 };
|
||||
u32 dictionary_size { 0 };
|
||||
Optional<u64> uncompressed_size;
|
||||
bool reject_end_of_stream_marker { false };
|
||||
};
|
||||
|
||||
// Described in section "lzma file format".
|
||||
|
|
|
@ -106,6 +106,9 @@ ErrorOr<Bytes> Lzma2Decompressor::read_some(Bytes bytes)
|
|||
.position_bits = properties.position_bits,
|
||||
.dictionary_size = static_cast<u32>(dictionary_size),
|
||||
.uncompressed_size = uncompressed_size,
|
||||
|
||||
// Note: This is not specified anywhere. However, it is apparently tested by bad-1-lzma2-7.xz from the XZ utils test files.
|
||||
.reject_end_of_stream_marker = true,
|
||||
};
|
||||
[[fallthrough]];
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue