mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 05:47:35 +00:00
AK: Rewrite the hint-based CircularBuffer::find_copy_in_seekback
This now searches the memory in blocks, which should be slightly more efficient. However, it doesn't make much difference (e.g. ~1% in LZMA compression) in most real-world applications, as the non-hint function is more expensive by orders of magnitude.
This commit is contained in:
parent
3526d67694
commit
42d01b21d8
4 changed files with 36 additions and 51 deletions
|
@ -375,7 +375,7 @@ ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_
|
||||||
return matches;
|
return matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_copy_in_seekback(Vector<size_t> const& distances, size_t maximum_length, size_t minimum_length) const
|
Optional<SearchableCircularBuffer::Match> SearchableCircularBuffer::find_copy_in_seekback(ReadonlySpan<size_t> distances, size_t maximum_length, size_t minimum_length) const
|
||||||
{
|
{
|
||||||
VERIFY(minimum_length > 0);
|
VERIFY(minimum_length > 0);
|
||||||
|
|
||||||
|
@ -384,55 +384,42 @@ ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_
|
||||||
maximum_length = m_used_space;
|
maximum_length = m_used_space;
|
||||||
|
|
||||||
if (maximum_length < minimum_length)
|
if (maximum_length < minimum_length)
|
||||||
return Vector<Match> {};
|
return Optional<Match> {};
|
||||||
|
|
||||||
Vector<Match> matches;
|
Optional<Match> best_match;
|
||||||
|
|
||||||
|
for (auto distance : distances) {
|
||||||
|
// Discard distances outside the valid range.
|
||||||
|
if (distance > search_limit() || distance <= 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
// Verify all hints that we have.
|
|
||||||
for (auto const& distance : distances) {
|
|
||||||
// TODO: This does not yet support looping repetitions.
|
// TODO: This does not yet support looping repetitions.
|
||||||
if (distance < minimum_length)
|
if (distance < minimum_length)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
auto needle_offset = (capacity() + m_reading_head) % capacity();
|
auto current_match_length = 0ul;
|
||||||
auto haystack_offset = (capacity() + m_reading_head - distance) % capacity();
|
|
||||||
|
|
||||||
for (size_t i = 0; i < minimum_length; i++) {
|
while (current_match_length < maximum_length) {
|
||||||
if (m_buffer[needle_offset] != m_buffer[haystack_offset])
|
auto haystack = next_search_span(distance - current_match_length).trim(maximum_length - current_match_length);
|
||||||
|
auto needle = next_read_span(current_match_length).trim(maximum_length - current_match_length);
|
||||||
|
|
||||||
|
auto submatch_length = haystack.matching_prefix_length(needle);
|
||||||
|
|
||||||
|
if (submatch_length == 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
needle_offset = (needle_offset + 1) % capacity();
|
current_match_length += submatch_length;
|
||||||
haystack_offset = (haystack_offset + 1) % capacity();
|
|
||||||
|
|
||||||
if (i + 1 == minimum_length)
|
|
||||||
TRY(matches.try_empend(distance, minimum_length));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// From now on, all matches that we have stored have at least a length of `minimum_length` and they all refer to the same value.
|
// Discard matches that don't reach the minimum length.
|
||||||
// For the remaining part, we will keep checking the next byte incrementally and keep eliminating matches until we eliminated all of them.
|
if (current_match_length < minimum_length)
|
||||||
Vector<Match> next_matches;
|
|
||||||
|
|
||||||
for (size_t offset = minimum_length; offset < maximum_length; offset++) {
|
|
||||||
auto needle_data = m_buffer[(capacity() + m_reading_head + offset) % capacity()];
|
|
||||||
|
|
||||||
for (auto const& match : matches) {
|
|
||||||
auto haystack_data = m_buffer[(capacity() + m_reading_head - match.distance + offset) % capacity()];
|
|
||||||
|
|
||||||
if (haystack_data != needle_data)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
TRY(next_matches.try_empend(match.distance, match.length + 1));
|
if (!best_match.has_value() || best_match->length < current_match_length)
|
||||||
|
best_match = Match { distance, current_match_length };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (next_matches.size() == 0)
|
return best_match;
|
||||||
return matches;
|
|
||||||
|
|
||||||
swap(matches, next_matches);
|
|
||||||
next_matches.clear_with_capacity();
|
|
||||||
}
|
|
||||||
|
|
||||||
return matches;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,7 @@ public:
|
||||||
/// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
|
/// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
|
||||||
/// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
|
/// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
|
||||||
ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2) const;
|
ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2) const;
|
||||||
ErrorOr<Vector<Match>> find_copy_in_seekback(Vector<size_t> const& distances, size_t maximum_length, size_t minimum_length = 2) const;
|
Optional<Match> find_copy_in_seekback(ReadonlySpan<size_t> distances, size_t maximum_length, size_t minimum_length = 2) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Note: This function has a similar purpose as next_seekback_span, but they differ in their reference point.
|
// Note: This function has a similar purpose as next_seekback_span, but they differ in their reference point.
|
||||||
|
|
|
@ -411,25 +411,23 @@ TEST_CASE(find_copy_in_seekback)
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// Find the largest matches with a length between 1 and 2 (selected "AB", everything smaller gets eliminated).
|
// Find the largest match with a length between 1 and 2 (selected "AB", everything smaller gets eliminated).
|
||||||
auto matches = MUST(buffer.find_copy_in_seekback(Vector<size_t> { 6ul, 9ul }, 2, 1));
|
// Since we have a tie, the first qualified match is preferred.
|
||||||
EXPECT_EQ(matches.size(), 2ul);
|
auto match = buffer.find_copy_in_seekback(Vector<size_t> { 6ul, 9ul }, 2, 1);
|
||||||
EXPECT_EQ(matches[0].distance, 6ul);
|
EXPECT_EQ(match.value().distance, 6ul);
|
||||||
EXPECT_EQ(matches[0].length, 2ul);
|
EXPECT_EQ(match.value().length, 2ul);
|
||||||
EXPECT_EQ(matches[1].distance, 9ul);
|
|
||||||
EXPECT_EQ(matches[1].length, 2ul);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// Check that we don't find anything for hints before the valid range.
|
// Check that we don't find anything for hints before the valid range.
|
||||||
auto matches = MUST(buffer.find_copy_in_seekback(Vector<size_t> { 0ul }, 2, 1));
|
auto match = buffer.find_copy_in_seekback(Vector<size_t> { 0ul }, 2, 1);
|
||||||
EXPECT_EQ(matches.size(), 0ul);
|
EXPECT(!match.has_value());
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// Check that we don't find anything for hints after the valid range.
|
// Check that we don't find anything for hints after the valid range.
|
||||||
auto matches = MUST(buffer.find_copy_in_seekback(Vector<size_t> { 12ul }, 2, 1));
|
auto match = buffer.find_copy_in_seekback(Vector<size_t> { 12ul }, 2, 1);
|
||||||
EXPECT_EQ(matches.size(), 0ul);
|
EXPECT(!match.has_value());
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
|
@ -998,10 +998,10 @@ ErrorOr<void> LzmaCompressor::encode_once()
|
||||||
m_rep2 + normalized_to_real_match_distance_offset,
|
m_rep2 + normalized_to_real_match_distance_offset,
|
||||||
m_rep3 + normalized_to_real_match_distance_offset,
|
m_rep3 + normalized_to_real_match_distance_offset,
|
||||||
};
|
};
|
||||||
auto existing_distance_results = TRY(m_dictionary->find_copy_in_seekback(existing_distances, m_dictionary->used_space(), normalized_to_real_match_length_offset));
|
auto existing_distance_result = m_dictionary->find_copy_in_seekback(existing_distances, m_dictionary->used_space(), normalized_to_real_match_length_offset);
|
||||||
|
|
||||||
if (existing_distance_results.size() > 0) {
|
if (existing_distance_result.has_value()) {
|
||||||
auto selected_match = existing_distance_results[0];
|
auto selected_match = existing_distance_result.release_value();
|
||||||
TRY(encode_existing_match(selected_match.distance, selected_match.length));
|
TRY(encode_existing_match(selected_match.distance, selected_match.length));
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue