1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 06:57:44 +00:00

AK: Add CircularBuffer::find_copy_in_seekback()

This is useful for compressors, which quite frequently need to find a
matching span of data within the seekback.
This commit is contained in:
Tim Schumacher 2023-05-03 11:14:36 +02:00 committed by Andreas Kling
parent d194011570
commit 221b91ff61
3 changed files with 223 additions and 0 deletions

View file

@ -249,4 +249,107 @@ ErrorOr<size_t> CircularBuffer::copy_from_seekback(size_t distance, size_t lengt
return length - remaining_length;
}
ErrorOr<Vector<CircularBuffer::Match>> CircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length, Optional<Vector<size_t> const&> distance_hints) const
{
VERIFY(minimum_length > 0);
// Clip the maximum length to the amount of data that we actually store.
if (maximum_length > m_used_space)
maximum_length = m_used_space;
if (maximum_length < minimum_length)
return Vector<Match> {};
Vector<Match> matches;
if (distance_hints.has_value()) {
// If we have any hints, verify and use those.
for (auto const& distance : distance_hints.value()) {
// TODO: This does not yet support looping repetitions.
if (distance < minimum_length)
continue;
auto needle_offset = (capacity() + m_reading_head) % capacity();
auto haystack_offset = (capacity() + m_reading_head - distance) % capacity();
for (size_t i = 0; i < minimum_length; i++) {
if (m_buffer[needle_offset] != m_buffer[haystack_offset])
break;
needle_offset = (needle_offset + 1) % capacity();
haystack_offset = (haystack_offset + 1) % capacity();
if (i + 1 == minimum_length)
TRY(matches.try_empend(distance, minimum_length));
}
}
} else {
// Otherwise, use memmem to find the initial matches.
// Note: We have the read head as our reference point, but `next_read_span_with_seekback` isn't aware of that and continues to use the write head.
// Therefore, we need to make sure to slice off the extraneous bytes from the end of the span and shift the returned distances by the correct amount.
size_t haystack_offset_from_start = 0;
Vector<ReadonlyBytes, 2> haystack;
haystack.append(next_read_span_with_seekback(m_seekback_limit));
if (haystack[0].size() < m_seekback_limit - used_space())
haystack.append(next_read_span_with_seekback(m_seekback_limit - haystack[0].size()));
haystack.last() = haystack.last().trim(haystack.last().size() - used_space());
auto needle = next_read_span().trim(minimum_length);
auto memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
while (memmem_match.has_value()) {
auto match_offset = memmem_match.release_value();
// Add the match to the list of matches to work with.
TRY(matches.try_empend(m_seekback_limit - used_space() - haystack_offset_from_start - match_offset, minimum_length));
auto size_to_discard = match_offset + 1;
// Trim away the already processed bytes from the haystack.
haystack_offset_from_start += size_to_discard;
while (size_to_discard > 0) {
if (haystack[0].size() < size_to_discard) {
size_to_discard -= haystack[0].size();
haystack.remove(0);
} else {
haystack[0] = haystack[0].slice(size_to_discard);
break;
}
}
if (haystack.size() == 0)
break;
// Try and find the next match.
memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
}
}
// From now on, all matches that we have stored have at least a length of `minimum_length` and they all refer to the same value.
// For the remaining part, we will keep checking the next byte incrementally and keep eliminating matches until we eliminated all of them.
Vector<Match> next_matches;
for (size_t offset = minimum_length; offset < maximum_length; offset++) {
auto needle_data = m_buffer[(capacity() + m_reading_head + offset) % capacity()];
for (auto const& match : matches) {
auto haystack_data = m_buffer[(capacity() + m_reading_head - match.distance + offset) % capacity()];
if (haystack_data != needle_data)
continue;
TRY(next_matches.try_empend(match.distance, match.length + 1));
}
if (next_matches.size() == 0)
return matches;
swap(matches, next_matches);
next_matches.clear_with_capacity();
}
return matches;
}
}