mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 01:07:36 +00:00
AK: Use hashing to accelerate searching a CircularBuffer
This commit is contained in:
parent
4a10cf1506
commit
60ac254df6
4 changed files with 269 additions and 101 deletions
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include <AK/ByteBuffer.h>
|
||||
#include <AK/Error.h>
|
||||
#include <AK/HashMap.h>
|
||||
#include <AK/Noncopyable.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
|
@ -67,6 +68,11 @@ public:
|
|||
|
||||
[[nodiscard]] size_t search_limit() const;
|
||||
|
||||
// These functions update the read pointer, so we need to hash any data that we have processed.
|
||||
ErrorOr<Bytes> read(Bytes bytes);
|
||||
ErrorOr<void> discard(size_t discarded_bytes);
|
||||
ErrorOr<size_t> flush_to_stream(Stream& stream);
|
||||
|
||||
struct Match {
|
||||
size_t distance;
|
||||
size_t length;
|
||||
|
@ -74,15 +80,26 @@ public:
|
|||
/// This searches the seekback buffer (between read head and limit) for occurrences where it matches the next `length` bytes from the read buffer.
|
||||
/// Supplying any hints will only consider those distances, in case existing offsets need to be validated.
|
||||
/// Note that, since we only start searching at the read head, the length between read head and write head is excluded from the distance.
|
||||
ErrorOr<Vector<Match>> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2) const;
|
||||
Optional<Match> find_copy_in_seekback(size_t maximum_length, size_t minimum_length = 2);
|
||||
Optional<Match> find_copy_in_seekback(ReadonlySpan<size_t> distances, size_t maximum_length, size_t minimum_length = 2) const;
|
||||
|
||||
// The chunk size for which the hash table holds hashes.
|
||||
// This is nice for users to know, as picking a minimum match length that is
|
||||
// equal or greater than this allows us to completely skip a slow memory search.
|
||||
static constexpr size_t HASH_CHUNK_SIZE = 3;
|
||||
|
||||
private:
|
||||
// Note: This function has a similar purpose as next_seekback_span, but they differ in their reference point.
|
||||
// Seekback operations start counting their distance at the write head, while search operations start counting their distance at the read head.
|
||||
[[nodiscard]] ReadonlyBytes next_search_span(size_t distance) const;
|
||||
|
||||
SearchableCircularBuffer(ByteBuffer);
|
||||
|
||||
HashMap<unsigned, size_t> m_hash_location_map;
|
||||
HashMap<size_t, size_t> m_location_chain_map;
|
||||
|
||||
ErrorOr<void> insert_location_hash(ReadonlyBytes value, size_t raw_offset);
|
||||
ErrorOr<void> hash_last_bytes(size_t count);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue