mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 06:07:34 +00:00
LibCrypto: Implement little endian CRC using the slicing-by-8 algorithm
This implements Intel's slicing-by-8 algorithm for CRC checksums (only little endian CPUs for now, as I don't have a way to test big endian). The original paper for this algorithm seems to have disappeared, but Intel's source code is still available as a reference: https://sourceforge.net/projects/slicing-by-8/ As well as other implementations for reference: https://docs.rs/slice-by-8/latest/src/slice_by_8/algorithm.rs.html Using the "enwik8" file as a test (100MB uncompressed, commonly used in benchmarks: https://www.mattmahoney.net/dc/enwik8.zip), decompression time decreases from: 4.89s to 3.52s on Serenity (cold) 1.72s to 1.32s on Serenity (warm) 1.06s to 0.92s on Linux
This commit is contained in:
parent
83cb73a045
commit
62b575ad7c
1 changed files with 94 additions and 1 deletions
|
@ -5,6 +5,7 @@
|
|||
*/
|
||||
|
||||
#include <AK/Array.h>
|
||||
#include <AK/NumericLimits.h>
|
||||
#include <AK/Span.h>
|
||||
#include <AK/Types.h>
|
||||
#include <LibCrypto/Checksum/CRC32.h>
|
||||
|
@ -46,6 +47,97 @@ void CRC32::update(ReadonlyBytes span)
|
|||
|
||||
#else
|
||||
|
||||
static constexpr size_t ethernet_polynomial = 0xEDB88320;
|
||||
|
||||
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
|
||||
// This implements Intel's slicing-by-8 algorithm. Their original paper is no longer on their website,
|
||||
// but their source code is still available for reference:
|
||||
// https://sourceforge.net/projects/slicing-by-8/
|
||||
static constexpr auto generate_table()
|
||||
{
|
||||
Array<Array<u32, 256>, 8> data {};
|
||||
|
||||
for (u32 i = 0; i < 256; ++i) {
|
||||
auto value = i;
|
||||
|
||||
for (size_t j = 0; j < 8; ++j)
|
||||
value = (value >> 1) ^ ((value & 1) * ethernet_polynomial);
|
||||
|
||||
data[0][i] = value;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < 256; ++i) {
|
||||
for (size_t j = 1; j < 8; ++j)
|
||||
data[j][i] = (data[j - 1][i] >> 8) ^ data[0][data[j - 1][i] & 0xff];
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static constexpr auto table = generate_table();
|
||||
|
||||
struct AlignmentData {
|
||||
ReadonlyBytes misaligned;
|
||||
ReadonlyBytes aligned;
|
||||
};
|
||||
|
||||
static AlignmentData split_bytes_for_alignment(ReadonlyBytes data, size_t alignment)
|
||||
{
|
||||
auto address = reinterpret_cast<uintptr_t>(data.data());
|
||||
auto offset = alignment - address % alignment;
|
||||
|
||||
if (offset == alignment)
|
||||
return { {}, data };
|
||||
|
||||
if (data.size() < alignment)
|
||||
return { data, {} };
|
||||
|
||||
return { data.trim(offset), data.slice(offset) };
|
||||
}
|
||||
|
||||
static constexpr u32 single_byte_crc(u32 crc, u8 byte)
|
||||
{
|
||||
return (crc >> 8) ^ table[0][(crc & 0xff) ^ byte];
|
||||
}
|
||||
|
||||
void CRC32::update(ReadonlyBytes data)
|
||||
{
|
||||
// The provided data may not be aligned to a 4-byte boundary, required to reinterpret its address
|
||||
// into a u32 in the loop below. So we split the bytes into two segments: the misaligned bytes
|
||||
// (which undergo the standard 1-byte-at-a-time algorithm) and remaining aligned bytes.
|
||||
auto [misaligned_data, aligned_data] = split_bytes_for_alignment(data, alignof(u32));
|
||||
|
||||
for (auto byte : misaligned_data)
|
||||
m_state = single_byte_crc(m_state, byte);
|
||||
|
||||
while (aligned_data.size() >= 8) {
|
||||
auto const* segment = reinterpret_cast<u32 const*>(aligned_data.data());
|
||||
auto low = *segment ^ m_state;
|
||||
auto high = *(++segment);
|
||||
|
||||
// clang-format will put this all on one line, which is really hard to read.
|
||||
// clang-format off
|
||||
m_state = table[0][(high >> 24) & 0xff]
|
||||
^ table[1][(high >> 16) & 0xff]
|
||||
^ table[2][(high >> 8) & 0xff]
|
||||
^ table[3][high & 0xff]
|
||||
^ table[4][(low >> 24) & 0xff]
|
||||
^ table[5][(low >> 16) & 0xff]
|
||||
^ table[6][(low >> 8) & 0xff]
|
||||
^ table[7][low & 0xff];
|
||||
// clang-format on
|
||||
|
||||
aligned_data = aligned_data.slice(8);
|
||||
}
|
||||
|
||||
for (auto byte : aligned_data)
|
||||
m_state = single_byte_crc(m_state, byte);
|
||||
};
|
||||
|
||||
# else
|
||||
|
||||
// FIXME: Implement the slicing-by-8 algorithm for big endian CPUs.
|
||||
static constexpr auto generate_table()
|
||||
{
|
||||
Array<u32, 256> data {};
|
||||
|
@ -54,7 +146,7 @@ static constexpr auto generate_table()
|
|||
|
||||
for (auto j = 0; j < 8; j++) {
|
||||
if (value & 1) {
|
||||
value = 0xEDB88320 ^ (value >> 1);
|
||||
value = ethernet_polynomial ^ (value >> 1);
|
||||
} else {
|
||||
value = value >> 1;
|
||||
}
|
||||
|
@ -74,6 +166,7 @@ void CRC32::update(ReadonlyBytes data)
|
|||
}
|
||||
};
|
||||
|
||||
# endif
|
||||
#endif
|
||||
|
||||
u32 CRC32::digest()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue