From 29796f8f5e591597415636754363e7a0ca0d5ef8 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sat, 25 Mar 2023 21:31:23 +0100 Subject: [PATCH] LibCrypto: Use 8-byte crc32 instruction on arm too Takes % time Build/lagom/gunzip -c \ /Users/thakis/Downloads/trace_bug.json.gz > /dev/null from 3.9s to 3.87s. --- .../Libraries/LibCrypto/Checksum/CRC32.cpp | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/Userland/Libraries/LibCrypto/Checksum/CRC32.cpp b/Userland/Libraries/LibCrypto/Checksum/CRC32.cpp index 805bff2546..7cdddfb274 100644 --- a/Userland/Libraries/LibCrypto/Checksum/CRC32.cpp +++ b/Userland/Libraries/LibCrypto/Checksum/CRC32.cpp @@ -13,14 +13,33 @@ namespace Crypto::Checksum { #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) -void CRC32::update(ReadonlyBytes data) +void CRC32::update(ReadonlyBytes span) { // FIXME: Does this require runtime checking on rpi? // (Maybe the instruction is present on the rpi4 but not on the rpi3?) - // FIXME: Use __builtin_arm_crc32d() for aligned middle part. - for (size_t i = 0; i < data.size(); i++) - m_state = __builtin_arm_crc32b(m_state, data.at(i)); + u8 const* data = span.data(); + size_t size = span.size(); + + while (size > 0 && (reinterpret_cast(data) & 7) != 0) { + m_state = __builtin_arm_crc32b(m_state, *data); + ++data; + --size; + } + + auto* data64 = reinterpret_cast(data); + while (size >= 8) { + m_state = __builtin_arm_crc32d(m_state, *data64); + ++data64; + size -= 8; + } + + data = reinterpret_cast(data64); + while (size > 0) { + m_state = __builtin_arm_crc32b(m_state, *data); + ++data; + --size; + } }; // FIXME: On Intel, use _mm_crc32_u8 / _mm_crc32_u64 if available (SSE 4.2).