From 2983215fb18eb5d1fe54c9b854d33d8d7af38c59 Mon Sep 17 00:00:00 2001 From: Lenny Maiorani Date: Tue, 13 Oct 2020 10:48:48 -0400 Subject: [PATCH] Base64: Pre-allocate size of input and output Problem: - Output of decode and encode grow as the decode and encode happen. This is inefficient because a large size will require many reallocations. - `const` qualifiers are missing on variables which are not intended to change. Solution: - Since the size of the decoded or encoded message is known prior to starting, calculate the size and set the output to that size immediately. All appends will not incur the reallocation overhead. - Add `const` qualifiers to show intent. --- AK/Base64.cpp | 53 +++++++++++++++++++++++++---------------- AK/Base64.h | 8 ++++++- AK/Tests/TestBase64.cpp | 2 ++ 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/AK/Base64.cpp b/AK/Base64.cpp index dcf4808ebc..4ed14e0130 100644 --- a/AK/Base64.cpp +++ b/AK/Base64.cpp @@ -62,9 +62,19 @@ static constexpr auto make_lookup_table() return table; } +size_t calculate_base64_decoded_length(const StringView& input) +{ + return input.length() * 3 / 4; +} + +size_t calculate_base64_encoded_length(ReadonlyBytes input) +{ + return ((4 * input.size() / 3) + 3) & ~3; +} + ByteBuffer decode_base64(const StringView& input) { - auto get = [&](size_t offset, bool* is_padding = nullptr) -> u8 { + auto get = [&](const size_t offset, bool* is_padding = nullptr) -> u8 { constexpr auto table = make_lookup_table(); if (offset >= input.length()) return 0; @@ -77,19 +87,20 @@ ByteBuffer decode_base64(const StringView& input) }; Vector output; + output.ensure_capacity(calculate_base64_decoded_length(input)); for (size_t i = 0; i < input.length(); i += 4) { bool in2_is_padding = false; bool in3_is_padding = false; - u8 in0 = get(i); - u8 in1 = get(i + 1); - u8 in2 = get(i + 2, &in2_is_padding); - u8 in3 = get(i + 3, &in3_is_padding); + const u8 in0 = get(i); + const u8 in1 = get(i + 1); + const u8 in2 = get(i + 2, &in2_is_padding); + const u8 in3 = get(i + 3, &in3_is_padding); - u8 out0 = (in0 << 2) | ((in1 >> 4) & 3); - u8 out1 = ((in1 & 0xf) << 4) | ((in2 >> 2) & 0xf); - u8 out2 = ((in2 & 0x3) << 6) | in3; + const u8 out0 = (in0 << 2) | ((in1 >> 4) & 3); + const u8 out1 = ((in1 & 0xf) << 4) | ((in2 >> 2) & 0xf); + const u8 out2 = ((in2 & 0x3) << 6) | in3; output.append(out0); if (!in2_is_padding) @@ -104,9 +115,9 @@ ByteBuffer decode_base64(const StringView& input) String encode_base64(ReadonlyBytes input) { constexpr auto alphabet = make_alphabet(); - StringBuilder output; + StringBuilder output(calculate_base64_decoded_length(input)); - auto get = [&](size_t offset, bool* need_padding = nullptr) -> u8 { + auto get = [&](const size_t offset, bool* need_padding = nullptr) -> u8 { if (offset >= input.size()) { if (need_padding) *need_padding = true; @@ -119,19 +130,19 @@ String encode_base64(ReadonlyBytes input) bool is_8bit = false; bool is_16bit = false; - u8 in0 = get(i); - u8 in1 = get(i + 1, &is_16bit); - u8 in2 = get(i + 2, &is_8bit); + const u8 in0 = get(i); + const u8 in1 = get(i + 1, &is_16bit); + const u8 in2 = get(i + 2, &is_8bit); - u8 index0 = (in0 >> 2) & 0x3f; - u8 index1 = ((in0 << 4) | (in1 >> 4)) & 0x3f; - u8 index2 = ((in1 << 2) | (in2 >> 6)) & 0x3f; - u8 index3 = in2 & 0x3f; + const u8 index0 = (in0 >> 2) & 0x3f; + const u8 index1 = ((in0 << 4) | (in1 >> 4)) & 0x3f; + const u8 index2 = ((in1 << 2) | (in2 >> 6)) & 0x3f; + const u8 index3 = in2 & 0x3f; - u8 out0 = alphabet[index0]; - u8 out1 = alphabet[index1]; - u8 out2 = is_16bit ? '=' : alphabet[index2]; - u8 out3 = is_8bit ? '=' : alphabet[index3]; + const u8 out0 = alphabet[index0]; + const u8 out1 = alphabet[index1]; + const u8 out2 = is_16bit ? '=' : alphabet[index2]; + const u8 out3 = is_8bit ? '=' : alphabet[index3]; output.append(out0); output.append(out1); diff --git a/AK/Base64.h b/AK/Base64.h index 758e78547d..eecc119cff 100644 --- a/AK/Base64.h +++ b/AK/Base64.h @@ -26,11 +26,17 @@ #pragma once -#include +#include #include +#include +#include namespace AK { +size_t calculate_base64_decoded_length(const StringView&); + +size_t calculate_base64_encoded_length(ReadonlyBytes); + ByteBuffer decode_base64(const StringView&); String encode_base64(ReadonlyBytes); diff --git a/AK/Tests/TestBase64.cpp b/AK/Tests/TestBase64.cpp index 5b19bcd544..ef008d01f3 100644 --- a/AK/Tests/TestBase64.cpp +++ b/AK/Tests/TestBase64.cpp @@ -35,6 +35,7 @@ TEST_CASE(test_decode) auto decode_equal = [&](const char* input, const char* expected) { auto decoded = decode_base64(StringView(input)); EXPECT(String::copy(decoded) == String(expected)); + EXPECT(StringView(expected).length() <= calculate_base64_decoded_length(StringView(input).bytes())); }; decode_equal("", ""); @@ -51,6 +52,7 @@ TEST_CASE(test_encode) auto encode_equal = [&](const char* input, const char* expected) { auto encoded = encode_base64({ input, strlen(input) }); EXPECT(encoded == String(expected)); + EXPECT_EQ(StringView(expected).length(), calculate_base64_encoded_length(StringView(input).bytes())); }; encode_equal("", "");