diff --git a/Userland/Libraries/LibVideo/CMakeLists.txt b/Userland/Libraries/LibVideo/CMakeLists.txt
index 293a106d77..ec1a4f517b 100644
--- a/Userland/Libraries/LibVideo/CMakeLists.txt
+++ b/Userland/Libraries/LibVideo/CMakeLists.txt
@@ -7,8 +7,7 @@ set(SOURCES
     VP9/ProbabilityTables.cpp
     VP9/SyntaxElementCounter.cpp
     VP9/TreeParser.cpp
-    VP9/Utilities.cpp
 )
 
 serenity_lib(LibVideo video)
-target_link_libraries(LibVideo LibAudio LibCore LibIPC)
+target_link_libraries(LibVideo LibAudio LibCore LibIPC LibGfx)
diff --git a/Userland/Libraries/LibVideo/DecoderError.h b/Userland/Libraries/LibVideo/DecoderError.h
index bfde7e7edb..02d5160233 100644
--- a/Userland/Libraries/LibVideo/DecoderError.h
+++ b/Userland/Libraries/LibVideo/DecoderError.h
@@ -22,6 +22,7 @@ using DecoderErrorOr = ErrorOr<T, DecoderError>;
 enum class DecoderErrorCategory : u32 {
     Unknown,
     IO,
+    Memory,
     // The input is corrupted.
     Corrupted,
     // The input uses features that are not yet implemented.
@@ -79,4 +80,6 @@ private:
         _result.release_value();                                           \
     })
 
+#define DECODER_TRY_ALLOC(expression) DECODER_TRY(DecoderErrorCategory::Memory, expression)
+
 }
diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.cpp b/Userland/Libraries/LibVideo/VP9/Decoder.cpp
index e6b539010b..87fce2cddd 100644
--- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp
@@ -5,6 +5,9 @@
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
+#include <AK/IntegralMath.h>
+#include <LibGfx/Size.h>
+
 #include "Decoder.h"
 #include "Utilities.h"
 
@@ -32,6 +35,66 @@ void Decoder::dump_frame_info()
     m_parser->dump_info();
 }
 
+inline size_t buffer_size(size_t width, size_t height)
+{
+    return width * height;
+}
+
+inline size_t buffer_size(Gfx::Size<size_t> size)
+{
+    return buffer_size(size.width(), size.height());
+}
+
+DecoderErrorOr<void> Decoder::allocate_buffers()
+{
+    // FIXME: Confirm that we need to clear buffers between frames.
+    m_buffers = {};
+
+    for (size_t plane = 0; plane < 3; plane++) {
+        auto size = m_parser->get_decoded_size_for_plane(plane);
+        dbgln("Allocating buffers for plane {} with size {}x{} ({})", plane, size.width(), size.height(), buffer_size(size));
+
+        auto& temp_buffer = get_temp_buffer(plane);
+        temp_buffer.clear_with_capacity();
+        DECODER_TRY_ALLOC(temp_buffer.try_resize_and_keep_capacity(buffer_size(size)));
+
+        auto& output_buffer = get_output_buffer(plane);
+        output_buffer.clear_with_capacity();
+        DECODER_TRY_ALLOC(output_buffer.try_resize_and_keep_capacity(buffer_size(size)));
+    }
+    return {};
+}
+
+Vector<Decoder::Intermediate>& Decoder::get_temp_buffer(u8 plane)
+{
+    return m_buffers.intermediate[plane];
+}
+
+Vector<u16>& Decoder::get_output_buffer(u8 plane)
+{
+    return m_buffers.output[plane];
+}
+
+Vector<u16> const& Decoder::get_output_buffer_for_plane(u8 plane) const
+{
+    return m_buffers.output[plane];
+}
+
+Gfx::Size<size_t> Decoder::get_y_plane_size()
+{
+    return m_parser->get_decoded_size_for_plane(0);
+}
+
+bool Decoder::get_uv_subsampling_y()
+{
+    return m_parser->m_subsampling_y;
+}
+
+bool Decoder::get_uv_subsampling_x()
+{
+    return m_parser->m_subsampling_x;
+}
+
 u8 Decoder::merge_prob(u8 pre_prob, u8 count_0, u8 count_1, u8 count_sat, u8 max_update_factor)
 {
     auto total_decode_count = count_0 + count_1;
@@ -152,22 +215,1111 @@ u8 Decoder::adapt_prob(u8 prob, u8 counts[2])
     return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR);
 }
 
-DecoderErrorOr<void> Decoder::predict_intra(size_t, u32, u32, bool, bool, bool, TXSize, u32)
+inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
+{
+    return row * stride + column;
+}
+
+DecoderErrorOr<void> Decoder::predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index)
+{
+    auto& frame_buffer = get_output_buffer(plane);
+
+    // 8.5.1 Intra prediction process
+
+    // The intra prediction process is invoked for intra coded blocks to predict a part of the block corresponding to a
+    // transform block. When the transform size is smaller than the block size, this process can be invoked multiple
+    // times within a single block for the same plane, and the invocations are in raster order within the block.
+
+    // The variable mode is specified by:
+    //     1. If plane is greater than 0, mode is set equal to uv_mode.
+    //     2. Otherwise, if MiSize is greater than or equal to BLOCK_8X8, mode is set equal to y_mode.
+    //     3. Otherwise, mode is set equal to sub_modes[ blockIdx ].
+    IntraMode mode;
+    if (plane > 0)
+        mode = static_cast<IntraMode>(m_parser->m_uv_mode);
+    else if (m_parser->m_mi_size >= Block_8x8)
+        mode = static_cast<IntraMode>(m_parser->m_y_mode);
+    else
+        mode = static_cast<IntraMode>(m_parser->m_block_sub_modes[block_index]);
+
+    // The variable log2Size specifying the base 2 logarithm of the width of the transform block is set equal to txSz + 2.
+    u8 log2_of_block_size = tx_size + 2;
+    // The variable size is set equal to 1 << log2Size.
+    u8 block_size = 1 << log2_of_block_size;
+
+    // The variable maxX is set equal to (MiCols * 8) - 1.
+    // The variable maxY is set equal to (MiRows * 8) - 1.
+    // If plane is greater than 0, then:
+    //  − maxX is set equal to ((MiCols * 8) >> subsampling_x) - 1.
+    //  − maxY is set equal to ((MiRows * 8) >> subsampling_y) - 1.
+    auto subsampling_x = plane > 0 ? m_parser->m_subsampling_x : false;
+    auto subsampling_y = plane > 0 ? m_parser->m_subsampling_y : false;
+    auto max_x = ((m_parser->m_mi_cols * 8u) >> subsampling_x) - 1u;
+    auto max_y = ((m_parser->m_mi_rows * 8u) >> subsampling_y) - 1u;
+
+    auto const frame_buffer_at = [&](u32 row, u32 column) -> u16& {
+        const auto frame_stride = max_x + 1u;
+        return frame_buffer[index_from_row_and_column(row, column, frame_stride)];
+    };
+
+    // The array aboveRow[ i ] for i = 0..size-1 is specified by:
+    //     ..
+    // The array aboveRow[ i ] for i = size..2*size-1 is specified by:
+    //     ..
+    // The array aboveRow[ i ] for i = -1 is specified by:
+    //     ..
+
+    // NOTE: above_row is an array ranging from 0 to (2*block_size).
+    //       There are three sections to the array:
+    //           - [0]
+    //           - [1 .. block_size]
+    //           - [block_size + 1 .. block_size * 2]
+    //       The array indices must be offset by 1 to accomodate index -1.
+    Vector<Intermediate>& above_row = m_buffers.above_row;
+    DECODER_TRY_ALLOC(above_row.try_resize_and_keep_capacity(block_size * 2 + 1));
+    auto above_row_at = [&](i32 index) -> Intermediate& {
+        return above_row[index + 1];
+    };
+
+    // NOTE: This value is pre-calculated since it is reused in spec below.
+    //       Use this to replace spec text "(1<<(BitDepth-1))".
+    auto half_sample_value = (1u << (m_parser->m_bit_depth - 1u));
+
+    // The array aboveRow[ i ] for i = 0..size-1 is specified by:
+    if (!have_above) {
+        // 1. If haveAbove is equal to 0, aboveRow[ i ] is set equal to (1<<(BitDepth-1)) - 1.
+        // FIXME: Use memset?
+        for (auto i = 0u; i < block_size; i++)
+            above_row_at(i) = half_sample_value - 1;
+    } else {
+        // 2. Otherwise, aboveRow[ i ] is set equal to CurrFrame[ plane ][ y-1 ][ Min(maxX, x+i) ].
+        for (auto i = 0u; i < block_size; i++)
+            above_row_at(i) = frame_buffer_at(y - 1, min(max_x, x + i));
+    }
+
+    // The array aboveRow[ i ] for i = size..2*size-1 is specified by:
+    if (have_above && not_on_right && tx_size == TXSize::TX_4x4) {
+        // 1. If haveAbove is equal to 1 and notOnRight is equal to 1 and txSz is equal to 0,
+        //    aboveRow[ i ] is set equal to CurrFrame[ plane ][ y-1 ][ Min(maxX, x+i) ].
+        for (auto i = block_size; i < block_size * 2; i++)
+            above_row_at(i) = frame_buffer_at(y - 1, min(max_x, x + i));
+    } else {
+        // 2. Otherwise, aboveRow[ i ] is set equal to aboveRow[ size-1 ].
+        for (auto i = block_size; i < block_size * 2; i++)
+            above_row_at(i) = above_row_at(block_size - 1);
+    }
+
+    // The array aboveRow[ i ] for i = -1 is specified by:
+    if (have_above && have_left) {
+        // 1. If haveAbove is equal to 1 and haveLeft is equal to 1, aboveRow[ -1 ] is set equal to
+        //    CurrFrame[ plane ][ y-1 ][ Min(maxX, x-1) ].
+        above_row_at(-1) = frame_buffer_at(y - 1, min(max_x, x - 1));
+    } else if (have_above) {
+        // 2. Otherwise if haveAbove is equal to 1, aboveRow[ -1] is set equal to (1<<(BitDepth-1)) + 1.
+        above_row_at(-1) = half_sample_value + 1;
+    } else {
+        // 3. Otherwise, aboveRow[ -1 ] is set equal to (1<<(BitDepth-1)) - 1
+        above_row_at(-1) = half_sample_value - 1;
+    }
+
+    // The array leftCol[ i ] for i = 0..size-1 is specified by:
+    Vector<Intermediate>& left_column = m_buffers.left_column;
+    DECODER_TRY_ALLOC(left_column.try_resize_and_keep_capacity(block_size));
+    if (have_left) {
+        // − If haveLeft is equal to 1, leftCol[ i ] is set equal to CurrFrame[ plane ][ Min(maxY, y+i) ][ x-1 ].
+        for (auto i = 0u; i < block_size; i++)
+            left_column[i] = frame_buffer_at(min(max_y, y + i), x - 1);
+    } else {
+        // − Otherwise, leftCol[ i ] is set equal to (1<<(BitDepth-1)) + 1.
+        for (auto i = 0u; i < block_size; i++)
+            left_column[i] = half_sample_value + 1;
+    }
+
+    // A 2D array named pred containing the intra predicted samples is constructed as follows:
+    Vector<Intermediate>& predicted_samples = m_buffers.predicted_samples;
+    DECODER_TRY_ALLOC(predicted_samples.try_resize_and_keep_capacity(block_size * block_size));
+    auto const predicted_sample_at = [&](u32 row, u32 column) -> Intermediate& {
+        return predicted_samples[index_from_row_and_column(row, column, block_size)];
+    };
+
+    // FIXME: One of the two below should be a simple memcpy of 1D arrays.
+    switch (mode) {
+    case IntraMode::VPred:
+        // − If mode is equal to V_PRED, pred[ i ][ j ] is set equal to aboveRow[ j ] with j = 0..size-1 and i = 0..size-1
+        // (each row of the block is filled with a copy of aboveRow).
+        for (auto j = 0u; j < block_size; j++) {
+            for (auto i = 0u; i < block_size; i++)
+                predicted_sample_at(i, j) = above_row_at(j);
+        }
+        break;
+    case IntraMode::HPred:
+        // − Otherwise if mode is equal to H_PRED, pred[ i ][ j ] is set equal to leftCol[ i ] with j = 0..size-1 and i =
+        // 0..size-1 (each column of the block is filled with a copy of leftCol).
+        for (auto j = 0u; j < block_size; j++) {
+            for (auto i = 0u; i < block_size; i++)
+                predicted_sample_at(i, j) = left_column[i];
+        }
+        break;
+    case IntraMode::D207Pred:
+        // − Otherwise if mode is equal to D207_PRED, the following applies:
+        // 1. pred[ size - 1 ][ j ] = leftCol[ size - 1] for j = 0..size-1
+        for (auto j = 0u; j < block_size; j++)
+            predicted_sample_at(block_size - 1, j) = left_column[block_size - 1];
+        // 2. pred[ i ][ 0 ] = Round2( leftCol[ i ] + leftCol[ i + 1 ], 1 ) for i = 0..size-2
+        for (auto i = 0u; i < block_size - 1u; i++)
+            predicted_sample_at(i, 0) = round_2(left_column[i] + left_column[i + 1], 1);
+        // 3. pred[ i ][ 1 ] = Round2( leftCol[ i ] + 2 * leftCol[ i + 1 ] + leftCol[ i + 2 ], 2 ) for i = 0..size-3
+        for (auto i = 0u; i < block_size - 2u; i++)
+            predicted_sample_at(i, 1) = round_2(left_column[i] + (2 * left_column[i + 1]) + left_column[i + 2], 2);
+        // 4. pred[ size - 2 ][ 1 ] = Round2( leftCol[ size - 2 ] + 3 * leftCol[ size - 1 ], 2 )
+        predicted_sample_at(block_size - 2, 1) = round_2(left_column[block_size - 2] + (3 * left_column[block_size - 1]), 2);
+        // 5. pred[ i ][ j ] = pred[ i + 1 ][ j - 2 ] for i = (size-2)..0, for j = 2..size-1
+        // NOTE – In the last step i iterates in reverse order.
+        for (auto i = block_size - 2u;;) {
+            for (auto j = 2u; j < block_size; j++)
+                predicted_sample_at(i, j) = predicted_sample_at(i + 1, j - 2);
+            if (i == 0)
+                break;
+            i--;
+        }
+        break;
+    case IntraMode::D45Pred:
+        // Otherwise if mode is equal to D45_PRED,
+        // for i = 0..size-1, for j = 0..size-1.
+        for (auto i = 0u; i < block_size; i++) {
+            for (auto j = 0; j < block_size; j++) {
+                // pred[ i ][ j ] is set equal to (i + j + 2 < size * 2) ?
+                if (i + j + 2 < block_size * 2)
+                    // Round2( aboveRow[ i + j ] + aboveRow[ i + j + 1 ] * 2 + aboveRow[ i + j + 2 ], 2 ) :
+                    predicted_sample_at(i, j) = round_2(above_row_at(i + j) + above_row_at(i + j + 1) * 2 + above_row_at(i + j + 2), 2);
+                else
+                    // aboveRow[ 2 * size - 1 ]
+                    predicted_sample_at(i, j) = above_row_at(2 * block_size - 1);
+            }
+        }
+        break;
+    case IntraMode::D63Pred:
+        // Otherwise if mode is equal to D63_PRED,
+        for (auto i = 0u; i < block_size; i++) {
+            for (auto j = 0u; j < block_size; j++) {
+                // i/2 + j
+                auto row_index = (i / 2) + j;
+                // pred[ i ][ j ] is set equal to (i & 1) ?
+                if (i & 1)
+                    // Round2( aboveRow[ i/2 + j ] + aboveRow[ i/2 + j + 1 ] * 2 + aboveRow[ i/2 + j + 2 ], 2 ) :
+                    predicted_sample_at(i, j) = round_2(above_row_at(row_index) + above_row_at(row_index + 1) * 2 + above_row_at(row_index + 2), 2);
+                else
+                    // Round2( aboveRow[ i/2 + j ] + aboveRow[ i/2 + j + 1 ], 1 ) for i = 0..size-1, for j = 0..size-1.
+                    predicted_sample_at(i, j) = round_2(above_row_at(row_index) + above_row_at(row_index + 1), 1);
+            }
+        }
+        break;
+    case IntraMode::D117Pred:
+        // Otherwise if mode is equal to D117_PRED, the following applies:
+        // 1. pred[ 0 ][ j ] = Round2( aboveRow[ j - 1 ] + aboveRow[ j ], 1 ) for j = 0..size-1
+        for (auto j = 0; j < block_size; j++)
+            predicted_sample_at(0, j) = round_2(above_row_at(j - 1) + above_row_at(j), 1);
+        // 2. pred[ 1 ][ 0 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 )
+        predicted_sample_at(1, 0) = round_2(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2);
+        // 3. pred[ 1 ][ j ] = Round2( aboveRow[ j - 2 ] + 2 * aboveRow[ j - 1 ] + aboveRow[ j ], 2 ) for j = 1..size-1
+        for (auto j = 1; j < block_size; j++)
+            predicted_sample_at(1, j) = round_2(above_row_at(j - 2) + 2 * above_row_at(j - 1) + above_row_at(j), 2);
+        // 4. pred[ 2 ][ 0 ] = Round2( aboveRow[ -1 ] + 2 * leftCol[ 0 ] + leftCol[ 1 ], 2 )
+        predicted_sample_at(2, 0) = round_2(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2);
+        // 5. pred[ i ][ 0 ] = Round2( leftCol[ i - 3 ] + 2 * leftCol[ i - 2 ] + leftCol[ i - 1 ], 2 ) for i = 3..size-1
+        for (auto i = 3u; i < block_size; i++)
+            predicted_sample_at(i, 0) = round_2(left_column[i - 3] + 2 * left_column[i - 2] + left_column[i - 1], 2);
+        // 6. pred[ i ][ j ] = pred[ i - 2 ][ j - 1 ] for i = 2..size-1, for j = 1..size-1
+        for (auto i = 2u; i < block_size; i++) {
+            for (auto j = 1u; j < block_size; j++)
+                predicted_sample_at(i, j) = predicted_sample_at(i - 2, j - 1);
+        }
+        break;
+    case IntraMode::D135Pred:
+        // Otherwise if mode is equal to D135_PRED, the following applies:
+        // 1. pred[ 0 ][ 0 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 )
+        predicted_sample_at(0, 0) = round_2(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2);
+        // 2. pred[ 0 ][ j ] = Round2( aboveRow[ j - 2 ] + 2 * aboveRow[ j - 1 ] + aboveRow[ j ], 2 ) for j = 1..size-1
+        for (auto j = 1; j < block_size; j++)
+            predicted_sample_at(0, j) = round_2(above_row_at(j - 2) + 2 * above_row_at(j - 1) + above_row_at(j), 2);
+        // 3. pred[ 1 ][ 0 ] = Round2( aboveRow [ -1 ] + 2 * leftCol[ 0 ] + leftCol[ 1 ], 2 ) for i = 1..size-1
+        predicted_sample_at(1, 0) = round_2(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2);
+        // 4. pred[ i ][ 0 ] = Round2( leftCol[ i - 2 ] + 2 * leftCol[ i - 1 ] + leftCol[ i ], 2 ) for i = 2..size-1
+        for (auto i = 2u; i < block_size; i++)
+            predicted_sample_at(i, 0) = round_2(left_column[i - 2] + 2 * left_column[i - 1] + left_column[i], 2);
+        // 5. pred[ i ][ j ] = pred[ i - 1 ][ j - 1 ] for i = 1..size-1, for j = 1..size-1
+        for (auto i = 1u; i < block_size; i++) {
+            for (auto j = 1; j < block_size; j++)
+                predicted_sample_at(i, j) = predicted_sample_at(i - 1, j - 1);
+        }
+        break;
+    case IntraMode::D153Pred:
+        // Otherwise if mode is equal to D153_PRED, the following applies:
+        // 1. pred[ 0 ][ 0 ] = Round2( leftCol[ 0 ] + aboveRow[ -1 ], 1 )
+        predicted_sample_at(0, 0) = round_2(left_column[0] + above_row_at(-1), 1);
+        // 2. pred[ i ][ 0 ] = Round2( leftCol[ i - 1] + leftCol[ i ], 1 ) for i = 1..size-1
+        for (auto i = 1u; i < block_size; i++)
+            predicted_sample_at(i, 0) = round_2(left_column[i - 1] + left_column[i], 1);
+        // 3. pred[ 0 ][ 1 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 )
+        predicted_sample_at(0, 1) = round_2(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2);
+        // 4. pred[ 1 ][ 1 ] = Round2( aboveRow[ -1 ] + 2 * leftCol [ 0 ] + leftCol [ 1 ], 2 )
+        predicted_sample_at(1, 1) = round_2(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2);
+        // 5. pred[ i ][ 1 ] = Round2( leftCol[ i - 2 ] + 2 * leftCol[ i - 1 ] + leftCol[ i ], 2 ) for i = 2..size-1
+        for (auto i = 2u; i < block_size; i++)
+            predicted_sample_at(i, 1) = round_2(left_column[i - 2] + 2 * left_column[i - 1] + left_column[i], 2);
+        // 6. pred[ 0 ][ j ] = Round2( aboveRow[ j - 3 ] + 2 * aboveRow[ j - 2 ] + aboveRow[ j - 1 ], 2 ) for j = 2..size-1
+        for (auto j = 2; j < block_size; j++)
+            predicted_sample_at(0, j) = round_2(above_row_at(j - 3) + 2 * above_row_at(j - 2) + above_row_at(j - 1), 2);
+        // 7. pred[ i ][ j ] = pred[ i - 1 ][ j - 2 ] for i = 1..size-1, for j = 2..size-1
+        for (auto i = 1u; i < block_size; i++) {
+            for (auto j = 2u; j < block_size; j++)
+                predicted_sample_at(i, j) = predicted_sample_at(i - 1, j - 2);
+        }
+        break;
+    case IntraMode::TmPred:
+        // Otherwise if mode is equal to TM_PRED,
+        // pred[ i ][ j ] is set equal to Clip1( aboveRow[ j ] + leftCol[ i ] - aboveRow[ -1 ] )
+        // for i = 0..size-1, for j = 0..size-1.
+        for (auto i = 0u; i < block_size; i++) {
+            for (auto j = 0u; j < block_size; j++)
+                predicted_sample_at(i, j) = clip_1(m_parser->m_bit_depth, above_row_at(j) + left_column[i] - above_row_at(-1));
+        }
+        break;
+    case IntraMode::DcPred: {
+        // FIXME: All indices are set equally below, use memset.
+        Intermediate average = 0;
+
+        if (have_left && have_above) {
+            // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 1 and haveAbove is equal to 1,
+            // The variable avg (the average of the samples in union of aboveRow and leftCol)
+            // is specified as follows:
+            // sum = 0
+            // for ( k = 0; k < size; k++ ) {
+            //     sum += leftCol[ k ]
+            //     sum += aboveRow[ k ]
+            // }
+            // avg = (sum + size) >> (log2Size + 1)
+            Intermediate sum = 0;
+            for (auto k = 0u; k < block_size; k++) {
+                sum += left_column[k];
+                sum += above_row_at(k);
+            }
+            average = (sum + block_size) >> (log2_of_block_size + 1);
+        } else if (have_left && !have_above) {
+            // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 1 and haveAbove is equal to 0,
+            // The variable leftAvg is specified as follows:
+            // sum = 0
+            // for ( k = 0; k < size; k++ ) {
+            //     sum += leftCol[ k ]
+            // }
+            // leftAvg = (sum + (1 << (log2Size - 1) ) ) >> log2Size
+            Intermediate sum = 0;
+            for (auto k = 0u; k < block_size; k++)
+                sum += left_column[k];
+            average = (sum + (1 << (log2_of_block_size - 1))) >> log2_of_block_size;
+        } else if (!have_left && have_above) {
+            // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 0 and haveAbove is equal to 1,
+            // The variable aboveAvg is specified as follows:
+            // sum = 0
+            // for ( k = 0; k < size; k++ ) {
+            // sum += aboveRow[ k ]
+            // }
+            // aboveAvg = (sum + (1 << (log2Size - 1) ) ) >> log2Size
+            Intermediate sum = 0;
+            for (auto k = 0u; k < block_size; k++)
+                sum += above_row_at(k);
+            average = (sum + (1 << (log2_of_block_size - 1))) >> log2_of_block_size;
+        } else {
+            // Otherwise (mode is DC_PRED),
+            // pred[ i ][ j ] is set equal to 1<<(BitDepth - 1) with i = 0..size-1 and j = 0..size-1.
+            average = 1 << (m_parser->m_bit_depth - 1);
+        }
+
+        // pred[ i ][ j ] is set equal to avg with i = 0..size-1 and j = 0..size-1.
+        for (auto i = 0u; i < block_size; i++) {
+            for (auto j = 0u; j < block_size; j++)
+                predicted_sample_at(i, j) = average;
+        }
+        break;
+    }
+    default:
+        dbgln("Unknown prediction mode {}", static_cast<u8>(mode));
+        VERIFY_NOT_REACHED();
+    }
+
+    // The current frame is updated as follows:
+    // − CurrFrame[ plane ][ y + i ][ x + j ] is set equal to pred[ i ][ j ] for i = 0..size-1 and j = 0..size-1.
+    auto width_in_frame_buffer = min(static_cast<u32>(block_size), max_x - x + 1);
+    auto height_in_frame_buffer = min(static_cast<u32>(block_size), max_y - y + 1);
+
+    for (auto i = 0u; i < height_in_frame_buffer; i++) {
+        for (auto j = 0u; j < width_in_frame_buffer; j++)
+            frame_buffer_at(y + i, x + j) = predicted_sample_at(i, j);
+    }
+
+    return {};
+}
+
+DecoderErrorOr<void> Decoder::predict_inter(u8, u32, u32, u32, u32, u32)
 {
     // TODO: Implement
     return DecoderError::not_implemented();
 }
 
-DecoderErrorOr<void> Decoder::predict_inter(size_t, u32, u32, u32, u32, u32)
+u16 Decoder::dc_q(u8 b)
 {
-    // TODO: Implement
+    // The function dc_q( b ) is specified as dc_qlookup[ (BitDepth-8) >> 1 ][ Clip3( 0, 255, b ) ] where dc_lookup is
+    // defined as follows:
+    static const u16 dc_qlookup[3][256] = {
+        { 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53, 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88, 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110, 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247, 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300, 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364, 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441, 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549, 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336 },
+        { 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132, 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182, 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230, 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387, 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466, 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567, 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347 },
+        { 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237, 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405, 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580, 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752, 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919, 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387 }
+    };
+
+    return dc_qlookup[(m_parser->m_bit_depth - 8) >> 1][clip_3<u8>(0, 255, b)];
+}
+
+u16 Decoder::ac_q(u8 b)
+{
+    // The function ac_q( b ) is specified as ac_qlookup[ (BitDepth-8) >> 1 ][ Clip3( 0, 255, b ) ] where ac_lookup is
+    // defined as follows:
+    static const u16 ac_qlookup[3][256] = {
+        { 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179, 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353, 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448, 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571, 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729, 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933, 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828 },
+        { 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40, 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92, 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149, 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208, 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267, 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324, 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379, 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466, 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571, 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713, 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889, 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312 },
+        { 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99, 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263, 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456, 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660, 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865, 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247 }
+    };
+
+    return ac_qlookup[(m_parser->m_bit_depth - 8) >> 1][clip_3<u8>(0, 255, b)];
+}
+
+u8 Decoder::get_qindex()
+{
+    // The function get_qindex( ) returns the quantizer index for the current block and is specified by the following:
+    // − If seg_feature_active( SEG_LVL_ALT_Q ) is equal to 1 the following ordered steps apply:
+    if (m_parser->seg_feature_active(SEG_LVL_ALT_Q)) {
+        // 1. Set the variable data equal to FeatureData[ segment_id ][ SEG_LVL_ALT_Q ].
+        auto data = m_parser->m_feature_data[m_parser->m_segment_id][SEG_LVL_ALT_Q];
+
+        // 2. If segmentation_abs_or_delta_update is equal to 0, set data equal to base_q_idx + data
+        if (!m_parser->m_segmentation_abs_or_delta_update) {
+            data += m_parser->m_base_q_idx;
+        }
+
+        // 3. Return Clip3( 0, 255, data ).
+        return clip_3<u8>(0, 255, data);
+    }
+
+    // − Otherwise, return base_q_idx.
+    return m_parser->m_base_q_idx;
+}
+
+u16 Decoder::get_dc_quant(u8 plane)
+{
+    // The function get_dc_quant( plane ) returns the quantizer value for the dc coefficient for a particular plane and
+    // is derived as follows:
+    // − If plane is equal to 0, return dc_q( get_qindex( ) + delta_q_y_dc ).
+    // − Otherwise, return dc_q( get_qindex( ) + delta_q_uv_dc ).
+    // Instead of if { return }, select the value to add and return.
+    i8 offset = plane == 0 ? m_parser->m_delta_q_y_dc : m_parser->m_delta_q_uv_dc;
+    return dc_q(static_cast<u8>(get_qindex() + offset));
+}
+
+u16 Decoder::get_ac_quant(u8 plane)
+{
+    // The function get_ac_quant( plane ) returns the quantizer value for the ac coefficient for a particular plane and
+    // is derived as follows:
+    // − If plane is equal to 0, return ac_q( get_qindex( ) ).
+    // − Otherwise, return ac_q( get_qindex( ) + delta_q_uv_ac ).
+    // Instead of if { return }, select the value to add and return.
+    i8 offset = plane == 0 ? 0 : m_parser->m_delta_q_uv_ac;
+    return ac_q(static_cast<u8>(get_qindex() + offset));
+}
+
+DecoderErrorOr<void> Decoder::reconstruct(u8 plane, u32 transform_block_x, u32 transform_block_y, TXSize transform_block_size)
+{
+    // 8.6.2 Reconstruct process
+
+    // The variable dqDenom is set equal to 2 if txSz is equal to TX_32X32, otherwise dqDenom is set equal to 1.
+    Intermediate dq_denominator = transform_block_size == TX_32x32 ? 2 : 1;
+    // The variable n (specifying the base 2 logarithm of the width of the transform block) is set equal to 2 + txSz.
+    u8 log2_of_block_size = 2u + transform_block_size;
+    // The variable n0 (specifying the width of the transform block) is set equal to 1 << n.
+    auto block_size = 1u << log2_of_block_size;
+
+    // 1. Dequant[ i ][ j ] is set equal to ( Tokens[ i * n0 + j ] * get_ac_quant( plane ) ) / dqDenom
+    //    for i = 0..(n0-1), for j = 0..(n0-1)
+    Vector<Intermediate>& dequantized = m_buffers.dequantized;
+    DECODER_TRY_ALLOC(dequantized.try_resize_and_keep_capacity(buffer_size(block_size, block_size)));
+    Intermediate ac_quant = get_ac_quant(plane);
+    for (auto i = 0u; i < block_size; i++) {
+        for (auto j = 0u; j < block_size; j++) {
+            auto index = index_from_row_and_column(i, j, block_size);
+            if (index == 0)
+                continue;
+            dequantized[index] = (m_parser->m_tokens[index] * ac_quant) / dq_denominator;
+        }
+    }
+
+    // 2. Dequant[ 0 ][ 0 ] is set equal to ( Tokens[ 0 ] * get_dc_quant( plane ) ) / dqDenom
+    dequantized[0] = (m_parser->m_tokens[0] * get_dc_quant(plane)) / dq_denominator;
+
+    // It is a requirement of bitstream conformance that the values written into the Dequant array in steps 1 and 2
+    // are representable by a signed integer with 8 + BitDepth bits.
+    for (auto i = 0u; i < block_size * block_size; i++)
+        VERIFY(check_intermediate_bounds(dequantized[i]));
+
+    // 3. Invoke the 2D inverse transform block process defined in section 8.7.2 with the variable n as input.
+    //    The inverse transform outputs are stored back to the Dequant buffer.
+    TRY(inverse_transform_2d(dequantized, log2_of_block_size));
+
+    // 4. CurrFrame[ plane ][ y + i ][ x + j ] is set equal to Clip1( CurrFrame[ plane ][ y + i ][ x + j ] + Dequant[ i ][ j ] )
+    //    for i = 0..(n0-1) and j = 0..(n0-1).
+    auto& current_buffer = get_output_buffer(plane);
+    auto subsampling_x = (plane > 0 ? m_parser->m_subsampling_x : 0);
+    auto subsampling_y = (plane > 0 ? m_parser->m_subsampling_y : 0);
+    auto frame_width = (m_parser->m_mi_cols * 8) >> subsampling_x;
+    auto frame_height = (m_parser->m_mi_rows * 8) >> subsampling_y;
+    auto width_in_frame_buffer = min(block_size, frame_width - transform_block_x);
+    auto height_in_frame_buffer = min(block_size, frame_height - transform_block_y);
+
+    for (auto i = 0u; i < height_in_frame_buffer; i++) {
+        for (auto j = 0u; j < width_in_frame_buffer; j++) {
+            auto index = index_from_row_and_column(transform_block_y + i, transform_block_x + j, frame_width);
+            auto dequantized_value = dequantized[index_from_row_and_column(i, j, block_size)];
+            current_buffer[index] = clip_1(m_parser->m_bit_depth, current_buffer[index] + dequantized_value);
+        }
+    }
+
+    return {};
+}
+
+inline DecoderErrorOr<void> Decoder::inverse_walsh_hadamard_transform(Vector<Intermediate>& data, u8 log2_of_block_size, u8 shift)
+{
+    (void)data;
+    (void)shift;
+    // The input to this process is a variable shift that specifies the amount of pre-scaling.
+    // This process does an in-place transform of the array T (of length 4) by the following ordered steps:
+    if (1 << log2_of_block_size != 4)
+        return DecoderError::corrupted("Block size was not 4"sv);
+
     return DecoderError::not_implemented();
 }
 
-DecoderErrorOr<void> Decoder::reconstruct(size_t, u32, u32, TXSize)
+inline i32 Decoder::cos64(u8 angle)
 {
-    // TODO: Implement
-    return DecoderError::not_implemented();
+    const i32 cos64_lookup[33] = { 16384, 16364, 16305, 16207, 16069, 15893, 15679, 15426, 15137, 14811, 14449, 14053, 13623, 13160, 12665, 12140, 11585, 11003, 10394, 9760, 9102, 8423, 7723, 7005, 6270, 5520, 4756, 3981, 3196, 2404, 1606, 804, 0 };
+
+    // 1. Set a variable angle2 equal to angle & 127.
+    angle &= 127;
+    // 2. If angle2 is greater than or equal to 0 and less than or equal to 32, return cos64_lookup[ angle2 ].
+    if (angle <= 32)
+        return cos64_lookup[angle];
+    // 3. If angle2 is greater than 32 and less than or equal to 64, return cos64_lookup[ 64 - angle2 ] * -1.
+    if (angle <= 64)
+        return -cos64_lookup[64 - angle];
+    // 4. If angle2 is greater than 64 and less than or equal to 96, return cos64_lookup[ angle2 - 64 ] * -1.
+    if (angle <= 96)
+        return -cos64_lookup[angle - 64];
+    // 5. Otherwise (if angle2 is greater than 96 and less than 128), return cos64_lookup[ 128 - angle2 ].
+    return cos64_lookup[128 - angle];
+}
+
+inline i32 Decoder::sin64(u8 angle)
+{
+    if (angle < 32)
+        angle += 128;
+    return cos64(angle - 32u);
+}
+
+template<typename T>
+inline i32 Decoder::round_2(T value, u8 bits)
+{
+    value = (value + static_cast<T>(1u << (bits - 1u))) >> bits;
+    return static_cast<i32>(value);
+}
+
+inline bool check_bounds(i64 value, u8 bits)
+{
+    const i64 maximum = (1u << (bits - 1u)) - 1u;
+    return value >= ~maximum && value <= maximum;
+}
+
+inline bool Decoder::check_intermediate_bounds(Intermediate value)
+{
+    i32 maximum = (1 << (8 + m_parser->m_bit_depth - 1)) - 1;
+    return value >= ~maximum && value <= maximum;
+}
+
+// (8.7.1.1) The function B( a, b, angle, 0 ) performs a butterfly rotation.
+inline void Decoder::butterfly_rotation_in_place(Vector<Intermediate>& data, size_t index_a, size_t index_b, u8 angle, bool flip)
+{
+    auto cos = cos64(angle);
+    auto sin = sin64(angle);
+    // 1. The variable x is set equal to T[ a ] * cos64( angle ) - T[ b ] * sin64( angle ).
+    i64 rotated_a = data[index_a] * cos - data[index_b] * sin;
+    // 2. The variable y is set equal to T[ a ] * sin64( angle ) + T[ b ] * cos64( angle ).
+    i64 rotated_b = data[index_a] * sin + data[index_b] * cos;
+    // 3. T[ a ] is set equal to Round2( x, 14 ).
+    data[index_a] = round_2(rotated_a, 14);
+    // 4. T[ b ] is set equal to Round2( y, 14 ).
+    data[index_b] = round_2(rotated_b, 14);
+
+    // The function B( a ,b, angle, 1 ) performs a butterfly rotation and flip specified by the following ordered steps:
+    // 1. The function B( a, b, angle, 0 ) is invoked.
+    // 2. The contents of T[ a ] and T[ b ] are exchanged.
+    if (flip)
+        swap(data[index_a], data[index_b]);
+
+    // It is a requirement of bitstream conformance that the values saved into the array T by this function are
+    // representable by a signed integer using 8 + BitDepth bits of precision.
+    VERIFY(check_intermediate_bounds(data[index_a]));
+    VERIFY(check_intermediate_bounds(data[index_b]));
+}
+
+// (8.7.1.1) The function H( a, b, 0 ) performs a Hadamard rotation.
+inline void Decoder::hadamard_rotation_in_place(Vector<Intermediate>& data, size_t index_a, size_t index_b, bool flip)
+{
+    // The function H( a, b, 1 ) performs a Hadamard rotation with flipped indices and is specified as follows:
+    // 1. The function H( b, a, 0 ) is invoked.
+    if (flip)
+        swap(index_a, index_b);
+
+    // The function H( a, b, 0 ) performs a Hadamard rotation specified by the following ordered steps:
+
+    // 1. The variable x is set equal to T[ a ].
+    auto a_value = data[index_a];
+    // 2. The variable y is set equal to T[ b ].
+    auto b_value = data[index_b];
+    // 3. T[ a ] is set equal to x + y.
+    data[index_a] = a_value + b_value;
+    // 4. T[ b ] is set equal to x - y.
+    data[index_b] = a_value - b_value;
+
+    // It is a requirement of bitstream conformance that the values saved into the array T by this function are
+    // representable by a signed integer using 8 + BitDepth bits of precision.
+    VERIFY(check_intermediate_bounds(data[index_a]));
+    VERIFY(check_intermediate_bounds(data[index_b]));
+}
+
+inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform_array_permutation(Vector<Intermediate>& data, u8 log2_of_block_size)
+{
+    u8 block_size = 1 << log2_of_block_size;
+
+    // This process performs an in-place permutation of the array T of length 2^n for 2 ≤ n ≤ 5 which is required before
+    // execution of the inverse DCT process.
+    if (log2_of_block_size < 2 || log2_of_block_size > 5)
+        return DecoderError::corrupted("Block size was out of range"sv);
+
+    // 1.1. A temporary array named copyT is set equal to T.
+    Vector<Intermediate>& data_copy = m_buffers.transform_temp;
+    data_copy.clear_with_capacity();
+    DECODER_TRY_ALLOC(data_copy.try_resize_and_keep_capacity(buffer_size(block_size, block_size)));
+    data_copy = data;
+
+    // 1.2. T[ i ] is set equal to copyT[ brev( n, i ) ] for i = 0..((1<<n) - 1).
+    for (auto i = 0u; i < block_size; i++)
+        data[i] = data_copy[brev(log2_of_block_size, i)];
+
+    return {};
+}
+
+inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform(Vector<Intermediate>& data, u8 log2_of_block_size)
+{
+    // 2.1. The variable n0 is set equal to 1<<n.
+    u8 block_size = 1 << log2_of_block_size;
+
+    // 8.7.1.3 Inverse DCT process
+
+    // 2.2. The variable n1 is set equal to 1<<(n-1).
+    u8 half_block_size = block_size >> 1;
+    // 2.3 The variable n2 is set equal to 1<<(n-2).
+    u8 quarter_block_size = half_block_size >> 1;
+    // 2.4 The variable n3 is set equal to 1<<(n-3).
+    u8 eighth_block_size = quarter_block_size >> 1;
+
+    // 2.5 If n is equal to 2, invoke B( 0, 1, 16, 1 ), otherwise recursively invoke the inverse DCT defined in this
+    // section with the variable n set equal to n - 1.
+    if (log2_of_block_size == 2)
+        butterfly_rotation_in_place(data, 0, 1, 16, true);
+    else
+        TRY(inverse_discrete_cosine_transform(data, log2_of_block_size - 1));
+
+    // 2.6 Invoke B( n1+i, n0-1-i, 32-brev( 5, n1+i), 0 ) for i = 0..(n2-1).
+    for (auto i = 0u; i < quarter_block_size; i++) {
+        auto index = half_block_size + i;
+        butterfly_rotation_in_place(data, index, block_size - 1 - i, 32 - brev(5, index), false);
+    }
+
+    // 2.7 If n is greater than or equal to 3:
+    if (log2_of_block_size >= 3) {
+        // a. Invoke H( n1+4*i+2*j, n1+1+4*i+2*j, j ) for i = 0..(n3-1), j = 0..1.
+        for (auto i = 0u; i < eighth_block_size; i++) {
+            for (auto j = 0u; j < 2; j++) {
+                auto index = half_block_size + (4 * i) + (2 * j);
+                hadamard_rotation_in_place(data, index, index + 1, j);
+            }
+        }
+    }
+
+    // 4. If n is equal to 5:
+    if (log2_of_block_size == 5) {
+        // a. Invoke B( n0-n+3-n2*j-4*i, n1+n-4+n2*j+4*i, 28-16*i+56*j, 1 ) for i = 0..1, j = 0..1.
+        for (auto i = 0u; i < 2; i++) {
+            for (auto j = 0u; j < 2; j++) {
+                auto index_a = block_size - log2_of_block_size + 3 - (quarter_block_size * j) - (4 * i);
+                auto index_b = half_block_size + log2_of_block_size - 4 + (quarter_block_size * j) + (4 * i);
+                auto angle = 28 - (16 * i) + (56 * j);
+                butterfly_rotation_in_place(data, index_a, index_b, angle, true);
+            }
+        }
+
+        // b. Invoke H( n1+n3*j+i, n1+n2-5+n3*j-i, j&1 ) for i = 0..1, j = 0..3.
+        for (auto i = 0u; i < 2; i++) {
+            for (auto j = 0u; j < 4; j++) {
+                auto index_a = half_block_size + (eighth_block_size * j) + i;
+                auto index_b = half_block_size + quarter_block_size - 5 + (eighth_block_size * j) - i;
+                hadamard_rotation_in_place(data, index_a, index_b, (j & 1) != 0);
+            }
+        }
+    }
+
+    // 5. If n is greater than or equal to 4:
+    if (log2_of_block_size >= 4) {
+        // a. Invoke B( n0-n+2-i-n2*j, n1+n-3+i+n2*j, 24+48*j, 1 ) for i = 0..(n==5), j = 0..1.
+        for (auto i = 0u; i <= (log2_of_block_size == 5); i++) {
+            for (auto j = 0u; j < 2; j++) {
+                auto index_a = block_size - log2_of_block_size + 2 - i - (quarter_block_size * j);
+                auto index_b = half_block_size + log2_of_block_size - 3 + i + (quarter_block_size * j);
+                butterfly_rotation_in_place(data, index_a, index_b, 24 + (48 * j), true);
+            }
+        }
+
+        // b. Invoke H( n1+n2*j+i, n1+n2-1+n2*j-i, j&1 ) for i = 0..(2n-7), j = 0..1.
+        for (auto i = 0u; i < (2 * log2_of_block_size) - 6u; i++) {
+            for (auto j = 0u; j < 2; j++) {
+                auto index_a = half_block_size + (quarter_block_size * j) + i;
+                auto index_b = half_block_size + quarter_block_size - 1 + (quarter_block_size * j) - i;
+                hadamard_rotation_in_place(data, index_a, index_b, (j & 1) != 0);
+            }
+        }
+    }
+
+    // 6. If n is greater than or equal to 3:
+    if (log2_of_block_size >= 3) {
+        // a. Invoke B( n0-n3-1-i, n1+n3+i, 16, 1 ) for i = 0..(n3-1).
+        for (auto i = 0u; i < eighth_block_size; i++) {
+            auto index_a = block_size - eighth_block_size - 1 - i;
+            auto index_b = half_block_size + eighth_block_size + i;
+            butterfly_rotation_in_place(data, index_a, index_b, 16, true);
+        }
+    }
+
+    // 7. Invoke H( i, n0-1-i, 0 ) for i = 0..(n1-1).
+    for (auto i = 0u; i < half_block_size; i++)
+        hadamard_rotation_in_place(data, i, block_size - 1 - i, false);
+
+    return {};
+}
+
+inline void Decoder::inverse_asymmetric_discrete_sine_transform_input_array_permutation(Vector<Intermediate>& data, Vector<Intermediate>& temp, u8 log2_of_block_size)
+{
+    // The variable n0 is set equal to 1<<n.
+    auto block_size = 1u << log2_of_block_size;
+    // The variable n1 is set equal to 1<<(n-1).
+    // We can iterate by 2 at a time instead of taking half block size.
+
+    // A temporary array named copyT is set equal to T.
+    temp = data;
+
+    // The values at even locations T[ 2 * i ] are set equal to copyT[ n0 - 1 - 2 * i ] for i = 0..(n1-1).
+    // The values at odd locations T[ 2 * i + 1 ] are set equal to copyT[ 2 * i ] for i = 0..(n1-1).
+    for (auto i = 0u; i < block_size; i += 2) {
+        data[i] = temp[block_size - 1 - i];
+        data[i + 1] = temp[i];
+    }
+}
+
+inline void Decoder::inverse_asymmetric_discrete_sine_transform_output_array_permutation(Vector<Intermediate>& data, Vector<Intermediate>& temp, u8 log2_of_block_size)
+{
+    // A temporary array named copyT is set equal to T.
+    temp = data;
+
+    // The permutation depends on n as follows:
+    if (log2_of_block_size == 4) {
+        // − If n is equal to 4,
+        // T[ 8*a + 4*b + 2*c + d ] is set equal to copyT[ 8*(d^c) + 4*(c^b) + 2*(b^a) + a ] for a = 0..1
+        // and b = 0..1 and c = 0..1 and d = 0..1.
+        for (auto a = 0u; a < 2; a++)
+            for (auto b = 0u; b < 2; b++)
+                for (auto c = 0u; c < 2; c++)
+                    for (auto d = 0u; d < 2; d++)
+                        data[(8 * a) + (4 * b) + (2 * c) + d] = temp[8 * (d ^ c) + 4 * (c ^ b) + 2 * (b ^ a) + a];
+    } else {
+        VERIFY(log2_of_block_size == 3);
+        // − Otherwise (n is equal to 3),
+        // T[ 4*a + 2*b + c ] is set equal to copyT[ 4*(c^b) + 2*(b^a) + a ] for a = 0..1 and
+        // b = 0..1 and c = 0..1.
+        for (auto a = 0u; a < 2; a++)
+            for (auto b = 0u; b < 2; b++)
+                for (auto c = 0u; c < 2; c++)
+                    data[4 * a + 2 * b + c] = temp[4 * (c ^ b) + 2 * (b ^ a) + a];
+    }
+}
+
+inline void Decoder::inverse_asymmetric_discrete_sine_transform_4(Vector<Intermediate>& data)
+{
+    VERIFY(data.size() == 4);
+    const i64 sinpi_1_9 = 5283;
+    const i64 sinpi_2_9 = 9929;
+    const i64 sinpi_3_9 = 13377;
+    const i64 sinpi_4_9 = 15212;
+
+    // Steps are derived from pseudocode in (8.7.1.6):
+    // s0 = SINPI_1_9 * T[ 0 ]
+    i64 s0 = sinpi_1_9 * data[0];
+    // s1 = SINPI_2_9 * T[ 0 ]
+    i64 s1 = sinpi_2_9 * data[0];
+    // s2 = SINPI_3_9 * T[ 1 ]
+    i64 s2 = sinpi_3_9 * data[1];
+    // s3 = SINPI_4_9 * T[ 2 ]
+    i64 s3 = sinpi_4_9 * data[2];
+    // s4 = SINPI_1_9 * T[ 2 ]
+    i64 s4 = sinpi_1_9 * data[2];
+    // s5 = SINPI_2_9 * T[ 3 ]
+    i64 s5 = sinpi_2_9 * data[3];
+    // s6 = SINPI_4_9 * T[ 3 ]
+    i64 s6 = sinpi_4_9 * data[3];
+    // v = T[ 0 ] - T[ 2 ] + T[ 3 ]
+    // s7 = SINPI_3_9 * v
+    i64 s7 = sinpi_3_9 * (data[0] - data[2] + data[3]);
+
+    // x0 = s0 + s3 + s5
+    auto x0 = s0 + s3 + s5;
+    // x1 = s1 - s4 - s6
+    auto x1 = s1 - s4 - s6;
+    // x2 = s7
+    auto x2 = s7;
+    // x3 = s2
+    auto x3 = s2;
+
+    // s0 = x0 + x3
+    s0 = x0 + x3;
+    // s1 = x1 + x3
+    s1 = x1 + x3;
+    // s2 = x2
+    s2 = x2;
+    // s3 = x0 + x1 - x3
+    s3 = x0 + x1 - x3;
+
+    // T[ 0 ] = Round2( s0, 14 )
+    data[0] = round_2(s0, 14);
+    // T[ 1 ] = Round2( s1, 14 )
+    data[1] = round_2(s1, 14);
+    // T[ 2 ] = Round2( s2, 14 )
+    data[2] = round_2(s2, 14);
+    // T[ 3 ] = Round2( s3, 14 )
+    data[3] = round_2(s3, 14);
+
+    // (8.7.1.1) The inverse asymmetric discrete sine transforms also make use of an intermediate array named S.
+    // The values in this array require higher precision to avoid overflow. Using signed integers with 24 +
+    // BitDepth bits of precision is enough to avoid overflow.
+    const u8 bits = 24 + m_parser->m_bit_depth;
+    VERIFY(check_bounds(data[0], bits));
+    VERIFY(check_bounds(data[1], bits));
+    VERIFY(check_bounds(data[2], bits));
+    VERIFY(check_bounds(data[3], bits));
+}
+
+// The function SB( a, b, angle, 0 ) performs a butterfly rotation.
+// Spec defines the source as array T, and the destination array as S.
+template<typename S, typename D>
+inline void Decoder::butterfly_rotation(Vector<S>& source, Vector<D>& destination, size_t index_a, size_t index_b, u8 angle, bool flip)
+{
+    // The function SB( a, b, angle, 0 ) performs a butterfly rotation according to the following ordered steps:
+    auto cos = cos64(angle);
+    auto sin = sin64(angle);
+    // Expand to the destination buffer's precision.
+    D a = source[index_a];
+    D b = source[index_b];
+    // 1. S[ a ] is set equal to T[ a ] * cos64( angle ) - T[ b ] * sin64( angle ).
+    destination[index_a] = a * cos - b * sin;
+    // 2. S[ b ] is set equal to T[ a ] * sin64( angle ) + T[ b ] * cos64( angle ).
+    destination[index_b] = a * sin + b * cos;
+
+    // The function SB( a, b, angle, 1 ) performs a butterfly rotation and flip according to the following ordered steps:
+    // 1. The function SB( a, b, angle, 0 ) is invoked.
+    // 2. The contents of S[ a ] and S[ b ] are exchanged.
+    if (flip)
+        swap(destination[index_a], destination[index_b]);
+}
+
+// The function SH( a, b ) performs a Hadamard rotation and rounding.
+// Spec defines the source array as S, and the destination array as T.
+template<typename S, typename D>
+inline void Decoder::hadamard_rotation(Vector<S>& source, Vector<D>& destination, size_t index_a, size_t index_b)
+{
+    // Keep the source buffer's precision until rounding.
+    S a = source[index_a];
+    S b = source[index_b];
+    // 1. T[ a ] is set equal to Round2( S[ a ] + S[ b ], 14 ).
+    destination[index_a] = round_2(a + b, 14);
+    // 2. T[ b ] is set equal to Round2( S[ a ] - S[ b ], 14 ).
+    destination[index_b] = round_2(a - b, 14);
+}
+
+inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform_8(Vector<Intermediate>& data)
+{
+    VERIFY(data.size() == 8);
+
+    // This process does an in-place transform of the array T using:
+
+    // A higher precision array S for intermediate results.
+    Vector<i64>& high_precision_temp = m_buffers.adst_temp;
+    high_precision_temp.clear_with_capacity();
+    DECODER_TRY_ALLOC(high_precision_temp.try_resize_and_keep_capacity(8));
+
+    // The following ordered steps apply:
+
+    // 1. Invoke the ADST input array permutation process specified in section 8.7.1.4 with the input variable n set
+    //    equal to 3.
+    inverse_asymmetric_discrete_sine_transform_input_array_permutation(data, m_buffers.transform_temp, 3);
+
+    // 2. Invoke SB( 2*i, 1+2*i, 30-8*i, 1 ) for i = 0..3.
+    for (auto i = 0u; i < 4; i++)
+        butterfly_rotation(data, high_precision_temp, 2 * i, 1 + (2 * i), 30 - (8 * i), true);
+    // (8.7.1.1) NOTE - The values in array S require higher precision to avoid overflow. Using signed integers with
+    // 24 + BitDepth bits of precision is enough to avoid overflow.
+    const u8 bits = 24 + m_parser->m_bit_depth;
+    for (auto i = 0u; i < 8; i++)
+        VERIFY(check_bounds(high_precision_temp[i], bits));
+    // 3. Invoke SH( i, 4+i ) for i = 0..3.
+    for (auto i = 0u; i < 4; i++)
+        hadamard_rotation(high_precision_temp, data, i, 4 + i);
+
+    // 4. Invoke SB( 4+3*i, 5+i, 24-16*i, 1 ) for i = 0..1.
+    for (auto i = 0u; i < 2; i++)
+        butterfly_rotation(data, high_precision_temp, 4 + (3 * i), 5 + i, 24 - (16 * i), true);
+    // Check again that we haven't exceeded the integer bounds.
+    for (auto i = 0u; i < 8; i++)
+        VERIFY(check_bounds(high_precision_temp[i], bits));
+    // 5. Invoke SH( 4+i, 6+i ) for i = 0..1.
+    for (auto i = 0u; i < 2; i++)
+        hadamard_rotation(high_precision_temp, data, 4 + i, 6 + i);
+
+    // 6. Invoke H( i, 2+i, 0 ) for i = 0..1.
+    for (auto i = 0u; i < 2; i++)
+        hadamard_rotation_in_place(data, i, 2 + i, false);
+
+    // 7. Invoke B( 2+4*i, 3+4*i, 16, 1 ) for i = 0..1.
+    for (auto i = 0u; i < 2; i++)
+        butterfly_rotation_in_place(data, 2 + (4 * i), 3 + (4 * i), 16, true);
+
+    // 8. Invoke the ADST output array permutation process specified in section 8.7.1.5 with the input variable n
+    //    set equal to 3.
+    inverse_asymmetric_discrete_sine_transform_output_array_permutation(data, m_buffers.transform_temp, 3);
+
+    // 9. Set T[ 1+2*i ] equal to -T[ 1+2*i ] for i = 0..3.
+    for (auto i = 0u; i < 4; i++) {
+        auto index = 1 + (2 * i);
+        data[index] = -data[index];
+    }
+    return {};
+}
+
+inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform_16(Vector<Intermediate>& data)
+{
+    VERIFY(data.size() == 16);
+    // This process does an in-place transform of the array T using:
+
+    // A higher precision array S for intermediate results.
+    Vector<i64>& high_precision_temp = m_buffers.adst_temp;
+    high_precision_temp.clear_with_capacity();
+    DECODER_TRY_ALLOC(high_precision_temp.try_resize_and_keep_capacity(16));
+
+    // The following ordered steps apply:
+
+    // 1. Invoke the ADST input array permutation process specified in section 8.7.1.4 with the input variable n set
+    // equal to 4.
+    inverse_asymmetric_discrete_sine_transform_input_array_permutation(data, m_buffers.transform_temp, 4);
+
+    // 2. Invoke SB( 2*i, 1+2*i, 31-4*i, 1 ) for i = 0..7.
+    for (auto i = 0u; i < 8; i++)
+        butterfly_rotation(data, high_precision_temp, 2 * i, 1 + (2 * i), 31 - (4 * i), true);
+    // (8.7.1.1) The inverse asymmetric discrete sine transforms also make use of an intermediate array named S.
+    // The values in this array require higher precision to avoid overflow. Using signed integers with 24 +
+    // BitDepth bits of precision is enough to avoid overflow.
+    const u8 bits = 24 + m_parser->m_bit_depth;
+    for (auto i = 0u; i < 16; i++)
+        VERIFY(check_bounds(data[i], bits));
+    // 3. Invoke SH( i, 8+i ) for i = 0..7.
+    for (auto i = 0u; i < 8; i++)
+        hadamard_rotation(high_precision_temp, data, i, 8 + i);
+
+    // 4. Invoke SB( 8+2*i, 9+2*i, 28-16*i, 1 ) for i = 0..3.
+    for (auto i = 0u; i < 4; i++)
+        butterfly_rotation(data, high_precision_temp, 8 + (2 * i), 9 + (2 * i), 128 + 28 - (16 * i), true);
+    // Check again that we haven't exceeded the integer bounds.
+    for (auto i = 0u; i < 16; i++)
+        VERIFY(check_bounds(data[i], bits));
+    // 5. Invoke SH( 8+i, 12+i ) for i = 0..3.
+    for (auto i = 0u; i < 4; i++)
+        hadamard_rotation(high_precision_temp, data, 8 + i, 12 + i);
+
+    // 6. Invoke H( i, 4+i, 0 ) for i = 0..3.
+    for (auto i = 0u; i < 4; i++)
+        hadamard_rotation_in_place(data, i, 4 + i, false);
+
+    // 7. Invoke SB( 4+8*i+3*j, 5+8*i+j, 24-16*j, 1 ) for i = 0..1, for j = 0..1.
+    for (auto i = 0u; i < 2; i++)
+        for (auto j = 0u; j < 2; j++)
+            butterfly_rotation(data, high_precision_temp, 4 + (8 * i) + (3 * j), 5 + (8 * i) + j, 24 - (16 * j), true);
+    // Check again that we haven't exceeded the integer bounds.
+    for (auto i = 0u; i < 16; i++)
+        VERIFY(check_bounds(data[i], bits));
+    // 8. Invoke SH( 4+8*j+i, 6+8*j+i ) for i = 0..1, j = 0..1.
+    for (auto i = 0u; i < 2; i++)
+        for (auto j = 0u; j < 2; j++)
+            hadamard_rotation(high_precision_temp, data, 4 + (8 * j) + i, 6 + (8 * j) + i);
+
+    // 9. Invoke H( 8*j+i, 2+8*j+i, 0 ) for i = 0..1, for j = 0..1.
+    for (auto i = 0u; i < 2; i++)
+        for (auto j = 0u; j < 2; j++)
+            hadamard_rotation_in_place(data, (8 * j) + i, 2 + (8 * j) + i, false);
+    // 10. Invoke B( 2+4*j+8*i, 3+4*j+8*i, 48+64*(i^j), 0 ) for i = 0..1, for j = 0..1.
+    for (auto i = 0u; i < 2; i++)
+        for (auto j = 0u; j < 2; j++)
+            butterfly_rotation_in_place(data, 2 + (4 * j) + (8 * i), 3 + (4 * j) + (8 * i), 48 + (64 * (i ^ j)), false);
+
+    // 11. Invoke the ADST output array permutation process specified in section 8.7.1.5 with the input variable n
+    // set equal to 4.
+    inverse_asymmetric_discrete_sine_transform_output_array_permutation(data, m_buffers.transform_temp, 4);
+
+    // 12. Set T[ 1+12*j+2*i ] equal to -T[ 1+12*j+2*i ] for i = 0..1, for j = 0..1.
+    for (auto i = 0u; i < 2; i++) {
+        for (auto j = 0u; j < 2; j++) {
+            auto index = 1 + (12 * j) + (2 * i);
+            data[index] = -data[index];
+        }
+    }
+    return {};
+}
+
+inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform(Vector<Intermediate>& data, u8 log2_of_block_size)
+{
+    // 8.7.1.9 Inverse ADST Process
+
+    // This process performs an in-place inverse ADST process on the array T of size 2^n for 2 ≤ n ≤ 4.
+    if (log2_of_block_size < 2 || log2_of_block_size > 4)
+        return DecoderError::corrupted("Block size was out of range"sv);
+
+    // The process to invoke depends on n as follows:
+    if (log2_of_block_size == 2) {
+        // − If n is equal to 2, invoke the Inverse ADST4 process specified in section 8.7.1.6.
+        inverse_asymmetric_discrete_sine_transform_4(data);
+        return {};
+    } else if (log2_of_block_size == 3) {
+        // − Otherwise if n is equal to 3, invoke the Inverse ADST8 process specified in section 8.7.1.7.
+        return inverse_asymmetric_discrete_sine_transform_8(data);
+    }
+    // − Otherwise (n is equal to 4), invoke the Inverse ADST16 process specified in section 8.7.1.8.
+    return inverse_asymmetric_discrete_sine_transform_16(data);
+}
+
+DecoderErrorOr<void> Decoder::inverse_transform_2d(Vector<Intermediate>& dequantized, u8 log2_of_block_size)
+{
+    // This process performs a 2D inverse transform for an array of size 2^n by 2^n stored in the 2D array Dequant.
+    // The input to this process is a variable n (log2_of_block_size) that specifies the base 2 logarithm of the width of the transform.
+
+    // 1. Set the variable n0 (block_size) equal to 1 << n.
+    auto block_size = 1u << log2_of_block_size;
+
+    Vector<Intermediate>& row_or_column = m_buffers.row_or_column;
+    DECODER_TRY_ALLOC(row_or_column.try_resize_and_keep_capacity(block_size));
+
+    // 2. The row transforms with i = 0..(n0-1) are applied as follows:
+    for (auto i = 0u; i < block_size; i++) {
+        // 1. Set T[ j ] equal to Dequant[ i ][ j ] for j = 0..(n0-1).
+        for (auto j = 0u; j < block_size; j++)
+            row_or_column[j] = dequantized[index_from_row_and_column(i, j, block_size)];
+
+        // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal
+        //    to 2.
+        if (m_parser->m_lossless) {
+            TRY(inverse_walsh_hadamard_transform(row_or_column, log2_of_block_size, 2));
+            continue;
+        }
+        switch (m_parser->m_tx_type) {
+        case DCT_DCT:
+        case ADST_DCT:
+            // Otherwise, if TxType is equal to DCT_DCT or TxType is equal to ADST_DCT, apply an inverse DCT as
+            // follows:
+            // 1. Invoke the inverse DCT permutation process as specified in section 8.7.1.2 with the input variable n.
+            TRY(inverse_discrete_cosine_transform_array_permutation(row_or_column, log2_of_block_size));
+            // 2. Invoke the inverse DCT process as specified in section 8.7.1.3 with the input variable n.
+            TRY(inverse_discrete_cosine_transform(row_or_column, log2_of_block_size));
+            break;
+        case DCT_ADST:
+        case ADST_ADST:
+            // 4. Otherwise (TxType is equal to DCT_ADST or TxType is equal to ADST_ADST), invoke the inverse ADST
+            //    process as specified in section 8.7.1.9 with input variable n.
+            TRY(inverse_asymmetric_discrete_sine_transform(row_or_column, log2_of_block_size));
+            break;
+        default:
+            return DecoderError::corrupted("Unknown tx_type"sv);
+        }
+
+        // 5. Set Dequant[ i ][ j ] equal to T[ j ] for j = 0..(n0-1).
+        for (auto j = 0u; j < block_size; j++)
+            dequantized[index_from_row_and_column(i, j, block_size)] = row_or_column[j];
+    }
+
+    // 3. The column transforms with j = 0..(n0-1) are applied as follows:
+    for (auto j = 0u; j < block_size; j++) {
+        // 1. Set T[ i ] equal to Dequant[ i ][ j ] for i = 0..(n0-1).
+        for (auto i = 0u; i < block_size; i++)
+            row_or_column[i] = dequantized[index_from_row_and_column(i, j, block_size)];
+
+        // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal
+        //    to 0.
+        if (m_parser->m_lossless) {
+            TRY(inverse_walsh_hadamard_transform(row_or_column, log2_of_block_size, 2));
+            continue;
+        }
+        switch (m_parser->m_tx_type) {
+        case DCT_DCT:
+        case DCT_ADST:
+            // Otherwise, if TxType is equal to DCT_DCT or TxType is equal to DCT_ADST, apply an inverse DCT as
+            // follows:
+            // 1. Invoke the inverse DCT permutation process as specified in section 8.7.1.2 with the input variable n.
+            TRY(inverse_discrete_cosine_transform_array_permutation(row_or_column, log2_of_block_size));
+            // 2. Invoke the inverse DCT process as specified in section 8.7.1.3 with the input variable n.
+            TRY(inverse_discrete_cosine_transform(row_or_column, log2_of_block_size));
+            break;
+        case ADST_DCT:
+        case ADST_ADST:
+            // 4. Otherwise (TxType is equal to ADST_DCT or TxType is equal to ADST_ADST), invoke the inverse ADST
+            //    process as specified in section 8.7.1.9 with input variable n.
+            TRY(inverse_asymmetric_discrete_sine_transform(row_or_column, log2_of_block_size));
+            break;
+        default:
+            VERIFY_NOT_REACHED();
+        }
+
+        // 5. If Lossless is equal to 1, set Dequant[ i ][ j ] equal to T[ i ] for i = 0..(n0-1).
+        for (auto i = 0u; i < block_size; i++)
+            dequantized[index_from_row_and_column(i, j, block_size)] = row_or_column[i];
+
+        // 6. Otherwise (Lossless is equal to 0), set Dequant[ i ][ j ] equal to Round2( T[ i ], Min( 6, n + 2 ) )
+        //    for i = 0..(n0-1).
+        if (!m_parser->m_lossless) {
+            for (auto i = 0u; i < block_size; i++) {
+                auto index = index_from_row_and_column(i, j, block_size);
+                dequantized[index] = round_2(dequantized[index], min(6, log2_of_block_size + 2));
+            }
+        }
+    }
+
+    return {};
 }
 
 DecoderErrorOr<void> Decoder::update_reference_frames()
diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.h b/Userland/Libraries/LibVideo/VP9/Decoder.h
index b73b6017aa..e32510edb0 100644
--- a/Userland/Libraries/LibVideo/VP9/Decoder.h
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.h
@@ -23,7 +23,20 @@ public:
     DecoderErrorOr<void> decode_frame(ByteBuffer const&);
     void dump_frame_info();
 
+    // FIXME: These functions should be replaced by a struct that contains
+    //        all the information needed to display a frame.
+    Vector<u16> const& get_output_buffer_for_plane(u8 plane) const;
+    Gfx::Size<size_t> get_y_plane_size();
+    bool get_uv_subsampling_y();
+    bool get_uv_subsampling_x();
+
 private:
+    typedef i32 Intermediate;
+
+    DecoderErrorOr<void> allocate_buffers();
+    Vector<Intermediate>& get_temp_buffer(u8 plane);
+    Vector<u16>& get_output_buffer(u8 plane);
+
     /* (8.4) Probability Adaptation Process */
     u8 merge_prob(u8 pre_prob, u8 count_0, u8 count_1, u8 count_sat, u8 max_update_factor);
     u8 merge_probs(int const* tree, int index, u8* probs, u8* counts, u8 count_sat, u8 max_update_factor);
@@ -33,16 +46,101 @@ private:
     u8 adapt_prob(u8 prob, u8 counts[2]);
 
     /* (8.5) Prediction Processes */
-    DecoderErrorOr<void> predict_intra(size_t plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index);
-    DecoderErrorOr<void> predict_inter(size_t plane, u32 x, u32 y, u32 w, u32 h, u32 block_index);
+    // (8.5.1) Intra prediction process
+    DecoderErrorOr<void> predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index);
+
+    // (8.5.1) Inter prediction process
+    DecoderErrorOr<void> predict_inter(u8 plane, u32 x, u32 y, u32 w, u32 h, u32 block_index);
 
     /* (8.6) Reconstruction and Dequantization */
-    DecoderErrorOr<void> reconstruct(size_t plane, u32 transform_block_x, u32 transform_block_y, TXSize transform_block_size);
+
+    u16 dc_q(u8 b);
+    u16 ac_q(u8 b);
+    // Returns the quantizer index for the current block
+    u8 get_qindex();
+    // Returns the quantizer value for the dc coefficient for a particular plane
+    u16 get_dc_quant(u8 plane);
+    // Returns the quantizer value for the ac coefficient for a particular plane
+    u16 get_ac_quant(u8 plane);
+
+    // (8.6.2) Reconstruct process
+    DecoderErrorOr<void> reconstruct(u8 plane, u32 transform_block_x, u32 transform_block_y, TXSize transform_block_size);
+
+    // (8.7) Inverse transform process
+    DecoderErrorOr<void> inverse_transform_2d(Vector<Intermediate>& dequantized, u8 log2_of_block_size);
+
+    // (8.7.1) 1D Transforms
+    // (8.7.1.1) Butterfly functions
+
+    inline i32 cos64(u8 angle);
+    inline i32 sin64(u8 angle);
+    // The function B( a, b, angle, 0 ) performs a butterfly rotation.
+    inline void butterfly_rotation_in_place(Vector<Intermediate>& data, size_t index_a, size_t index_b, u8 angle, bool flip);
+    // The function H( a, b, 0 ) performs a Hadamard rotation.
+    inline void hadamard_rotation_in_place(Vector<Intermediate>& data, size_t index_a, size_t index_b, bool flip);
+    // The function SB( a, b, angle, 0 ) performs a butterfly rotation.
+    // Spec defines the source as array T, and the destination array as S.
+    template<typename S, typename D>
+    inline void butterfly_rotation(Vector<S>& source, Vector<D>& destination, size_t index_a, size_t index_b, u8 angle, bool flip);
+    // The function SH( a, b ) performs a Hadamard rotation and rounding.
+    // Spec defines the source array as S, and the destination array as T.
+    template<typename S, typename D>
+    inline void hadamard_rotation(Vector<S>& source, Vector<D>& destination, size_t index_a, size_t index_b);
+
+    template<typename T>
+    inline i32 round_2(T value, u8 bits);
+
+    // Checks whether the value is representable by a signed integer with (8 + bit_depth) bits.
+    inline bool check_intermediate_bounds(Intermediate value);
+
+    // (8.7.1.10) This process does an in-place Walsh-Hadamard transform of the array T (of length 4).
+    inline DecoderErrorOr<void> inverse_walsh_hadamard_transform(Vector<Intermediate>& data, u8 log2_of_block_size, u8 shift);
+
+    // (8.7.1.2) Inverse DCT array permutation process
+    inline DecoderErrorOr<void> inverse_discrete_cosine_transform_array_permutation(Vector<Intermediate>& data, u8 log2_of_block_size);
+    // (8.7.1.3) Inverse DCT process
+    inline DecoderErrorOr<void> inverse_discrete_cosine_transform(Vector<Intermediate>& data, u8 log2_of_block_size);
+
+    // (8.7.1.4) This process performs the in-place permutation of the array T of length 2 n which is required as the first step of
+    // the inverse ADST.
+    inline void inverse_asymmetric_discrete_sine_transform_input_array_permutation(Vector<Intermediate>& data, Vector<Intermediate>& temp, u8 log2_of_block_size);
+    // (8.7.1.5) This process performs the in-place permutation of the array T of length 2 n which is required before the final
+    // step of the inverse ADST.
+    inline void inverse_asymmetric_discrete_sine_transform_output_array_permutation(Vector<Intermediate>& data, Vector<Intermediate>& temp, u8 log2_of_block_size);
+
+    // (8.7.1.6) This process does an in-place transform of the array T to perform an inverse ADST.
+    inline void inverse_asymmetric_discrete_sine_transform_4(Vector<Intermediate>& data);
+    // (8.7.1.7) This process does an in-place transform of the array T using a higher precision array S for intermediate
+    // results.
+    inline DecoderErrorOr<void> inverse_asymmetric_discrete_sine_transform_8(Vector<Intermediate>& data);
+    // (8.7.1.8) This process does an in-place transform of the array T using a higher precision array S for intermediate
+    // results.
+    inline DecoderErrorOr<void> inverse_asymmetric_discrete_sine_transform_16(Vector<Intermediate>& data);
+    // (8.7.1.9) This process performs an in-place inverse ADST process on the array T of size 2 n for 2 ≤ n ≤ 4.
+    inline DecoderErrorOr<void> inverse_asymmetric_discrete_sine_transform(Vector<Intermediate>& data, u8 log2_of_block_size);
 
     /* (8.10) Reference Frame Update Process */
     DecoderErrorOr<void> update_reference_frames();
 
     NonnullOwnPtr<Parser> m_parser;
+
+    struct {
+        // FIXME: We may be able to consolidate some of these to reduce memory consumption.
+        Vector<Intermediate> dequantized;
+        Vector<Intermediate> row_or_column;
+
+        // predict_intra
+        Vector<Intermediate> above_row;
+        Vector<Intermediate> left_column;
+        Vector<Intermediate> predicted_samples;
+
+        // transforms (dct, adst)
+        Vector<Intermediate> transform_temp;
+        Vector<i64> adst_temp;
+
+        Vector<Intermediate> intermediate[3];
+        Vector<u16> output[3];
+    } m_buffers;
 };
 
 }
diff --git a/Userland/Libraries/LibVideo/VP9/Parser.cpp b/Userland/Libraries/LibVideo/VP9/Parser.cpp
index 3de4ab0dba..64d68d8ff0 100644
--- a/Userland/Libraries/LibVideo/VP9/Parser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp
@@ -5,10 +5,13 @@
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
-#include "Parser.h"
-#include "Decoder.h"
-#include "Utilities.h"
 #include <AK/String.h>
+#include <LibGfx/Point.h>
+#include <LibGfx/Size.h>
+
+#include "Decoder.h"
+#include "Parser.h"
+#include "Utilities.h"
 
 namespace Video::VP9 {
 
@@ -23,22 +26,22 @@ Parser::Parser(Decoder& decoder)
 
 Parser::~Parser()
 {
-    cleanup_tile_allocations();
-    free(m_prev_segment_ids);
 }
 
 void Parser::cleanup_tile_allocations()
 {
-    free(m_skips);
-    free(m_tx_sizes);
-    free(m_mi_sizes);
-    free(m_y_modes);
-    free(m_segment_ids);
-    free(m_ref_frames);
-    free(m_interp_filters);
-    free(m_mvs);
-    free(m_sub_mvs);
-    free(m_sub_modes);
+    // FIXME: Is this necessary? Data should be truncated and
+    //        overwritten by the next tile.
+    m_skips.clear_with_capacity();
+    m_tx_sizes.clear_with_capacity();
+    m_mi_sizes.clear_with_capacity();
+    m_y_modes.clear_with_capacity();
+    m_segment_ids.clear_with_capacity();
+    m_ref_frames.clear_with_capacity();
+    m_interp_filters.clear_with_capacity();
+    m_mvs.clear_with_capacity();
+    m_sub_mvs.clear_with_capacity();
+    m_sub_modes.clear_with_capacity();
 }
 
 /* (6.1) */
@@ -63,6 +66,8 @@ DecoderErrorOr<void> Parser::parse_frame(ByteBuffer const& frame_data)
     dbgln("Finished reading compressed header");
     TRY_READ(m_bit_stream->exit_bool());
 
+    TRY(m_decoder.allocate_buffers());
+
     TRY(decode_tiles());
     TRY(refresh_probs());
 
@@ -335,11 +340,11 @@ DecoderErrorOr<void> Parser::loop_filter_params()
 
 DecoderErrorOr<void> Parser::quantization_params()
 {
-    auto base_q_idx = TRY_READ(m_bit_stream->read_f8());
-    auto delta_q_y_dc = TRY(read_delta_q());
-    auto delta_q_uv_dc = TRY(read_delta_q());
-    auto delta_q_uv_ac = TRY(read_delta_q());
-    m_lossless = base_q_idx == 0 && delta_q_y_dc == 0 && delta_q_uv_dc == 0 && delta_q_uv_ac == 0;
+    m_base_q_idx = TRY_READ(m_bit_stream->read_f8());
+    m_delta_q_y_dc = TRY(read_delta_q());
+    m_delta_q_uv_dc = TRY(read_delta_q());
+    m_delta_q_uv_ac = TRY(read_delta_q());
+    m_lossless = m_base_q_idx == 0 && m_delta_q_y_dc == 0 && m_delta_q_uv_dc == 0 && m_delta_q_uv_ac == 0;
     return {};
 }
 
@@ -441,9 +446,8 @@ void Parser::setup_past_independence()
         }
     }
     m_segmentation_abs_or_delta_update = false;
-    if (m_prev_segment_ids)
-        free(m_prev_segment_ids);
-    m_prev_segment_ids = static_cast<u8*>(kmalloc_array(m_mi_rows, m_mi_cols));
+    m_prev_segment_ids.clear_with_capacity();
+    m_prev_segment_ids.resize_and_keep_capacity(m_mi_rows * m_mi_cols);
     m_loop_filter_delta_enabled = true;
     m_loop_filter_ref_deltas[IntraFrame] = 1;
     m_loop_filter_ref_deltas[LastFrame] = 0;
@@ -561,8 +565,8 @@ DecoderErrorOr<void> Parser::read_coef_probs()
                         auto max_l = (k == 0) ? 3 : 6;
                         for (auto l = 0; l < max_l; l++) {
                             for (auto m = 0; m < 3; m++) {
-                                auto& coef_probs = m_probability_tables->coef_probs()[tx_size];
-                                coef_probs[i][j][k][l][m] = TRY(diff_update_prob(coef_probs[i][j][k][l][m]));
+                                auto& prob = m_probability_tables->coef_probs()[tx_size][i][j][k][l][m];
+                                prob = TRY(diff_update_prob(prob));
                             }
                         }
                     }
@@ -748,30 +752,28 @@ void Parser::setup_compound_reference_mode()
     }
 }
 
-void Parser::allocate_tile_data()
+DecoderErrorOr<void> Parser::allocate_tile_data()
 {
     auto dimensions = m_mi_rows * m_mi_cols;
-    if (dimensions == m_allocated_dimensions)
-        return;
     cleanup_tile_allocations();
-    m_skips = static_cast<bool*>(kmalloc_array(dimensions, sizeof(bool)));
-    m_tx_sizes = static_cast<TXSize*>(kmalloc_array(dimensions, sizeof(TXSize)));
-    m_mi_sizes = static_cast<u32*>(kmalloc_array(dimensions, sizeof(u32)));
-    m_y_modes = static_cast<u8*>(kmalloc_array(dimensions, sizeof(u8)));
-    m_segment_ids = static_cast<u8*>(kmalloc_array(dimensions, sizeof(u8)));
-    m_ref_frames = static_cast<ReferenceFrame*>(kmalloc_array(dimensions, 2, sizeof(ReferenceFrame)));
-    m_interp_filters = static_cast<InterpolationFilter*>(kmalloc_array(dimensions, sizeof(InterpolationFilter)));
-    m_mvs = static_cast<MV*>(kmalloc_array(dimensions, 2, sizeof(MV)));
-    m_sub_mvs = static_cast<MV*>(kmalloc_array(dimensions, 8, sizeof(MV)));
-    m_sub_modes = static_cast<IntraMode*>(kmalloc_array(dimensions, 4, sizeof(IntraMode)));
-    m_allocated_dimensions = dimensions;
+    DECODER_TRY_ALLOC(m_skips.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_tx_sizes.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_mi_sizes.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_y_modes.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_segment_ids.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_ref_frames.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_interp_filters.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_mvs.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_sub_mvs.try_resize_and_keep_capacity(dimensions));
+    DECODER_TRY_ALLOC(m_sub_modes.try_resize_and_keep_capacity(dimensions));
+    return {};
 }
 
 DecoderErrorOr<void> Parser::decode_tiles()
 {
     auto tile_cols = 1 << m_tile_cols_log2;
     auto tile_rows = 1 << m_tile_rows_log2;
-    allocate_tile_data();
+    TRY(allocate_tile_data());
     clear_above_context();
     for (auto tile_row = 0; tile_row < tile_rows; tile_row++) {
         for (auto tile_col = 0; tile_col < tile_cols; tile_col++) {
@@ -826,9 +828,7 @@ DecoderErrorOr<void> Parser::decode_tile()
 {
     for (auto row = m_mi_row_start; row < m_mi_row_end; row += 8) {
         clear_left_context();
-        m_row = row;
         for (auto col = m_mi_col_start; col < m_mi_col_end; col += 8) {
-            m_col = col;
             TRY(decode_partition(row, col, Block_64x64));
         }
     }
@@ -845,14 +845,16 @@ void Parser::clear_left_context()
 DecoderErrorOr<void> Parser::decode_partition(u32 row, u32 col, u8 block_subsize)
 {
     if (row >= m_mi_rows || col >= m_mi_cols)
-        return DecoderError::corrupted("Row or column were outside valid ranges"sv);
+        return {};
     m_block_subsize = block_subsize;
     m_num_8x8 = num_8x8_blocks_wide_lookup[block_subsize];
     auto half_block_8x8 = m_num_8x8 >> 1;
     m_has_rows = (row + half_block_8x8) < m_mi_rows;
     m_has_cols = (col + half_block_8x8) < m_mi_cols;
-
+    m_row = row;
+    m_col = col;
     auto partition = TRY_READ(m_tree_parser->parse_tree(SyntaxElementType::Partition));
+
     auto subsize = subsize_lookup[partition][block_subsize];
     if (subsize < Block_8x8 || partition == PartitionNone) {
         TRY(decode_block(row, col, subsize));
@@ -871,15 +873,22 @@ DecoderErrorOr<void> Parser::decode_partition(u32 row, u32 col, u8 block_subsize
         TRY(decode_partition(row + half_block_8x8, col + half_block_8x8, subsize));
     }
     if (block_subsize == Block_8x8 || partition != PartitionSplit) {
+        auto above_context = 15 >> b_width_log2_lookup[subsize];
+        auto left_context = 15 >> b_height_log2_lookup[subsize];
         for (size_t i = 0; i < m_num_8x8; i++) {
-            m_above_partition_context[col + i] = 15 >> b_width_log2_lookup[subsize];
-            m_left_partition_context[row + i] = 15 >> b_width_log2_lookup[subsize];
+            m_above_partition_context[col + i] = above_context;
+            m_left_partition_context[row + i] = left_context;
         }
     }
     return {};
 }
 
-DecoderErrorOr<void> Parser::decode_block(u32 row, u32 col, u8 subsize)
+size_t Parser::get_image_index(u32 row, u32 column)
+{
+    return row * m_mi_cols + column;
+}
+
+DecoderErrorOr<void> Parser::decode_block(u32 row, u32 col, BlockSubsize subsize)
 {
     m_mi_row = row;
     m_mi_col = col;
@@ -893,25 +902,24 @@ DecoderErrorOr<void> Parser::decode_block(u32 row, u32 col, u8 subsize)
         m_skip = true;
     for (size_t y = 0; y < num_8x8_blocks_high_lookup[subsize]; y++) {
         for (size_t x = 0; x < num_8x8_blocks_wide_lookup[subsize]; x++) {
-            auto pos = (row + y) * m_mi_cols + (col + x);
+            auto pos = get_image_index(row + y, col + x);
             m_skips[pos] = m_skip;
             m_tx_sizes[pos] = m_tx_size;
             m_mi_sizes[pos] = m_mi_size;
             m_y_modes[pos] = m_y_mode;
             m_segment_ids[pos] = m_segment_id;
             for (size_t ref_list = 0; ref_list < 2; ref_list++)
-                m_ref_frames[(pos * 2) + ref_list] = m_ref_frame[ref_list];
+                m_ref_frames[pos][ref_list] = m_ref_frame[ref_list];
             if (m_is_inter) {
                 m_interp_filters[pos] = m_interp_filter;
                 for (size_t ref_list = 0; ref_list < 2; ref_list++) {
-                    auto pos_with_ref_list = (pos * 2 + ref_list) * sizeof(MV);
-                    m_mvs[pos_with_ref_list] = m_block_mvs[ref_list][3];
+                    m_mvs[pos][ref_list] = m_block_mvs[ref_list][3];
                     for (size_t b = 0; b < 4; b++)
-                        m_sub_mvs[pos_with_ref_list * 4 + b * sizeof(MV)] = m_block_mvs[ref_list][b];
+                        m_sub_mvs[pos][ref_list][b] = m_block_mvs[ref_list][b];
                 }
             } else {
                 for (size_t b = 0; b < 4; b++)
-                    m_sub_modes[pos * 4 + b] = static_cast<IntraMode>(m_block_sub_modes[b]);
+                    m_sub_modes[pos][b] = static_cast<IntraMode>(m_block_sub_modes[b]);
             }
         }
     }
@@ -998,10 +1006,10 @@ DecoderErrorOr<void> Parser::read_tx_size(bool allow_select)
 
 DecoderErrorOr<void> Parser::inter_frame_mode_info()
 {
-    m_left_ref_frame[0] = m_available_l ? m_ref_frames[m_mi_row * m_mi_cols + (m_mi_col - 1)] : IntraFrame;
-    m_above_ref_frame[0] = m_available_u ? m_ref_frames[(m_mi_row - 1) * m_mi_cols + m_mi_col] : IntraFrame;
-    m_left_ref_frame[1] = m_available_l ? m_ref_frames[m_mi_row * m_mi_cols + (m_mi_col - 1) + 1] : None;
-    m_above_ref_frame[1] = m_available_u ? m_ref_frames[(m_mi_row - 1) * m_mi_cols + m_mi_col + 1] : None;
+    m_left_ref_frame[0] = m_available_l ? m_ref_frames[get_image_index(m_mi_row, m_mi_col - 1)][0] : IntraFrame;
+    m_above_ref_frame[0] = m_available_u ? m_ref_frames[get_image_index(m_mi_row - 1, m_mi_col)][0] : IntraFrame;
+    m_left_ref_frame[1] = m_available_l ? m_ref_frames[get_image_index(m_mi_row, m_mi_col - 1)][1] : None;
+    m_above_ref_frame[1] = m_available_u ? m_ref_frames[get_image_index(m_mi_row - 1, m_mi_col)][1] : None;
     m_left_intra = m_left_ref_frame[0] <= IntraFrame;
     m_above_intra = m_above_ref_frame[0] <= IntraFrame;
     m_left_single = m_left_ref_frame[1] <= None;
@@ -1234,10 +1242,23 @@ DecoderErrorOr<i32> Parser::read_mv_component(u8)
     return (mv_sign ? -1 : 1) * static_cast<i32>(mag);
 }
 
+Gfx::Point<size_t> Parser::get_decoded_point_for_plane(u8 column, u8 row, u8 plane)
+{
+    if (plane == 0)
+        return { column * 8, row * 8 };
+    return { (column * 8) >> m_subsampling_x, (row * 8) >> m_subsampling_y };
+}
+
+Gfx::Size<size_t> Parser::get_decoded_size_for_plane(u8 plane)
+{
+    auto point = get_decoded_point_for_plane(m_mi_cols, m_mi_rows, plane);
+    return { point.x(), point.y() };
+}
+
 DecoderErrorOr<void> Parser::residual()
 {
     auto block_size = m_mi_size < Block_8x8 ? Block_8x8 : static_cast<BlockSubsize>(m_mi_size);
-    for (size_t plane = 0; plane < 3; plane++) {
+    for (u8 plane = 0; plane < 3; plane++) {
         auto tx_size = (plane > 0) ? get_uv_tx_size() : m_tx_size;
         auto step = 1 << tx_size;
         auto plane_size = get_plane_block_size(block_size, plane);
@@ -1274,10 +1295,8 @@ DecoderErrorOr<void> Parser::residual()
                         TRY(m_decoder.reconstruct(plane, start_x, start_y, tx_size));
                     }
                 }
-                auto above_sub_context = m_above_nonzero_context[plane];
-                auto left_sub_context = m_left_nonzero_context[plane];
-                above_sub_context.resize_and_keep_capacity((start_x >> 2) + step);
-                left_sub_context.resize_and_keep_capacity((start_y >> 2) + step);
+                auto& above_sub_context = m_above_nonzero_context[plane];
+                auto& left_sub_context = m_left_nonzero_context[plane];
                 for (auto i = 0; i < step; i++) {
                     above_sub_context[(start_x >> 2) + i] = non_zero;
                     left_sub_context[(start_y >> 2) + i] = non_zero;
@@ -1378,7 +1397,7 @@ DecoderErrorOr<i32> Parser::read_coef(Token token)
 {
     auto cat = extra_bits[token][0];
     auto num_extra = extra_bits[token][1];
-    auto coef = extra_bits[token][2];
+    u32 coef = extra_bits[token][2];
     if (token == DctValCat6) {
         for (size_t e = 0; e < (u8)(m_bit_depth - 8); e++) {
             auto high_bit = TRY_READ(m_bit_stream->read_bool(255));
diff --git a/Userland/Libraries/LibVideo/VP9/Parser.h b/Userland/Libraries/LibVideo/VP9/Parser.h
index 50a6dd059f..dae569757f 100644
--- a/Userland/Libraries/LibVideo/VP9/Parser.h
+++ b/Userland/Libraries/LibVideo/VP9/Parser.h
@@ -40,7 +40,7 @@ private:
     /* Utilities */
     void clear_context(Vector<u8>& context, size_t size);
     void clear_context(Vector<Vector<u8>>& context, size_t outer_size, size_t inner_size);
-    void allocate_tile_data();
+    DecoderErrorOr<void> allocate_tile_data();
     void cleanup_tile_allocations();
 
     /* (6.1) Frame Syntax */
@@ -94,7 +94,7 @@ private:
     DecoderErrorOr<void> decode_tile();
     void clear_left_context();
     DecoderErrorOr<void> decode_partition(u32 row, u32 col, u8 block_subsize);
-    DecoderErrorOr<void> decode_block(u32 row, u32 col, u8 subsize);
+    DecoderErrorOr<void> decode_block(u32 row, u32 col, BlockSubsize subsize);
     DecoderErrorOr<void> mode_info();
     DecoderErrorOr<void> intra_frame_mode_info();
     DecoderErrorOr<void> intra_segment_id();
@@ -123,6 +123,10 @@ private:
     DecoderErrorOr<void> find_best_ref_mvs(int ref_list);
     DecoderErrorOr<void> append_sub8x8_mvs(u8 block, u8 ref_list);
     DecoderErrorOr<bool> use_mv_hp(MV const& delta_mv);
+    size_t get_image_index(u32 row, u32 column);
+
+    Gfx::Point<size_t> get_decoded_point_for_plane(u8 row, u8 column, u8 plane);
+    Gfx::Size<size_t> get_decoded_size_for_plane(u8 plane);
 
     u8 m_profile { 0 };
     u8 m_frame_to_show_map_index { 0 };
@@ -131,8 +135,8 @@ private:
     u8 m_loop_filter_level { 0 };
     u8 m_loop_filter_sharpness { 0 };
     bool m_loop_filter_delta_enabled { false };
-    FrameType m_frame_type;
-    FrameType m_last_frame_type;
+    FrameType m_frame_type { FrameType::KeyFrame };
+    FrameType m_last_frame_type { FrameType::KeyFrame };
     bool m_show_frame { false };
     bool m_error_resilient_mode { false };
     bool m_frame_is_intra { false };
@@ -157,7 +161,11 @@ private:
     u32 m_mi_rows { 0 };
     u32 m_sb64_cols { 0 };
     u32 m_sb64_rows { 0 };
-    InterpolationFilter m_interpolation_filter;
+    InterpolationFilter m_interpolation_filter { 0xf };
+    u8 m_base_q_idx { 0 };
+    i8 m_delta_q_y_dc { 0 };
+    i8 m_delta_q_uv_dc { 0 };
+    i8 m_delta_q_uv_ac { 0 };
     bool m_lossless { false };
     u8 m_segmentation_tree_probs[7];
     u8 m_segmentation_pred_prob[3];
@@ -184,17 +192,24 @@ private:
     u32 m_mi_col_end { 0 };
     u32 m_mi_row { 0 };
     u32 m_mi_col { 0 };
-    u32 m_mi_size { 0 };
+    BlockSubsize m_mi_size { 0 };
     bool m_available_u { false };
     bool m_available_l { false };
     u8 m_segment_id { 0 };
+    // FIXME: Should this be an enum?
+    // skip equal to 0 indicates that there may be some transform coefficients to read for this block; skip equal to 1
+    // indicates that there are no transform coefficients.
+    //
+    // skip may be set to 0 even if transform blocks contain immediate end of block markers.
     bool m_skip { false };
     u8 m_num_8x8 { 0 };
     bool m_has_rows { false };
     bool m_has_cols { false };
     TXSize m_max_tx_size { TX_4x4 };
     u8 m_block_subsize { 0 };
+    // The row to use for getting partition tree probability lookups.
     u32 m_row { 0 };
+    // The column to use for getting partition tree probability lookups.
     u32 m_col { 0 };
     TXSize m_tx_size { TX_4x4 };
     ReferenceFrame m_ref_frame[2];
@@ -228,19 +243,18 @@ private:
     ReferenceFrame m_comp_fixed_ref;
     ReferenceFrame m_comp_var_ref[2];
     MV m_block_mvs[2][4];
-    u8* m_prev_segment_ids { nullptr };
+    Vector<u8> m_prev_segment_ids;
 
-    u32 m_allocated_dimensions { 0 };
-    bool* m_skips { nullptr };
-    TXSize* m_tx_sizes { nullptr };
-    u32* m_mi_sizes { nullptr };
-    u8* m_y_modes { nullptr };
-    u8* m_segment_ids { nullptr };
-    ReferenceFrame* m_ref_frames { nullptr };
-    InterpolationFilter* m_interp_filters { nullptr };
-    MV* m_mvs { nullptr };
-    MV* m_sub_mvs { nullptr };
-    IntraMode* m_sub_modes { nullptr };
+    Vector<bool> m_skips;
+    Vector<TXSize> m_tx_sizes;
+    Vector<u32> m_mi_sizes;
+    Vector<u8> m_y_modes;
+    Vector<u8> m_segment_ids;
+    Vector<Array<ReferenceFrame, 2>> m_ref_frames;
+    Vector<InterpolationFilter> m_interp_filters;
+    Vector<Array<MV, 2>> m_mvs;
+    Vector<Array<Array<MV, 4>, 2>> m_sub_mvs;
+    Vector<Array<IntraMode, 4>> m_sub_modes;
 
     OwnPtr<BitStream> m_bit_stream;
     OwnPtr<ProbabilityTables> m_probability_tables;
diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
index a568168c67..83c2a9f63a 100644
--- a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
@@ -212,17 +212,17 @@ u8 TreeParser::calculate_default_intra_mode_probability(u8 node)
     u32 above_mode, left_mode;
     if (m_decoder.m_mi_size >= Block_8x8) {
         above_mode = AVAIL_U
-            ? m_decoder.m_sub_modes[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols * 4 + m_decoder.m_mi_col * 4 + 2]
+            ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row - 1, m_decoder.m_mi_col)][2]
             : DcPred;
         left_mode = AVAIL_L
-            ? m_decoder.m_sub_modes[m_decoder.m_mi_row * m_decoder.m_mi_cols * 4 + (m_decoder.m_mi_col - 1) * 4 + 1]
+            ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row, m_decoder.m_mi_col - 1)][1]
             : DcPred;
     } else {
         if (m_idy) {
             above_mode = m_decoder.m_block_sub_modes[m_idx];
         } else {
             above_mode = AVAIL_U
-                ? m_decoder.m_sub_modes[(m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols * 4 + m_decoder.m_mi_col * 4 + 2 + m_idx]
+                ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row - 1, m_decoder.m_mi_col)][2 + m_idx]
                 : DcPred;
         }
 
@@ -230,7 +230,7 @@ u8 TreeParser::calculate_default_intra_mode_probability(u8 node)
             left_mode = m_decoder.m_block_sub_modes[m_idy * 2];
         } else {
             left_mode = AVAIL_L
-                ? m_decoder.m_sub_modes[m_decoder.m_mi_row * m_decoder.m_mi_cols * 4 + (m_decoder.m_mi_col - 1) * 4 + 1 + m_idy * 2]
+                ? m_decoder.m_sub_modes[m_decoder.get_image_index(m_decoder.m_mi_row, m_decoder.m_mi_col - 1)][1 + m_idy * 2]
                 : DcPred;
         }
     }
@@ -544,12 +544,16 @@ u8 TreeParser::calculate_tx_size_probability(u8 node)
 {
     auto above = m_decoder.m_max_tx_size;
     auto left = m_decoder.m_max_tx_size;
-    auto u_pos = (m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols + m_decoder.m_mi_col;
-    if (AVAIL_U && !m_decoder.m_skips[u_pos])
-        above = m_decoder.m_tx_sizes[u_pos];
-    auto l_pos = m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1;
-    if (AVAIL_L && !m_decoder.m_skips[l_pos])
-        left = m_decoder.m_tx_sizes[l_pos];
+    if (AVAIL_U) {
+        auto u_pos = (m_decoder.m_mi_row - 1) * m_decoder.m_mi_cols + m_decoder.m_mi_col;
+        if (!m_decoder.m_skips[u_pos])
+            above = m_decoder.m_tx_sizes[u_pos];
+    }
+    if (AVAIL_L) {
+        auto l_pos = m_decoder.m_mi_row * m_decoder.m_mi_cols + m_decoder.m_mi_col - 1;
+        if (!m_decoder.m_skips[l_pos])
+            left = m_decoder.m_tx_sizes[l_pos];
+    }
     if (!AVAIL_L)
         left = above;
     if (!AVAIL_U)
@@ -582,20 +586,14 @@ u8 TreeParser::calculate_interp_filter_probability(u8 node)
     return m_decoder.m_probability_tables->interp_filter_probs()[m_ctx][node];
 }
 
-u8 TreeParser::calculate_token_probability(u8 node)
+void TreeParser::set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos)
 {
-    auto prob = m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][min(2, 1 + node)];
-    if (node < 2)
-        return prob;
-    auto x = (prob - 1) / 2;
-    auto& pareto_table = m_decoder.m_probability_tables->pareto_table();
-    if (prob & 1)
-        return pareto_table[x][node - 2];
-    return (pareto_table[x][node - 2] + pareto_table[x + 1][node - 2]) >> 1;
-}
+    m_band = band;
+    m_c = c;
+    m_plane = plane;
+    m_tx_size = tx_size;
+    m_pos = pos;
 
-u8 TreeParser::calculate_more_coefs_probability()
-{
     if (m_c == 0) {
         auto sx = m_plane > 0 ? m_decoder.m_subsampling_x : 0;
         auto sy = m_plane > 0 ? m_decoder.m_subsampling_y : 0;
@@ -618,7 +616,7 @@ u8 TreeParser::calculate_more_coefs_probability()
         auto n = 4 << m_tx_size;
         auto i = m_pos / n;
         auto j = m_pos % n;
-        auto a = (i - 1) * n + j;
+        auto a = i > 0 ? (i - 1) * n + j : 0;
         auto a2 = i * n + j - 1;
         if (i > 0 && j > 0) {
             if (m_decoder.m_tx_type == DCT_ADST) {
@@ -640,9 +638,25 @@ u8 TreeParser::calculate_more_coefs_probability()
         }
         m_ctx = (1 + m_decoder.m_token_cache[neighbor_0] + m_decoder.m_token_cache[neighbor_1]) >> 1;
     }
+}
+
+u8 TreeParser::calculate_more_coefs_probability()
+{
     return m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][0];
 }
 
+u8 TreeParser::calculate_token_probability(u8 node)
+{
+    auto prob = m_decoder.m_probability_tables->coef_probs()[m_tx_size][m_plane > 0][m_decoder.m_is_inter][m_band][m_ctx][min(2, 1 + node)];
+    if (node < 2)
+        return prob;
+    auto x = (prob - 1) / 2;
+    auto& pareto_table = m_decoder.m_probability_tables->pareto_table();
+    if (prob & 1)
+        return pareto_table[x][node - 2];
+    return (pareto_table[x][node - 2] + pareto_table[x + 1][node - 2]) >> 1;
+}
+
 void TreeParser::count_syntax_element(SyntaxElementType type, int value)
 {
     switch (type) {
diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.h b/Userland/Libraries/LibVideo/VP9/TreeParser.h
index 87df3a4501..ba0bf6f987 100644
--- a/Userland/Libraries/LibVideo/VP9/TreeParser.h
+++ b/Userland/Libraries/LibVideo/VP9/TreeParser.h
@@ -57,14 +57,7 @@ public:
         m_idy = idy;
     }
 
-    void set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos)
-    {
-        m_band = band;
-        m_c = c;
-        m_plane = plane;
-        m_tx_size = tx_size;
-        m_pos = pos;
-    }
+    void set_tokens_variables(u8 band, u32 c, u32 plane, TXSize tx_size, u32 pos);
 
     void set_start_x_and_y(u32 start_x, u32 start_y)
     {
diff --git a/Userland/Libraries/LibVideo/VP9/Utilities.cpp b/Userland/Libraries/LibVideo/VP9/Utilities.cpp
deleted file mode 100644
index 70336efc6d..0000000000
--- a/Userland/Libraries/LibVideo/VP9/Utilities.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#include "Utilities.h"
-
-namespace Video::VP9 {
-
-u8 clip_3(u8 x, u8 y, u8 z)
-{
-    return clamp(z, x, y);
-}
-
-u8 round_2(u8 x, u8 n)
-{
-    return (x + (1 << (n - 1))) >> n;
-}
-
-}
diff --git a/Userland/Libraries/LibVideo/VP9/Utilities.h b/Userland/Libraries/LibVideo/VP9/Utilities.h
index 0d64125fa6..08b073791d 100644
--- a/Userland/Libraries/LibVideo/VP9/Utilities.h
+++ b/Userland/Libraries/LibVideo/VP9/Utilities.h
@@ -11,7 +11,35 @@
 
 namespace Video::VP9 {
 
-u8 clip_3(u8 x, u8 y, u8 z);
-u8 round_2(u8 x, u8 n);
+// FIXME: Once everything is working, replace this with plain clamp
+// since parameter order is different
+template<typename T>
+T clip_3(T x, T y, T z)
+{
+    return clamp(z, x, y);
+}
+
+template<typename T>
+u16 clip_1(u8 bit_depth, T x)
+{
+    if (x < 0) {
+        return 0u;
+    }
+    const T max = (1u << bit_depth) - 1u;
+    if (x > max)
+        return max;
+    return x;
+}
+
+template<typename T, typename C>
+inline T brev(C bit_count, T value)
+{
+    T result = 0;
+    for (C i = 0; i < bit_count; i++) {
+        auto bit = (value >> i) & 1;
+        result |= bit << (bit_count - 1 - i);
+    }
+    return result;
+}
 
 }