mirror of
https://github.com/RGBCube/serenity
synced 2025-10-24 18:42:35 +00:00
LibGfx/JBIG2: Add arithmetic coding decoder
I think the context normally changes for every bit. But this here is enough to correctly decode the test bitstream in Annex H.2 in the spec, which seems like a good checkpoint. The internals of the decoder use spec naming, to make the code look virtually identical to what's in the spec. (Even so, I managed to put in several typos that took a while to track down.)
This commit is contained in:
parent
c4be9318a2
commit
df9dd8ec69
3 changed files with 294 additions and 0 deletions
|
@ -347,6 +347,39 @@ TEST_CASE(test_jbig2_white_47x23)
|
||||||
EXPECT_EQ(pixel, Gfx::Color(Gfx::Color::White).value());
|
EXPECT_EQ(pixel, Gfx::Color(Gfx::Color::White).value());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(test_jbig2_arithmetic_decoder)
|
||||||
|
{
|
||||||
|
// https://www.itu.int/rec/T-REC-T.88-201808-I
|
||||||
|
// H.2 Test sequence for arithmetic coder
|
||||||
|
// clang-format off
|
||||||
|
constexpr auto input = to_array<u8>({
|
||||||
|
0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04,
|
||||||
|
0x02, 0x20, 0x00, 0x00, 0x41, 0x0D, 0xBB, 0x86,
|
||||||
|
0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47,
|
||||||
|
0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC
|
||||||
|
});
|
||||||
|
constexpr auto output = to_array<u8>({
|
||||||
|
0x00, 0x02, 0x00, 0x51, 0x00, 0x00, 0x00, 0xC0,
|
||||||
|
0x03, 0x52, 0x87, 0x2A, 0xAA, 0xAA, 0xAA, 0xAA,
|
||||||
|
0x82, 0xC0, 0x20, 0x00, 0xFC, 0xD7, 0x9E, 0xF6,
|
||||||
|
0xBF, 0x7F, 0xED, 0x90, 0x4F, 0x46, 0xA3, 0xBF
|
||||||
|
});
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
// "For this entire test, a single value of CX is used. I(CX) is initially 0 and MPS(CX) is initially 0."
|
||||||
|
Gfx::JBIG2::ArithmeticDecoder::Context context { 0, 0 };
|
||||||
|
auto decoder = MUST(Gfx::JBIG2::ArithmeticDecoder::initialize(input, context));
|
||||||
|
|
||||||
|
for (auto expected : output) {
|
||||||
|
u8 actual = 0;
|
||||||
|
for (size_t i = 0; i < 8; ++i) {
|
||||||
|
actual <<= 1;
|
||||||
|
actual |= static_cast<u8>(decoder.get_next_bit());
|
||||||
|
}
|
||||||
|
EXPECT_EQ(actual, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(test_jpeg_sof0_one_scan)
|
TEST_CASE(test_jpeg_sof0_one_scan)
|
||||||
{
|
{
|
||||||
auto file = TRY_OR_FAIL(Core::MappedFile::map(TEST_INPUT("jpg/rgb24.jpg"sv)));
|
auto file = TRY_OR_FAIL(Core::MappedFile::map(TEST_INPUT("jpg/rgb24.jpg"sv)));
|
||||||
|
|
|
@ -16,6 +16,207 @@
|
||||||
|
|
||||||
namespace Gfx {
|
namespace Gfx {
|
||||||
|
|
||||||
|
namespace JBIG2 {
|
||||||
|
|
||||||
|
// Table E.1 – Qe values and probability estimation process
|
||||||
|
// See also E.1.2 Coding conventions and approximations
|
||||||
|
// and E.2.5 Probability estimation.
|
||||||
|
struct QeEntry {
|
||||||
|
u16 qe; // Sub-interval for the less probable symbol.
|
||||||
|
u16 nmps; // Next index if the more probable symbol is decoded
|
||||||
|
u16 nlps; // Next index if the less probable symbol is decoded
|
||||||
|
u16 switch_flag; // See second-to-last paragraph in E.1.2.
|
||||||
|
};
|
||||||
|
constexpr auto qe_table = to_array<QeEntry>({
|
||||||
|
{ 0x5601, 1, 1, 1 },
|
||||||
|
{ 0x3401, 2, 6, 0 },
|
||||||
|
{ 0x1801, 3, 9, 0 },
|
||||||
|
{ 0x0AC1, 4, 12, 0 },
|
||||||
|
{ 0x0521, 5, 29, 0 },
|
||||||
|
{ 0x0221, 38, 33, 0 },
|
||||||
|
{ 0x5601, 7, 6, 1 },
|
||||||
|
{ 0x5401, 8, 14, 0 },
|
||||||
|
{ 0x4801, 9, 14, 0 },
|
||||||
|
{ 0x3801, 10, 14, 0 },
|
||||||
|
{ 0x3001, 11, 17, 0 },
|
||||||
|
{ 0x2401, 12, 18, 0 },
|
||||||
|
{ 0x1C01, 13, 20, 0 },
|
||||||
|
{ 0x1601, 29, 21, 0 },
|
||||||
|
{ 0x5601, 15, 14, 1 },
|
||||||
|
{ 0x5401, 16, 14, 0 },
|
||||||
|
{ 0x5101, 17, 15, 0 },
|
||||||
|
{ 0x4801, 18, 16, 0 },
|
||||||
|
{ 0x3801, 19, 17, 0 },
|
||||||
|
{ 0x3401, 20, 18, 0 },
|
||||||
|
{ 0x3001, 21, 19, 0 },
|
||||||
|
{ 0x2801, 22, 19, 0 },
|
||||||
|
{ 0x2401, 23, 20, 0 },
|
||||||
|
{ 0x2201, 24, 21, 0 },
|
||||||
|
{ 0x1C01, 25, 22, 0 },
|
||||||
|
{ 0x1801, 26, 23, 0 },
|
||||||
|
{ 0x1601, 27, 24, 0 },
|
||||||
|
{ 0x1401, 28, 25, 0 },
|
||||||
|
{ 0x1201, 29, 26, 0 },
|
||||||
|
{ 0x1101, 30, 27, 0 },
|
||||||
|
{ 0x0AC1, 31, 28, 0 },
|
||||||
|
{ 0x09C1, 32, 29, 0 },
|
||||||
|
{ 0x08A1, 33, 30, 0 },
|
||||||
|
{ 0x0521, 34, 31, 0 },
|
||||||
|
{ 0x0441, 35, 32, 0 },
|
||||||
|
{ 0x02A1, 36, 33, 0 },
|
||||||
|
{ 0x0221, 37, 34, 0 },
|
||||||
|
{ 0x0141, 38, 35, 0 },
|
||||||
|
{ 0x0111, 39, 36, 0 },
|
||||||
|
{ 0x0085, 40, 37, 0 },
|
||||||
|
{ 0x0049, 41, 38, 0 },
|
||||||
|
{ 0x0025, 42, 39, 0 },
|
||||||
|
{ 0x0015, 43, 40, 0 },
|
||||||
|
{ 0x0009, 44, 41, 0 },
|
||||||
|
{ 0x0005, 45, 42, 0 },
|
||||||
|
{ 0x0001, 45, 43, 0 },
|
||||||
|
{ 0x5601, 46, 46, 0 },
|
||||||
|
});
|
||||||
|
|
||||||
|
ErrorOr<ArithmeticDecoder> ArithmeticDecoder::initialize(ReadonlyBytes data, Context context)
|
||||||
|
{
|
||||||
|
ArithmeticDecoder decoder { data };
|
||||||
|
decoder.CX = context;
|
||||||
|
decoder.INITDEC();
|
||||||
|
return decoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArithmeticDecoder::get_next_bit()
|
||||||
|
{
|
||||||
|
// Useful for comparing to Table H.1 – Encoder and decoder trace data.
|
||||||
|
// dbg("I={} MPS={} A={:#x} C={:#x} CT={} B={:#x}", I(CX), MPS(CX), A, C, CT, B());
|
||||||
|
u8 D = DECODE();
|
||||||
|
// dbgln(" -> D={}", D);
|
||||||
|
return D;
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 ArithmeticDecoder::Qe(u16 index) { return qe_table[index].qe; }
|
||||||
|
u8 ArithmeticDecoder::NMPS(u16 index) { return qe_table[index].nmps; }
|
||||||
|
u8 ArithmeticDecoder::NLPS(u16 index) { return qe_table[index].nlps; }
|
||||||
|
u8 ArithmeticDecoder::SWITCH(u16 index) { return qe_table[index].switch_flag; }
|
||||||
|
|
||||||
|
u8 ArithmeticDecoder::B(size_t offset) const
|
||||||
|
{
|
||||||
|
// E.2.10 Minimization of the compressed data
|
||||||
|
// "the convention is used in the decoder that when a marker code is encountered,
|
||||||
|
// 1-bits (without bit stuffing) are supplied to the decoder until the coding interval is complete."
|
||||||
|
if (BP + offset >= m_data.size())
|
||||||
|
return 0xFF;
|
||||||
|
return m_data[BP + offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
void ArithmeticDecoder::INITDEC()
|
||||||
|
{
|
||||||
|
// E.3.5 Initialization of the decoder (INITDEC)
|
||||||
|
// Figure G.1 – Initialization of the software conventions decoder
|
||||||
|
|
||||||
|
// "BP, the pointer to the compressed data, is initialized to BPST (pointing to the first compressed byte)."
|
||||||
|
auto const BPST = 0;
|
||||||
|
BP = BPST;
|
||||||
|
C = (B() ^ 0xFF) << 16;
|
||||||
|
|
||||||
|
BYTEIN();
|
||||||
|
|
||||||
|
C = C << 7;
|
||||||
|
CT = CT - 7;
|
||||||
|
A = 0x8000;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 ArithmeticDecoder::DECODE()
|
||||||
|
{
|
||||||
|
// E.3.2 Decoding a decision (DECODE)
|
||||||
|
// Figure G.2 – Decoding an MPS or an LPS in the software-conventions decoder
|
||||||
|
u8 D;
|
||||||
|
A = A - Qe(I(CX));
|
||||||
|
if (C < ((u32)A << 16)) { // `(C_high < A)` in spec
|
||||||
|
if ((A & 0x8000) == 0) {
|
||||||
|
D = MPS_EXCHANGE();
|
||||||
|
RENORMD();
|
||||||
|
} else {
|
||||||
|
D = MPS(CX);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
C = C - ((u32)A << 16); // `C_high = C_high - A` in spec
|
||||||
|
D = LPS_EXCHANGE();
|
||||||
|
RENORMD();
|
||||||
|
}
|
||||||
|
return D;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 ArithmeticDecoder::MPS_EXCHANGE()
|
||||||
|
{
|
||||||
|
// Figure E.16 – Decoder MPS path conditional exchange procedure
|
||||||
|
u8 D;
|
||||||
|
if (A < Qe(I(CX))) {
|
||||||
|
D = 1 - MPS(CX);
|
||||||
|
if (SWITCH(I(CX)) == 1) {
|
||||||
|
MPS(CX) = 1 - MPS(CX);
|
||||||
|
}
|
||||||
|
I(CX) = NLPS(I(CX));
|
||||||
|
} else {
|
||||||
|
D = MPS(CX);
|
||||||
|
I(CX) = NMPS(I(CX));
|
||||||
|
}
|
||||||
|
return D;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 ArithmeticDecoder::LPS_EXCHANGE()
|
||||||
|
{
|
||||||
|
// Figure E.17 – Decoder LPS path conditional exchange procedure
|
||||||
|
u8 D;
|
||||||
|
if (A < Qe(I(CX))) {
|
||||||
|
A = Qe(I(CX));
|
||||||
|
D = MPS(CX);
|
||||||
|
I(CX) = NMPS(I(CX));
|
||||||
|
} else {
|
||||||
|
A = Qe(I(CX));
|
||||||
|
D = 1 - MPS(CX);
|
||||||
|
if (SWITCH(I(CX)) == 1) {
|
||||||
|
MPS(CX) = 1 - MPS(CX);
|
||||||
|
}
|
||||||
|
I(CX) = NLPS(I(CX));
|
||||||
|
}
|
||||||
|
return D;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ArithmeticDecoder::RENORMD()
|
||||||
|
{
|
||||||
|
// E.3.3 Renormalization in the decoder (RENORMD)
|
||||||
|
// Figure E.18 – Decoder renormalization procedure
|
||||||
|
do {
|
||||||
|
if (CT == 0)
|
||||||
|
BYTEIN();
|
||||||
|
A = A << 1;
|
||||||
|
C = C << 1;
|
||||||
|
CT = CT - 1;
|
||||||
|
} while ((A & 0x8000) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ArithmeticDecoder::BYTEIN()
|
||||||
|
{
|
||||||
|
// E.3.4 Compressed data input (BYTEIN)
|
||||||
|
// Figure G.3 – Inserting a new byte into the C register in the software-conventions decoder
|
||||||
|
if (B() == 0xFF) {
|
||||||
|
if (B(1) > 0x8F) {
|
||||||
|
CT = 8;
|
||||||
|
} else {
|
||||||
|
BP = BP + 1;
|
||||||
|
C = C + 0xFE00 - (B() << 9);
|
||||||
|
CT = 7;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
BP = BP + 1;
|
||||||
|
C = C + 0xFF00 - (B() << 8);
|
||||||
|
CT = 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// JBIG2 spec, Annex D, D.4.1 ID string
|
// JBIG2 spec, Annex D, D.4.1 ID string
|
||||||
static constexpr u8 id_string[] = { 0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A };
|
static constexpr u8 id_string[] = { 0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A };
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,66 @@ namespace Gfx {
|
||||||
|
|
||||||
struct JBIG2LoadingContext;
|
struct JBIG2LoadingContext;
|
||||||
|
|
||||||
|
namespace JBIG2 {
|
||||||
|
|
||||||
|
// E.3 Arithmetic decoding procedure, but with the changes described in
|
||||||
|
// Annex G Arithmetic decoding procedure (software conventions).
|
||||||
|
// Exposed for testing.
|
||||||
|
class ArithmeticDecoder {
|
||||||
|
public:
|
||||||
|
struct Context {
|
||||||
|
u16 I; // Index I stored for context CX (E.2.4)
|
||||||
|
u8 is_mps; // "More probable symbol" (E.1.1). 0 or 1.
|
||||||
|
};
|
||||||
|
|
||||||
|
static ErrorOr<ArithmeticDecoder> initialize(ReadonlyBytes data, Context context);
|
||||||
|
|
||||||
|
bool get_next_bit();
|
||||||
|
|
||||||
|
private:
|
||||||
|
ArithmeticDecoder(ReadonlyBytes data)
|
||||||
|
: m_data(data)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
ReadonlyBytes m_data;
|
||||||
|
|
||||||
|
// The code below uses names from the spec, so that the algorithms look exactly like the flowcharts in the spec.
|
||||||
|
|
||||||
|
// Abbreviations:
|
||||||
|
// "CX": "Context" (E.1)
|
||||||
|
// "D": "Decision" (as in "encoder input" / "decoder output") (E.1)
|
||||||
|
// "I(CX)": "Index I stored for context CX" (E.2.4)
|
||||||
|
// "MPS": "More probable symbol" (E.1.1)
|
||||||
|
// "LPS": "Less probable symbol" (E.1.1)
|
||||||
|
|
||||||
|
void INITDEC();
|
||||||
|
u8 DECODE(); // Returns a single decoded bit.
|
||||||
|
u8 MPS_EXCHANGE();
|
||||||
|
u8 LPS_EXCHANGE();
|
||||||
|
void RENORMD();
|
||||||
|
void BYTEIN();
|
||||||
|
|
||||||
|
u8 B(size_t offset = 0) const; // Byte pointed to by BP.
|
||||||
|
size_t BP; // Pointer into compressed data.
|
||||||
|
|
||||||
|
// E.3.1 Decoder code register conventions
|
||||||
|
u32 C; // Consists of u16 C_high, C_low.
|
||||||
|
u16 A; // Current value of the fraction. Fixed precision; 0x8000 is equivalent to 0.75.
|
||||||
|
|
||||||
|
u8 CT; // Count of the number of bits in C.
|
||||||
|
|
||||||
|
Context CX;
|
||||||
|
static u16& I(Context& cx) { return cx.I; }
|
||||||
|
static u8& MPS(Context& cx) { return cx.is_mps; }
|
||||||
|
static u16 Qe(u16);
|
||||||
|
static u8 NMPS(u16);
|
||||||
|
static u8 NLPS(u16);
|
||||||
|
static u8 SWITCH(u16);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
class JBIG2ImageDecoderPlugin : public ImageDecoderPlugin {
|
class JBIG2ImageDecoderPlugin : public ImageDecoderPlugin {
|
||||||
public:
|
public:
|
||||||
static bool sniff(ReadonlyBytes);
|
static bool sniff(ReadonlyBytes);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue