1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-06-14 03:42:10 +00:00
serenity/Userland/Libraries/LibPDF/XRefTable.h
Julian Offenhäuser d1bc89e30b LibPDF: Try to repair XRef tables with broken indices
An XRef table usually starts with an object number of zero. While it
could technically start at any other number, this is a tell-tale sign
of a broken table.

For the "broken" documents I encountered, this always meant that some
objects must have been removed from the start of the table, without
updating the following indices. When this is the case, the document is
not able to be read normally.

However, most other PDF parsers seem to know of this quirk and fix the
XRef table automatically.

Likewise, we now check for this exact case, and if it matches up with
what we expect, we update the XRef table such that all object numbers
match the actual objects found in the file again.
2022-11-25 22:44:47 +01:00

147 lines
4 KiB
C++

/*
* Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Format.h>
#include <AK/RefCounted.h>
#include <AK/String.h>
#include <AK/Vector.h>
#include <LibPDF/Error.h>
namespace PDF {
constexpr long invalid_byte_offset = NumericLimits<long>::max();
struct XRefEntry {
long byte_offset { invalid_byte_offset };
u16 generation_number { 0 };
bool in_use { false };
bool compressed { false };
};
struct XRefSection {
int starting_index;
int count;
Vector<XRefEntry> entries;
};
class XRefTable final : public RefCounted<XRefTable> {
public:
PDFErrorOr<void> merge(XRefTable&& other)
{
auto this_size = m_entries.size();
auto other_size = other.m_entries.size();
m_entries.ensure_capacity(other_size);
for (size_t i = 0; i < other_size; i++) {
auto other_entry = other.m_entries[i];
if (i >= this_size) {
m_entries.unchecked_append(other_entry);
continue;
}
auto this_entry = m_entries[i];
if (this_entry.byte_offset == invalid_byte_offset) {
m_entries[i] = other_entry;
} else if (other_entry.byte_offset != invalid_byte_offset) {
// Both xref tables have an entry for the same object index
return Error { Error::Type::Parse, "Conflicting xref entry during merge" };
}
}
return {};
}
void add_section(XRefSection const& section)
{
m_entries.ensure_capacity(section.starting_index + section.count);
for (int i = static_cast<int>(m_entries.size()); i < section.starting_index; i++)
m_entries.append(XRefEntry {});
for (auto& entry : section.entries)
m_entries.append(entry);
}
ALWAYS_INLINE Vector<XRefEntry>& entries() { return m_entries; }
[[nodiscard]] ALWAYS_INLINE bool has_object(size_t index) const
{
return index < m_entries.size() && m_entries[index].byte_offset != -1;
}
[[nodiscard]] ALWAYS_INLINE long byte_offset_for_object(size_t index) const
{
VERIFY(has_object(index));
return m_entries[index].byte_offset;
}
[[nodiscard]] ALWAYS_INLINE long object_stream_for_object(size_t index) const
{
return byte_offset_for_object(index);
}
[[nodiscard]] ALWAYS_INLINE u16 generation_number_for_object(size_t index) const
{
VERIFY(has_object(index));
return m_entries[index].generation_number;
}
[[nodiscard]] ALWAYS_INLINE u16 object_stream_index_for_object(size_t index) const
{
return generation_number_for_object(index);
}
[[nodiscard]] ALWAYS_INLINE bool is_object_in_use(size_t index) const
{
VERIFY(has_object(index));
return m_entries[index].in_use;
}
[[nodiscard]] ALWAYS_INLINE bool is_object_compressed(size_t index) const
{
VERIFY(has_object(index));
return m_entries[index].compressed;
}
private:
friend struct AK::Formatter<PDF::XRefTable>;
Vector<XRefEntry> m_entries;
};
}
namespace AK {
template<>
struct Formatter<PDF::XRefEntry> : Formatter<StringView> {
ErrorOr<void> format(FormatBuilder& builder, PDF::XRefEntry const& entry)
{
return Formatter<StringView>::format(builder,
String::formatted("XRefEntry {{ offset={} generation={} used={} }}",
entry.byte_offset,
entry.generation_number,
entry.in_use));
}
};
template<>
struct Formatter<PDF::XRefTable> : Formatter<StringView> {
ErrorOr<void> format(FormatBuilder& format_builder, PDF::XRefTable const& table)
{
StringBuilder builder;
builder.append("XRefTable {"sv);
for (auto& entry : table.m_entries)
builder.appendff("\n {}", entry);
builder.append("\n}"sv);
return Formatter<StringView>::format(format_builder, builder.to_string());
}
};
}