1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 01:17:35 +00:00

LibPDF: Parse linearized PDF files

This is a big step, as most PDFs which are downloaded online will be
linearized. Pretty much the only difference is that the xref structure
is slightly different.
This commit is contained in:
Matthew Olsson 2021-05-26 22:52:05 -07:00 committed by Ali Mohammad Pur
parent be1be47613
commit e23bfd7252
8 changed files with 270 additions and 45 deletions

View file

@ -10,8 +10,10 @@
namespace PDF {
constexpr long invalid_byte_offset = NumericLimits<long>::max();
struct XRefEntry {
long byte_offset { -1L };
long byte_offset { invalid_byte_offset };
u16 generation_number { 0 };
bool in_use { false };
};
@ -22,8 +24,34 @@ struct XRefSection {
Vector<XRefEntry> entries;
};
class XRefTable {
class XRefTable final : public RefCounted<XRefTable> {
public:
bool merge(XRefTable&& other)
{
auto this_size = m_entries.size();
auto other_size = other.m_entries.size();
m_entries.ensure_capacity(other_size);
for (size_t i = 0; i < other_size; i++) {
auto other_entry = other.m_entries[i];
if (i >= this_size) {
m_entries.unchecked_append(other_entry);
continue;
}
auto this_entry = m_entries[i];
if (this_entry.byte_offset == invalid_byte_offset) {
m_entries[i] = other_entry;
} else if (other_entry.byte_offset != invalid_byte_offset) {
// Both xref tables have an entry for the same object index
return false;
}
}
return true;
}
void add_section(const XRefSection& section)
{
m_entries.ensure_capacity(section.starting_index + section.count);