mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 06:47:35 +00:00
LibPDF: Parse page structures
This commit introduces the ability to parse the document catalog dict, as well as the page tree and individual pages. Pages obviously aren't fully parsed, as we won't care about most of the fields until we start actually rendering PDFs. One of the primary benefits of the PDF format is laziness. PDFs are not meant to be parsed all at once, and the same is true for pages. When a Document is constructed, it builds a map of page number to object index, but it does not fetch and parse any of the pages. A page is only parsed when a caller requests that particular page (and is cached going forwards). Additionally, this commit also adds an object_cast function which logs bad casts if DEBUG_PDF is set. Additionally, utility functions were added to ArrayObject and DictObject to get all types of objects from the collections to avoid having to manually cast.
This commit is contained in:
parent
72f693e9ed
commit
8c745ad0d9
11 changed files with 320 additions and 6 deletions
|
@ -48,6 +48,13 @@ Parser::XRefTableAndTrailer Parser::parse_last_xref_table_and_trailer()
|
|||
return { xref_table, trailer };
|
||||
}
|
||||
|
||||
NonnullRefPtr<IndirectValue> Parser::parse_indirect_value_at_offset(size_t offset)
|
||||
{
|
||||
m_reader.set_reading_forwards();
|
||||
m_reader.move_to(offset);
|
||||
return parse_indirect_value();
|
||||
}
|
||||
|
||||
bool Parser::parse_header()
|
||||
{
|
||||
// FIXME: Do something with the version?
|
||||
|
@ -323,11 +330,18 @@ NonnullRefPtr<IndirectValue> Parser::parse_indirect_value(int index, int generat
|
|||
auto value = parse_value();
|
||||
VERIFY(value.is_object());
|
||||
VERIFY(m_reader.matches("endobj"));
|
||||
VERIFY(consume_whitespace());
|
||||
|
||||
return make_object<IndirectValue>(index, generation, value.as_object());
|
||||
}
|
||||
|
||||
NonnullRefPtr<IndirectValue> Parser::parse_indirect_value()
|
||||
{
|
||||
auto first_number = parse_number();
|
||||
auto second_number = parse_number();
|
||||
VERIFY(first_number.is_int() && second_number.is_int());
|
||||
return parse_indirect_value(first_number.as_int(), second_number.as_int());
|
||||
}
|
||||
|
||||
Value Parser::parse_number()
|
||||
{
|
||||
size_t start_offset = m_reader.offset();
|
||||
|
@ -366,7 +380,7 @@ NonnullRefPtr<NameObject> Parser::parse_name()
|
|||
StringBuilder builder;
|
||||
|
||||
while (true) {
|
||||
if (matches_whitespace())
|
||||
if (!matches_regular_character())
|
||||
break;
|
||||
|
||||
if (m_reader.matches('#')) {
|
||||
|
@ -587,6 +601,16 @@ bool Parser::matches_number() const
|
|||
return isdigit(ch) || ch == '-' || ch == '+';
|
||||
}
|
||||
|
||||
bool Parser::matches_delimiter() const
|
||||
{
|
||||
return m_reader.matches_any('(', ')', '<', '>', '[', ']', '{', '}', '/', '%');
|
||||
}
|
||||
|
||||
bool Parser::matches_regular_character() const
|
||||
{
|
||||
return !matches_delimiter() && !matches_whitespace();
|
||||
}
|
||||
|
||||
void Parser::consume_eol()
|
||||
{
|
||||
if (m_reader.matches("\r\n")) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue