mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 20:28:11 +00:00
LibPDF: Implement loading compressed objects from object streams
Now, whenever the xref table points to a compressed object, parse_object_with_index will look it up in the corresponding object stream as if it were a regular object. With this, our parser gains the bare minimum support for xref streams.
This commit is contained in:
parent
f9beff7b5e
commit
563d91b6c4
3 changed files with 55 additions and 0 deletions
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
|
||||
* Copyright (c) 2022, Julian Offenhäuser <offenhaeuser@protonmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -51,6 +52,11 @@ PDFErrorOr<void> DocumentParser::initialize()
|
|||
PDFErrorOr<Value> DocumentParser::parse_object_with_index(u32 index)
|
||||
{
|
||||
VERIFY(m_xref_table->has_object(index));
|
||||
|
||||
if (m_xref_table->is_object_compressed(index))
|
||||
// The object can be found in a object stream
|
||||
return parse_compressed_object_with_index(index);
|
||||
|
||||
auto byte_offset = m_xref_table->byte_offset_for_object(index);
|
||||
m_reader.move_to(byte_offset);
|
||||
auto indirect_value = TRY(parse_indirect_value());
|
||||
|
@ -440,6 +446,51 @@ PDFErrorOr<NonnullRefPtr<DictObject>> DocumentParser::parse_file_trailer()
|
|||
return dict;
|
||||
}
|
||||
|
||||
PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
|
||||
{
|
||||
auto object_stream_index = m_xref_table->object_stream_for_object(index);
|
||||
auto stream_offset = m_xref_table->byte_offset_for_object(object_stream_index);
|
||||
|
||||
m_reader.move_to(stream_offset);
|
||||
|
||||
auto first_number = TRY(parse_number());
|
||||
auto second_number = TRY(parse_number());
|
||||
|
||||
if (first_number.get<int>() != object_stream_index)
|
||||
return error("Mismatching object stream index");
|
||||
if (second_number.get<int>() != 0)
|
||||
return error("Non-zero object stream generation number");
|
||||
|
||||
if (!m_reader.matches("obj"))
|
||||
return error("Malformed object stream");
|
||||
m_reader.move_by(3);
|
||||
if (m_reader.matches_eol())
|
||||
m_reader.consume_eol();
|
||||
|
||||
auto dict = TRY(parse_dict());
|
||||
auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name();
|
||||
if (type != "ObjStm")
|
||||
return error("Invalid object stream type");
|
||||
|
||||
auto object_count = dict->get_value("N").get_u32();
|
||||
auto first_object_offset = dict->get_value("First").get_u32();
|
||||
|
||||
auto stream = TRY(parse_stream(dict));
|
||||
Parser stream_parser(m_document, stream->bytes());
|
||||
|
||||
for (u32 i = 0; i < object_count; ++i) {
|
||||
auto object_number = TRY(stream_parser.parse_number());
|
||||
auto object_offset = TRY(stream_parser.parse_number());
|
||||
|
||||
if (object_number.get_u32() == index) {
|
||||
stream_parser.move_to(first_object_offset + object_offset.get_u32());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return TRY(stream_parser.parse_value());
|
||||
}
|
||||
|
||||
PDFErrorOr<DocumentParser::PageOffsetHintTable> DocumentParser::parse_page_offset_hint_table(ReadonlyBytes hint_stream_bytes)
|
||||
{
|
||||
if (hint_stream_bytes.size() < sizeof(PageOffsetHintTable))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue