mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 19:37:35 +00:00
LibPDF: Port 59b50fa43f8c2 to xref and object streams
0000440.pdf contains an xref stream object (at offset 3643676) starting:
```
294 0 obj <<
/Type /XRef
/Index [0 295]
/Size 295
```
and an object stream object (at offset 3640121) starting:
```
230 0 obj <<
/Type /ObjStm
/N 73
/First 614
```
In both cases, the `obj` and the `<<` are separated by non-newline
whitespace.
633e1632d0
made parse_indirect_value() tolerate this, but it didn't
update neither parse_xref_stream() (which parses xref streams) nor
parse_compressed_object_with_index() (which parses object streams),
despite all three changes being part of #14873.
Make parse_xref_stream() and parse_compressed_object_with_index()
call parse_indirect_value() to pick up the fix over there. It's a bit
less code too.
(0000440.pdf is the only PDF in my 1000 test PDFs that this helps,
somewhat surprisingly.)
This commit is contained in:
parent
a545935997
commit
e16345555b
1 changed files with 18 additions and 26 deletions
|
@ -390,18 +390,23 @@ PDFErrorOr<void> DocumentParser::validate_xref_table_and_fix_if_necessary()
|
|||
return {};
|
||||
}
|
||||
|
||||
static PDFErrorOr<NonnullRefPtr<StreamObject>> indirect_value_as_stream(NonnullRefPtr<IndirectValue> indirect_value)
|
||||
{
|
||||
auto value = indirect_value->value();
|
||||
if (!value.has<NonnullRefPtr<Object>>())
|
||||
return Error { Error::Type::Parse, "Expected indirect value to be a stream" };
|
||||
auto value_object = value.get<NonnullRefPtr<Object>>();
|
||||
if (!value_object->is<StreamObject>())
|
||||
return Error { Error::Type::Parse, "Expected indirect value to be a stream" };
|
||||
return value_object->cast<StreamObject>();
|
||||
}
|
||||
|
||||
PDFErrorOr<NonnullRefPtr<XRefTable>> DocumentParser::parse_xref_stream()
|
||||
{
|
||||
auto first_number = TRY(parse_number());
|
||||
auto second_number = TRY(parse_number());
|
||||
auto xref_stream = TRY(parse_indirect_value());
|
||||
auto stream = TRY(indirect_value_as_stream(xref_stream));
|
||||
|
||||
if (!m_reader.matches("obj"))
|
||||
return error("Malformed xref object");
|
||||
m_reader.move_by(3);
|
||||
if (m_reader.matches_eol())
|
||||
m_reader.consume_eol();
|
||||
|
||||
auto dict = TRY(parse_dict());
|
||||
auto dict = stream->dict();
|
||||
auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name();
|
||||
if (type != "XRef")
|
||||
return error("Malformed xref dictionary");
|
||||
|
@ -425,7 +430,6 @@ PDFErrorOr<NonnullRefPtr<XRefTable>> DocumentParser::parse_xref_stream()
|
|||
} else {
|
||||
subsections.append({ 0, number_of_object_entries });
|
||||
}
|
||||
auto stream = TRY(parse_stream(dict));
|
||||
auto table = adopt_ref(*new XRefTable());
|
||||
|
||||
auto field_to_long = [](ReadonlyBytes field) -> long {
|
||||
|
@ -562,22 +566,13 @@ PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
|
|||
|
||||
m_reader.move_to(stream_offset);
|
||||
|
||||
auto first_number = TRY(parse_number());
|
||||
auto second_number = TRY(parse_number());
|
||||
auto obj_stream = TRY(parse_indirect_value());
|
||||
auto stream = TRY(indirect_value_as_stream(obj_stream));
|
||||
|
||||
if (first_number.get<int>() != object_stream_index)
|
||||
if (obj_stream->index() != object_stream_index)
|
||||
return error("Mismatching object stream index");
|
||||
if (second_number.get<int>() != 0)
|
||||
return error("Non-zero object stream generation number");
|
||||
|
||||
if (!m_reader.matches("obj"))
|
||||
return error("Malformed object stream");
|
||||
m_reader.move_by(3);
|
||||
if (m_reader.matches_eol())
|
||||
m_reader.consume_eol();
|
||||
|
||||
push_reference({ static_cast<u32>(first_number.get<int>()), static_cast<u32>(second_number.get<int>()) });
|
||||
auto dict = TRY(parse_dict());
|
||||
auto dict = stream->dict();
|
||||
|
||||
auto type = TRY(dict->get_name(m_document, CommonNames::Type))->name();
|
||||
if (type != "ObjStm")
|
||||
|
@ -586,9 +581,6 @@ PDFErrorOr<Value> DocumentParser::parse_compressed_object_with_index(u32 index)
|
|||
auto object_count = dict->get_value("N").get_u32();
|
||||
auto first_object_offset = dict->get_value("First").get_u32();
|
||||
|
||||
auto stream = TRY(parse_stream(dict));
|
||||
pop_reference();
|
||||
|
||||
Parser stream_parser(m_document, stream->bytes());
|
||||
|
||||
// The data was already decrypted when reading the outer compressed ObjStm.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue