mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 14:47:44 +00:00
LibPDF: Improve stream parsing
When parsing streams we rely on a /Length item being defined in the stream's dictionary to know how much data comprises the stream. Its value is usually a direct value, but it can be indirect. There was however a contradiction in the code: the condition that allowed it to read and use the /Length value required it to be a direct value, but the actual code using the value would have worked with indirect ones. This meant that indirect /Length values triggered the fallback, "manual" stream parsing code. On the other hand, this latter code was also buggy, because it relied on the "endstream" keyword to appear on a separate line, which isn't always the case. This commit both fixes the bug in the manual stream parsing scenario, while also allowing for indirect /Length values to be used to parse streams more directly and avoid the manual approach. The main caveat to this second change is that for a brief period of time the Document is not able to resolve references (i.e., before the xref table itself is not parsed). Any parsing happening before that (e..g, the linearization dictionary) must therefore use the manual stream parsing approach.
This commit is contained in:
parent
8670526f2a
commit
a533ea7ae6
3 changed files with 14 additions and 11 deletions
|
@ -446,7 +446,7 @@ PDFErrorOr<NonnullRefPtr<StreamObject>> Parser::parse_stream(NonnullRefPtr<DictO
|
|||
ReadonlyBytes bytes;
|
||||
|
||||
auto maybe_length = dict->get(CommonNames::Length);
|
||||
if (maybe_length.has_value() && (!maybe_length->has<Reference>())) {
|
||||
if (maybe_length.has_value() && m_document->can_resolve_refefences()) {
|
||||
// The PDF writer has kindly provided us with the direct length of the stream
|
||||
m_reader.save();
|
||||
auto length = TRY(m_document->resolve_to<int>(maybe_length.value()));
|
||||
|
@ -457,17 +457,13 @@ PDFErrorOr<NonnullRefPtr<StreamObject>> Parser::parse_stream(NonnullRefPtr<DictO
|
|||
} else {
|
||||
// We have to look for the endstream keyword
|
||||
auto stream_start = m_reader.offset();
|
||||
|
||||
while (true) {
|
||||
m_reader.move_until([&](auto) { return m_reader.matches_eol(); });
|
||||
auto potential_stream_end = m_reader.offset();
|
||||
m_reader.consume_eol();
|
||||
if (!m_reader.matches("endstream"))
|
||||
continue;
|
||||
|
||||
bytes = m_reader.bytes().slice(stream_start, potential_stream_end - stream_start);
|
||||
break;
|
||||
while (!m_reader.matches("endstream")) {
|
||||
m_reader.consume();
|
||||
m_reader.move_until('e');
|
||||
}
|
||||
auto stream_end = m_reader.offset();
|
||||
m_reader.consume_eol();
|
||||
bytes = m_reader.bytes().slice(stream_start, stream_end - stream_start);
|
||||
}
|
||||
|
||||
m_reader.move_by(9);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue