mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 15:47:44 +00:00
LibPDF: Skip inline image data in operator stream
Inline images can contain arbitrary binary data in the operator stream, greatly confusing the operator parser. Just skip them for now. They'll produce a `Rendering of feature not supported: draw operation: inline_image_begin` diag as usual, so we won't forget about it. After #21536, reduces number of crashes on 300 random PDFs from the web (the first 300 from 0000.zip from https://pdfa.org/new-large-scale-pdf-corpus-now-publicly-available/) from 23 (7%) to 22 (7%). On a larger sample (`Meta/test_pdf.py -n 500 ~/Downloads/0000`), reduces number of crashes from 53 (10.6%) with 36 distinct crash stacks to 46 (9.2%) with 33 distinct stacks.
This commit is contained in:
parent
e108f394bf
commit
a1f17bd643
1 changed files with 26 additions and 1 deletions
|
@ -531,10 +531,35 @@ PDFErrorOr<Vector<Operator>> Parser::parse_operators()
|
|||
}
|
||||
|
||||
auto operator_string = StringView(m_reader.bytes().slice(operator_start, m_reader.offset() - operator_start));
|
||||
m_reader.consume_whitespace();
|
||||
|
||||
auto operator_type = Operator::operator_type_from_symbol(operator_string);
|
||||
|
||||
// Inline images contain a dictionary containing arbitrary values between BI and ID,
|
||||
// and then arbitrary binary data between ID and EI.
|
||||
// This means they need a special code path in the parser, so that image data in there doesn't confuse the operator parser.
|
||||
if (operator_type == OperatorType::InlineImageBegin) {
|
||||
if (!operator_args.is_empty())
|
||||
return error("operator args not empty on start of inline image");
|
||||
|
||||
while (!m_reader.done()) {
|
||||
if (m_reader.matches("EI")) {
|
||||
break;
|
||||
}
|
||||
m_reader.consume();
|
||||
}
|
||||
|
||||
if (m_reader.done())
|
||||
return error("operator stream ended inside inline image");
|
||||
|
||||
m_reader.consume(2); // "EI"
|
||||
m_reader.consume_whitespace();
|
||||
|
||||
// FIXME: Do more with inline images than just skipping them.
|
||||
}
|
||||
|
||||
operators.append(Operator(operator_type, move(operator_args)));
|
||||
operator_args = Vector<Value>();
|
||||
m_reader.consume_whitespace();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue