From 1b45c3e127b1fd7cf8c199a8c71d5c8848f2d399 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 3 Jan 2024 17:28:46 -0500 Subject: [PATCH] LibPDF: Tolerate whitespace after `xref` and `startxref` The spec isn't super clear on if this is allowed: """Each cross-reference section shall begin with a line containing the keyword xref. Following this line...""" """The two preceding lines shall contain, one per line and in order, the keyword startxref and...""" It kind of sounds like anything goes on both lines as long as they contain `xref` and `startxref`. In practice, both seem to always occur at the start of their line, but in 0000780.pdf (and nowhere else), there's one space after each keyword before the following linebreak, and this makes that file load. --- Userland/Libraries/LibPDF/DocumentParser.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp index d772f57f61..a856903c03 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.cpp +++ b/Userland/Libraries/LibPDF/DocumentParser.cpp @@ -481,6 +481,7 @@ PDFErrorOr> DocumentParser::parse_xref_table() } m_reader.move_by(4); + m_reader.consume_non_eol_whitespace(); if (!m_reader.consume_eol()) return error("Expected newline after \"xref\""); @@ -741,6 +742,8 @@ bool DocumentParser::navigate_to_after_startxref() auto offset = m_reader.offset() + 1; m_reader.consume_eol(); + m_reader.consume_whitespace(); + if (!m_reader.matches("startxref")) continue;