From 4675700057bc6761a8bb052bccbb209ed599be37 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 25 Oct 2023 00:05:25 -0700 Subject: [PATCH] LibPDF: Reject unterminated literal strings with an error 0000459.pdf in 0000.zip in the pdfa dataset contains this as the very first object: ``` 1 0 obj << /Creator (Developer 2000) /CreatorDate ( /Author (Oracle Reports) /Producer (Oracle PDF driver) /Title (2021_06_29 Tutoritzacions APTES.PDF) >> endobj ``` The `/CreatorDate` value string is unterminated. Before, we'd assert when trying to check if the first object is a linearization dict. Now, we never read the first object (an error during the linearization dict reading is treated as "file is not linearized") unless we try to print the document's metadata -- and there we now show an error instead of asserting. --- Userland/Libraries/LibPDF/Parser.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index 59682cf8f6..95f9c8dea6 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -282,6 +282,9 @@ PDFErrorOr Parser::parse_literal_string() auto opened_parens = 0; while (true) { + if (m_reader.done()) + return error("unterminated string literal"); + if (m_reader.matches('(')) { opened_parens++; builder.append(m_reader.consume());