From cf26fc23931332daaa9989a74f530bce1ec290f7 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 20 Oct 2023 22:36:10 -0400
Subject: [PATCH] LibPDF: Make parser skip whitespace after header

0000990.pdf from 0000.zip from
https://pdfa.org/new-large-scale-pdf-corpus-now-publicly-available/
starts like so:

```
%PDF-1.7

4 0 obj
```

parse_heaader() used to put the cursor at the start of the 2nd,
empty, line. initialize_linearization_dict() would then check
if `m_reader.matches_number()` to see if there could possibly
be a linearization dict.

In this case, there isn't one, but we should detect linearization
dicts even if they're separated by whitespace from the first line.
---
 Userland/Libraries/LibPDF/DocumentParser.cpp | 1 +
 1 file changed, 1 insertion(+)
diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp
index 1f427109dd..0731da6c6d 100644
--- a/Userland/Libraries/LibPDF/DocumentParser.cpp
+++ b/Userland/Libraries/LibPDF/DocumentParser.cpp
@@ -92,6 +92,7 @@ PDFErrorOr<Version> DocumentParser::parse_header()
         return error(DeprecatedString::formatted("Unknown minor version \"{}\"", minor_ver));
 
     m_reader.consume_eol();
+    m_reader.consume_whitespace();
 
     // Parse optional high-byte comment, which signifies a binary file
     // FIXME: Do something with this?