LibPDF: Extract Parser::parse_inline_image()

Pure code move, no intended behavior change. The motivation is just to make Parser::parse_operators() less nested and more focused.
2025-09-18 17:26:17 +00:00 · 2023-12-21 22:01:43 -05:00 · 2023-12-21 22:01:43 -05:00 · 3d07684891
commit 3d07684891
parent f972eda7ed
2 changed files with 45 additions and 40 deletions
--- a/Userland/Libraries/LibPDF/Parser.cpp
+++ b/Userland/Libraries/LibPDF/Parser.cpp
@ -520,45 +520,11 @@ PDFErrorOr<NonnullRefPtr<StreamObject>> Parser::parse_stream(NonnullRefPtr<DictO
    return stream_object;
 }

-PDFErrorOr<Vector<Operator>> Parser::parse_operators()
+PDFErrorOr<NonnullRefPtr<StreamObject>> Parser::parse_inline_image()
 {
-    Vector<Operator> operators;
-    Vector<Value> operator_args;
-
-    constexpr static auto is_operator_char_start = [](char ch) {
-        return isalpha(ch) || ch == '*' || ch == '\'' || ch == '"';
-    };
-    constexpr static auto is_operator_char_continuation = [](char ch) {
-        return is_operator_char_start(ch) || ch == '0' || ch == '1';
-    };
-
-    m_reader.consume_whitespace();
-
-    while (!m_reader.done()) {
-        parse_comment();
-        if (m_reader.done())
-            break;
-        auto ch = m_reader.peek();
-        if (is_operator_char_start(ch)) {
-            auto operator_start = m_reader.offset();
-            while (is_operator_char_continuation(ch)) {
-                m_reader.consume();
-                if (m_reader.done())
-                    break;
-                ch = m_reader.peek();
-            }
-
-            auto operator_string = StringView(m_reader.bytes().slice(operator_start, m_reader.offset() - operator_start));
-            m_reader.consume_whitespace();
-
-            auto operator_type = Operator::operator_type_from_symbol(operator_string);
-
    // Inline images contain a dictionary containing arbitrary values between BI and ID,
    // and then arbitrary binary data between ID and EI.
    // This means they need a special code path in the parser, so that image data in there doesn't confuse the operator parser.
-            if (operator_type == OperatorType::InlineImageBegin) {
-                if (!operator_args.is_empty())
-                    return error("operator args not empty on start of inline image");

    HashMap<DeprecatedFlyString, Value> map = TRY(parse_dict_contents_until("ID"));
    m_reader.consume(2); // "ID"
@ -592,10 +558,47 @@ PDFErrorOr<Vector<Operator>> Parser::parse_operators()

    auto stream_bytes = m_reader.bytes().slice(stream_start, stream_end - stream_start);

-                Vector<Value> inline_image_args;
    auto map_object = make_object<DictObject>(move(map));
-                inline_image_args.append(make_object<StreamObject>(move(map_object), MUST(ByteBuffer::copy(stream_bytes))));
-                operators.append(Operator(OperatorType::InlineImageEnd, move(inline_image_args)));
+    return make_object<StreamObject>(move(map_object), MUST(ByteBuffer::copy(stream_bytes)));
+}
+
+PDFErrorOr<Vector<Operator>> Parser::parse_operators()
+{
+    Vector<Operator> operators;
+    Vector<Value> operator_args;
+
+    constexpr static auto is_operator_char_start = [](char ch) {
+        return isalpha(ch) || ch == '*' || ch == '\'' || ch == '"';
+    };
+    constexpr static auto is_operator_char_continuation = [](char ch) {
+        return is_operator_char_start(ch) || ch == '0' || ch == '1';
+    };
+
+    m_reader.consume_whitespace();
+
+    while (!m_reader.done()) {
+        parse_comment();
+        if (m_reader.done())
+            break;
+        auto ch = m_reader.peek();
+        if (is_operator_char_start(ch)) {
+            auto operator_start = m_reader.offset();
+            while (is_operator_char_continuation(ch)) {
+                m_reader.consume();
+                if (m_reader.done())
+                    break;
+                ch = m_reader.peek();
+            }
+
+            auto operator_string = StringView(m_reader.bytes().slice(operator_start, m_reader.offset() - operator_start));
+            m_reader.consume_whitespace();
+
+            auto operator_type = Operator::operator_type_from_symbol(operator_string);
+
+            if (operator_type == OperatorType::InlineImageBegin) {
+                if (!operator_args.is_empty())
+                    return error("operator args not empty on start of inline image");
+                operators.append(Operator(OperatorType::InlineImageEnd, { TRY(parse_inline_image()) }));
                continue;
            }

--- a/Userland/Libraries/LibPDF/Parser.h
+++ b/Userland/Libraries/LibPDF/Parser.h
@ -73,6 +73,8 @@ public:
    void pop_reference() { m_current_reference_stack.take_last(); }

 protected:
+    PDFErrorOr<NonnullRefPtr<StreamObject>> parse_inline_image();
+
    Error error(
        ByteString const& message
 #ifdef PDF_DEBUG