diff --git a/Userland/Libraries/LibPDF/CMakeLists.txt b/Userland/Libraries/LibPDF/CMakeLists.txt index 82cf57ae4f..c405f4fb0e 100644 --- a/Userland/Libraries/LibPDF/CMakeLists.txt +++ b/Userland/Libraries/LibPDF/CMakeLists.txt @@ -11,6 +11,7 @@ set(SOURCES Fonts/Type1Font.cpp ObjectDerivatives.cpp Parser.cpp + Reader.cpp Renderer.cpp Value.cpp ) diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index 58e412f86b..36b018d89c 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -108,7 +108,7 @@ PDFErrorOr Parser::parse_header() if (minor_ver < '0' || minor_ver > '7') return error(String::formatted("Unknown minor version \"{}\"", minor_ver)); - consume_eol(); + m_reader.consume_eol(); // Parse optional high-byte comment, which signifies a binary file // FIXME: Do something with this? @@ -300,7 +300,7 @@ PDFErrorOr> Parser::parse_xref_table() if (!m_reader.matches("xref")) return error("Expected \"xref\""); m_reader.move_by(4); - if (!consume_eol()) + if (!m_reader.consume_eol()) return error("Expected newline after \"xref\""); auto table = adopt_ref(*new XRefTable()); @@ -319,12 +319,12 @@ PDFErrorOr> Parser::parse_xref_table() for (int i = 0; i < object_count; i++) { auto offset_string = String(m_reader.bytes().slice(m_reader.offset(), 10)); m_reader.move_by(10); - if (!consume(' ')) + if (!m_reader.consume(' ')) return error("Malformed xref entry"); auto generation_string = String(m_reader.bytes().slice(m_reader.offset(), 5)); m_reader.move_by(5); - if (!consume(' ')) + if (!m_reader.consume(' ')) return error("Malformed xref entry"); auto letter = m_reader.read(); @@ -334,8 +334,8 @@ PDFErrorOr> Parser::parse_xref_table() // The line ending sequence can be one of the following: // SP CR, SP LF, or CR LF if (m_reader.matches(' ')) { - consume(); - auto ch = consume(); + m_reader.consume(); + auto ch = m_reader.consume(); if (ch != '\r' && ch != '\n') return error("Malformed xref entry"); } else { @@ -351,34 +351,34 @@ PDFErrorOr> Parser::parse_xref_table() } table->add_section({ starting_index, object_count, entries }); - } while (matches_number()); + } while (m_reader.matches_number()); return table; } PDFErrorOr> Parser::parse_file_trailer() { - while (matches_eol()) - consume_eol(); + while (m_reader.matches_eol()) + m_reader.consume_eol(); if (!m_reader.matches("trailer")) return error("Expected \"trailer\" keyword"); m_reader.move_by(7); - consume_whitespace(); + m_reader.consume_whitespace(); auto dict = TRY(parse_dict()); if (!m_reader.matches("startxref")) return error("Expected \"startxref\""); m_reader.move_by(9); - consume_whitespace(); + m_reader.consume_whitespace(); - m_reader.move_until([&](auto) { return matches_eol(); }); - VERIFY(consume_eol()); + m_reader.move_until([&](auto) { return m_reader.matches_eol(); }); + VERIFY(m_reader.consume_eol()); if (!m_reader.matches("%%EOF")) return error("Expected \"%%EOF\""); m_reader.move_by(5); - consume_whitespace(); + m_reader.consume_whitespace(); return dict; } @@ -492,18 +492,18 @@ bool Parser::navigate_to_before_eof_marker() m_reader.set_reading_backwards(); while (!m_reader.done()) { - m_reader.move_until([&](auto) { return matches_eol(); }); + m_reader.move_until([&](auto) { return m_reader.matches_eol(); }); if (m_reader.done()) return false; - consume_eol(); + m_reader.consume_eol(); if (!m_reader.matches("%%EOF")) continue; m_reader.move_by(5); - if (!matches_eol()) + if (!m_reader.matches_eol()) continue; - consume_eol(); + m_reader.consume_eol(); return true; } @@ -515,15 +515,15 @@ bool Parser::navigate_to_after_startxref() m_reader.set_reading_backwards(); while (!m_reader.done()) { - m_reader.move_until([&](auto) { return matches_eol(); }); + m_reader.move_until([&](auto) { return m_reader.matches_eol(); }); auto offset = m_reader.offset() + 1; - consume_eol(); + m_reader.consume_eol(); if (!m_reader.matches("startxref")) continue; m_reader.move_by(9); - if (!matches_eol()) + if (!m_reader.matches_eol()) continue; m_reader.move_to(offset); @@ -538,14 +538,14 @@ String Parser::parse_comment() if (!m_reader.matches('%')) return {}; - consume(); + m_reader.consume(); auto comment_start_offset = m_reader.offset(); m_reader.move_until([&](auto) { - return matches_eol(); + return m_reader.matches_eol(); }); String str = StringView(m_reader.bytes().slice(comment_start_offset, m_reader.offset() - comment_start_offset)); - consume_eol(); - consume_whitespace(); + m_reader.consume_eol(); + m_reader.consume_whitespace(); return str; } @@ -555,23 +555,23 @@ PDFErrorOr Parser::parse_value() if (m_reader.matches("null")) { m_reader.move_by(4); - consume_whitespace(); + m_reader.consume_whitespace(); return Value(nullptr); } if (m_reader.matches("true")) { m_reader.move_by(4); - consume_whitespace(); + m_reader.consume_whitespace(); return Value(true); } if (m_reader.matches("false")) { m_reader.move_by(5); - consume_whitespace(); + m_reader.consume_whitespace(); return Value(false); } - if (matches_number()) + if (m_reader.matches_number()) return parse_possible_indirect_value_or_ref(); if (m_reader.matches('/')) @@ -596,7 +596,7 @@ PDFErrorOr Parser::parse_value() PDFErrorOr Parser::parse_possible_indirect_value_or_ref() { auto first_number = TRY(parse_number()); - if (!matches_number()) + if (!m_reader.matches_number()) return first_number; m_reader.save(); @@ -608,8 +608,8 @@ PDFErrorOr Parser::parse_possible_indirect_value_or_ref() if (m_reader.matches('R')) { m_reader.discard(); - consume(); - consume_whitespace(); + m_reader.consume(); + m_reader.consume_whitespace(); return Value(Reference(first_number.get(), second_number.value().get())); } @@ -631,16 +631,16 @@ PDFErrorOr> Parser::parse_indirect_value(u32 index, if (!m_reader.matches("obj")) return error("Expected \"obj\" at beginning of indirect value"); m_reader.move_by(3); - if (matches_eol()) - consume_eol(); + if (m_reader.matches_eol()) + m_reader.consume_eol(); push_reference({ index, generation }); auto value = TRY(parse_value()); if (!m_reader.matches("endobj")) return error("Expected \"endobj\" at end of indirect value"); - consume(6); - consume_whitespace(); + m_reader.consume(6); + m_reader.consume_whitespace(); pop_reference(); @@ -665,16 +665,16 @@ PDFErrorOr Parser::parse_number() bool consumed_digit = false; if (m_reader.matches('+') || m_reader.matches('-')) - consume(); + m_reader.consume(); while (!m_reader.done()) { if (m_reader.matches('.')) { if (is_float) break; is_float = true; - consume(); + m_reader.consume(); } else if (isdigit(m_reader.peek())) { - consume(); + m_reader.consume(); consumed_digit = true; } else { break; @@ -684,7 +684,7 @@ PDFErrorOr Parser::parse_number() if (!consumed_digit) return error("Invalid number"); - consume_whitespace(); + m_reader.consume_whitespace(); auto string = String(m_reader.bytes().slice(start_offset, m_reader.offset() - start_offset)); float f = strtof(string.characters(), nullptr); @@ -697,19 +697,19 @@ PDFErrorOr Parser::parse_number() PDFErrorOr> Parser::parse_name() { - if (!consume('/')) + if (!m_reader.consume('/')) return error("Expected Name object to start with \"/\""); StringBuilder builder; while (true) { - if (!matches_regular_character()) + if (!m_reader.matches_regular_character()) break; if (m_reader.matches('#')) { int hex_value = 0; for (int i = 0; i < 2; i++) { - auto ch = consume(); + auto ch = m_reader.consume(); VERIFY(isxdigit(ch)); hex_value *= 16; if (ch <= '9') { @@ -722,17 +722,17 @@ PDFErrorOr> Parser::parse_name() continue; } - builder.append(consume()); + builder.append(m_reader.consume()); } - consume_whitespace(); + m_reader.consume_whitespace(); return make_object(builder.to_string()); } NonnullRefPtr Parser::parse_string() { - ScopeGuard guard([&] { consume_whitespace(); }); + ScopeGuard guard([&] { m_reader.consume_whitespace(); }); String string; bool is_binary_string; @@ -768,31 +768,31 @@ NonnullRefPtr Parser::parse_string() String Parser::parse_literal_string() { - VERIFY(consume('(')); + VERIFY(m_reader.consume('(')); StringBuilder builder; auto opened_parens = 0; while (true) { if (m_reader.matches('(')) { opened_parens++; - builder.append(consume()); + builder.append(m_reader.consume()); } else if (m_reader.matches(')')) { - consume(); + m_reader.consume(); if (opened_parens == 0) break; opened_parens--; builder.append(')'); } else if (m_reader.matches('\\')) { - consume(); - if (matches_eol()) { - consume_eol(); + m_reader.consume(); + if (m_reader.matches_eol()) { + m_reader.consume_eol(); continue; } if (m_reader.done()) return {}; - auto ch = consume(); + auto ch = m_reader.consume(); switch (ch) { case 'n': builder.append('\n'); @@ -822,7 +822,7 @@ String Parser::parse_literal_string() if (ch >= '0' && ch <= '7') { int octal_value = ch - '0'; for (int i = 0; i < 2; i++) { - auto octal_ch = consume(); + auto octal_ch = m_reader.consume(); if (octal_ch < '0' || octal_ch > '7') break; octal_value = octal_value * 8 + (octal_ch - '0'); @@ -833,11 +833,11 @@ String Parser::parse_literal_string() } } } - } else if (matches_eol()) { - consume_eol(); + } else if (m_reader.matches_eol()) { + m_reader.consume_eol(); builder.append('\n'); } else { - builder.append(consume()); + builder.append(m_reader.consume()); } } @@ -846,23 +846,23 @@ String Parser::parse_literal_string() String Parser::parse_hex_string() { - VERIFY(consume('<')); + VERIFY(m_reader.consume('<')); StringBuilder builder; while (true) { if (m_reader.matches('>')) { - consume(); + m_reader.consume(); return builder.to_string(); } else { int hex_value = 0; for (int i = 0; i < 2; i++) { - auto ch = consume(); + auto ch = m_reader.consume(); if (ch == '>') { // The hex string contains an odd number of characters, and the last character // is assumed to be '0' - consume(); + m_reader.consume(); hex_value *= 16; builder.append(static_cast(hex_value)); return builder.to_string(); @@ -887,26 +887,26 @@ String Parser::parse_hex_string() PDFErrorOr> Parser::parse_array() { - if (!consume('[')) + if (!m_reader.consume('[')) return error("Expected array to start with \"[\""); - consume_whitespace(); + m_reader.consume_whitespace(); Vector values; while (!m_reader.matches(']')) values.append(TRY(parse_value())); - VERIFY(consume(']')); - consume_whitespace(); + VERIFY(m_reader.consume(']')); + m_reader.consume_whitespace(); return make_object(values); } PDFErrorOr> Parser::parse_dict() { - if (!consume('<') || !consume('<')) + if (!m_reader.consume('<') || !m_reader.consume('<')) return error("Expected dict to start with \"<<\""); - consume_whitespace(); + m_reader.consume_whitespace(); HashMap map; while (!m_reader.done()) { @@ -917,9 +917,9 @@ PDFErrorOr> Parser::parse_dict() map.set(name, value); } - if (!consume('>') || !consume('>')) + if (!m_reader.consume('>') || !m_reader.consume('>')) return error("Expected dict to end with \">>\""); - consume_whitespace(); + m_reader.consume_whitespace(); return make_object(map); } @@ -936,11 +936,11 @@ PDFErrorOr> Parser::conditionally_parse_page_tree_node(u32 ob return error(String::formatted("Invalid page tree offset {}", object_index)); m_reader.move_by(3); - consume_whitespace(); + m_reader.consume_whitespace(); - VERIFY(consume('<') && consume('<')); + VERIFY(m_reader.consume('<') && m_reader.consume('<')); - consume_whitespace(); + m_reader.consume_whitespace(); HashMap map; while (true) { @@ -967,8 +967,8 @@ PDFErrorOr> Parser::conditionally_parse_page_tree_node(u32 ob map.set(name->name(), value); } - VERIFY(consume('>') && consume('>')); - consume_whitespace(); + VERIFY(m_reader.consume('>') && m_reader.consume('>')); + m_reader.consume_whitespace(); return make_object(map); } @@ -978,7 +978,7 @@ PDFErrorOr> Parser::parse_stream(NonnullRefPtr> Parser::parse_stream(NonnullRefPtr> Parser::parse_stream(NonnullRefPtr(dict, MUST(ByteBuffer::copy(bytes))); @@ -1043,7 +1043,7 @@ PDFErrorOr> Parser::parse_operators() if (is_operator_char(ch)) { auto operator_start = m_reader.offset(); while (is_operator_char(ch)) { - consume(); + m_reader.consume(); if (m_reader.done()) break; ch = m_reader.peek(); @@ -1053,7 +1053,7 @@ PDFErrorOr> Parser::parse_operators() auto operator_type = Operator::operator_type_from_symbol(operator_string); operators.append(Operator(operator_type, move(operator_args))); operator_args = Vector(); - consume_whitespace(); + m_reader.consume_whitespace(); continue; } @@ -1064,73 +1064,6 @@ PDFErrorOr> Parser::parse_operators() return operators; } -bool Parser::matches_eol() const -{ - return m_reader.matches_any(0xa, 0xd); -} - -bool Parser::matches_whitespace() const -{ - return matches_eol() || m_reader.matches_any(0, 0x9, 0xc, ' '); -} - -bool Parser::matches_number() const -{ - if (m_reader.done()) - return false; - auto ch = m_reader.peek(); - return isdigit(ch) || ch == '-' || ch == '+'; -} - -bool Parser::matches_delimiter() const -{ - return m_reader.matches_any('(', ')', '<', '>', '[', ']', '{', '}', '/', '%'); -} - -bool Parser::matches_regular_character() const -{ - return !matches_delimiter() && !matches_whitespace(); -} - -bool Parser::consume_eol() -{ - if (m_reader.done()) { - return false; - } - if (m_reader.matches("\r\n")) { - consume(2); - return true; - } - auto consumed = consume(); - return consumed == 0xd || consumed == 0xa; -} - -bool Parser::consume_whitespace() -{ - bool consumed = false; - while (matches_whitespace()) { - consumed = true; - consume(); - } - return consumed; -} - -char Parser::consume() -{ - return m_reader.read(); -} - -void Parser::consume(int amount) -{ - for (size_t i = 0; i < static_cast(amount); i++) - consume(); -} - -bool Parser::consume(char ch) -{ - return consume() == ch; -} - Error Parser::error( String const& message #ifdef PDF_DEBUG diff --git a/Userland/Libraries/LibPDF/Parser.h b/Userland/Libraries/LibPDF/Parser.h index f2d48e80b0..7fcf82478e 100644 --- a/Userland/Libraries/LibPDF/Parser.h +++ b/Userland/Libraries/LibPDF/Parser.h @@ -120,18 +120,6 @@ private: void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); } void pop_reference() { m_current_reference_stack.take_last(); } - bool matches_eol() const; - bool matches_whitespace() const; - bool matches_number() const; - bool matches_delimiter() const; - bool matches_regular_character() const; - - bool consume_eol(); - bool consume_whitespace(); - char consume(); - void consume(int amount); - bool consume(char); - Error error( String const& message #ifdef PDF_DEBUG diff --git a/Userland/Libraries/LibPDF/Reader.cpp b/Userland/Libraries/LibPDF/Reader.cpp new file mode 100644 index 0000000000..27ec852510 --- /dev/null +++ b/Userland/Libraries/LibPDF/Reader.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace PDF { + +bool Reader::matches_eol() const +{ + return matches_any(0xa, 0xd); +} + +bool Reader::matches_whitespace() const +{ + return matches_eol() || matches_any(0, 0x9, 0xc, ' '); +} + +bool Reader::matches_number() const +{ + if (done()) + return false; + auto ch = peek(); + return isdigit(ch) || ch == '-' || ch == '+'; +} + +bool Reader::matches_delimiter() const +{ + return matches_any('(', ')', '<', '>', '[', ']', '{', '}', '/', '%'); +} + +bool Reader::matches_regular_character() const +{ + return !matches_delimiter() && !matches_whitespace(); +} + +bool Reader::consume_eol() +{ + if (done()) { + return false; + } + if (matches("\r\n")) { + consume(2); + return true; + } + auto consumed = consume(); + return consumed == 0xd || consumed == 0xa; +} + +bool Reader::consume_whitespace() +{ + bool consumed = false; + while (matches_whitespace()) { + consumed = true; + consume(); + } + return consumed; +} + +char Reader::consume() +{ + return read(); +} + +void Reader::consume(int amount) +{ + for (size_t i = 0; i < static_cast(amount); i++) + consume(); +} + +bool Reader::consume(char ch) +{ + return consume() == ch; +} + +} diff --git a/Userland/Libraries/LibPDF/Reader.h b/Userland/Libraries/LibPDF/Reader.h index 862decb8e1..37402862c5 100644 --- a/Userland/Libraries/LibPDF/Reader.h +++ b/Userland/Libraries/LibPDF/Reader.h @@ -120,6 +120,18 @@ public: move_until([&predicate](char t) { return !predicate(t); }); } + bool matches_eol() const; + bool matches_whitespace() const; + bool matches_number() const; + bool matches_delimiter() const; + bool matches_regular_character() const; + + bool consume_eol(); + bool consume_whitespace(); + char consume(); + void consume(int amount); + bool consume(char); + ALWAYS_INLINE void set_reading_forwards() { m_forwards = true; } ALWAYS_INLINE void set_reading_backwards() { m_forwards = false; }