1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 12:58:12 +00:00
serenity/Userland/Libraries/LibPDF/Reader.h
Nico Weber 02d2d12592 LibPDF: Allow moving Reader::move_to() to end of data stream
CFF::parse_index_data() calls move_to() to put the reader's
current position behind the index data.

In several PDFs, the PrivDictOperator::Subrs case in CFF::create()
sets up a span that contains exactly the Subrs data and nothing
after it, so that finale move_to() call in parse_index_data()
would cause an assert.

This is similar to fe3612ebcb, where the caller was also in CFF.
So maybe CFF just has a different view of what valid values to pass
to Reader are, compared to the rest of the code? But having an iterator
point to one past the valid data in a container is common, so maybe
this is the Right Fix after all.

Fixes a crash opening 411_getting_started_with_instruments.pdf
(and a whole bunch of other WWDC slides). Rendering is pretty glitchy
and we still crash on page 14, but at least we can open the file now.

The file is currently available at:
411cbc60y12x68arcof/411/411_getting_started_with_instruments.pdf
2023-10-18 06:32:23 -04:00

181 lines
4.4 KiB
C++

/*
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Debug.h>
#include <AK/DeprecatedString.h>
#include <AK/Function.h>
#include <AK/ScopeGuard.h>
#include <AK/Span.h>
#include <AK/Vector.h>
#include <LibPDF/Error.h>
namespace PDF {
class Reader {
public:
explicit Reader(ReadonlyBytes bytes)
: m_bytes(bytes)
{
}
ALWAYS_INLINE ReadonlyBytes bytes() const { return m_bytes; }
ALWAYS_INLINE size_t offset() const { return m_offset; }
bool done() const
{
if (m_forwards)
return offset() >= bytes().size();
return m_offset < 0;
}
size_t remaining() const
{
if (done())
return 0;
if (m_forwards)
return bytes().size() - offset();
return offset() + 1;
}
void move_by(size_t count)
{
if (m_forwards) {
m_offset += static_cast<ssize_t>(count);
} else {
m_offset -= static_cast<ssize_t>(count);
}
}
template<typename T = char>
T read()
{
T value = reinterpret_cast<T const*>(m_bytes.offset(m_offset))[0];
move_by(sizeof(T));
return value;
}
template<typename T = char>
PDFErrorOr<T> try_read()
{
if (sizeof(T) + m_offset > m_bytes.size()) {
auto message = DeprecatedString::formatted("Cannot read {} bytes at offset {} of ReadonlyBytes of size {}", sizeof(T), m_offset, m_bytes.size());
return Error { Error::Type::Parse, message };
}
return read<T>();
}
char peek(size_t shift = 0) const
{
auto offset = m_offset + shift * (m_forwards ? 1 : -1);
return static_cast<char>(m_bytes.at(offset));
}
template<typename... T>
bool matches_any(T... elements) const
{
if (done())
return false;
auto ch = peek();
return ((ch == elements) || ...);
}
bool matches(char ch) const
{
return !done() && peek() == ch;
}
bool matches(char const* chars) const
{
DeprecatedString string(chars);
if (remaining() < string.length())
return false;
if (!m_forwards)
string = string.reverse();
for (size_t i = 0; i < string.length(); i++) {
if (peek(i) != string[i])
return false;
}
return true;
}
template<typename T = char>
void move_to(size_t offset)
{
VERIFY(offset <= m_bytes.size());
m_offset = static_cast<ssize_t>(offset);
}
void move_until(char ch)
{
while (!done() && peek() != ch)
move_by(1);
}
void move_until(Function<bool(char)> predicate)
{
while (!done() && !predicate(peek()))
move_by(1);
}
ALWAYS_INLINE void move_while(Function<bool(char)> predicate)
{
move_until([&predicate](char t) { return !predicate(t); });
}
bool matches_eol() const;
bool matches_whitespace() const;
bool matches_number() const;
bool matches_delimiter() const;
bool matches_regular_character() const;
bool consume_eol();
bool consume_whitespace();
char consume();
void consume(int amount);
bool consume(char);
ALWAYS_INLINE void set_reading_forwards() { m_forwards = true; }
ALWAYS_INLINE void set_reading_backwards() { m_forwards = false; }
ALWAYS_INLINE void save() { m_saved_offsets.append(m_offset); }
ALWAYS_INLINE void load() { m_offset = m_saved_offsets.take_last(); }
ALWAYS_INLINE void discard() { m_saved_offsets.take_last(); }
#ifdef PDF_DEBUG
void dump_state() const
{
dbgln("Reader State (offset={} size={})", offset(), bytes().size());
size_t from = max(0, static_cast<int>(offset()) - 10);
size_t to = min(bytes().size() - 1, offset() + 10);
for (auto i = from; i <= to; i++) {
char value = static_cast<char>(bytes().at(i));
auto line = DeprecatedString::formatted(" {}: '{}' (value={:3d}) ", i, value, static_cast<u8>(value));
if (i == offset()) {
dbgln("{} <<< current location, forwards={}", line, m_forwards);
} else {
dbgln("{}", line);
}
}
dbgln();
}
#endif
private:
ReadonlyBytes m_bytes;
ssize_t m_offset { 0 };
Vector<ssize_t> m_saved_offsets;
bool m_forwards { true };
};
}