mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 19:27:44 +00:00
LibPDF: Handle string encodings
Strings can be encoded in either UTF16-BE or UTF8. In either case, there are a few initial bytes which specify the encoding that must be checked and also removed from the final string.
This commit is contained in:
parent
a08922d2f6
commit
67b65dffa8
2 changed files with 23 additions and 4 deletions
|
@ -8,4 +8,4 @@ set(SOURCES
|
|||
)
|
||||
|
||||
serenity_lib(LibPDF pdf)
|
||||
target_link_libraries(LibPDF LibC LibCore LibIPC LibGfx)
|
||||
target_link_libraries(LibPDF LibC LibCore LibIPC LibGfx LibTextCodec)
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <LibPDF/Document.h>
|
||||
#include <LibPDF/Filter.h>
|
||||
#include <LibPDF/Parser.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
|
||||
|
@ -422,9 +423,27 @@ NonnullRefPtr<StringObject> Parser::parse_string()
|
|||
{
|
||||
ScopeGuard guard([&] { consume_whitespace(); });
|
||||
|
||||
if (m_reader.matches('('))
|
||||
return make_object<StringObject>(parse_literal_string(), false);
|
||||
return make_object<StringObject>(parse_hex_string(), true);
|
||||
String string;
|
||||
bool is_binary_string;
|
||||
|
||||
if (m_reader.matches('(')) {
|
||||
string = parse_literal_string();
|
||||
is_binary_string = false;
|
||||
} else {
|
||||
string = parse_hex_string();
|
||||
is_binary_string = true;
|
||||
}
|
||||
|
||||
if (string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff })) {
|
||||
// The string is encoded in UTF16-BE
|
||||
string = TextCodec::decoder_for("utf-16be")->to_utf8(string.substring(2));
|
||||
} else if (string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 })) {
|
||||
// The string is encoded in UTF-8. This is the default anyways, but if these bytes
|
||||
// are explicitly included, we have to trim them
|
||||
string = string.substring(3);
|
||||
}
|
||||
|
||||
return make_object<StringObject>(string, is_binary_string);
|
||||
}
|
||||
|
||||
String Parser::parse_literal_string()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue