1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 04:38:11 +00:00

LibTextCodec: Start fleshing out a simple text codec library

We're starting with a very basic decoding API and only ISO-8859-1 and
UTF-8 decoding (and UTF-8 decoding is really a no-op since String is
expected to be UTF-8.)
This commit is contained in:
Andreas Kling 2020-05-03 22:41:34 +02:00
parent f3676ebef5
commit e09b83c60c
10 changed files with 148 additions and 21 deletions

View file

@ -0,0 +1,73 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <LibTextCodec/Decoder.h>
namespace TextCodec {
Latin1Decoder& latin1_decoder()
{
static Latin1Decoder* decoder;
if (!decoder)
decoder = new Latin1Decoder;
return *decoder;
}
UTF8Decoder& utf8_decoder()
{
static UTF8Decoder* decoder;
if (!decoder)
decoder = new UTF8Decoder;
return *decoder;
}
Decoder* decoder_for(const String& encoding)
{
if (encoding.equals_ignoring_case("iso-8859-1"))
return &latin1_decoder();
if (encoding.equals_ignoring_case("utf-8"))
return &utf8_decoder();
return nullptr;
}
String UTF8Decoder::to_utf8(const StringView& input)
{
return input;
}
String Latin1Decoder::to_utf8(const StringView& input)
{
StringBuilder builder(input.length());
for (size_t i = 0; i < input.length(); ++i) {
u8 ch = input[i];
builder.append(ch >= 0x80 ? '?' : ch);
}
return builder.to_string();
}
}

View file

@ -0,0 +1,50 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/Forward.h>
namespace TextCodec {
class Decoder {
public:
virtual String to_utf8(const StringView&) = 0;
};
class UTF8Decoder final : public Decoder {
public:
virtual String to_utf8(const StringView&) override;
};
class Latin1Decoder final : public Decoder {
public:
virtual String to_utf8(const StringView&) override;
};
Decoder* decoder_for(const String& encoding);
}

View file

@ -0,0 +1,15 @@
OBJS = \
Decoder.o
LIBRARY = libtextcodec.a
install:
for dir in .; do \
mkdir -p $(SERENITY_BASE_DIR)/Root/usr/include/LibTextCodec/$$dir; \
cp $$dir/*.h $(SERENITY_BASE_DIR)/Root/usr/include/LibTextCodec/$$dir/; \
done
cp $(LIBRARY) $(SERENITY_BASE_DIR)/Root/usr/lib/
include ../../Makefile.common
include ../../Makefile.subdir