1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-28 07:55:07 +00:00
serenity/Userland/Libraries/LibTextCodec/Decoder.h
Luke Wilde 94965ba28d LibTextCodec: Add BOM sniffer
This takes the input and sniffs it for a BOM. If it has the UTF-8 or
UTF-16BE BOM, it will return their respective decoder. Currently we
don't have a UTF-16LE decoder, so it will assert TODO if it detects
a UTF-16LE BOM. If there is no recognisable BOM, it will return no
decoder.
2022-02-12 12:53:28 +01:00

76 lines
2 KiB
C++

/*
* Copyright (c) 2020-2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Forward.h>
#include <AK/Function.h>
namespace TextCodec {
class Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) = 0;
virtual String to_utf8(StringView);
protected:
virtual ~Decoder() = default;
};
class UTF8Decoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
virtual String to_utf8(StringView) override;
};
class UTF16BEDecoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
virtual String to_utf8(StringView) override;
};
class Latin1Decoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
class Latin2Decoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
class HebrewDecoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
class CyrillicDecoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
class Koi8RDecoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
class Latin9Decoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
class TurkishDecoder final : public Decoder {
public:
virtual void process(StringView, Function<void(u32)> on_code_point) override;
};
Decoder* decoder_for(String const& encoding);
Optional<String> get_standardized_encoding(const String& encoding);
// This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder.
Decoder* bom_sniff_to_decoder(StringView);
}