diff --git a/Meta/CMake/common_options.cmake b/Meta/CMake/common_options.cmake index 7cd1aca6af..0896897502 100644 --- a/Meta/CMake/common_options.cmake +++ b/Meta/CMake/common_options.cmake @@ -17,6 +17,7 @@ serenity_option(ENABLE_COMPILETIME_HEADER_CHECK OFF CACHE BOOL "Enable compileti serenity_option(ENABLE_TIME_ZONE_DATABASE_DOWNLOAD ON CACHE BOOL "Enable download of the IANA Time Zone Database at build time") serenity_option(ENABLE_UNICODE_DATABASE_DOWNLOAD ON CACHE BOOL "Enable download of Unicode UCD and CLDR files at build time") +serenity_option(ENABLE_PUBLIC_SUFFIX_DOWNLOAD ON CACHE BOOL "Enable download of the Public Suffix List at build time") serenity_option(INCLUDE_WASM_SPEC_TESTS OFF CACHE BOOL "Download and include the WebAssembly spec testsuite") serenity_option(INCLUDE_FLAC_SPEC_TESTS OFF CACHE BOOL "Download and include the FLAC spec testsuite") serenity_option(ENABLE_CACERT_DOWNLOAD ON CACHE BOOL "Enable download of cacert.pem at build time") diff --git a/Meta/CMake/public_suffix.cmake b/Meta/CMake/public_suffix.cmake new file mode 100644 index 0000000000..e37dee1f07 --- /dev/null +++ b/Meta/CMake/public_suffix.cmake @@ -0,0 +1,25 @@ +include(${CMAKE_CURRENT_LIST_DIR}/utils.cmake) + +if (ENABLE_PUBLIC_SUFFIX_DOWNLOAD) + set(PUBLIC_SUFFIX_PATH "${SERENITY_CACHE_DIR}/PublicSuffix" CACHE PATH "Download location for PublicSuffix files") + set(PUBLIC_SUFFIX_DATA_URL "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat") + set(PUBLIC_SUFFIX_DATA_PATH "${PUBLIC_SUFFIX_PATH}/public_suffix_list.dat") + + set(PUBLIC_SUFFIX_DATA_HEADER PublicSuffixData.h) + set(PUBLIC_SUFFIX_DATA_IMPLEMENTATION PublicSuffixData.cpp) + + download_file("${PUBLIC_SUFFIX_DATA_URL}" "${PUBLIC_SUFFIX_DATA_PATH}") + invoke_generator( + "PublicSuffixData" + Lagom::GeneratePublicSuffixData + "${PUBLIC_SUFFIX_PATH}/" + "${PUBLIC_SUFFIX_DATA_HEADER}" + "${PUBLIC_SUFFIX_DATA_IMPLEMENTATION}" + arguments -p "${PUBLIC_SUFFIX_DATA_PATH}" + ) + + set(PUBLIC_SUFFIX_SOURCES + ${PUBLIC_SUFFIX_DATA_HEADER} + ${PUBLIC_SUFFIX_DATA_IMPLEMENTATION} + ) +endif() diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index 452e38584a..cad8fbd991 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -404,6 +404,7 @@ if (BUILD_LAGOM) Markdown PDF Protocol + PublicSuffix Regex SoftGPU SQL diff --git a/Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt b/Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt index fff1d4276b..e4bacbd23c 100644 --- a/Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt +++ b/Meta/Lagom/Tools/CodeGenerators/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(IPCCompiler) add_subdirectory(LibEDID) add_subdirectory(LibGL) add_subdirectory(LibLocale) +add_subdirectory(LibPublicSuffix) add_subdirectory(LibTimeZone) add_subdirectory(LibUnicode) add_subdirectory(LibWeb) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibPublicSuffix/CMakeLists.txt b/Meta/Lagom/Tools/CodeGenerators/LibPublicSuffix/CMakeLists.txt new file mode 100644 index 0000000000..93caf7a0a4 --- /dev/null +++ b/Meta/Lagom/Tools/CodeGenerators/LibPublicSuffix/CMakeLists.txt @@ -0,0 +1 @@ +lagom_tool(GeneratePublicSuffixData SOURCES GeneratePublicSuffixData.cpp LIBS LibMain) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibPublicSuffix/GeneratePublicSuffixData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibPublicSuffix/GeneratePublicSuffixData.cpp new file mode 100644 index 0000000000..1177bf8b2b --- /dev/null +++ b/Meta/Lagom/Tools/CodeGenerators/LibPublicSuffix/GeneratePublicSuffixData.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2023, Cameron Youell + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "../LibUnicode/GeneratorUtil.h" +#include +#include +#include +#include +#include + +ErrorOr generate_header_file(Core::InputBufferedFile&, Core::File&); +ErrorOr generate_implementation_file(Core::InputBufferedFile&, Core::File&); + +ErrorOr serenity_main(Main::Arguments arguments) +{ + StringView generated_header_path; + StringView generated_implementation_path; + StringView public_suffix_list_path; + + Core::ArgsParser args_parser; + args_parser.add_option(generated_header_path, "Path to the header file to generate", "generated-header-path", 'h', "generated-header-path"); + args_parser.add_option(generated_implementation_path, "Path to the implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); + args_parser.add_option(public_suffix_list_path, "Path to the public suffix list", "public-suffix-list-path", 'p', "public-suffix-list-path"); + args_parser.parse(arguments); + + auto identifier_data = TRY(open_file(public_suffix_list_path, Core::File::OpenMode::Read)); + + auto generated_header_file = TRY(Core::File::open(generated_header_path, Core::File::OpenMode::Write)); + auto generated_implementation_file = TRY(Core::File::open(generated_implementation_path, Core::File::OpenMode::Write)); + + TRY(generate_header_file(*identifier_data, *generated_header_file)); + TRY(generate_implementation_file(*identifier_data, *generated_implementation_file)); + + return 0; +} + +ErrorOr generate_header_file(Core::InputBufferedFile&, Core::File& file) +{ + StringBuilder builder; + SourceGenerator generator { builder }; + generator.append(R"~~~( +#pragma once + +#include +#include +#include + +namespace PublicSuffix { + +class PublicSuffixData { +protected: + PublicSuffixData(); + +public: + PublicSuffixData(PublicSuffixData const&) = delete; + PublicSuffixData& operator=(PublicSuffixData const&) = delete; + + static PublicSuffixData* the() + { + static PublicSuffixData* s_the; + if (!s_the) + s_the = new PublicSuffixData; + return s_the; + } + + ErrorOr> get_public_suffix(StringView string); + +private: + Trie m_dictionary; +}; + +} // namespace PublicSuffix + +)~~~"); + + TRY(file.write_until_depleted(generator.as_string_view().bytes())); + return {}; +} + +ErrorOr generate_implementation_file(Core::InputBufferedFile& input, Core::File& file) +{ + StringBuilder builder; + SourceGenerator generator { builder }; + generator.append(R"~~~( +#include +#include +#include + +namespace PublicSuffix { + +static Vector s_public_suffixes {)~~~"); + + Array buffer {}; + + while (TRY(input.can_read_line())) { + auto line = TRY(input.read_line(buffer)); + + if (line.starts_with("//"sv) || line.is_empty()) + continue; + + auto view = line.split_view("."sv); + view.reverse(); + + StringBuilder builder; + builder.join("."sv, view); + auto val = builder.string_view(); + + generator.set("line", val); + generator.append(R"~~~( + {"@line@"sv},)~~~"); + } + + generator.append(R"~~~( +}; + +PublicSuffixData::PublicSuffixData() + : m_dictionary('/', "") +{ + // FIXME: Reduce the depth of this trie + for (auto str : s_public_suffixes) { + MUST(m_dictionary.insert(str.begin(), str.end(), str, [](auto& parent, auto& it) -> Optional { + return DeprecatedString::formatted("{}{}", parent.metadata_value(), *it); + })); + } +} + +ErrorOr> PublicSuffixData::get_public_suffix(StringView string) +{ + auto input = string.split_view("."sv); + input.reverse(); + + auto can_find = [&](StringView input) -> bool { + auto it = input.begin(); + auto& node = m_dictionary.traverse_until_last_accessible_node(it, input.end()); + return it.is_end() && node.metadata().has_value(); + }; + + StringBuilder overall_search_string; + StringBuilder search_string; + for (auto part : input) { + search_string.clear(); + TRY(search_string.try_append(TRY(overall_search_string.to_string()))); + TRY(search_string.try_append(part)); + + if (can_find(search_string.string_view())) { + overall_search_string.append(TRY(String::from_utf8(part))); + overall_search_string.append("."sv); + continue; + } + + search_string.clear(); + TRY(search_string.try_append(TRY(overall_search_string.to_string()))); + TRY(search_string.try_append("*"sv)); + + if (can_find(search_string.string_view())) { + overall_search_string.append(TRY(String::from_utf8(part))); + overall_search_string.append("."sv); + continue; + } + + break; + } + + auto view = overall_search_string.string_view().split_view("."sv); + view.reverse(); + + StringBuilder return_string_builder; + return_string_builder.join('.', view); + auto returnString = TRY(return_string_builder.to_string()); + if (!returnString.is_empty()) + return returnString; + + return Optional {}; +} + +} // namespace PublicSuffix + +)~~~"); + + TRY(file.write_until_depleted(generator.as_string_view().bytes())); + return {}; +} diff --git a/Userland/Libraries/CMakeLists.txt b/Userland/Libraries/CMakeLists.txt index 83c0cec63e..b495263713 100644 --- a/Userland/Libraries/CMakeLists.txt +++ b/Userland/Libraries/CMakeLists.txt @@ -44,6 +44,7 @@ add_subdirectory(LibPartition) add_subdirectory(LibPCIDB) add_subdirectory(LibPDF) add_subdirectory(LibProtocol) +add_subdirectory(LibPublicSuffix) add_subdirectory(LibRegex) add_subdirectory(LibSanitizer) add_subdirectory(LibSoftGPU) diff --git a/Userland/Libraries/LibPublicSuffix/CMakeLists.txt b/Userland/Libraries/LibPublicSuffix/CMakeLists.txt new file mode 100644 index 0000000000..71485ffc13 --- /dev/null +++ b/Userland/Libraries/LibPublicSuffix/CMakeLists.txt @@ -0,0 +1,10 @@ +include(${SerenityOS_SOURCE_DIR}/Meta/CMake/public_suffix.cmake) + +set(SOURCES + URL.cpp + ${PUBLIC_SUFFIX_SOURCES} +) +set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED}) + +serenity_lib(LibPublicSuffix publicsuffix) +target_compile_definitions(LibPublicSuffix PRIVATE ENABLE_PUBLIC_SUFFIX_DOWNLOAD=$) diff --git a/Userland/Libraries/LibPublicSuffix/URL.cpp b/Userland/Libraries/LibPublicSuffix/URL.cpp new file mode 100644 index 0000000000..5757facf4e --- /dev/null +++ b/Userland/Libraries/LibPublicSuffix/URL.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023, Cameron Youell + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#if defined(ENABLE_PUBLIC_SUFFIX_DOWNLOAD) +# include +#endif + +namespace PublicSuffix { +ErrorOr absolute_url(StringView url) +{ + String out = TRY(String::from_utf8(url)); +#if !defined(ENABLE_PUBLIC_SUFFIX_DOWNLOAD) + return out; +#else + if (!out.contains("://"sv)) + out = TRY(String::formatted("https://{}"sv, out)); + + auto final_url = URL::create_with_url_or_path(out.to_deprecated_string()); + if (!final_url.is_valid()) + return Error::from_string_view("Invalid URL"sv); + + if (final_url.host().has() || final_url.host().has()) + return out; + + if (final_url.scheme() != "http"sv && final_url.scheme() != "https"sv) + return out; + + if (final_url.host().has()) { + auto string_host = final_url.host().get(); + auto maybe_public_suffix = TRY(PublicSuffixData::the()->get_public_suffix(string_host)); + if (maybe_public_suffix.has_value()) + return out; + + if (string_host.ends_with_bytes(".local"sv) || string_host.ends_with_bytes("localhost"sv)) + return out; + } + + return Error::from_string_view("Invalid URL"sv); +#endif +} +} diff --git a/Userland/Libraries/LibPublicSuffix/URL.h b/Userland/Libraries/LibPublicSuffix/URL.h new file mode 100644 index 0000000000..c1d623d92c --- /dev/null +++ b/Userland/Libraries/LibPublicSuffix/URL.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2023, Cameron Youell + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace PublicSuffix { + +ErrorOr absolute_url(StringView url); + +}