From 191e20d639b578bb431b23318e7310b800b0013f Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Fri, 13 Oct 2023 09:43:16 -0400 Subject: [PATCH] LibWebView: Add a helper to sanitize a user-provided URL We currently implement several forms of this method across the Ladybird chromes. As such, we see commits to add special URL handling that only affects a single chrome. Instead, let's consolidate all special handling in a single location for all chromes to make use of. This method can handle resolving file:// URLs, falling back to a search engine query, and validation against the Public Suffix List. These cases were gathered from the various chromes. --- Userland/Libraries/LibWebView/CMakeLists.txt | 3 +- Userland/Libraries/LibWebView/URL.cpp | 48 ++++++++++++++++++++ Userland/Libraries/LibWebView/URL.h | 22 +++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 Userland/Libraries/LibWebView/URL.cpp create mode 100644 Userland/Libraries/LibWebView/URL.h diff --git a/Userland/Libraries/LibWebView/CMakeLists.txt b/Userland/Libraries/LibWebView/CMakeLists.txt index cfe28cdf23..be33be92f3 100644 --- a/Userland/Libraries/LibWebView/CMakeLists.txt +++ b/Userland/Libraries/LibWebView/CMakeLists.txt @@ -9,6 +9,7 @@ set(SOURCES RequestServerAdapter.cpp SourceHighlighter.cpp StylePropertiesModel.cpp + URL.cpp UserAgent.cpp ViewImplementation.cpp WebContentClient.cpp @@ -40,7 +41,7 @@ set(GENERATED_SOURCES ) serenity_lib(LibWebView webview) -target_link_libraries(LibWebView PRIVATE LibCore LibGfx LibGUI LibIPC LibProtocol LibJS LibWeb LibSQL) +target_link_libraries(LibWebView PRIVATE LibCore LibFileSystem LibGfx LibGUI LibIPC LibProtocol LibPublicSuffix LibJS LibWeb LibSQL) if (SERENITYOS) target_link_libraries(LibWebView PRIVATE LibFileSystemAccessClient) diff --git a/Userland/Libraries/LibWebView/URL.cpp b/Userland/Libraries/LibWebView/URL.cpp new file mode 100644 index 0000000000..856e3cc2f4 --- /dev/null +++ b/Userland/Libraries/LibWebView/URL.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include + +namespace WebView { + +Optional sanitize_url(StringView url, Optional search_engine, AppendTLD append_tld) +{ + if (FileSystem::exists(url)) { + auto path = FileSystem::real_path(url); + if (path.is_error()) + return {}; + + return URL::create_with_file_scheme(path.value().to_deprecated_string()); + } + + auto format_search_engine = [&]() -> Optional { + if (!search_engine.has_value()) + return {}; + + return MUST(String::formatted(*search_engine, URL::percent_decode(url))); + }; + + String url_buffer; + + if (append_tld == AppendTLD::Yes) { + // FIXME: Expand the list of top level domains. + if (!url.ends_with(".com"sv) && !url.ends_with(".net"sv) && !url.ends_with(".org"sv)) { + url_buffer = MUST(String::formatted("{}.com", url)); + url = url_buffer; + } + } + + auto result = PublicSuffix::absolute_url(url); + if (result.is_error()) + return format_search_engine(); + + return result.release_value(); +} + +} diff --git a/Userland/Libraries/LibWebView/URL.h b/Userland/Libraries/LibWebView/URL.h new file mode 100644 index 0000000000..ea7b004ca1 --- /dev/null +++ b/Userland/Libraries/LibWebView/URL.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace WebView { + +enum class AppendTLD { + No, + Yes, +}; + +Optional sanitize_url(StringView, Optional search_engine = {}, AppendTLD = AppendTLD::No); + +}