From 7405536a1abcc23d6a54565687800b14bde3db53 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 29 Jun 2021 17:06:21 +0200 Subject: [PATCH] AK+Everywhere: Use mostly StringView in LexicalPath This changes the m_parts, m_dirname, m_basename, m_title and m_extension member variables to StringViews onto the m_string String. It also removes the m_is_absolute member in favour of computing if a path is absolute in the is_absolute() getter. Due to this, the canonicalize() method has been completely rewritten. The parts() getter still returns a Vector, although it is no longer a const reference as m_parts is no longer a Vector. Rather, it is constructed from the StringViews in m_parts upon request. The parts_view() getter has been added, which returns Vector const&. Most previous users of parts() have been changed to use parts_view(), except where Strings are required. Due to this change, it's is now no longer allow to create temporary LexicalPath objects to call the dirname, basename, title, or extension getters on them because the returned StringViews will point to possible freed memory. --- AK/LexicalPath.cpp | 119 ++++++++++-------- AK/LexicalPath.h | 24 ++-- Kernel/FileSystem/VirtualFileSystem.cpp | 2 +- Kernel/Syscalls/unveil.cpp | 6 +- Userland/Libraries/LibCore/File.cpp | 9 +- .../Libraries/LibGUI/EmojiInputDialog.cpp | 4 +- Userland/Libraries/LibGUI/FileSystemModel.cpp | 7 +- .../CodeGenerators/WrapperGenerator.cpp | 2 +- Userland/Utilities/basename.cpp | 2 +- Userland/Utilities/mkdir.cpp | 2 +- 10 files changed, 98 insertions(+), 79 deletions(-) diff --git a/AK/LexicalPath.cpp b/AK/LexicalPath.cpp index 803cfa5676..8a9ae25155 100644 --- a/AK/LexicalPath.cpp +++ b/AK/LexicalPath.cpp @@ -12,85 +12,102 @@ namespace AK { +char s_single_dot = '.'; + LexicalPath::LexicalPath(String s) : m_string(move(s)) { canonicalize(); } +Vector LexicalPath::parts() const +{ + Vector vector; + vector.ensure_capacity(m_parts.size()); + for (auto& part : m_parts) + vector.unchecked_append(part); + return vector; +} + void LexicalPath::canonicalize() { + // NOTE: We never allow an empty m_string, if it's empty, we just set it to '.'. if (m_string.is_empty()) { + m_string = "."; + m_dirname = m_string; + m_basename = {}; + m_title = {}; + m_extension = {}; m_parts.clear(); return; } - m_is_absolute = m_string[0] == '/'; - auto parts = m_string.split_view('/'); + // NOTE: If there are no dots, no '//' and the path doesn't end with a slash, it is already canonical. + if (m_string.contains("."sv) || m_string.contains("//"sv) || m_string.ends_with('/')) { + auto parts = m_string.split_view('/'); + size_t approximate_canonical_length = 0; + Vector canonical_parts; - size_t approximate_canonical_length = 0; - Vector canonical_parts; - - for (size_t i = 0; i < parts.size(); ++i) { - auto& part = parts[i]; - if (part == ".") - continue; - if (part == "..") { - if (canonical_parts.is_empty()) { - if (m_is_absolute) { - // At the root, .. does nothing. - continue; - } - } else { - if (canonical_parts.last() != "..") { - // A .. and a previous non-.. part cancel each other. - canonical_parts.take_last(); - continue; + for (auto& part : parts) { + if (part == ".") + continue; + if (part == "..") { + if (canonical_parts.is_empty()) { + if (is_absolute()) { + // At the root, .. does nothing. + continue; + } + } else { + if (canonical_parts.last() != "..") { + // A .. and a previous non-.. part cancel each other. + canonical_parts.take_last(); + continue; + } } } - } - if (!part.is_empty()) { approximate_canonical_length += part.length() + 1; canonical_parts.append(part); } - } - if (canonical_parts.is_empty()) { - m_string = m_basename = m_dirname = m_is_absolute ? "/" : "."; - return; + + if (canonical_parts.is_empty() && !is_absolute()) + canonical_parts.append("."); + + StringBuilder builder(approximate_canonical_length); + if (is_absolute()) + builder.append('/'); + builder.join('/', canonical_parts); + m_string = builder.to_string(); } - StringBuilder dirname_builder(approximate_canonical_length); - for (size_t i = 0; i < canonical_parts.size() - 1; ++i) { - auto& canonical_part = canonical_parts[i]; - if (m_is_absolute || i != 0) - dirname_builder.append('/'); - dirname_builder.append(canonical_part); - } - m_dirname = dirname_builder.to_string(); + m_parts = m_string.split_view('/'); - if (m_dirname.is_empty()) { - m_dirname = m_is_absolute ? "/" : "."; + auto last_slash_index = m_string.view().find_last_of('/'); + if (!last_slash_index.has_value()) { + // The path contains a single part and is not absolute. m_dirname = "."sv + m_dirname = { &s_single_dot, 1 }; + } else if (*last_slash_index == 0) { + // The path contains a single part and is absolute. m_dirname = "/"sv + m_dirname = m_string.substring_view(0, 1); + } else { + m_dirname = m_string.substring_view(0, *last_slash_index); } - m_basename = canonical_parts.last(); + if (m_string == "/") + m_basename = m_string; + else { + VERIFY(m_parts.size() > 0); + m_basename = m_parts.last(); + } - Optional last_dot = StringView(m_basename).find_last_of('.'); - if (last_dot.has_value()) { - m_title = m_basename.substring(0, last_dot.value()); - m_extension = m_basename.substring(last_dot.value() + 1, m_basename.length() - last_dot.value() - 1); + auto last_dot_index = m_basename.find_last_of('.'); + // NOTE: if the dot index is 0, this means we have ".foo", it's not an extension, as the title would then be "". + if (last_dot_index.has_value() && *last_dot_index != 0) { + m_title = m_basename.substring_view(0, *last_dot_index); + m_extension = m_basename.substring_view(*last_dot_index + 1); } else { m_title = m_basename; + m_extension = {}; } - - StringBuilder builder(approximate_canonical_length); - for (size_t i = 0; i < canonical_parts.size(); ++i) { - auto& canonical_part = canonical_parts[i]; - if (m_is_absolute || i != 0) - builder.append('/'); - builder.append(canonical_part); - } - m_parts = move(canonical_parts); - m_string = builder.to_string(); } bool LexicalPath::has_extension(StringView const& extension) const diff --git a/AK/LexicalPath.h b/AK/LexicalPath.h index 257b39963b..4c425c4ed3 100644 --- a/AK/LexicalPath.h +++ b/AK/LexicalPath.h @@ -17,15 +17,16 @@ public: LexicalPath() = default; explicit LexicalPath(String); - bool is_absolute() const { return m_is_absolute; } + bool is_absolute() const { return !m_string.is_empty() && m_string[0] == '/'; } String const& string() const { return m_string; } - String const& dirname() const { return m_dirname; } - String const& basename() const { return m_basename; } - String const& title() const { return m_title; } - String const& extension() const { return m_extension; } + StringView const& dirname() const { return m_dirname; } + StringView const& basename() const { return m_basename; } + StringView const& title() const { return m_title; } + StringView const& extension() const { return m_extension; } - Vector const& parts() const { return m_parts; } + Vector const& parts_view() const { return m_parts; } + Vector parts() const; bool has_extension(StringView const&) const; @@ -71,13 +72,12 @@ public: private: void canonicalize(); - Vector m_parts; + Vector m_parts; String m_string; - String m_dirname; - String m_basename; - String m_title; - String m_extension; - bool m_is_absolute { false }; + StringView m_dirname; + StringView m_basename; + StringView m_title; + StringView m_extension; }; template<> diff --git a/Kernel/FileSystem/VirtualFileSystem.cpp b/Kernel/FileSystem/VirtualFileSystem.cpp index 1002b9f67b..a6341df577 100644 --- a/Kernel/FileSystem/VirtualFileSystem.cpp +++ b/Kernel/FileSystem/VirtualFileSystem.cpp @@ -834,7 +834,7 @@ UnveilNode const& VFS::find_matching_unveiled_path(StringView path) auto& unveil_root = Process::current()->unveiled_paths(); LexicalPath lexical_path { path }; - auto& path_parts = lexical_path.parts(); + auto& path_parts = lexical_path.parts_view(); return unveil_root.traverse_until_last_accessible_node(path_parts.begin(), path_parts.end()); } diff --git a/Kernel/Syscalls/unveil.cpp b/Kernel/Syscalls/unveil.cpp index 409c0df71e..0a9248b823 100644 --- a/Kernel/Syscalls/unveil.cpp +++ b/Kernel/Syscalls/unveil.cpp @@ -99,8 +99,8 @@ KResultOr Process::sys$unveil(Userspace Process::sys$unveil(Userspace Optional { auto path = LexicalPath::join(parent.path(), *it).string(); diff --git a/Userland/Libraries/LibCore/File.cpp b/Userland/Libraries/LibCore/File.cpp index 0a08414396..a8e62d05a0 100644 --- a/Userland/Libraries/LibCore/File.cpp +++ b/Userland/Libraries/LibCore/File.cpp @@ -271,16 +271,17 @@ static String get_duplicate_name(const String& path, int duplicate_count) LexicalPath lexical_path(path); StringBuilder duplicated_name; duplicated_name.append('/'); - for (size_t i = 0; i < lexical_path.parts().size() - 1; ++i) { - duplicated_name.appendff("{}/", lexical_path.parts()[i]); + auto& parts = lexical_path.parts_view(); + for (size_t i = 0; i < parts.size() - 1; ++i) { + duplicated_name.appendff("{}/", parts[i]); } auto prev_duplicate_tag = String::formatted("({})", duplicate_count); auto title = lexical_path.title(); if (title.ends_with(prev_duplicate_tag)) { // remove the previous duplicate tag "(n)" so we can add a new tag. - title = title.substring(0, title.length() - prev_duplicate_tag.length()); + title = title.substring_view(0, title.length() - prev_duplicate_tag.length()); } - duplicated_name.appendff("{} ({})", lexical_path.title(), duplicate_count); + duplicated_name.appendff("{} ({})", title, duplicate_count); if (!lexical_path.extension().is_empty()) { duplicated_name.appendff(".{}", lexical_path.extension()); } diff --git a/Userland/Libraries/LibGUI/EmojiInputDialog.cpp b/Userland/Libraries/LibGUI/EmojiInputDialog.cpp index 098a2ce1ab..7a25e701c3 100644 --- a/Userland/Libraries/LibGUI/EmojiInputDialog.cpp +++ b/Userland/Libraries/LibGUI/EmojiInputDialog.cpp @@ -26,10 +26,10 @@ static Vector supported_emoji_code_points() auto lexical_path = LexicalPath(filename); if (lexical_path.extension() != "png") continue; - auto basename = lexical_path.basename(); + auto& basename = lexical_path.basename(); if (!basename.starts_with("U+")) continue; - u32 code_point = strtoul(basename.characters() + 2, nullptr, 16); + u32 code_point = strtoul(basename.to_string().characters() + 2, nullptr, 16); code_points.append(code_point); } return code_points; diff --git a/Userland/Libraries/LibGUI/FileSystemModel.cpp b/Userland/Libraries/LibGUI/FileSystemModel.cpp index 71e08653f7..fe5f31a4be 100644 --- a/Userland/Libraries/LibGUI/FileSystemModel.cpp +++ b/Userland/Libraries/LibGUI/FileSystemModel.cpp @@ -202,15 +202,16 @@ FileSystemModel::Node const* FileSystemModel::node_for_path(String const& path) if (lexical_path.string() == "/") return node; - for (size_t i = 0; i < lexical_path.parts().size(); ++i) { - auto& part = lexical_path.parts()[i]; + auto& parts = lexical_path.parts_view(); + for (size_t i = 0; i < parts.size(); ++i) { + auto& part = parts[i]; bool found = false; for (auto& child : node->children) { if (child.name == part) { const_cast(child).reify_if_needed(); node = &child; found = true; - if (i == lexical_path.parts().size() - 1) + if (i == parts.size() - 1) return node; break; } diff --git a/Userland/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp b/Userland/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp index 31626bdb6f..9b5371a801 100644 --- a/Userland/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp +++ b/Userland/Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp @@ -402,7 +402,7 @@ int main(int argc, char** argv) } LexicalPath lexical_path(path); - auto namespace_ = lexical_path.parts().at(lexical_path.parts().size() - 2); + auto& namespace_ = lexical_path.parts_view().at(lexical_path.parts_view().size() - 2); auto data = file_or_error.value()->read_all(); auto interface = IDL::parse_interface(path, data); diff --git a/Userland/Utilities/basename.cpp b/Userland/Utilities/basename.cpp index 4b6031715e..fa2b480f1d 100644 --- a/Userland/Utilities/basename.cpp +++ b/Userland/Utilities/basename.cpp @@ -27,7 +27,7 @@ int main(int argc, char** argv) auto result = LexicalPath::basename(path); if (!suffix.is_null() && result.length() != suffix.length() && result.ends_with(suffix)) - result = result.substring(0, result.length() - suffix.length()); + result = result.substring_view(0, result.length() - suffix.length()); outln("{}", result); return 0; diff --git a/Userland/Utilities/mkdir.cpp b/Userland/Utilities/mkdir.cpp index 05752966b9..74a02e2246 100644 --- a/Userland/Utilities/mkdir.cpp +++ b/Userland/Utilities/mkdir.cpp @@ -44,7 +44,7 @@ int main(int argc, char** argv) StringBuilder path_builder; if (lexical_path.is_absolute()) path_builder.append("/"); - for (auto& part : lexical_path.parts()) { + for (auto& part : lexical_path.parts_view()) { path_builder.append(part); auto path = path_builder.build(); struct stat st;