From 5efcec308af2e3aa4f5e1deff45fb8019b7192f5 Mon Sep 17 00:00:00 2001 From: Peter Elliott Date: Wed, 26 Oct 2022 19:27:47 -0600 Subject: [PATCH] Utilities: Rewrite sort(1) to be more posixy --- Userland/Utilities/sort.cpp | 121 ++++++++++++++++++++++++++++++------ 1 file changed, 101 insertions(+), 20 deletions(-) diff --git a/Userland/Utilities/sort.cpp b/Userland/Utilities/sort.cpp index 8457f7ca5b..46d0a9ca80 100644 --- a/Userland/Utilities/sort.cpp +++ b/Userland/Utilities/sort.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018-2020, Andreas Kling + * Copyright (c) 2022, Peter Elliott * * SPDX-License-Identifier: BSD-2-Clause */ @@ -7,39 +8,119 @@ #include #include #include +#include +#include #include #include -#include -#include -#include -#include -#include +#include + +struct Line { + StringView key; + long int numeric_key; + DeprecatedString line; + bool numeric; + + bool operator<(Line const& other) const + { + if (numeric) + return numeric_key < other.numeric_key; + + return key < other.key; + } + + bool operator==(Line const& other) const + { + if (numeric) + return numeric_key == other.numeric_key; + + return key == other.key; + } + +private: +}; + +template<> +struct AK::Traits : public GenericTraits { + static unsigned hash(Line l) + { + if (l.numeric) + return l.numeric_key; + + return l.key.hash(); + } +}; + +struct Options { + size_t key_field { 0 }; + bool unique { false }; + bool numeric { false }; + StringView separator { "\0", 1 }; + Vector files; +}; + +static ErrorOr load_file(Options options, StringView filename, Vector& lines, HashTable& seen) +{ + auto file = TRY(Core::Stream::BufferedFile::create( + TRY(Core::Stream::File::open_file_or_standard_stream(filename, Core::Stream::OpenMode::Read)))); + + // FIXME: Unlimited line length + auto buffer = TRY(ByteBuffer::create_uninitialized(4096)); + while (TRY(file->can_read_line())) { + DeprecatedString line = TRY(file->read_line(buffer)); + + StringView key = line; + if (options.key_field != 0) { + auto split = (options.separator[0]) + ? line.split_view(options.separator[0]) + : line.split_view(isspace); + if (options.key_field - 1 >= split.size()) { + key = ""sv; + } else { + key = split[options.key_field - 1]; + } + } + + Line l = { key, key.to_int().value_or(0), line, options.numeric }; + + if (!options.unique || !seen.contains(l)) { + lines.append(l); + if (options.unique) + seen.set(l); + } + } + + return {}; +} ErrorOr serenity_main([[maybe_unused]] Main::Arguments arguments) { - TRY(Core::System::pledge("stdio"sv)); + TRY(Core::System::pledge("stdio rpath")); - Vector lines; + Options options; - for (;;) { - char* buffer = nullptr; - ssize_t buflen = 0; - size_t n; - errno = 0; - buflen = getline(&buffer, &n, stdin); - if (buflen == -1 && errno != 0) { - perror("getline"); - exit(1); + Core::ArgsParser args_parser; + args_parser.add_option(options.key_field, "The field to sort by", "key-field", 'k', "keydef"); + args_parser.add_option(options.unique, "Don't emit duplicate lines", "unique", 'u'); + args_parser.add_option(options.numeric, "treat the key field as a number", "numeric", 'n'); + args_parser.add_option(options.separator, "The separator to split fields by", "sep", 't', "char"); + args_parser.add_positional_argument(options.files, "Files to sort", "file", Core::ArgsParser::Required::No); + args_parser.parse(arguments); + + Vector lines; + HashTable seen; + + if (options.files.size() == 0) { + TRY(load_file(options, "-"sv, lines, seen)); + } else { + for (auto& file : options.files) { + TRY(load_file(options, file, lines, seen)); } - if (buflen == -1) - break; - lines.append({ buffer, AK::ShouldChomp::Chomp }); } quick_sort(lines); for (auto& line : lines) { - outln("{}", line); + outln("{}", line.line); } return 0;