diff --git a/Userland/Utilities/uniq.cpp b/Userland/Utilities/uniq.cpp index 54fb89ba0a..0ddf69aa68 100644 --- a/Userland/Utilities/uniq.cpp +++ b/Userland/Utilities/uniq.cpp @@ -4,76 +4,111 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include +#include #include +#include #include -#include -#include -#include -#include #include -struct linebuf { - char* buf = NULL; - size_t len = 0; -}; - -static FILE* get_stream(char const* filepath, char const* perms) +static ErrorOr write_line_content(StringView line, size_t count, bool duplicates_only, bool print_count, Core::Stream::File& outfile) { - FILE* ret; + if (duplicates_only && count <= 1) + return {}; - if (filepath == nullptr) { - if (perms[0] == 'r') - return stdin; - return stdout; + if (print_count) + TRY(outfile.write(String::formatted("{} {}\n", count, line).bytes())); + else + TRY(outfile.write(String::formatted("{}\n", line).bytes())); + return {}; +} + +static StringView skip(StringView line, unsigned char_skip_count, unsigned field_skip_count) +{ + line = line.trim("\n"sv); + if (field_skip_count) { + bool in_field = false; + int field_index = 0; + unsigned current_field = 0; + for (size_t i = 0; i < line.length(); i++) { + char c = line[i]; + if (is_ascii_space(c)) { + in_field = false; + field_index = i; + if (++current_field > field_skip_count) + break; + } else if (!in_field) { + in_field = true; + } + } + line = line.substring_view(field_index); } - - ret = fopen(filepath, perms); - if (ret == nullptr) { - perror("fopen"); - exit(1); - } - - return ret; + char_skip_count = min(char_skip_count, line.length()); + return line.substring_view(char_skip_count); } ErrorOr serenity_main(Main::Arguments arguments) { TRY(Core::System::pledge("stdio rpath wpath cpath")); - char const* inpath = nullptr; - char const* outpath = nullptr; + StringView inpath; + StringView outpath; + bool duplicates_only = false; + bool unique_only = false; + bool ignore_case = false; + bool print_count = false; + unsigned skip_chars = 0; + unsigned skip_fields = 0; + Core::ArgsParser args_parser; + args_parser.add_option(duplicates_only, "Only print duplicated lines", "repeated", 'd'); + args_parser.add_option(unique_only, "Only print unique lines (default)", "unique", 'u'); + args_parser.add_option(ignore_case, "Ignore case when comparing lines", "ignore-case", 'i'); + args_parser.add_option(print_count, "Prefix each line by its number of occurrences", "count", 'c'); + args_parser.add_option(skip_chars, "Skip N chars", "skip-chars", 's', "N"); + args_parser.add_option(skip_fields, "Skip N fields", "skip-fields", 'f', "N"); args_parser.add_positional_argument(inpath, "Input file", "input", Core::ArgsParser::Required::No); args_parser.add_positional_argument(outpath, "Output file", "output", Core::ArgsParser::Required::No); args_parser.parse(arguments); - FILE* infile = get_stream(inpath, "r"); - FILE* outfile = get_stream(outpath, "w"); - - struct linebuf buffers[2]; - struct linebuf* previous = &(buffers[0]); - struct linebuf* current = &(buffers[1]); - bool first_run = true; - for (;;) { - errno = 0; - ssize_t rc = getline(&(current->buf), &(current->len), infile); - if (rc < 0 && errno != 0) { - perror("getline"); - exit(1); - } - if (rc < 0) - break; - if (!first_run && strcmp(current->buf, previous->buf) == 0) - continue; - - fputs(current->buf, outfile); - swap(current, previous); - first_run = false; + if (!unique_only && !duplicates_only) { + unique_only = true; + } else if (unique_only && duplicates_only) { + // Printing duplicated and unique lines shouldn't print anything + return 0; } - fclose(infile); - fclose(outfile); + auto infile = TRY(Core::Stream::BufferedFile::create(TRY(Core::Stream::File::open_file_or_standard_stream(inpath, Core::Stream::OpenMode::Read)))); + auto outfile = TRY(Core::Stream::File::open_file_or_standard_stream(outpath, Core::Stream::OpenMode::Write)); + + size_t count = 0; + ByteBuffer previous_buf = TRY(ByteBuffer::create_uninitialized(1024)); + ByteBuffer current_buf = TRY(ByteBuffer::create_uninitialized(1024)); + + StringView previous = TRY(infile->read_line(previous_buf)); + StringView previous_to_compare = skip(previous, skip_chars, skip_fields); + + while (TRY(infile->can_read_line())) { + // FIXME: The buffer does not automatically resize, + // and this will return EMSGSIZE if the read line + // is more than 1024 bytes. + StringView current = TRY(infile->read_line(current_buf)); + + StringView current_to_compare = skip(current, skip_chars, skip_fields); + bool lines_equal = ignore_case ? current_to_compare.equals_ignoring_case(previous_to_compare) : current_to_compare == previous_to_compare; + if (!lines_equal) { + TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile)); + count = 1; + } else { + count++; + } + swap(current_to_compare, previous_to_compare); + swap(current_buf, previous_buf); + swap(current, previous); + } + + TRY(write_line_content(previous, count, duplicates_only, print_count, *outfile)); return 0; }