From 72125827da493d056572ec264806ae26abb5681b Mon Sep 17 00:00:00 2001 From: Sergey Bugaev Date: Tue, 4 May 2021 18:41:38 +0300 Subject: [PATCH] Userland: Refactor head(1) to not use stdio This fixes extensive copying data around, and also makes head(1) in bytes mode read exactly as much data as it needs. Also, rename --characters to --bytes: that's exactly what it does (actual character counting is way more complicated), and that's what the option is called in GNU coreutils. Fixes https://github.com/SerenityOS/serenity/issues/6852 --- Userland/Utilities/head.cpp | 110 +++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 52 deletions(-) diff --git a/Userland/Utilities/head.cpp b/Userland/Utilities/head.cpp index 8455dedc23..ad2a095980 100644 --- a/Userland/Utilities/head.cpp +++ b/Userland/Utilities/head.cpp @@ -4,14 +4,16 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include +#include #include #include #include -int head(const String& filename, bool print_filename, int line_count, int char_count); +int head(const String& filename, bool print_filename, ssize_t line_count, ssize_t byte_count); int main(int argc, char** argv) { @@ -20,8 +22,8 @@ int main(int argc, char** argv) return 1; } - int line_count = 0; - int char_count = 0; + int line_count = -1; + int byte_count = -1; bool never_print_filenames = false; bool always_print_filenames = false; Vector files; @@ -29,13 +31,13 @@ int main(int argc, char** argv) Core::ArgsParser args_parser; args_parser.set_general_help("Print the beginning ('head') of a file."); args_parser.add_option(line_count, "Number of lines to print (default 10)", "lines", 'n', "number"); - args_parser.add_option(char_count, "Number of characters to print", "characters", 'c', "number"); + args_parser.add_option(byte_count, "Number of bytes to print", "bytes", 'c', "number"); args_parser.add_option(never_print_filenames, "Never print filenames", "quiet", 'q'); args_parser.add_option(always_print_filenames, "Always print filenames", "verbose", 'v'); args_parser.add_positional_argument(files, "File to process", "file", Core::ArgsParser::Required::No); args_parser.parse(argc, argv); - if (line_count == 0 && char_count == 0) { + if (line_count == -1 && byte_count == -1) { line_count = 10; } @@ -46,13 +48,13 @@ int main(int argc, char** argv) print_filenames = false; if (files.is_empty()) { - return head("", print_filenames, line_count, char_count); + return head("", print_filenames, line_count, byte_count); } int rc = 0; for (auto& file : files) { - if (head(file, print_filenames, line_count, char_count) != 0) { + if (head(file, print_filenames, line_count, byte_count) != 0) { rc = 1; } } @@ -60,17 +62,22 @@ int main(int argc, char** argv) return rc; } -int head(const String& filename, bool print_filename, int line_count, int char_count) +int head(const String& filename, bool print_filename, ssize_t line_count, ssize_t byte_count) { bool is_stdin = false; - FILE* fp = nullptr; + int fd = -1; + + ScopeGuard fd_close_guard = [&fd] { + if (fd > 0) + close(fd); + }; if (filename == "" || filename == "-") { - fp = stdin; + fd = 0; is_stdin = true; } else { - fp = fopen(filename.characters(), "r"); - if (!fp) { + fd = open(filename.characters(), O_RDONLY); + if (fd < 0) { fprintf(stderr, "can't open %s for reading: %s\n", filename.characters(), strerror(errno)); return 1; } @@ -84,56 +91,55 @@ int head(const String& filename, bool print_filename, int line_count, int char_c } } - if (line_count) { - for (int line = 0; line < line_count; ++line) { - char buffer[BUFSIZ]; - auto* str = fgets(buffer, sizeof(buffer), fp); - if (!str) - break; + fflush(stdout); - // specifically use fputs rather than puts, because fputs doesn't add - // its own newline. - fputs(str, stdout); + size_t buffer_size = line_count != -1 ? BUFSIZ : min((size_t)BUFSIZ, (size_t)byte_count); + char buffer[buffer_size]; + + while (line_count > 0 || byte_count > 0) { + size_t ntoread = line_count != -1 ? buffer_size : min(buffer_size, (size_t)byte_count); + ssize_t nread = read(fd, buffer, ntoread); + if (nread < 0) { + perror("read"); + return 1; + } else if (nread == 0) { + break; } - } else if (char_count) { - char buffer[BUFSIZ]; - while (char_count) { - int nread = fread(buffer, 1, min(BUFSIZ, char_count), fp); - if (nread > 0) { - int ncomplete = 0; - - while (ncomplete < nread) { - int nwrote = fwrite(&buffer[ncomplete], 1, nread - ncomplete, stdout); - if (nwrote > 0) - ncomplete += nwrote; - - if (feof(stdout)) { - fprintf(stderr, "unexpected eof writing to stdout\n"); - return 1; - } - - if (ferror(stdout)) { - fprintf(stderr, "error writing to stdout\n"); - return 1; - } + size_t ntowrite; + if (byte_count != -1) { + // Write out everything we've read, since we have explicitly ensured + // that we wouldn't read more than we want to write. + ntowrite = nread; + byte_count -= nread; + } else { + // Count line breaks. + ntowrite = 0; + while (line_count) { + const char* newline = strchr(buffer + ntowrite, '\n'); + if (newline) { + // Found another line break, include this line. + ntowrite = newline - buffer + 1; + line_count--; + } else { + // No more line breaks, write the whole thing. + ntowrite = nread; + break; } } + } - char_count -= nread; - - if (feof(fp)) - break; - - if (ferror(fp)) { - fprintf(stderr, "error reading input\n"); - break; + size_t ncomplete = 0; + while (ncomplete < ntowrite) { + ssize_t nwritten = write(1, buffer + ncomplete, ntowrite - ncomplete); + if (nwritten < 0) { + perror("write"); + return 1; } + ncomplete += nwritten; } } - fclose(fp); - if (print_filename) { puts(""); }