From 41fa1a1461df98126e4b3e9071e26d321adedc96 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Sun, 24 Oct 2021 11:32:56 +0800 Subject: [PATCH] Utilities: Add more commands, addresses, spaces, cmdline parsing to sed The initial version of sed implements only the `s` command (given on the command line) applied to all lines of the input file. While this is probably the most common scenario that people use sed with in the wild, it's limited in several ways: * `s` is only one of the many commands that sed is meant to implement. * Commands may take one or two addresses that limits its applicability to input lines. * Commands in general operate over the "pattern" and "hold" spaces rather than blindly over input lines. * Command line parameters include specifying a script file, and optionally input file(s) and/or a script to execute. This commit implements a big portion of these missing features: * It adds support for parsing *almost* all commands and their arguments. * It also implements the execution of a big portion of the commands. * It adds support for parsing the optional addresses that prefix a command, and enables/disables commands based on these address ranges. * It implements the pattern and hold spaces, which are the source of input/output for most of the commands. * It improves the command line argument handling to receive a script file, potentially multiple execution scripts, and optional input files. Some know missing functionality: * The `{` and `}` commands are not supported yet. * Pattern-based addresses are not supported yet. * Labels and branches are parsed, but not supported at runtime. --- Userland/Utilities/CMakeLists.txt | 2 +- Userland/Utilities/sed.cpp | 960 ++++++++++++++++++++++++++---- 2 files changed, 856 insertions(+), 106 deletions(-) diff --git a/Userland/Utilities/CMakeLists.txt b/Userland/Utilities/CMakeLists.txt index 906863dfa0..f672eaead5 100644 --- a/Userland/Utilities/CMakeLists.txt +++ b/Userland/Utilities/CMakeLists.txt @@ -3,7 +3,7 @@ list(APPEND SPECIAL_TARGETS test install) list(APPEND REQUIRED_TARGETS arp base64 basename cat chmod chown clear comm cp cut date dd df diff dirname dmesg du echo env expr false file find grep groups head host hostname id ifconfig kill killall ln logout ls mkdir mount mv nproc - pgrep pidof ping pkill pmap ps readlink realpath reboot rm rmdir route seq shutdown sleep sort stat stty su tail test + pgrep pidof ping pkill pmap ps readlink realpath reboot rm rmdir sed route seq shutdown sleep sort stat stty su tail test touch tr true umount uname uniq uptime w wc which whoami xargs yes ) list(APPEND RECOMMENDED_TARGETS diff --git a/Userland/Utilities/sed.cpp b/Userland/Utilities/sed.cpp index 8ecf6540aa..57bd9e96bf 100644 --- a/Userland/Utilities/sed.cpp +++ b/Userland/Utilities/sed.cpp @@ -1,11 +1,17 @@ /* * Copyright (c) 2022, Eli Youngs + * Copyright (c) 2023, Rodrigo Tobar * * SPDX-License-Identifier: BSD-2-Clause */ #include +#include #include +#include +#include +#include +#include #include #include #include @@ -14,138 +20,882 @@ #include #include -struct SubstitutionCommand { +class SedError { +public: + SedError() = default; + SedError(String&& message) + : m_message(move(message)) + { + } + + SedError(Error const& error) + { + *this = formatted("Internal sed error: {}", error.string_literal()); + } + + String const& message() const { return m_message; } + + template + static SedError formatted(CheckedFormatString&& fmtstr, Parameters const&... parameters) + { + return maybe_with_string(String::formatted(move(fmtstr), parameters...)); + } + + static SedError parsing_error(GenericLexer const& lexer, StringView message) + { + return parsing_error(lexer, "{}", message); + } + + template + static SedError parsing_error(GenericLexer const& lexer, CheckedFormatString&& fmtstr, Parameters const&... parameters) + { + StringBuilder builder; + builder.appendff("Parsing error at position {}: ", lexer.tell()); + builder.appendff(move(fmtstr), parameters...); + return maybe_with_string(String::from_utf8(builder.string_view())); + } + + static SedError from_error(Error const& error) + { + return formatted("Internal sed error: {}", error.string_literal()); + } + +private: + String m_message; + + static SedError maybe_with_string(ErrorOr maybe_string) + { + if (maybe_string.is_error()) + return SedError {}; + return SedError { maybe_string.release_value() }; + } +}; + +template +using SedErrorOr = ErrorOr; + +// function, maximum addresses +#define ENUMERATE_FUNCTIONS(F) \ + F('a', 1) \ + F('b', 2) \ + F('c', 2) \ + F('d', 2) \ + F('D', 2) \ + F('g', 2) \ + F('G', 2) \ + F('h', 2) \ + F('H', 2) \ + F('i', 1) \ + F('l', 2) \ + F('n', 2) \ + F('N', 2) \ + F('p', 2) \ + F('P', 2) \ + F('q', 1) \ + F('r', 1) \ + F('s', 2) \ + F('t', 2) \ + F('w', 2) \ + F('x', 2) \ + F('y', 2) \ + F(':', 0) \ + F('=', 1) + +enum class AddressType { + Unset, + Line, + LastLine, + ContextAddress, +}; + +class Address { +public: + Address() = default; + + explicit Address(size_t line) + : m_line_number(line) + , m_address_type(AddressType::Line) + { + } + + explicit Address(AddressType address_type) + : m_address_type(address_type) + { + VERIFY(address_type == AddressType::LastLine || address_type == AddressType::ContextAddress); + } + + size_t line_number() const + { + VERIFY(m_address_type == AddressType::Line); + return m_line_number; + } + + AddressType address_type() const { return m_address_type; } + + bool matches([[maybe_unused]] StringView pattern_space, size_t line_number, bool is_last_line) const + { + switch (m_address_type) { + case AddressType::Line: + return line_number == m_line_number; + case AddressType::LastLine: + return is_last_line; + default: + warnln("Addressing type not implemented: {}", int(m_address_type)); + return false; + } + } + +private: + size_t m_line_number { 0 }; + AddressType m_address_type { AddressType::Unset }; +}; + +namespace AK { +template<> +class Formatter
: public StandardFormatter { +public: + AK::ErrorOr format(FormatBuilder& format_builder, Address address) + { + auto& builder = format_builder.builder(); + switch (address.address_type()) { + case AddressType::Line: + builder.appendff("{}", address.line_number()); + break; + case AddressType::LastLine: + builder.append('$'); + break; + case AddressType::ContextAddress: + VERIFY_NOT_REACHED(); + case AddressType::Unset: + break; + } + return {}; + } +}; +} + +static bool is_command_separator(char c) +{ + return c == '\n' || c == ';'; +} + +template +struct TextArgument { + String text; + + static SedErrorOr parse(GenericLexer& lexer) + { + auto original_text = lexer.consume_until([is_escape_sequence = false](char c) mutable { + if (c == '\n' && !is_escape_sequence) + return true; + is_escape_sequence = c == '\\'; + return false; + }); + if (!original_text.starts_with("\\\n"sv)) + return SedError::parsing_error(lexer, "Command should be followed by \\ + \\n"sv); + auto text = TRY(String::from_utf8(original_text.substring_view(2))); + return ArgsT { TRY(text.replace("\\\n"sv, "\n"sv, AK::ReplaceMode::All)) }; + } +}; + +template +struct OptionalLabelArgument { + Optional label; + + static SedErrorOr parse(GenericLexer& lexer) + { + auto blanks = lexer.consume_while(is_ascii_blank); + if (blanks.is_empty()) + return SedError::parsing_error(lexer, "expected one or more blank characeters"sv); + if (lexer.next_is(is_command_separator)) + return ArgsT {}; + return ArgsT { lexer.consume_until(is_command_separator) }; + } +}; + +template +struct FilepathArgument { + static SedErrorOr parse(GenericLexer& lexer) + { + auto blanks = lexer.consume_while(is_ascii_blank); + if (blanks.is_empty()) + return SedError::parsing_error(lexer, "expected one or more blank characeters"sv); + auto filepath = lexer.consume_until(is_command_separator); + if (filepath.is_empty()) + return SedError::parsing_error(lexer, "input filename expected, none found"); + return ArgsT { {}, filepath }; + } +}; + +struct AArguments : TextArgument { }; + +struct BArguments : OptionalLabelArgument { }; + +struct CArguments : TextArgument { }; + +struct IArguments : TextArgument { }; + +struct RArguments : FilepathArgument { + StringView input_filepath; +}; + +struct SArguments { Regex regex; StringView replacement; PosixOptions options; + bool print; Optional output_filepath; + + static SedErrorOr parse(GenericLexer& lexer) + { + auto generic_error_message = "Incomplete substitution command"sv; + + if (lexer.is_eof()) + return SedError::parsing_error(lexer, generic_error_message); + + auto delimiter = lexer.consume(); + if (delimiter == '\n' || delimiter == '\\') + return SedError::parsing_error(lexer, "\\n and \\ cannot be used as delimiters."sv); + + auto pattern = lexer.consume_until(delimiter); + if (pattern.is_empty()) + return SedError::parsing_error(lexer, "Substitution patterns cannot be empty."sv); + + if (!lexer.consume_specific(delimiter)) + return SedError::parsing_error(lexer, generic_error_message); + + auto replacement = lexer.consume_until(delimiter); + + // According to Posix, "s/x/y" is an invalid substitution command. + // It must have a closing delimiter: "s/x/y/" + if (!lexer.consume_specific(delimiter)) + return SedError::parsing_error(lexer, "The substitution command was not properly terminated."sv); + + PosixOptions options = PosixOptions(PosixFlags::Global | PosixFlags::SingleMatch); + bool print = false; + Optional output_filepath; + + auto flags = split_flags(lexer); + for (auto const& flag : flags) { + if (flag.starts_with('w')) { + auto flag_filepath = flag.substring_view(1).trim_whitespace(); + if (flag_filepath.is_empty()) + return SedError::parsing_error(lexer, "No filepath was provided for the 'w' flag."sv); + output_filepath = flag_filepath; + } else if (flag == "g"sv) { + // Allow multiple matches per line by un-setting the SingleMatch flag + options &= ~PosixFlags::SingleMatch; + } else if (flag == "i"sv || flag == "I"sv) { + options |= PosixFlags::Insensitive; + } else if (flag == "p"sv) { + print = true; + } else { + return SedError::parsing_error(lexer, "Unsupported flag for s command: {}", flag); + } + } + + return SArguments { Regex { pattern }, replacement, options, print, output_filepath }; + } + +private: + static Vector split_flags(GenericLexer& lexer) + { + Vector flags; + + while (!lexer.is_eof() && !lexer.next_is(is_command_separator)) { + StringView flag; + + if (lexer.next_is(is_ascii_digit)) { + flag = lexer.consume_while(is_ascii_digit); + } else if (lexer.peek() == 'w') { + flag = lexer.consume_until(is_command_separator); + } else { + flag = lexer.consume(1); + } + + flags.append(flag); + } + + return flags; + } }; -static Vector split_flags(StringView const& input) -{ - Vector flags; +struct TArguments : OptionalLabelArgument { }; - auto lexer = GenericLexer(input); - while (!lexer.is_eof()) { - StringView flag; +struct WArguments : FilepathArgument { + StringView output_filepath; +}; - if (lexer.next_is(is_ascii_digit)) { - flag = lexer.consume_while(is_ascii_digit); - } else if (lexer.peek() == 'w') { - flag = lexer.consume_all(); - } else { - flag = lexer.consume(1); - } +struct YArguments { + StringView characters; + StringView replacements; - flags.append(flag); + static SedErrorOr parse(GenericLexer& lexer) + { + return SedError::parsing_error(lexer, "not implemented"sv); + } +}; + +struct ColonArguments { + StringView label; + + static SedErrorOr parse(GenericLexer& lexer) + { + ColonArguments args {}; + args.label = lexer.consume_until(is_command_separator); + if (args.label.is_empty()) + return SedError::parsing_error(lexer, "label expected, none found"); + return args; + } +}; + +struct Command { + Address address1; + Address address2; + char function = '\0'; + Optional> arguments; + StringView arguments_view; + + void enable_for(StringView pattern_space, size_t line_number, bool is_last_line) + { + m_is_enabled = selects(pattern_space, line_number, is_last_line); } - return flags; + bool is_enabled() const { return m_is_enabled; } + +private: + bool selects(StringView pattern_space, size_t line_number, bool is_last_line) + { + // no address set, all patterns match + if (address1.address_type() == AddressType::Unset) { + VERIFY(address2.address_type() == AddressType::Unset); + return true; + } + + // single address set + if (address2.address_type() == AddressType::Unset) + return address1.matches(pattern_space, line_number, is_last_line); + + // two addresses + if (!m_is_selection_active && address1.matches(pattern_space, line_number, is_last_line)) { + m_is_selection_active = true; + return true; + } + if (m_is_selection_active && address2.matches(pattern_space, line_number, is_last_line)) { + m_is_selection_active = false; + return true; + } + return false; + } + + bool m_is_enabled { false }; + bool m_is_selection_active { false }; +}; + +namespace AK { +template<> +class Formatter : public StandardFormatter { +public: + AK::ErrorOr format(FormatBuilder& format_builder, Command const& command) + { + auto& builder = format_builder.builder(); + builder.appendff("{}", command.address1); + if (command.address2.address_type() != AddressType::Unset) { + builder.appendff(",{}", command.address2); + } + builder.append(command.function); + builder.append(command.arguments_view); + return {}; + } +}; } -static ErrorOr parse_command(StringView command) +struct AddressParsingResult { + Optional
address; +}; + +static Optional
parse_address(GenericLexer& lexer) { - auto generic_error_message = "Incomplete substitution command"sv; - - auto lexer = GenericLexer(command); - - auto address = lexer.consume_until('s'); - if (!address.is_empty()) - warnln("sed: Addresses are currently ignored"); - - if (!lexer.consume_specific('s')) - return Error::from_string_view(generic_error_message); - if (lexer.is_eof()) - return Error::from_string_view(generic_error_message); + return {}; - auto delimiter = lexer.consume(); - if (delimiter == '\n' || delimiter == '\\') - return Error::from_string_literal("\\n and \\ cannot be used as delimiters."); - - auto pattern = lexer.consume_until(delimiter); - if (pattern.is_empty()) - return Error::from_string_literal("Substitution patterns cannot be empty."); - - if (!lexer.consume_specific(delimiter)) - return Error::from_string_view(generic_error_message); - - auto replacement = lexer.consume_until(delimiter); - - // According to Posix, "s/x/y" is an invalid substitution command. - // It must have a closing delimiter: "s/x/y/" - if (!lexer.consume_specific(delimiter)) - return Error::from_string_literal("The substitution command was not properly terminated."); - - PosixOptions options = PosixOptions(PosixFlags::Global | PosixFlags::SingleMatch); - Optional output_filepath; - - auto flags = split_flags(lexer.consume_all()); - for (auto const& flag : flags) { - if (flag.starts_with('w')) { - auto flag_filepath = flag.substring_view(1).trim_whitespace(); - if (flag_filepath.is_empty()) - return Error::from_string_literal("No filepath was provided for the 'w' flag."); - output_filepath = flag_filepath; - } else if (flag == "g"sv) { - // Allow multiple matches per line by un-setting the SingleMatch flag - options &= ~PosixFlags::SingleMatch; - } else if (flag == "i"sv || flag == "I"sv) { - options |= PosixFlags::Insensitive; - } else { - warnln("sed: Unsupported flag: {}", flag); - } + if (lexer.peek() == '$') { + lexer.consume(); + return Address { AddressType::LastLine }; } - return SubstitutionCommand { Regex { pattern }, replacement, options, output_filepath }; + auto lineno = lexer.consume_while(AK::is_ascii_digit); + if (lineno.is_empty()) + return {}; + return Address { AK::StringUtils::convert_to_uint(lineno).release_value() }; +} + +template +static SedErrorOr verify_number_of_addresses(Command const& command) +{ + if constexpr (max_addresses == 2) { + return {}; + } else { + static_assert(max_addresses == 0 || max_addresses == 1); + auto c = command.function; + if constexpr (max_addresses == 0) { + if (command.address1.address_type() != AddressType::Unset) { + return SedError::formatted("'{}' doesn't take any address, at least one given", c); + } + } else { + if (command.address2.address_type() != AddressType::Unset) { + return SedError::formatted("'{}' takes a single address, two given", c); + } + } + } + return {}; +} + +static SedErrorOr parse_command(GenericLexer& lexer) +{ + lexer.consume_while(is_ascii_blank); + + Command command; + command.address1 = parse_address(lexer).value_or({}); + if (lexer.is_eof()) + return SedError::parsing_error(lexer, "Incomplete command"sv); + if (lexer.peek() == ',') { + lexer.consume(); + command.address2 = parse_address(lexer).value_or({}); + } + if (lexer.is_eof()) + return SedError::parsing_error(lexer, "Incomplete command"sv); + + char command_char = lexer.consume(); + +#define HANDLE_FUNCTION_CASE(c, max_addresses) \ + case c: \ + command.function = c; \ + TRY(verify_number_of_addresses(command)); \ + break; + + switch (command_char) { + ENUMERATE_FUNCTIONS(HANDLE_FUNCTION_CASE) + default: + return SedError::parsing_error(lexer, "Unknown function command '{}'", command_char); + } +#undef HANDLE_FUNCTION_CASE + + auto args_start = lexer.tell(); + switch (command_char) { + case 'a': + command.arguments = TRY(AArguments::parse(lexer)); + break; + case 'b': + command.arguments = TRY(BArguments::parse(lexer)); + break; + case 'c': + command.arguments = TRY(CArguments::parse(lexer)); + break; + case 'i': + command.arguments = TRY(IArguments::parse(lexer)); + break; + case 'r': + command.arguments = TRY(RArguments::parse(lexer)); + break; + case 's': + command.arguments = TRY(SArguments::parse(lexer)); + break; + case 't': + command.arguments = TRY(TArguments::parse(lexer)); + break; + case 'w': + command.arguments = TRY(WArguments::parse(lexer)); + break; + case 'y': + command.arguments = TRY(YArguments::parse(lexer)); + break; + case ':': + command.arguments = TRY(ColonArguments::parse(lexer)); + break; + default: { + auto padding = lexer.consume_until(is_command_separator); + if (!padding.is_whitespace()) { + warnln("Command had arguments but none were expected, ignoring: '{}'", padding); + } + } + } + + auto args_end = lexer.tell(); + VERIFY(args_end >= args_start); + auto args_length = args_end - args_start; + lexer.retreat(args_length); + command.arguments_view = lexer.consume(args_length); + return command; +} + +class Script { +public: + [[nodiscard]] bool add_script_part(StringView data) + { + auto last_pos = m_script.length(); + m_script.append(data); + auto lexer = GenericLexer(m_script.string_view().substring_view(last_pos)); + while (!lexer.is_eof()) { + if (lexer.is_eof()) + break; + auto maybe_command = parse_command(lexer); + if (maybe_command.is_error()) { + warnln("Problem while parsing script part: {}", maybe_command.release_error().message()); + return false; + }; + m_commands.append(maybe_command.release_value()); + lexer.consume_until(is_command_separator); + if (lexer.is_eof()) + break; + lexer.consume(); + } + return true; + } + + Vector& commands() { return m_commands; } + + ErrorOr> output_filenames() const + { + Vector output_filenames; + for (auto const& command : m_commands) { + if (!command.arguments.has_value()) + continue; + if (command.arguments->has()) { + auto const& s_arguments = command.arguments->get(); + if (s_arguments.output_filepath.has_value()) { + TRY(add(output_filenames, s_arguments.output_filepath.value())); + } + } else if (command.arguments->has()) { + TRY(add(output_filenames, command.arguments->get().output_filepath)); + } + } + return output_filenames; + } + + ErrorOr> input_filenames() const + { + Vector input_filenames; + for (auto const& command : m_commands) { + if (!command.arguments.has_value()) { + continue; + } + if (command.arguments->has()) { + TRY(add(input_filenames, command.arguments->get().input_filepath)); + } + } + return input_filenames; + } + +private: + StringBuilder m_script; + Vector m_commands; + + ErrorOr add(Vector& container, StringView element_sv) const + { + auto element = TRY(String::from_utf8(element_sv)); + TRY(container.try_append(move(element))); + return {}; + }; +}; + +enum class CycleDecision { + None, + Next, + Quit +}; + +class InputFile { + AK_MAKE_NONCOPYABLE(InputFile); + + InputFile(NonnullOwnPtr&& file) + : m_file(move(file)) + { + } + +public: + static ErrorOr create(NonnullOwnPtr&& file) + { + auto buffered_file = TRY(Core::BufferedFile::create(move(file))); + return InputFile(move(buffered_file)); + } + + static ErrorOr create_from_stdin() + { + return create(TRY(Core::File::standard_input())); + } + + InputFile(InputFile&&) = default; + InputFile& operator=(InputFile&&) = default; + + ErrorOr has_next() const + { + return m_file->can_read_line(); + } + + ErrorOr next() + { + VERIFY(TRY(has_next())); + m_current_line = TRY(m_file->read_line(m_buffer)); + ++m_line_number; + return m_current_line; + } + + size_t line_number() const { return m_line_number; } + +private: + NonnullOwnPtr m_file; + size_t m_line_number { 0 }; + DeprecatedString m_current_line; + constexpr static size_t MAX_SUPPORTED_LINE_SIZE = 4096; + Array m_buffer; +}; + +static ErrorOr write_pattern_space(Core::File& output, StringBuilder& pattern_space) +{ + TRY(output.write_until_depleted(pattern_space.string_view().bytes())); + TRY(output.write_until_depleted("\n"sv.bytes())); + return {}; +} + +static void print_unambiguous(StringView pattern_space) +{ + // TODO: find out the terminal width, folding width should be less than that + // to make it clear that folding is happening + constexpr size_t fold_width = 70; + + AK::StringBuilder unambiguous_output; + auto folded_append = [&unambiguous_output, current_line_length = size_t { 0 }](auto const& value, size_t length) mutable { + if (current_line_length + length < fold_width) { + current_line_length += length; + } else { + unambiguous_output.append("\\\n"sv); + current_line_length = length; + } + unambiguous_output.append(value); + }; + for (auto const c : pattern_space) { + if (c == '\\') + folded_append("\\\\"sv, 2); + else if (c == '\a') + folded_append("\\a"sv, 2); + else if (c == '\b') + folded_append("\\b"sv, 2); + else if (c == '\f') + folded_append("\\f"sv, 2); + else if (c == '\r') + folded_append("\\r"sv, 2); + else if (c == '\t') + folded_append("\\t"sv, 2); + else if (c == '\v') + folded_append("\\v"sv, 2); + else if (c == '\n') + folded_append("$\n"sv, 1); + else if (AK::is_ascii_printable(c)) + folded_append(c, 1); + else + folded_append(DeprecatedString::formatted("\\{:3o}", (unsigned char)c), 4); + } + outln("{}$", unambiguous_output.string_view()); +} + +static ErrorOr apply(Command const& command, StringBuilder& pattern_space, StringBuilder& hold_space, InputFile& input, bool suppress_default_output) +{ + auto stdout = TRY(Core::File::standard_output()); + auto cycle_decision = CycleDecision::None; + + switch (command.function) { + case 'd': + pattern_space.clear(); + cycle_decision = CycleDecision::Next; + break; + case 'g': + pattern_space = hold_space; + break; + case 'G': + pattern_space.append('\n'); + pattern_space.append(hold_space.string_view()); + break; + case 'h': + hold_space = pattern_space; + break; + case 'H': + hold_space.append('\n'); + hold_space.append(pattern_space.string_view()); + break; + case 'i': + outln("{}", command.arguments->get().text); + break; + case 'l': + print_unambiguous(pattern_space.string_view()); + break; + case 'n': + if (!suppress_default_output) + TRY(write_pattern_space(*stdout, pattern_space)); + if (TRY(input.has_next())) { + pattern_space.clear(); + pattern_space.append(TRY(input.next())); + } + break; + case 'p': + TRY(write_pattern_space(*stdout, pattern_space)); + break; + case 'P': { + auto pattern_sv = pattern_space.string_view(); + auto newline_position = pattern_sv.find('\n').value_or(pattern_sv.length() - 1); + TRY(stdout->write_until_depleted(pattern_sv.substring_view(0, newline_position + 1).bytes())); + break; + } + case 'q': + cycle_decision = CycleDecision::Quit; + break; + case 's': { + auto pattern_space_sv = pattern_space.string_view(); + auto const& s_args = command.arguments->get(); + auto result = s_args.regex.replace(pattern_space_sv, s_args.replacement, s_args.options); + auto replacement_made = result != pattern_space_sv; + pattern_space.clear(); + pattern_space.append(result); + if (replacement_made && s_args.print) + TRY(write_pattern_space(*stdout, pattern_space)); + break; + } + case 'x': + swap(pattern_space, hold_space); + break; + case '=': + outln("{}", input.line_number()); + break; + case '#': + break; + default: + warnln("Command not implemented: {}", command.function); + break; + } + + return cycle_decision; +} + +static ErrorOr run(Vector& inputs, Script& script, bool suppress_default_output) +{ + // TODO: verify all commands are valid + + StringBuilder pattern_space; + StringBuilder hold_space; + auto stdout = TRY(Core::File::standard_output()); + + // TODO: extend to multiple input files + auto& input = inputs[0]; + // main loop + while (TRY(input.has_next())) { + + // Avoid potential last, empty line + auto line = TRY(input.next()); + auto is_last_line = !TRY(input.has_next()); + + // TODO: "Reading from input shall be skipped if a was in the pattern space prior to a D command ending the previous cycle" + pattern_space.append(line); + + // Turn commands on/off depending on selection. We need + for (auto& command : script.commands()) + command.enable_for(pattern_space.string_view(), input.line_number(), is_last_line); + + // Go, go, go! + CycleDecision cycle_decision = CycleDecision::None; + for (auto& command : script.commands()) { + if (!command.is_enabled()) + continue; + auto command_cycle_decision = TRY(apply(command, pattern_space, hold_space, input, suppress_default_output)); + if (command_cycle_decision == CycleDecision::Next || command_cycle_decision == CycleDecision::Quit) { + cycle_decision = command_cycle_decision; + break; + } + } + + if (cycle_decision == CycleDecision::Next) + continue; + if (cycle_decision == CycleDecision::Quit) + break; + + if (!suppress_default_output) + TRY(write_pattern_space(*stdout, pattern_space)); + pattern_space.clear(); + } + return {}; } ErrorOr serenity_main(Main::Arguments args) { TRY(Core::System::pledge("stdio cpath rpath wpath")); - Core::ArgsParser args_parser; - - StringView command_input; - Vector filepaths; - - args_parser.add_positional_argument(command_input, "Command", "command_input", Core::ArgsParser::Required::Yes); - args_parser.add_positional_argument(filepaths, "File", "file", Core::ArgsParser::Required::No); - - args_parser.parse(args); - - auto command = TRY(parse_command(command_input)); - - Optional> maybe_output_file; - if (command.output_filepath.has_value()) - maybe_output_file = TRY(Core::File::open_file_or_standard_stream(command.output_filepath.release_value(), Core::File::OpenMode::Write)); - - if (filepaths.is_empty()) - filepaths = { "-"sv }; - - Array buffer {}; - for (auto const& filepath : filepaths) { - auto file_unbuffered = TRY(Core::File::open_file_or_standard_stream(filepath, Core::File::OpenMode::Read)); - auto file = TRY(Core::BufferedFile::create(move(file_unbuffered))); - - while (!file->is_eof()) { - auto line = TRY(file->read_line(buffer)); - - // Substitutions can apply to blank lines in the middle of a file, - // but not to the trailing newline that marks the end of a file. - if (line.is_empty() && file->is_eof()) - break; - - auto result = command.regex.replace(line, command.replacement, command.options); - outln(result); - - if (maybe_output_file.has_value()) { - auto const& output_file = maybe_output_file.value(); - TRY(output_file->write_until_depleted(result.bytes())); - TRY(output_file->write_until_depleted("\n"sv.bytes())); + bool suppress_default_output = false; + Core::ArgsParser arg_parser; + Script script; + Vector pos_args; + arg_parser.set_general_help("The Stream EDitor"); + arg_parser.add_option(suppress_default_output, "suppress default output", nullptr, 'n'); + arg_parser.add_option(Core::ArgsParser::Option { + .argument_mode = Core::ArgsParser::OptionArgumentMode::Required, + .help_string = "A file containing script commands", + .short_name = 'f', + .value_name = "script-file", + .accept_value = [&script](StringView script_file) { + auto maybe_file = Core::File::open(script_file, Core::File::OpenMode::Read); + if (maybe_file.is_error()) { + warnln("Failed to open script file: {}", maybe_file.release_error()); + return false; } + auto maybe_file_contents = maybe_file.release_value()->read_until_eof(1); + if (maybe_file_contents.is_error()) { + warnln("Failed to read contents of script file {}: {}", script_file, maybe_file_contents.release_error()); + return false; + } + return script.add_script_part(StringView { maybe_file_contents.release_value().bytes() }); + }, + }); + arg_parser.add_option(Core::ArgsParser::Option { + .argument_mode = Core::ArgsParser::OptionArgumentMode::Required, + .help_string = "A script of commands", + .short_name = 'e', + .value_name = "script", + .accept_value = [&script](StringView script_argument) { + return script.add_script_part(script_argument); + }, + }); + arg_parser.add_positional_argument(pos_args, "script and/or file", "...", Core::ArgsParser::Required::No); + arg_parser.parse(args); + + if (script.commands().is_empty()) { + if (pos_args.is_empty()) { + warnln("No script specified, aborting"); + return 1; } + if (!script.add_script_part(pos_args[0])) { + return 1; + } + pos_args.remove(0); } + for (auto const& input_filename : TRY(script.input_filenames())) { + TRY(Core::System::unveil(input_filename, "r"sv)); + } + for (auto const& output_filename : TRY(script.output_filenames())) { + TRY(Core::System::unveil(output_filename, "w"sv)); + } + + Vector inputs; + for (auto const& filename : pos_args) { + if (filename == "-"sv) { + inputs.empend(TRY(InputFile::create_from_stdin())); + } else { + auto file = TRY(Core::File::open(filename, Core::File::OpenMode::Read)); + inputs.empend(TRY(InputFile::create(move(file)))); + } + } + if (inputs.is_empty()) { + inputs.empend(TRY(InputFile::create_from_stdin())); + } + + TRY(run(inputs, script, suppress_default_output)); return 0; }