1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 20:27:45 +00:00

Shell: Add support for heredocs to the POSIX parser

This commit is contained in:
Ali Mohammad Pur 2023-02-16 09:52:13 +03:30 committed by Ali Mohammad Pur
parent a5e4bc4faf
commit 2881bb4c3a
4 changed files with 333 additions and 20 deletions

View file

@ -23,8 +23,11 @@ static bool is_part_of_operator(StringView text, char ch)
namespace Shell::Posix {
Vector<Token> Lexer::batch_next()
Vector<Token> Lexer::batch_next(Optional<Reduction> starting_reduction)
{
if (starting_reduction.has_value())
m_next_reduction = *starting_reduction;
for (; m_next_reduction != Reduction::None;) {
auto result = reduce(m_next_reduction);
m_next_reduction = result.next_reduction;
@ -55,6 +58,18 @@ char Lexer::consume()
return ch;
}
void Lexer::reconsume(StringView string)
{
for (auto byte : string.bytes()) {
if (byte == '\n') {
m_state.position.end_line.line_number++;
m_state.position.end_line.line_column = 0;
}
m_state.position.end_offset++;
}
}
bool Lexer::consume_specific(char ch)
{
if (m_lexer.peek() == ch) {
@ -95,6 +110,8 @@ Lexer::ReductionResult Lexer::reduce(Reduction reduction)
return reduce_command_or_arithmetic_substitution_expansion();
case Reduction::ExtendedParameterExpansion:
return reduce_extended_parameter_expansion();
case Reduction::HeredocContents:
return reduce_heredoc_contents();
}
VERIFY_NOT_REACHED();
@ -108,6 +125,91 @@ Lexer::ReductionResult Lexer::reduce_end()
};
}
Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token)
{
StringBuilder builder;
enum ParseState {
Free,
InDoubleQuotes,
InSingleQuotes,
};
Vector<ParseState, 4> parse_state;
parse_state.append(Free);
bool escaped = false;
bool had_a_single_quote_segment = false;
for (auto byte : token.value.bytes()) {
switch (parse_state.last()) {
case Free:
switch (byte) {
case '"':
if (escaped) {
builder.append(byte);
escaped = false;
} else {
parse_state.append(InDoubleQuotes);
}
break;
case '\'':
if (escaped) {
builder.append(byte);
escaped = false;
} else {
had_a_single_quote_segment = true;
parse_state.append(InSingleQuotes);
}
break;
case '\\':
if (escaped) {
builder.append(byte);
escaped = false;
} else {
escaped = true;
}
break;
default:
if (escaped) {
builder.append('\\');
escaped = false;
}
builder.append(byte);
break;
}
break;
case InDoubleQuotes:
if (!escaped && byte == '"') {
parse_state.take_last();
break;
}
if (escaped) {
if (byte != '"')
builder.append('\\');
builder.append(byte);
break;
}
if (byte == '\\')
escaped = true;
else
builder.append(byte);
break;
case InSingleQuotes:
if (byte == '\'') {
parse_state.take_last();
break;
}
builder.append(byte);
break;
}
}
// NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/
return {
.key = builder.to_deprecated_string(),
.allow_interpolation = !had_a_single_quote_segment,
};
}
Lexer::ReductionResult Lexer::reduce_operator()
{
if (m_lexer.is_eof()) {
@ -142,8 +244,25 @@ Lexer::ReductionResult Lexer::reduce_operator()
m_state.position.start_line = m_state.position.end_line;
}
auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess);
auto result = reduce(Reduction::Start);
tokens.extend(move(result.tokens));
while (expect_heredoc_entry && tokens.size() == 1) {
result = reduce(result.next_reduction);
tokens.extend(move(result.tokens));
}
if (expect_heredoc_entry && tokens.size() > 1) {
auto [key, interpolation] = process_heredoc_key(tokens[1]);
m_state.heredoc_entries.enqueue(HeredocEntry {
.key = key,
.allow_interpolation = interpolation,
.dedent = tokens[0].type == Token::Type::DoubleLessDash,
});
}
return {
.tokens = move(tokens),
.next_reduction = result.next_reduction,
@ -160,6 +279,7 @@ Lexer::ReductionResult Lexer::reduce_comment()
}
if (consume() == '\n') {
m_state.on_new_line = true;
return {
.tokens = { Token::newline() },
.next_reduction = Reduction::Start,
@ -352,7 +472,7 @@ Lexer::ReductionResult Lexer::reduce_command_expansion()
};
}
Lexer::ReductionResult Lexer::reduce_start()
Lexer::ReductionResult Lexer::reduce_heredoc_contents()
{
if (m_lexer.is_eof()) {
auto tokens = Token::maybe_from_state(m_state);
@ -366,6 +486,107 @@ Lexer::ReductionResult Lexer::reduce_start()
};
}
if (!m_state.escaping && consume_specific('\\')) {
m_state.escaping = true;
m_state.buffer.append('\\');
return {
.tokens = {},
.next_reduction = Reduction::HeredocContents,
};
}
if (!m_state.escaping && consume_specific('$')) {
m_state.buffer.append('$');
if (m_lexer.next_is("("))
m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
else
m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
return {
.tokens = {},
.next_reduction = Reduction::Expansion,
};
}
if (!m_state.escaping && consume_specific('`')) {
m_state.buffer.append('`');
m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
return {
.tokens = {},
.next_reduction = Reduction::CommandExpansion,
};
}
m_state.escaping = false;
m_state.buffer.append(consume());
return {
.tokens = {},
.next_reduction = Reduction::HeredocContents,
};
}
Lexer::ReductionResult Lexer::reduce_start()
{
auto was_on_new_line = m_state.on_new_line;
m_state.on_new_line = false;
if (m_lexer.is_eof()) {
auto tokens = Token::maybe_from_state(m_state);
m_state.buffer.clear();
m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line;
return {
.tokens = move(tokens),
.next_reduction = Reduction::End,
};
}
if (was_on_new_line && !m_state.heredoc_entries.is_empty()) {
auto const& entry = m_state.heredoc_entries.head();
auto start_index = m_lexer.tell();
Optional<size_t> end_index;
for (; !m_lexer.is_eof();) {
auto index = m_lexer.tell();
auto possible_end_index = m_lexer.tell();
if (m_lexer.consume_specific('\n')) {
if (entry.dedent)
m_lexer.ignore_while(is_any_of("\t"sv));
if (m_lexer.consume_specific(entry.key.view())) {
if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) {
end_index = possible_end_index;
break;
}
}
}
if (m_lexer.tell() == index)
m_lexer.ignore();
}
auto contents = m_lexer.input().substring_view(start_index, end_index.value_or(m_lexer.tell()) - start_index);
reconsume(contents);
m_state.buffer.clear();
m_state.buffer.append(contents);
auto token = Token::maybe_from_state(m_state).first();
token.relevant_heredoc_key = entry.key;
token.type = Token::Type::HeredocContents;
m_state.heredoc_entries.dequeue();
m_state.on_new_line = true;
m_state.buffer.clear();
return {
.tokens = { move(token) },
.next_reduction = Reduction::Start,
};
}
if (m_state.escaping && consume_specific('\n')) {
m_state.escaping = false;
@ -391,6 +612,8 @@ Lexer::ReductionResult Lexer::reduce_start()
auto tokens = Token::maybe_from_state(m_state);
tokens.append(Token::newline());
m_state.on_new_line = true;
m_state.buffer.clear();
m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line;
@ -678,6 +901,8 @@ StringView Token::type_name() const
return "Clobber"sv;
case Type::Semicolon:
return "Semicolon"sv;
case Type::HeredocContents:
return "HeredocContents"sv;
case Type::AssignmentWord:
return "AssignmentWord"sv;
case Type::Bang: