mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 05:57:44 +00:00
Shell: Add support for heredocs to the POSIX parser
This commit is contained in:
parent
a5e4bc4faf
commit
2881bb4c3a
4 changed files with 333 additions and 20 deletions
|
@ -23,8 +23,11 @@ static bool is_part_of_operator(StringView text, char ch)
|
|||
|
||||
namespace Shell::Posix {
|
||||
|
||||
Vector<Token> Lexer::batch_next()
|
||||
Vector<Token> Lexer::batch_next(Optional<Reduction> starting_reduction)
|
||||
{
|
||||
if (starting_reduction.has_value())
|
||||
m_next_reduction = *starting_reduction;
|
||||
|
||||
for (; m_next_reduction != Reduction::None;) {
|
||||
auto result = reduce(m_next_reduction);
|
||||
m_next_reduction = result.next_reduction;
|
||||
|
@ -55,6 +58,18 @@ char Lexer::consume()
|
|||
return ch;
|
||||
}
|
||||
|
||||
void Lexer::reconsume(StringView string)
|
||||
{
|
||||
for (auto byte : string.bytes()) {
|
||||
if (byte == '\n') {
|
||||
m_state.position.end_line.line_number++;
|
||||
m_state.position.end_line.line_column = 0;
|
||||
}
|
||||
|
||||
m_state.position.end_offset++;
|
||||
}
|
||||
}
|
||||
|
||||
bool Lexer::consume_specific(char ch)
|
||||
{
|
||||
if (m_lexer.peek() == ch) {
|
||||
|
@ -95,6 +110,8 @@ Lexer::ReductionResult Lexer::reduce(Reduction reduction)
|
|||
return reduce_command_or_arithmetic_substitution_expansion();
|
||||
case Reduction::ExtendedParameterExpansion:
|
||||
return reduce_extended_parameter_expansion();
|
||||
case Reduction::HeredocContents:
|
||||
return reduce_heredoc_contents();
|
||||
}
|
||||
|
||||
VERIFY_NOT_REACHED();
|
||||
|
@ -108,6 +125,91 @@ Lexer::ReductionResult Lexer::reduce_end()
|
|||
};
|
||||
}
|
||||
|
||||
Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token)
|
||||
{
|
||||
StringBuilder builder;
|
||||
enum ParseState {
|
||||
Free,
|
||||
InDoubleQuotes,
|
||||
InSingleQuotes,
|
||||
};
|
||||
Vector<ParseState, 4> parse_state;
|
||||
parse_state.append(Free);
|
||||
bool escaped = false;
|
||||
bool had_a_single_quote_segment = false;
|
||||
|
||||
for (auto byte : token.value.bytes()) {
|
||||
switch (parse_state.last()) {
|
||||
case Free:
|
||||
switch (byte) {
|
||||
case '"':
|
||||
if (escaped) {
|
||||
builder.append(byte);
|
||||
escaped = false;
|
||||
} else {
|
||||
parse_state.append(InDoubleQuotes);
|
||||
}
|
||||
break;
|
||||
case '\'':
|
||||
if (escaped) {
|
||||
builder.append(byte);
|
||||
escaped = false;
|
||||
} else {
|
||||
had_a_single_quote_segment = true;
|
||||
parse_state.append(InSingleQuotes);
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
if (escaped) {
|
||||
builder.append(byte);
|
||||
escaped = false;
|
||||
} else {
|
||||
escaped = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (escaped) {
|
||||
builder.append('\\');
|
||||
escaped = false;
|
||||
}
|
||||
builder.append(byte);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case InDoubleQuotes:
|
||||
if (!escaped && byte == '"') {
|
||||
parse_state.take_last();
|
||||
break;
|
||||
}
|
||||
if (escaped) {
|
||||
if (byte != '"')
|
||||
builder.append('\\');
|
||||
builder.append(byte);
|
||||
break;
|
||||
}
|
||||
if (byte == '\\')
|
||||
escaped = true;
|
||||
else
|
||||
builder.append(byte);
|
||||
break;
|
||||
case InSingleQuotes:
|
||||
if (byte == '\'') {
|
||||
parse_state.take_last();
|
||||
break;
|
||||
}
|
||||
builder.append(byte);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/
|
||||
|
||||
return {
|
||||
.key = builder.to_deprecated_string(),
|
||||
.allow_interpolation = !had_a_single_quote_segment,
|
||||
};
|
||||
}
|
||||
|
||||
Lexer::ReductionResult Lexer::reduce_operator()
|
||||
{
|
||||
if (m_lexer.is_eof()) {
|
||||
|
@ -142,8 +244,25 @@ Lexer::ReductionResult Lexer::reduce_operator()
|
|||
m_state.position.start_line = m_state.position.end_line;
|
||||
}
|
||||
|
||||
auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess);
|
||||
|
||||
auto result = reduce(Reduction::Start);
|
||||
tokens.extend(move(result.tokens));
|
||||
|
||||
while (expect_heredoc_entry && tokens.size() == 1) {
|
||||
result = reduce(result.next_reduction);
|
||||
tokens.extend(move(result.tokens));
|
||||
}
|
||||
|
||||
if (expect_heredoc_entry && tokens.size() > 1) {
|
||||
auto [key, interpolation] = process_heredoc_key(tokens[1]);
|
||||
m_state.heredoc_entries.enqueue(HeredocEntry {
|
||||
.key = key,
|
||||
.allow_interpolation = interpolation,
|
||||
.dedent = tokens[0].type == Token::Type::DoubleLessDash,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
.tokens = move(tokens),
|
||||
.next_reduction = result.next_reduction,
|
||||
|
@ -160,6 +279,7 @@ Lexer::ReductionResult Lexer::reduce_comment()
|
|||
}
|
||||
|
||||
if (consume() == '\n') {
|
||||
m_state.on_new_line = true;
|
||||
return {
|
||||
.tokens = { Token::newline() },
|
||||
.next_reduction = Reduction::Start,
|
||||
|
@ -352,7 +472,7 @@ Lexer::ReductionResult Lexer::reduce_command_expansion()
|
|||
};
|
||||
}
|
||||
|
||||
Lexer::ReductionResult Lexer::reduce_start()
|
||||
Lexer::ReductionResult Lexer::reduce_heredoc_contents()
|
||||
{
|
||||
if (m_lexer.is_eof()) {
|
||||
auto tokens = Token::maybe_from_state(m_state);
|
||||
|
@ -366,6 +486,107 @@ Lexer::ReductionResult Lexer::reduce_start()
|
|||
};
|
||||
}
|
||||
|
||||
if (!m_state.escaping && consume_specific('\\')) {
|
||||
m_state.escaping = true;
|
||||
m_state.buffer.append('\\');
|
||||
return {
|
||||
.tokens = {},
|
||||
.next_reduction = Reduction::HeredocContents,
|
||||
};
|
||||
}
|
||||
|
||||
if (!m_state.escaping && consume_specific('$')) {
|
||||
m_state.buffer.append('$');
|
||||
if (m_lexer.next_is("("))
|
||||
m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
|
||||
else
|
||||
m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
|
||||
|
||||
return {
|
||||
.tokens = {},
|
||||
.next_reduction = Reduction::Expansion,
|
||||
};
|
||||
}
|
||||
|
||||
if (!m_state.escaping && consume_specific('`')) {
|
||||
m_state.buffer.append('`');
|
||||
m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
|
||||
return {
|
||||
.tokens = {},
|
||||
.next_reduction = Reduction::CommandExpansion,
|
||||
};
|
||||
}
|
||||
|
||||
m_state.escaping = false;
|
||||
m_state.buffer.append(consume());
|
||||
return {
|
||||
.tokens = {},
|
||||
.next_reduction = Reduction::HeredocContents,
|
||||
};
|
||||
}
|
||||
|
||||
Lexer::ReductionResult Lexer::reduce_start()
|
||||
{
|
||||
auto was_on_new_line = m_state.on_new_line;
|
||||
m_state.on_new_line = false;
|
||||
|
||||
if (m_lexer.is_eof()) {
|
||||
auto tokens = Token::maybe_from_state(m_state);
|
||||
m_state.buffer.clear();
|
||||
m_state.position.start_offset = m_state.position.end_offset;
|
||||
m_state.position.start_line = m_state.position.end_line;
|
||||
|
||||
return {
|
||||
.tokens = move(tokens),
|
||||
.next_reduction = Reduction::End,
|
||||
};
|
||||
}
|
||||
|
||||
if (was_on_new_line && !m_state.heredoc_entries.is_empty()) {
|
||||
auto const& entry = m_state.heredoc_entries.head();
|
||||
|
||||
auto start_index = m_lexer.tell();
|
||||
Optional<size_t> end_index;
|
||||
|
||||
for (; !m_lexer.is_eof();) {
|
||||
auto index = m_lexer.tell();
|
||||
auto possible_end_index = m_lexer.tell();
|
||||
if (m_lexer.consume_specific('\n')) {
|
||||
if (entry.dedent)
|
||||
m_lexer.ignore_while(is_any_of("\t"sv));
|
||||
if (m_lexer.consume_specific(entry.key.view())) {
|
||||
if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) {
|
||||
end_index = possible_end_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (m_lexer.tell() == index)
|
||||
m_lexer.ignore();
|
||||
}
|
||||
|
||||
auto contents = m_lexer.input().substring_view(start_index, end_index.value_or(m_lexer.tell()) - start_index);
|
||||
reconsume(contents);
|
||||
|
||||
m_state.buffer.clear();
|
||||
m_state.buffer.append(contents);
|
||||
|
||||
auto token = Token::maybe_from_state(m_state).first();
|
||||
token.relevant_heredoc_key = entry.key;
|
||||
token.type = Token::Type::HeredocContents;
|
||||
|
||||
m_state.heredoc_entries.dequeue();
|
||||
|
||||
m_state.on_new_line = true;
|
||||
|
||||
m_state.buffer.clear();
|
||||
|
||||
return {
|
||||
.tokens = { move(token) },
|
||||
.next_reduction = Reduction::Start,
|
||||
};
|
||||
}
|
||||
|
||||
if (m_state.escaping && consume_specific('\n')) {
|
||||
m_state.escaping = false;
|
||||
|
||||
|
@ -391,6 +612,8 @@ Lexer::ReductionResult Lexer::reduce_start()
|
|||
auto tokens = Token::maybe_from_state(m_state);
|
||||
tokens.append(Token::newline());
|
||||
|
||||
m_state.on_new_line = true;
|
||||
|
||||
m_state.buffer.clear();
|
||||
m_state.position.start_offset = m_state.position.end_offset;
|
||||
m_state.position.start_line = m_state.position.end_line;
|
||||
|
@ -678,6 +901,8 @@ StringView Token::type_name() const
|
|||
return "Clobber"sv;
|
||||
case Type::Semicolon:
|
||||
return "Semicolon"sv;
|
||||
case Type::HeredocContents:
|
||||
return "HeredocContents"sv;
|
||||
case Type::AssignmentWord:
|
||||
return "AssignmentWord"sv;
|
||||
case Type::Bang:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue