1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 11:34:59 +00:00

Shell: Add support for heredocs

Closes #4283.
Heredocs are implemented in a way that makes them feel more like a
string (and not a weird redirection, a la bash).
There are two tunables, whether the string is dedented (`<<-` vs `<<~`)
and whether it allows interpolation (quoted key vs not).
To the familiar people, this is how Ruby handles them, and I feel is the
most elegant heredoc syntax.
Unlike the oddjob that is bash, heredocs are treated exactly as normal
strings, and can be used _anywhere_ where a string can be used.
They are *required* to appear in the same order as used after a newline
is seen when parsing the sequence that the heredoc is used in.
For instance:
```sh
echo <<-doc1 <<-doc2 | blah blah
contents for doc1
doc1
contents for doc2
doc2
```
The typical nice errors are also implemented :^)
This commit is contained in:
Ali Mohammad Pur 2021-04-29 07:04:00 +04:30 committed by Andreas Kling
parent 7c8d39e002
commit 3048274f5e
7 changed files with 364 additions and 10 deletions

View file

@ -1307,6 +1307,78 @@ Glob::~Glob()
{ {
} }
void Heredoc::dump(int level) const
{
Node::dump(level);
print_indented("(End Key)", level + 1);
print_indented(m_end, level + 2);
print_indented("(Allows Interpolation)", level + 1);
print_indented(String::formatted("{}", m_allows_interpolation), level + 2);
print_indented("(Contents)", level + 1);
if (m_contents)
m_contents->dump(level + 2);
else
print_indented("(null)", level + 2);
}
RefPtr<Value> Heredoc::run(RefPtr<Shell> shell)
{
if (!m_deindent)
return m_contents->run(shell);
// To deindent, first split to lines...
auto value = m_contents->run(shell);
if (!value)
return value;
auto list = value->resolve_as_list(shell);
// The list better have one entry, otherwise we've put the wrong kind of node inside this heredoc
VERIFY(list.size() == 1);
auto lines = list.first().split_view('\n');
// Now just trim each line and put them back in a string
StringBuilder builder { list.first().length() };
for (auto& line : lines) {
builder.append(line.trim_whitespace(TrimMode::Left));
builder.append('\n');
}
return create<StringValue>(builder.to_string());
}
void Heredoc::highlight_in_editor(Line::Editor& editor, Shell& shell, HighlightMetadata metadata)
{
Line::Style content_style { Line::Style::Foreground(Line::Style::XtermColor::Yellow) };
if (metadata.is_first_in_list)
content_style.unify_with({ Line::Style::Bold });
if (!m_contents)
content_style.unify_with({ Line::Style::Foreground(Line::Style::XtermColor::Red) }, true);
editor.stylize({ m_position.start_offset, m_position.end_offset }, content_style);
if (m_contents)
m_contents->highlight_in_editor(editor, shell, metadata);
}
HitTestResult Heredoc::hit_test_position(size_t offset) const
{
if (!m_contents)
return {};
return m_contents->hit_test_position(offset);
}
Heredoc::Heredoc(Position position, String end, bool allow_interpolation, bool deindent)
: Node(move(position))
, m_end(move(end))
, m_allows_interpolation(allow_interpolation)
, m_deindent(deindent)
{
}
Heredoc::~Heredoc()
{
}
void HistoryEvent::dump(int level) const void HistoryEvent::dump(int level) const
{ {
Node::dump(level); Node::dump(level);

View file

@ -474,6 +474,7 @@ public:
ForLoop, ForLoop,
FunctionDeclaration, FunctionDeclaration,
Glob, Glob,
Heredoc,
HistoryEvent, HistoryEvent,
IfCond, IfCond,
ImmediateExpression, ImmediateExpression,
@ -1313,6 +1314,39 @@ private:
NonnullRefPtr<Node> m_right; NonnullRefPtr<Node> m_right;
}; };
class Heredoc final : public Node {
public:
Heredoc(Position, String end, bool allow_interpolation, bool deindent);
virtual ~Heredoc();
virtual void visit(NodeVisitor& visitor) override { visitor.visit(this); }
const String& end() const { return m_end; }
bool allow_interpolation() const { return m_allows_interpolation; }
bool deindent() const { return m_deindent; }
const RefPtr<AST::Node>& contents() const { return m_contents; }
void set_contents(RefPtr<AST::Node> contents)
{
m_contents = move(contents);
if (m_contents->is_syntax_error())
set_is_syntax_error(m_contents->syntax_error_node());
else
clear_syntax_error();
}
private:
NODE(Heredoc);
virtual void dump(int level) const override;
virtual RefPtr<Value> run(RefPtr<Shell>) override;
virtual void highlight_in_editor(Line::Editor&, Shell&, HighlightMetadata = {}) override;
virtual HitTestResult hit_test_position(size_t) const override;
virtual RefPtr<Node> leftmost_trivial_literal() const override { return this; };
String m_end;
bool m_allows_interpolation { false };
bool m_deindent { false };
RefPtr<AST::Node> m_contents;
};
class StringLiteral final : public Node { class StringLiteral final : public Node {
public: public:
StringLiteral(Position, String); StringLiteral(Position, String);

View file

@ -34,6 +34,7 @@ class Fd2FdRedirection;
class FunctionDeclaration; class FunctionDeclaration;
class ForLoop; class ForLoop;
class Glob; class Glob;
class Heredoc;
class HistoryEvent; class HistoryEvent;
class Execute; class Execute;
class IfCond; class IfCond;

View file

@ -101,6 +101,12 @@ void NodeVisitor::visit(const AST::Glob*)
{ {
} }
void NodeVisitor::visit(const AST::Heredoc* node)
{
if (node->contents())
node->contents()->visit(*this);
}
void NodeVisitor::visit(const AST::HistoryEvent*) void NodeVisitor::visit(const AST::HistoryEvent*)
{ {
} }

View file

@ -30,6 +30,7 @@ public:
virtual void visit(const AST::FunctionDeclaration*); virtual void visit(const AST::FunctionDeclaration*);
virtual void visit(const AST::ForLoop*); virtual void visit(const AST::ForLoop*);
virtual void visit(const AST::Glob*); virtual void visit(const AST::Glob*);
virtual void visit(const AST::Heredoc*);
virtual void visit(const AST::HistoryEvent*); virtual void visit(const AST::HistoryEvent*);
virtual void visit(const AST::Execute*); virtual void visit(const AST::Execute*);
virtual void visit(const AST::IfCond*); virtual void visit(const AST::IfCond*);

View file

@ -7,6 +7,7 @@
#include "Parser.h" #include "Parser.h"
#include "Shell.h" #include "Shell.h"
#include <AK/AllOf.h> #include <AK/AllOf.h>
#include <AK/ScopeGuard.h>
#include <AK/ScopedValueRollback.h> #include <AK/ScopedValueRollback.h>
#include <AK/TemporaryChange.h> #include <AK/TemporaryChange.h>
#include <ctype.h> #include <ctype.h>
@ -187,9 +188,47 @@ RefPtr<AST::Node> Parser::parse_toplevel()
Parser::SequenceParseResult Parser::parse_sequence() Parser::SequenceParseResult Parser::parse_sequence()
{ {
consume_while(is_any_of(" \t\n;")); // ignore whitespaces or terminators without effect.
NonnullRefPtrVector<AST::Node> left; NonnullRefPtrVector<AST::Node> left;
auto read_terminators = [&](bool consider_tabs_and_spaces) {
if (m_heredoc_initiations.is_empty()) {
discard_terminators:;
consume_while(is_any_of(consider_tabs_and_spaces ? " \t\n;" : "\n;"));
} else {
for (;;) {
if (consider_tabs_and_spaces && (peek() == '\t' || peek() == ' ')) {
consume();
continue;
}
if (peek() == ';') {
consume();
continue;
}
if (peek() == '\n') {
auto rule_start = push_start();
consume();
if (!parse_heredoc_entries()) {
StringBuilder error_builder;
error_builder.append("Expected to find heredoc entries for ");
bool first = true;
for (auto& entry : m_heredoc_initiations) {
if (first)
error_builder.appendff("{} (at {}:{})", entry.end, entry.node->position().start_line.line_column, entry.node->position().start_line.line_number);
else
error_builder.appendff(", {} (at {}:{})", entry.end, entry.node->position().start_line.line_column, entry.node->position().start_line.line_number);
first = false;
}
left.append(create<AST::SyntaxError>(error_builder.build(), true));
// Just read the rest of the newlines
goto discard_terminators;
}
continue;
}
break;
}
}
};
read_terminators(true);
auto rule_start = push_start(); auto rule_start = push_start();
{ {
@ -203,8 +242,10 @@ Parser::SequenceParseResult Parser::parse_sequence()
switch (peek()) { switch (peek()) {
case '}': case '}':
return { move(left), {}, ShouldReadMoreSequences::No }; return { move(left), {}, ShouldReadMoreSequences::No };
case ';': case '\n':
case '\n': { read_terminators(false);
[[fallthrough]];
case ';': {
if (left.is_empty()) if (left.is_empty())
break; break;
@ -235,8 +276,10 @@ Parser::SequenceParseResult Parser::parse_sequence()
pos_before_seps = save_offset(); pos_before_seps = save_offset();
switch (peek()) { switch (peek()) {
case ';': case '\n':
case '\n': { read_terminators(false);
[[fallthrough]];
case ';': {
consume_while(is_any_of("\n;")); consume_while(is_any_of("\n;"));
auto pos_after_seps = save_offset(); auto pos_after_seps = save_offset();
separator_positions.empend(pos_before_seps.offset, pos_after_seps.offset, pos_before_seps.line, pos_after_seps.line); separator_positions.empend(pos_before_seps.offset, pos_after_seps.offset, pos_before_seps.line, pos_after_seps.line);
@ -960,6 +1003,11 @@ RefPtr<AST::Node> Parser::parse_match_pattern()
RefPtr<AST::Node> Parser::parse_redirection() RefPtr<AST::Node> Parser::parse_redirection()
{ {
auto rule_start = push_start(); auto rule_start = push_start();
// heredoc entry
if (next_is("<<-") || next_is("<<~"))
return nullptr;
auto pipe_fd = 0; auto pipe_fd = 0;
auto number = consume_while(is_digit); auto number = consume_while(is_digit);
if (number.is_empty()) { if (number.is_empty()) {
@ -1091,8 +1139,11 @@ RefPtr<AST::Node> Parser::parse_expression()
return move(expr); return move(expr);
}; };
// Heredocs are expressions, so allow them
if (!(next_is("<<-") || next_is("<<~"))) {
if (strchr("&|)} ;<>\n", starting_char) != nullptr) if (strchr("&|)} ;<>\n", starting_char) != nullptr)
return nullptr; return nullptr;
}
if (m_extra_chars_not_allowed_in_barewords.contains_slow(starting_char)) if (m_extra_chars_not_allowed_in_barewords.contains_slow(starting_char))
return nullptr; return nullptr;
@ -1188,6 +1239,13 @@ RefPtr<AST::Node> Parser::parse_string_composite()
return inline_command; return inline_command;
} }
if (auto heredoc = parse_heredoc_initiation_record()) {
if (auto next_part = parse_string_composite())
return create<AST::Juxtaposition>(heredoc.release_nonnull(), next_part.release_nonnull()); // Concatenate Heredoc StringComposite
return heredoc;
}
return nullptr; return nullptr;
} }
@ -1852,6 +1910,163 @@ RefPtr<AST::Node> Parser::parse_brace_expansion_spec()
return create<AST::BraceExpansion>(move(subexpressions)); return create<AST::BraceExpansion>(move(subexpressions));
} }
RefPtr<AST::Node> Parser::parse_heredoc_initiation_record()
{
if (!next_is("<<"))
return nullptr;
auto rule_start = push_start();
// '<' '<'
consume();
consume();
HeredocInitiationRecord record;
record.end = "<error>";
RefPtr<AST::SyntaxError> syntax_error_node;
// '-' | '~'
switch (peek()) {
case '-':
record.deindent = false;
consume();
break;
case '~':
record.deindent = true;
consume();
break;
default:
restore_to(*rule_start);
return nullptr;
}
// StringLiteral | bareword
if (auto bareword = parse_bareword()) {
if (bareword->is_syntax_error())
syntax_error_node = bareword->syntax_error_node();
else
record.end = static_cast<AST::BarewordLiteral*>(bareword.ptr())->text();
record.interpolate = true;
} else if (peek() == '\'') {
consume();
auto text = consume_while(is_not('\''));
bool is_error = false;
if (!expect('\''))
is_error = true;
if (is_error)
syntax_error_node = create<AST::SyntaxError>("Expected a terminating single quote", true);
record.end = text;
record.interpolate = false;
} else {
syntax_error_node = create<AST::SyntaxError>("Expected a bareword or a single-quoted string literal for heredoc end key", true);
}
auto node = create<AST::Heredoc>(record.end, record.interpolate, record.deindent);
if (syntax_error_node)
node->set_is_syntax_error(*syntax_error_node);
else
node->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected heredoc contents for heredoc with end key '{}'", node->end()), true));
record.node = node;
m_heredoc_initiations.append(move(record));
return node;
}
bool Parser::parse_heredoc_entries()
{
// Try to parse heredoc entries, as reverse recorded in the initiation records
for (auto& record : m_heredoc_initiations) {
auto rule_start = push_start();
bool found_key = false;
if (!record.interpolate) {
// Since no interpolation is allowed, just read lines until we hit the key
Optional<Offset> last_line_offset;
for (;;) {
if (at_end())
break;
if (peek() == '\n')
consume();
last_line_offset = current_position();
auto line = consume_while(is_not('\n'));
if (peek() == '\n')
consume();
if (line.trim_whitespace() == record.end) {
found_key = true;
break;
}
}
if (!last_line_offset.has_value())
last_line_offset = current_position();
// Now just wrap it in a StringLiteral and set it as the node's contents
auto node = create<AST::StringLiteral>(m_input.substring_view(rule_start->offset, last_line_offset->offset - rule_start->offset));
if (!found_key)
node->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected to find the heredoc key '{}', but found Eof", record.end), true));
record.node->set_contents(move(node));
} else {
// Interpolation is allowed, so we're going to read doublequoted string innards
// until we find a line that contains the key
auto end_condition = move(m_end_condition);
found_key = false;
set_end_condition([this, end = record.end, &found_key] {
if (found_key)
return true;
auto offset = current_position();
auto cond = move(m_end_condition);
ScopeGuard guard {
[&] {
m_end_condition = move(cond);
}
};
if (peek() == '\n') {
consume();
auto line = consume_while(is_not('\n'));
if (peek() == '\n')
consume();
if (line.trim_whitespace() == end) {
restore_to(offset.offset, offset.line);
found_key = true;
return true;
}
}
restore_to(offset.offset, offset.line);
return false;
});
auto expr = parse_doublequoted_string_inner();
set_end_condition(move(end_condition));
if (found_key) {
auto offset = current_position();
if (peek() == '\n')
consume();
auto line = consume_while(is_not('\n'));
if (peek() == '\n')
consume();
if (line.trim_whitespace() != record.end)
restore_to(offset.offset, offset.line);
}
if (!expr && found_key) {
expr = create<AST::StringLiteral>("");
} else if (!expr) {
expr = create<AST::SyntaxError>(String::formatted("Expected to find a valid string inside a heredoc (with end key '{}')", record.end), true);
} else if (!found_key) {
expr->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected to find the heredoc key '{}'", record.end), true));
}
record.node->set_contents(create<AST::DoubleQuotedString>(move(expr)));
}
}
m_heredoc_initiations.clear();
return true;
}
StringView Parser::consume_while(Function<bool(char)> condition) StringView Parser::consume_while(Function<bool(char)> condition)
{ {
if (at_end()) if (at_end())

View file

@ -46,6 +46,13 @@ private:
ShouldReadMoreSequences decision; ShouldReadMoreSequences decision;
}; };
struct HeredocInitiationRecord {
String end;
RefPtr<AST::Heredoc> node;
bool interpolate { false };
bool deindent { false };
};
constexpr static size_t max_allowed_nested_rule_depth = 2048; constexpr static size_t max_allowed_nested_rule_depth = 2048;
RefPtr<AST::Node> parse_toplevel(); RefPtr<AST::Node> parse_toplevel();
SequenceParseResult parse_sequence(); SequenceParseResult parse_sequence();
@ -81,11 +88,19 @@ private:
RefPtr<AST::Node> parse_brace_expansion(); RefPtr<AST::Node> parse_brace_expansion();
RefPtr<AST::Node> parse_brace_expansion_spec(); RefPtr<AST::Node> parse_brace_expansion_spec();
RefPtr<AST::Node> parse_immediate_expression(); RefPtr<AST::Node> parse_immediate_expression();
RefPtr<AST::Node> parse_heredoc_initiation_record();
bool parse_heredoc_entries();
template<typename A, typename... Args> template<typename A, typename... Args>
NonnullRefPtr<A> create(Args... args); NonnullRefPtr<A> create(Args... args);
bool at_end() const { return m_input.length() <= m_offset; } void set_end_condition(Function<bool()> condition) { m_end_condition = move(condition); }
bool at_end() const
{
if (m_end_condition && m_end_condition())
return true;
return m_input.length() <= m_offset;
}
char peek(); char peek();
char consume(); char consume();
bool expect(char); bool expect(char);
@ -144,6 +159,8 @@ private:
Vector<size_t> m_rule_start_offsets; Vector<size_t> m_rule_start_offsets;
Vector<AST::Position::Line> m_rule_start_lines; Vector<AST::Position::Line> m_rule_start_lines;
Function<bool()> m_end_condition;
Vector<HeredocInitiationRecord> m_heredoc_initiations;
Vector<char> m_extra_chars_not_allowed_in_barewords; Vector<char> m_extra_chars_not_allowed_in_barewords;
bool m_is_in_brace_expansion_spec { false }; bool m_is_in_brace_expansion_spec { false };
bool m_continuation_controls_allowed { false }; bool m_continuation_controls_allowed { false };
@ -169,7 +186,9 @@ and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence
| pipe_sequence | pipe_sequence
terminator :: ';' terminator :: ';'
| '\n' | '\n' [?!heredoc_stack.is_empty] heredoc_entries
heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries]
variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '* variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '*
| identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '* | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '*
@ -233,6 +252,12 @@ string_composite :: string string_composite?
| bareword string_composite? | bareword string_composite?
| glob string_composite? | glob string_composite?
| brace_expansion string_composite? | brace_expansion string_composite?
| heredoc_initiator string_composite? {append to heredoc_entries}
heredoc_initiator :: '<' '<' '-' bareword {*bareword, interpolate, no deindent}
| '<' '<' '-' "'" [^']* "'" {*string, no interpolate, no deindent}
| '<' '<' '~' bareword {*bareword, interpolate, deindent}
| '<' '<' '~' "'" [^']* "'" {*bareword, no interpolate, deindent}
string :: '"' dquoted_string_inner '"' string :: '"' dquoted_string_inner '"'
| "'" [^']* "'" | "'" [^']* "'"