mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 21:47:46 +00:00
LibJS: Handle hex and unicode escape sequences in string literals
Introduces the following syntax: '\x55' '\u26a0' '\u{1f41e}'
This commit is contained in:
parent
b3090678a9
commit
e415dd4e9c
5 changed files with 118 additions and 9 deletions
|
@ -405,7 +405,7 @@ NonnullRefPtr<Expression> Parser::parse_primary_expression()
|
||||||
case TokenType::BoolLiteral:
|
case TokenType::BoolLiteral:
|
||||||
return create_ast_node<BooleanLiteral>(consume().bool_value());
|
return create_ast_node<BooleanLiteral>(consume().bool_value());
|
||||||
case TokenType::StringLiteral:
|
case TokenType::StringLiteral:
|
||||||
return create_ast_node<StringLiteral>(consume().string_value());
|
return parse_string_literal(consume());
|
||||||
case TokenType::NullLiteral:
|
case TokenType::NullLiteral:
|
||||||
consume();
|
consume();
|
||||||
return create_ast_node<NullLiteral>();
|
return create_ast_node<NullLiteral>();
|
||||||
|
@ -494,7 +494,7 @@ NonnullRefPtr<ObjectExpression> Parser::parse_object_expression()
|
||||||
property_value = create_ast_node<Identifier>(identifier);
|
property_value = create_ast_node<Identifier>(identifier);
|
||||||
need_colon = false;
|
need_colon = false;
|
||||||
} else if (match(TokenType::StringLiteral)) {
|
} else if (match(TokenType::StringLiteral)) {
|
||||||
property_key = create_ast_node<StringLiteral>(consume(TokenType::StringLiteral).string_value());
|
property_key = parse_string_literal(consume());
|
||||||
} else if (match(TokenType::NumericLiteral)) {
|
} else if (match(TokenType::NumericLiteral)) {
|
||||||
property_key = create_ast_node<StringLiteral>(consume(TokenType::NumericLiteral).value());
|
property_key = create_ast_node<StringLiteral>(consume(TokenType::NumericLiteral).value());
|
||||||
} else if (match(TokenType::BracketOpen)) {
|
} else if (match(TokenType::BracketOpen)) {
|
||||||
|
@ -559,6 +559,28 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression()
|
||||||
return create_ast_node<ArrayExpression>(move(elements));
|
return create_ast_node<ArrayExpression>(move(elements));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NonnullRefPtr<StringLiteral> Parser::parse_string_literal(Token token)
|
||||||
|
{
|
||||||
|
auto status = Token::StringValueStatus::Ok;
|
||||||
|
auto string = token.string_value(status);
|
||||||
|
if (status != Token::StringValueStatus::Ok) {
|
||||||
|
String message;
|
||||||
|
if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) {
|
||||||
|
auto type = status == Token::StringValueStatus::MalformedUnicodeEscape ? "unicode" : "hexadecimal";
|
||||||
|
message = String::format("Malformed %s escape sequence", type);
|
||||||
|
} else if (status == Token::StringValueStatus::UnicodeEscapeOverflow) {
|
||||||
|
message = "Unicode codepoint must not be greater than 0x10ffff in escape sequence";
|
||||||
|
}
|
||||||
|
|
||||||
|
syntax_error(
|
||||||
|
message,
|
||||||
|
m_parser_state.m_current_token.line_number(),
|
||||||
|
m_parser_state.m_current_token.line_column()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return create_ast_node<StringLiteral>(string);
|
||||||
|
}
|
||||||
|
|
||||||
NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
|
NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
|
||||||
{
|
{
|
||||||
consume(TokenType::TemplateLiteralStart);
|
consume(TokenType::TemplateLiteralStart);
|
||||||
|
@ -579,7 +601,7 @@ NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
|
||||||
while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
|
while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
|
||||||
if (match(TokenType::TemplateLiteralString)) {
|
if (match(TokenType::TemplateLiteralString)) {
|
||||||
auto token = consume();
|
auto token = consume();
|
||||||
expressions.append(create_ast_node<StringLiteral>(token.string_value()));
|
expressions.append(parse_string_literal(token));
|
||||||
if (is_tagged)
|
if (is_tagged)
|
||||||
raw_strings.append(create_ast_node<StringLiteral>(token.value()));
|
raw_strings.append(create_ast_node<StringLiteral>(token.value()));
|
||||||
} else if (match(TokenType::TemplateLiteralExprStart)) {
|
} else if (match(TokenType::TemplateLiteralExprStart)) {
|
||||||
|
|
|
@ -70,6 +70,7 @@ public:
|
||||||
NonnullRefPtr<Expression> parse_unary_prefixed_expression();
|
NonnullRefPtr<Expression> parse_unary_prefixed_expression();
|
||||||
NonnullRefPtr<ObjectExpression> parse_object_expression();
|
NonnullRefPtr<ObjectExpression> parse_object_expression();
|
||||||
NonnullRefPtr<ArrayExpression> parse_array_expression();
|
NonnullRefPtr<ArrayExpression> parse_array_expression();
|
||||||
|
NonnullRefPtr<StringLiteral> parse_string_literal(Token token);
|
||||||
NonnullRefPtr<TemplateLiteral> parse_template_literal(bool is_tagged);
|
NonnullRefPtr<TemplateLiteral> parse_template_literal(bool is_tagged);
|
||||||
NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
|
NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
|
||||||
NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);
|
NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);
|
||||||
|
|
17
Libraries/LibJS/Tests/string-escapes.js
Normal file
17
Libraries/LibJS/Tests/string-escapes.js
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
load("test-common.js")
|
||||||
|
|
||||||
|
try {
|
||||||
|
assert("\x55" === "U");
|
||||||
|
assert("\X55" === "X55");
|
||||||
|
assert(`\x55` === "U");
|
||||||
|
assert(`\X55` === "X55");
|
||||||
|
|
||||||
|
assert("\u26a0" === "⚠");
|
||||||
|
assert(`\u26a0` === "⚠");
|
||||||
|
assert("\u{1f41e}" === "🐞");
|
||||||
|
assert(`\u{1f41e}` === "🐞");
|
||||||
|
|
||||||
|
console.log("PASS");
|
||||||
|
} catch (e) {
|
||||||
|
console.log("FAIL: " + e);
|
||||||
|
}
|
|
@ -27,6 +27,7 @@
|
||||||
#include "Token.h"
|
#include "Token.h"
|
||||||
#include <AK/Assertions.h>
|
#include <AK/Assertions.h>
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
|
#include <AK/Utf32View.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
namespace JS {
|
namespace JS {
|
||||||
|
@ -72,13 +73,26 @@ double Token::double_value() const
|
||||||
return strtod(value_string.characters(), nullptr);
|
return strtod(value_string.characters(), nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
String Token::string_value() const
|
static u32 hex2int(char x)
|
||||||
|
{
|
||||||
|
ASSERT(isxdigit(x));
|
||||||
|
if (x >= '0' && x <= '9')
|
||||||
|
return x - '0';
|
||||||
|
return 10u + (tolower(x) - 'a');
|
||||||
|
}
|
||||||
|
|
||||||
|
String Token::string_value(StringValueStatus& status) const
|
||||||
{
|
{
|
||||||
ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
|
ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
|
||||||
auto is_template = type() == TokenType::TemplateLiteralString;
|
auto is_template = type() == TokenType::TemplateLiteralString;
|
||||||
|
|
||||||
auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
|
auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
|
||||||
|
|
||||||
|
auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
|
||||||
|
status = parse_status;
|
||||||
|
return {};
|
||||||
|
};
|
||||||
|
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
for (size_t i = offset; i < m_value.length() - offset; ++i) {
|
for (size_t i = offset; i < m_value.length() - offset; ++i) {
|
||||||
if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
|
if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
|
||||||
|
@ -114,14 +128,62 @@ String Token::string_value() const
|
||||||
case '\\':
|
case '\\':
|
||||||
builder.append('\\');
|
builder.append('\\');
|
||||||
break;
|
break;
|
||||||
|
case 'x': {
|
||||||
|
if (i + 2 >= m_value.length() - offset)
|
||||||
|
return encoding_failure(StringValueStatus::MalformedHexEscape);
|
||||||
|
|
||||||
|
auto digit1 = m_value[++i];
|
||||||
|
auto digit2 = m_value[++i];
|
||||||
|
if (!isxdigit(digit1) || !isxdigit(digit2))
|
||||||
|
return encoding_failure(StringValueStatus::MalformedHexEscape);
|
||||||
|
builder.append(static_cast<char>(hex2int(digit1) * 16 + hex2int(digit2)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'u': {
|
||||||
|
if (i + 1 >= m_value.length() - offset)
|
||||||
|
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||||
|
u32 code_point = m_value[++i];
|
||||||
|
|
||||||
|
if (code_point == '{') {
|
||||||
|
code_point = 0;
|
||||||
|
do {
|
||||||
|
if (i + 1 >= m_value.length() - offset)
|
||||||
|
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||||
|
|
||||||
|
auto ch = m_value[++i];
|
||||||
|
if (!isxdigit(ch))
|
||||||
|
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||||
|
|
||||||
|
auto new_code_point = (code_point << 4u) | hex2int(ch);
|
||||||
|
if (new_code_point < code_point)
|
||||||
|
return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
|
||||||
|
code_point = new_code_point;
|
||||||
|
} while (m_value[i + 1] != '}');
|
||||||
|
++i;
|
||||||
|
} else {
|
||||||
|
if (i + 3 >= m_value.length() - offset || !isxdigit(code_point))
|
||||||
|
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||||
|
|
||||||
|
code_point = hex2int(code_point);
|
||||||
|
for (int j = 0; j < 3; ++j) {
|
||||||
|
auto ch = m_value[++i];
|
||||||
|
if (!isxdigit(ch))
|
||||||
|
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||||
|
code_point = (code_point << 4u) | hex2int(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.append({ &code_point, 1 });
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
if (is_template && (m_value[i] == '$' || m_value[i] == '`')) {
|
if (is_template && (m_value[i] == '$' || m_value[i] == '`')) {
|
||||||
builder.append(m_value[i]);
|
builder.append(m_value[i]);
|
||||||
} else {
|
break;
|
||||||
// FIXME: Also parse octal, hex and unicode sequences
|
|
||||||
// should anything else generate a syntax error?
|
|
||||||
builder.append(m_value[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Also parse octal. Should anything else generate a syntax error?
|
||||||
|
builder.append(m_value[i]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
builder.append(m_value[i]);
|
builder.append(m_value[i]);
|
||||||
|
|
|
@ -172,9 +172,16 @@ public:
|
||||||
size_t line_number() const { return m_line_number; }
|
size_t line_number() const { return m_line_number; }
|
||||||
size_t line_column() const { return m_line_column; }
|
size_t line_column() const { return m_line_column; }
|
||||||
double double_value() const;
|
double double_value() const;
|
||||||
String string_value() const;
|
|
||||||
bool bool_value() const;
|
bool bool_value() const;
|
||||||
|
|
||||||
|
enum class StringValueStatus {
|
||||||
|
Ok,
|
||||||
|
MalformedHexEscape,
|
||||||
|
MalformedUnicodeEscape,
|
||||||
|
UnicodeEscapeOverflow,
|
||||||
|
};
|
||||||
|
String string_value(StringValueStatus& status) const;
|
||||||
|
|
||||||
bool is_identifier_name() const;
|
bool is_identifier_name() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue