mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 06:48:12 +00:00
LibSQL: Make lexer and parser more standard SQL compliant
SQL was standardized before there was consensus on sane language syntax constructs had evolved. The language is mostly case-insensitive, with unquoted text converted to upper case. Identifiers can include lower case characters and other 'special' characters by enclosing the identifier with double quotes. A double quote is escaped by doubling it. Likewise, a single quote in a literal string is escaped by doubling it. All this means that the strategy used in the lexer, where a token's value is a StringView 'window' on the source string, does not work, because the value needs to be massaged before being handed to the parser. Therefore a token now has a String containing its value. Given the limited lifetime of a token, this is acceptable overhead. Not doing this means that for example quote removal and double quote escaping would need to be done in the parser or in AST node construction, which would spread lexing basically all over the place. Which would be suboptimal. There was some impact on the sql utility and SyntaxHighlighter component which was addressed by storing the token's end position together with the start position in order to properly highlight it. Finally, reviewing the tests for parsing numeric literals revealed an inconsistency in which tokens we accept or reject: `1a` is accepted but `1e` is rejected. Related to this is the fate of `0x`. Added a FIXME reminding us to address this.
This commit is contained in:
parent
4198f7e1af
commit
5c4890411b
9 changed files with 408 additions and 311 deletions
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -48,6 +49,14 @@ ParseResult parse(StringView sql)
|
|||
|
||||
TEST_CASE(numeric_literal)
|
||||
{
|
||||
// FIXME Right now the "1a" test fails (meaning the parse succeeds).
|
||||
// This is obviously inconsistent.
|
||||
// See the FIXME in lexer.cpp, method consume_exponent() about
|
||||
// solutions.
|
||||
// EXPECT(parse("1e").is_error());
|
||||
// EXPECT(parse("1a").is_error());
|
||||
// EXPECT(parse("0x").is_error());
|
||||
|
||||
auto validate = [](StringView sql, double expected_value) {
|
||||
auto result = parse(sql);
|
||||
EXPECT(!result.is_error());
|
||||
|
@ -61,7 +70,9 @@ TEST_CASE(numeric_literal)
|
|||
|
||||
validate("123", 123);
|
||||
validate("3.14", 3.14);
|
||||
validate("0xA", 10);
|
||||
validate("0xff", 255);
|
||||
validate("0x100", 256);
|
||||
validate("1e3", 1000);
|
||||
}
|
||||
|
||||
|
@ -81,14 +92,16 @@ TEST_CASE(string_literal)
|
|||
EXPECT_EQ(literal.value(), expected_value);
|
||||
};
|
||||
|
||||
validate("''", "''");
|
||||
validate("'hello friends'", "'hello friends'");
|
||||
validate("''", "");
|
||||
validate("'hello friends'", "hello friends");
|
||||
validate("'hello ''friends'''", "hello 'friends'");
|
||||
}
|
||||
|
||||
TEST_CASE(blob_literal)
|
||||
{
|
||||
EXPECT(parse("x'").is_error());
|
||||
EXPECT(parse("x'unterminated").is_error());
|
||||
EXPECT(parse("x'NOTHEX'").is_error());
|
||||
|
||||
auto validate = [](StringView sql, StringView expected_value) {
|
||||
auto result = parse(sql);
|
||||
|
@ -101,8 +114,8 @@ TEST_CASE(blob_literal)
|
|||
EXPECT_EQ(literal.value(), expected_value);
|
||||
};
|
||||
|
||||
validate("x''", "x''");
|
||||
validate("x'hello friends'", "x'hello friends'");
|
||||
validate("x''", "");
|
||||
validate("x'DEADC0DE'", "DEADC0DE");
|
||||
}
|
||||
|
||||
TEST_CASE(null_literal)
|
||||
|
@ -120,9 +133,10 @@ TEST_CASE(null_literal)
|
|||
|
||||
TEST_CASE(column_name)
|
||||
{
|
||||
EXPECT(parse(".column").is_error());
|
||||
EXPECT(parse("table.").is_error());
|
||||
EXPECT(parse("schema.table.").is_error());
|
||||
EXPECT(parse(".column_name").is_error());
|
||||
EXPECT(parse("table_name.").is_error());
|
||||
EXPECT(parse("schema_name.table_name.").is_error());
|
||||
EXPECT(parse("\"unterminated").is_error());
|
||||
|
||||
auto validate = [](StringView sql, StringView expected_schema, StringView expected_table, StringView expected_column) {
|
||||
auto result = parse(sql);
|
||||
|
@ -137,9 +151,11 @@ TEST_CASE(column_name)
|
|||
EXPECT_EQ(column.column_name(), expected_column);
|
||||
};
|
||||
|
||||
validate("column", {}, {}, "column");
|
||||
validate("table.column", {}, "table", "column");
|
||||
validate("schema.table.column", "schema", "table", "column");
|
||||
validate("column_name", {}, {}, "COLUMN_NAME");
|
||||
validate("table_name.column_name", {}, "TABLE_NAME", "COLUMN_NAME");
|
||||
validate("schema_name.table_name.column_name", "SCHEMA_NAME", "TABLE_NAME", "COLUMN_NAME");
|
||||
validate("\"Column_Name\"", {}, {}, "Column_Name");
|
||||
validate("\"Column\n_Name\"", {}, {}, "Column\n_Name");
|
||||
}
|
||||
|
||||
TEST_CASE(unary_operator)
|
||||
|
@ -258,7 +274,7 @@ TEST_CASE(chained_expression)
|
|||
|
||||
validate("(15)", 1);
|
||||
validate("(15, 16)", 2);
|
||||
validate("(15, 16, column)", 3);
|
||||
validate("(15, 16, column_name)", 3);
|
||||
}
|
||||
|
||||
TEST_CASE(cast_expression)
|
||||
|
@ -273,6 +289,8 @@ TEST_CASE(cast_expression)
|
|||
|
||||
auto validate = [](StringView sql, StringView expected_type_name) {
|
||||
auto result = parse(sql);
|
||||
if (result.is_error())
|
||||
outln("{}: {}", sql, result.error());
|
||||
EXPECT(!result.is_error());
|
||||
|
||||
auto expression = result.release_value();
|
||||
|
@ -285,9 +303,12 @@ TEST_CASE(cast_expression)
|
|||
EXPECT_EQ(type_name->name(), expected_type_name);
|
||||
};
|
||||
|
||||
validate("CAST (15 AS int)", "int");
|
||||
validate("CAST ('NULL' AS null)", "null");
|
||||
validate("CAST (15 AS varchar(255))", "varchar");
|
||||
validate("CAST (15 AS int)", "INT");
|
||||
// FIXME The syntax in the test below fails on both sqlite3 and psql (PostgreSQL).
|
||||
// Also fails here because null is interpreted as the NULL keyword and not the
|
||||
// identifier null (which is not a type)
|
||||
// validate("CAST ('NULL' AS null)", "null");
|
||||
validate("CAST (15 AS varchar(255))", "VARCHAR");
|
||||
}
|
||||
|
||||
TEST_CASE(case_expression)
|
||||
|
@ -344,16 +365,16 @@ TEST_CASE(exists_expression)
|
|||
EXPECT(parse("EXISTS (").is_error());
|
||||
EXPECT(parse("EXISTS (SELECT").is_error());
|
||||
EXPECT(parse("EXISTS (SELECT)").is_error());
|
||||
EXPECT(parse("EXISTS (SELECT * FROM table").is_error());
|
||||
EXPECT(parse("EXISTS (SELECT * FROM table_name").is_error());
|
||||
EXPECT(parse("NOT EXISTS").is_error());
|
||||
EXPECT(parse("NOT EXISTS (").is_error());
|
||||
EXPECT(parse("NOT EXISTS (SELECT").is_error());
|
||||
EXPECT(parse("NOT EXISTS (SELECT)").is_error());
|
||||
EXPECT(parse("NOT EXISTS (SELECT * FROM table").is_error());
|
||||
EXPECT(parse("NOT EXISTS (SELECT * FROM table_name").is_error());
|
||||
EXPECT(parse("(").is_error());
|
||||
EXPECT(parse("(SELECT").is_error());
|
||||
EXPECT(parse("(SELECT)").is_error());
|
||||
EXPECT(parse("(SELECT * FROM table").is_error());
|
||||
EXPECT(parse("(SELECT * FROM table_name").is_error());
|
||||
|
||||
auto validate = [](StringView sql, bool expected_invert_expression) {
|
||||
auto result = parse(sql);
|
||||
|
@ -366,9 +387,9 @@ TEST_CASE(exists_expression)
|
|||
EXPECT_EQ(exists.invert_expression(), expected_invert_expression);
|
||||
};
|
||||
|
||||
validate("EXISTS (SELECT * FROM table)", false);
|
||||
validate("NOT EXISTS (SELECT * FROM table)", true);
|
||||
validate("(SELECT * FROM table)", false);
|
||||
validate("EXISTS (SELECT * FROM table_name)", false);
|
||||
validate("NOT EXISTS (SELECT * FROM table_name)", true);
|
||||
validate("(SELECT * FROM table_name)", false);
|
||||
}
|
||||
|
||||
TEST_CASE(collate_expression)
|
||||
|
@ -389,8 +410,8 @@ TEST_CASE(collate_expression)
|
|||
EXPECT_EQ(collate.collation_name(), expected_collation_name);
|
||||
};
|
||||
|
||||
validate("15 COLLATE fifteen", "fifteen");
|
||||
validate("(15, 16) COLLATE chain", "chain");
|
||||
validate("15 COLLATE fifteen", "FIFTEEN");
|
||||
validate("(15, 16) COLLATE \"chain\"", "chain");
|
||||
}
|
||||
|
||||
TEST_CASE(is_expression)
|
||||
|
@ -525,9 +546,9 @@ TEST_CASE(between_expression)
|
|||
TEST_CASE(in_table_expression)
|
||||
{
|
||||
EXPECT(parse("IN").is_error());
|
||||
EXPECT(parse("IN table").is_error());
|
||||
EXPECT(parse("IN table_name").is_error());
|
||||
EXPECT(parse("NOT IN").is_error());
|
||||
EXPECT(parse("NOT IN table").is_error());
|
||||
EXPECT(parse("NOT IN table_name").is_error());
|
||||
|
||||
auto validate = [](StringView sql, StringView expected_schema, StringView expected_table, bool expected_invert_expression) {
|
||||
auto result = parse(sql);
|
||||
|
@ -543,11 +564,11 @@ TEST_CASE(in_table_expression)
|
|||
EXPECT_EQ(in.invert_expression(), expected_invert_expression);
|
||||
};
|
||||
|
||||
validate("15 IN table", {}, "table", false);
|
||||
validate("15 IN schema.table", "schema", "table", false);
|
||||
validate("15 IN table_name", {}, "TABLE_NAME", false);
|
||||
validate("15 IN schema_name.table_name", "SCHEMA_NAME", "TABLE_NAME", false);
|
||||
|
||||
validate("15 NOT IN table", {}, "table", true);
|
||||
validate("15 NOT IN schema.table", "schema", "table", true);
|
||||
validate("15 NOT IN table_name", {}, "TABLE_NAME", true);
|
||||
validate("15 NOT IN schema_name.table_name", "SCHEMA_NAME", "TABLE_NAME", true);
|
||||
}
|
||||
|
||||
TEST_CASE(in_chained_expression)
|
||||
|
@ -583,9 +604,9 @@ TEST_CASE(in_chained_expression)
|
|||
TEST_CASE(in_selection_expression)
|
||||
{
|
||||
EXPECT(parse("IN (SELECT)").is_error());
|
||||
EXPECT(parse("IN (SELECT * FROM table, SELECT * FROM table);").is_error());
|
||||
EXPECT(parse("IN (SELECT * FROM table_name, SELECT * FROM table_name);").is_error());
|
||||
EXPECT(parse("NOT IN (SELECT)").is_error());
|
||||
EXPECT(parse("NOT IN (SELECT * FROM table, SELECT * FROM table);").is_error());
|
||||
EXPECT(parse("NOT IN (SELECT * FROM table_name, SELECT * FROM table_name);").is_error());
|
||||
|
||||
auto validate = [](StringView sql, bool expected_invert_expression) {
|
||||
auto result = parse(sql);
|
||||
|
@ -599,8 +620,8 @@ TEST_CASE(in_selection_expression)
|
|||
EXPECT_EQ(in.invert_expression(), expected_invert_expression);
|
||||
};
|
||||
|
||||
validate("15 IN (SELECT * FROM table)", false);
|
||||
validate("15 NOT IN (SELECT * FROM table)", true);
|
||||
validate("15 IN (SELECT * FROM table_name)", false);
|
||||
validate("15 NOT IN (SELECT * FROM table_name)", true);
|
||||
}
|
||||
|
||||
TEST_CASE(expression_tree_depth_limit)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue