diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index 3f2544f9ae..06dd9c2e36 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -409,6 +409,7 @@ if (BUILD_LAGOM) list(REMOVE_ITEM LIBSQL_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../../Userland/Libraries/LibSQL/SQLClient.cpp") lagom_lib(SQL sql SOURCES ${LIBSQL_SOURCES} + LIBS LagomRegex ) # TextCodec diff --git a/Tests/LibSQL/TestSqlStatementExecution.cpp b/Tests/LibSQL/TestSqlStatementExecution.cpp index 2cb23b0554..caf97a9ec2 100644 --- a/Tests/LibSQL/TestSqlStatementExecution.cpp +++ b/Tests/LibSQL/TestSqlStatementExecution.cpp @@ -337,4 +337,77 @@ TEST_CASE(select_inner_join) EXPECT_EQ(row[2].to_string(), "Test_12"); } +TEST_CASE(select_with_like) +{ + ScopeGuard guard([]() { unlink(db_name); }); + auto database = SQL::Database::construct(db_name); + EXPECT(!database->open().is_error()); + create_table(database); + auto result = execute(database, + "INSERT INTO TestSchema.TestTable ( TextColumn, IntColumn ) VALUES " + "( 'Test+1', 42 ), " + "( 'Test+2', 43 ), " + "( 'Test+3', 44 ), " + "( 'Test+4', 45 ), " + "( 'Test+5', 46 ), " + "( 'Another+Test_6', 47 );"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->inserted() == 6); + + // Simple match + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn LIKE 'Test+1';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 1u); + + // Use % to match most rows + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn LIKE 'T%';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 5u); + + // Same as above but invert the match + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn NOT LIKE 'T%';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 1u); + + // Use _ and % to match all rows + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn LIKE '%e_t%';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 6u); + + // Use escape to match a single row. The escape character happens to be a + // Regex metacharacter, let's make sure we don't get confused by that. + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn LIKE '%Test^_%' ESCAPE '^';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 1u); + + // Same as above but escape the escape character happens to be a SQL + // metacharacter - we want to make sure it's treated as an escape. + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn LIKE '%Test__%' ESCAPE '_';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 1u); + + // (Unnecessarily) escaping a character that happens to be a Regex + // metacharacter should have no effect. + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn LIKE 'Test:+_' ESCAPE ':';"); + EXPECT(result->error().code == SQL::SQLErrorCode::NoError); + EXPECT(result->has_results()); + EXPECT_EQ(result->results().size(), 5u); + + // Make sure we error out if the ESCAPE is empty + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn LIKE '%' ESCAPE '';"); + EXPECT(result->error().code == SQL::SQLErrorCode::SyntaxError); + EXPECT(!result->has_results()); + + // Make sure we error out if the ESCAPE has more than a single character + result = execute(database, "SELECT TextColumn FROM TestSchema.TestTable WHERE TextColumn LIKE '%' ESCAPE 'whf';"); + EXPECT(result->error().code == SQL::SQLErrorCode::SyntaxError); + EXPECT(!result->has_results()); +} + } diff --git a/Userland/Libraries/LibSQL/AST/AST.h b/Userland/Libraries/LibSQL/AST/AST.h index 7dbcb7c389..20dc9d3184 100644 --- a/Userland/Libraries/LibSQL/AST/AST.h +++ b/Userland/Libraries/LibSQL/AST/AST.h @@ -636,6 +636,7 @@ public: MatchOperator type() const { return m_type; } const RefPtr& escape() const { return m_escape; } + virtual Value evaluate(ExecutionContext&) const override; private: MatchOperator m_type; diff --git a/Userland/Libraries/LibSQL/AST/Expression.cpp b/Userland/Libraries/LibSQL/AST/Expression.cpp index ce4c972e0c..ad189c5eb6 100644 --- a/Userland/Libraries/LibSQL/AST/Expression.cpp +++ b/Userland/Libraries/LibSQL/AST/Expression.cpp @@ -4,11 +4,14 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include namespace SQL::AST { +static const String s_posix_basic_metacharacters = ".^$*[]+\\"; + Value Expression::evaluate(ExecutionContext&) const { return Value::null(); @@ -194,4 +197,58 @@ Value ColumnNameExpression::evaluate(ExecutionContext& context) const return Value::null(); } +Value MatchExpression::evaluate(ExecutionContext& context) const +{ + if (context.result->has_error()) + return Value::null(); + switch (type()) { + case MatchOperator::Like: { + Value lhs_value = lhs()->evaluate(context); + Value rhs_value = rhs()->evaluate(context); + char escape_char = '\0'; + if (escape()) { + auto escape_str = escape()->evaluate(context).to_string(); + if (escape_str.length() != 1) { + context.result->set_error(SQLErrorCode::SyntaxError, "ESCAPE should be a single character"); + return Value::null(); + } + escape_char = escape_str[0]; + } + + // Compile the pattern into a simple regex. + // https://sqlite.org/lang_expr.html#the_like_glob_regexp_and_match_operators + bool escaped = false; + AK::StringBuilder builder; + builder.append('^'); + for (auto c : rhs_value.to_string()) { + if (escape() && c == escape_char && !escaped) { + escaped = true; + } else if (s_posix_basic_metacharacters.contains(c)) { + escaped = false; + builder.append('\\'); + builder.append(c); + } else if (c == '_' && !escaped) { + builder.append('.'); + } else if (c == '%' && !escaped) { + builder.append(".*"); + } else { + escaped = false; + builder.append(c); + } + } + builder.append('$'); + // FIXME: We should probably cache this regex. + auto regex = Regex(builder.build()); + auto result = regex.match(lhs_value.to_string(), PosixFlags::Insensitive | PosixFlags::Unicode); + return Value(invert_expression() ? !result.success : result.success); + } + case MatchOperator::Glob: + case MatchOperator::Match: + case MatchOperator::Regexp: + default: + VERIFY_NOT_REACHED(); + } + return Value::null(); +} + } diff --git a/Userland/Libraries/LibSQL/CMakeLists.txt b/Userland/Libraries/LibSQL/CMakeLists.txt index 1bfa86baa1..395890f602 100644 --- a/Userland/Libraries/LibSQL/CMakeLists.txt +++ b/Userland/Libraries/LibSQL/CMakeLists.txt @@ -31,4 +31,4 @@ set(GENERATED_SOURCES ) serenity_lib(LibSQL sql) -target_link_libraries(LibSQL LibCore LibSyntax) +target_link_libraries(LibSQL LibCore LibSyntax LibRegex)