mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 08:38:12 +00:00
AK: Enhance GenericLexer's string consumption
The `consume_quoted_string()` can now take an escape character. This allows it (for example) to capture a string's enclosing quotes. The escape character is optional by default. You can also consume and unescape a quoted string with the eponymous method `consume_and_unescape_string()`. It takes an escape character as parameter (backslash by default). It builds a String in which common escape sequences get... unescaped :^) (e.g. \n, \r, \t...).
This commit is contained in:
parent
1ab6dd67e9
commit
8f34b493e4
2 changed files with 44 additions and 8 deletions
|
@ -26,6 +26,7 @@
|
||||||
|
|
||||||
#include <AK/Assertions.h>
|
#include <AK/Assertions.h>
|
||||||
#include <AK/GenericLexer.h>
|
#include <AK/GenericLexer.h>
|
||||||
|
#include <AK/StringBuilder.h>
|
||||||
|
|
||||||
namespace AK {
|
namespace AK {
|
||||||
|
|
||||||
|
@ -211,29 +212,63 @@ StringView GenericLexer::consume_until(Condition condition)
|
||||||
return m_input.substring_view(start, length);
|
return m_input.substring_view(start, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consume a string surrounded by single or double quotes
|
/*
|
||||||
// The returned StringView does not include the quotes
|
* Consume a string surrounded by single or double quotes. The returned
|
||||||
StringView GenericLexer::consume_quoted_string()
|
* StringView does not include the quotes. An escape character can be provided
|
||||||
|
* to capture the enclosing quotes. Please note that the escape character will
|
||||||
|
* still be in the resulting StringView
|
||||||
|
*/
|
||||||
|
StringView GenericLexer::consume_quoted_string(char escape_char)
|
||||||
{
|
{
|
||||||
if (!is_quote(peek()))
|
if (!is_quote(peek()))
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
char quote_char = consume();
|
char quote_char = consume();
|
||||||
size_t start = m_index;
|
size_t start = m_index;
|
||||||
while (!is_eof() && peek() != quote_char)
|
while (!is_eof()) {
|
||||||
|
if (next_is(escape_char))
|
||||||
|
m_index++;
|
||||||
|
else if (next_is(quote_char))
|
||||||
|
break;
|
||||||
m_index++;
|
m_index++;
|
||||||
|
}
|
||||||
size_t length = m_index - start;
|
size_t length = m_index - start;
|
||||||
|
|
||||||
if (peek() != quote_char) {
|
if (peek() != quote_char) {
|
||||||
m_index = start - 1; // Restore the index in case the string is unterminated
|
// Restore the index in case the string is unterminated
|
||||||
|
m_index = start - 1;
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ignore closing quote
|
||||||
ignore();
|
ignore();
|
||||||
|
|
||||||
return m_input.substring_view(start, length);
|
return m_input.substring_view(start, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String GenericLexer::consume_and_unescape_string(char escape_char)
|
||||||
|
{
|
||||||
|
auto view = consume_quoted_string(escape_char);
|
||||||
|
if (view.is_null())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
// Transform common escape sequences
|
||||||
|
auto unescape_character = [](char c) {
|
||||||
|
static const char* escape_map = "n\nr\rt\tb\bf\f";
|
||||||
|
for (size_t i = 0; escape_map[i] != '\0'; i += 2)
|
||||||
|
if (c == escape_map[i])
|
||||||
|
return escape_map[i + 1];
|
||||||
|
return c;
|
||||||
|
};
|
||||||
|
|
||||||
|
StringBuilder builder;
|
||||||
|
for (size_t i = 0; i < view.length(); ++i) {
|
||||||
|
char c = (view[i] == escape_char) ? unescape_character(view[++i]) : view[i];
|
||||||
|
builder.append(c);
|
||||||
|
}
|
||||||
|
return builder.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
// Ignore a number of characters (1 by default)
|
// Ignore a number of characters (1 by default)
|
||||||
void GenericLexer::ignore(size_t count)
|
void GenericLexer::ignore(size_t count)
|
||||||
{
|
{
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <AK/Function.h>
|
#include <AK/Function.h>
|
||||||
|
#include <AK/String.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
|
||||||
namespace AK {
|
namespace AK {
|
||||||
|
@ -36,7 +37,7 @@ public:
|
||||||
explicit GenericLexer(const StringView& input);
|
explicit GenericLexer(const StringView& input);
|
||||||
virtual ~GenericLexer();
|
virtual ~GenericLexer();
|
||||||
|
|
||||||
// A lambda/function can be used to match characters as the user pleases
|
// A lambda/function can be used to match characters as the user pleases
|
||||||
using Condition = Function<bool(char)>;
|
using Condition = Function<bool(char)>;
|
||||||
|
|
||||||
size_t tell() const { return m_index; }
|
size_t tell() const { return m_index; }
|
||||||
|
@ -64,8 +65,8 @@ public:
|
||||||
StringView consume_until(char);
|
StringView consume_until(char);
|
||||||
StringView consume_until(const char*);
|
StringView consume_until(const char*);
|
||||||
StringView consume_until(Condition);
|
StringView consume_until(Condition);
|
||||||
// FIXME: provide an escape character
|
StringView consume_quoted_string(char escape_char = 0);
|
||||||
StringView consume_quoted_string();
|
String consume_and_unescape_string(char escape_char = '\\');
|
||||||
|
|
||||||
void ignore(size_t count = 1);
|
void ignore(size_t count = 1);
|
||||||
void ignore_while(Condition);
|
void ignore_while(Condition);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue