1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 17:27:35 +00:00

LibJS: Start fleshing out an ISO 8601 parser for Temporal

This is the start of a parser for the ISO 8601 grammar used in the
Temporal spec:
https://tc39.es/proposal-temporal/#sec-temporal-iso8601grammar

We will, on purpose, not use a generic ISO 8601 parser from AK or
similar for two reasons:

- Many AOs make specific assumptions about which productions exist and
  access them directly, even when they're part of a larger production.
- The spec says "The grammar deviates from the standard given in ISO
  8601 in the following ways:" and then lists 17 of such deviations.
  Making that work with a general purpose parser is not worth it.

The public API is not being used anywhere yet, but will be in the next
couple of commits. Likewise, the Production enum will be populated with
all the productions accessed directly (e.g. TemporalDateString).

Many thanks to Ali for showing me how to improve my initial approach
full of macros with a nice RAII helper - it's much nicer :^)

Co-Authored-By: Ali Mohammad Pur <mpfard@serenityos.org>
This commit is contained in:
Linus Groh 2021-11-19 18:04:50 +00:00
parent dd76ba2fe1
commit de23f0b68c
5 changed files with 634 additions and 26 deletions

View file

@ -0,0 +1,117 @@
/*
* Copyright (c) 2021, Linus Groh <linusg@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/GenericLexer.h>
#include <AK/Optional.h>
#include <AK/StringView.h>
#include <AK/Vector.h>
namespace JS::Temporal {
struct ParseResult {
Optional<StringView> sign;
Optional<StringView> date_year;
Optional<StringView> date_month;
Optional<StringView> date_day;
Optional<StringView> time_hour;
Optional<StringView> time_minute;
Optional<StringView> time_second;
Optional<StringView> time_fractional_part;
Optional<StringView> calendar_name;
};
enum class Production {
};
Optional<ParseResult> parse_iso8601(Production, StringView);
namespace Detail {
class ISO8601Parser {
public:
explicit ISO8601Parser(StringView input)
: m_input(input)
, m_state({
.lexer = GenericLexer { input },
.parse_result = {},
})
{
}
[[nodiscard]] GenericLexer const& lexer() const { return m_state.lexer; }
[[nodiscard]] ParseResult const& parse_result() const { return m_state.parse_result; }
[[nodiscard]] bool parse_decimal_digit();
[[nodiscard]] bool parse_non_zero_digit();
[[nodiscard]] bool parse_ascii_sign();
[[nodiscard]] bool parse_sign();
[[nodiscard]] bool parse_hour();
[[nodiscard]] bool parse_minute_second();
[[nodiscard]] bool parse_decimal_separator();
[[nodiscard]] bool parse_date_time_separator();
[[nodiscard]] bool parse_date_year();
[[nodiscard]] bool parse_date_month();
[[nodiscard]] bool parse_date_day();
[[nodiscard]] bool parse_date();
[[nodiscard]] bool parse_time_hour();
[[nodiscard]] bool parse_time_minute();
[[nodiscard]] bool parse_time_second();
[[nodiscard]] bool parse_fractional_part();
[[nodiscard]] bool parse_time_fractional_part();
[[nodiscard]] bool parse_fraction();
[[nodiscard]] bool parse_time_fraction();
[[nodiscard]] bool parse_time_zone_offset_required();
[[nodiscard]] bool parse_time_zone_name_required();
[[nodiscard]] bool parse_time_zone();
[[nodiscard]] bool parse_calendar_name();
[[nodiscard]] bool parse_calendar();
[[nodiscard]] bool parse_time_spec();
[[nodiscard]] bool parse_time_spec_separator();
[[nodiscard]] bool parse_date_time();
[[nodiscard]] bool parse_calendar_date_time();
private:
struct State {
GenericLexer lexer;
ParseResult parse_result;
};
struct StateTransaction {
explicit StateTransaction(ISO8601Parser& parser)
: m_parser(parser)
, m_saved_state(parser.m_state)
, m_start_index(parser.m_state.lexer.tell())
{
}
~StateTransaction()
{
if (!m_commit)
m_parser.m_state = move(m_saved_state);
}
void commit() { m_commit = true; }
StringView parsed_string_view() const
{
return m_parser.m_input.substring_view(m_start_index, m_parser.m_state.lexer.tell() - m_start_index);
}
private:
ISO8601Parser& m_parser;
State m_saved_state;
size_t m_start_index { 0 };
bool m_commit { false };
};
StringView m_input;
State m_state;
};
}
}