mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 12:12:45 +00:00 
			
		
		
		
	DevTools: Add StateMachineGenerator utility
This program turns a description of a state machine that takes its input
byte-by-byte into C++ code. The state machine is described in a custom
format as specified below:
```
// Comments are started by two slashes, and cause the rest of the line
// to be ignored
@name ExampleStateMachine // sets the name of the generated class
@namespace Test           // sets the namespace (optional)
@begin Begin              // sets the state the parser will start in
// The rest of the file contains one or more states and an optional
// @anywhere directive. Each of these is a curly bracket delimited set
// of state transitions. State transitions contain a selector, the
// literal "=>" and a (new_state, action) tuple. Examples:
//     0x0a => (Begin, PrintLine)
//     [0x00..0x1f] => (_, Warn)      // '_' means no change
//     [0x41..0x5a] => (BeginWord, _) // '_' means no action
// Rules common to all states. These take precedence over rules in the
// specific states.
@anywhere {
    0x0a         => (Begin, PrintLine)
    [0x00..0x1f] => (_, Warn)
}
Begin {
    [0x41..0x5a] => (Word, _)
    [0x61..0x7a] => (Word, _)
    // For missing values, the transition (_, _) is implied
}
Word {
    // The entry action is run when we transition to this state from a
    // *different* state. @anywhere can't have this
    @entry IncreaseWordCount
    0x09 => (Begin, _)
    0x20 => (Begin, _)
    // The exit action is run before we transition to any *other* state
    // from here. @anywhere can't have this
    @exit EndOfWord
}
```
The generated code consists of a single class which takes a
`Function<Action, u8>` as a parameter in its constructor. This gets
called whenever an action is to be done. This is because some input
might not produce an action, but others might produce up to 3 (exit,
state transition, entry). The actions allow us to build a more
advanced parser over the simple state machine.
The sole public method, `void advance(u8)`, handles the input
byte-by-byte, managing the state changes and requesting the appropriate
Action from the handler.
Internally, the state transitions are resolved via a lookup table. This
is a bit wasteful for more complex state machines, therefore the
generator is designed to be easily extendable with a switch-based
resolver; only the private `lookup_state_transition` method needs to be
re-implemented.
My goal for this tool is to use it for implementing a standard-compliant
ANSI escape sequence parser for LibVT, as described on
<https://vt100.net/emu/dec_ansi_parser>
			
			
This commit is contained in:
		
							parent
							
								
									aa4d41fe2c
								
							
						
					
					
						commit
						22195d965f
					
				
					 3 changed files with 502 additions and 0 deletions
				
			
		|  | @ -103,6 +103,7 @@ include_directories(${CMAKE_BINARY_DIR}) | |||
| 
 | ||||
| add_subdirectory(Meta/Lagom) | ||||
| add_subdirectory(Userland/DevTools/IPCCompiler) | ||||
| add_subdirectory(Userland/DevTools/StateMachineGenerator) | ||||
| add_subdirectory(Userland/Libraries/LibWeb/CodeGenerators) | ||||
| 
 | ||||
| set(write_if_different ${CMAKE_SOURCE_DIR}/Meta/write-only-on-difference.sh) | ||||
|  |  | |||
							
								
								
									
										6
									
								
								Userland/DevTools/StateMachineGenerator/CMakeLists.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								Userland/DevTools/StateMachineGenerator/CMakeLists.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,6 @@ | |||
| set(SOURCES | ||||
|     main.cpp | ||||
| ) | ||||
| 
 | ||||
| add_executable(StateMachineGenerator ${SOURCES}) | ||||
| target_link_libraries(StateMachineGenerator LagomCore) | ||||
							
								
								
									
										495
									
								
								Userland/DevTools/StateMachineGenerator/main.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										495
									
								
								Userland/DevTools/StateMachineGenerator/main.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,495 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2021, Daniel Bertalan <dani@danielbertalan.dev> | ||||
|  * | ||||
|  * SPDX-License-Identifier: BSD-2-Clause | ||||
|  */ | ||||
| 
 | ||||
| #include <AK/GenericLexer.h> | ||||
| #include <AK/HashTable.h> | ||||
| #include <AK/NonnullOwnPtr.h> | ||||
| #include <AK/SourceGenerator.h> | ||||
| #include <AK/String.h> | ||||
| #include <AK/StringBuilder.h> | ||||
| #include <AK/Types.h> | ||||
| #include <LibCore/ArgsParser.h> | ||||
| #include <LibCore/File.h> | ||||
| #include <ctype.h> | ||||
| #include <string.h> | ||||
| 
 | ||||
| struct Range { | ||||
|     int begin; | ||||
|     int end; | ||||
| }; | ||||
| 
 | ||||
| struct StateTransition { | ||||
|     Optional<String> new_state; | ||||
|     Optional<String> action; | ||||
| }; | ||||
| 
 | ||||
| struct MatchedAction { | ||||
|     Range range; | ||||
|     StateTransition action; | ||||
| }; | ||||
| 
 | ||||
| struct State { | ||||
|     String name; | ||||
|     Vector<MatchedAction> actions; | ||||
|     Optional<String> entry_action; | ||||
|     Optional<String> exit_action; | ||||
| }; | ||||
| 
 | ||||
| struct StateMachine { | ||||
|     String name; | ||||
|     String initial_state; | ||||
|     Vector<State> states; | ||||
|     Optional<State> anywhere; | ||||
|     Optional<String> namespaces; | ||||
| }; | ||||
| 
 | ||||
| static OwnPtr<StateMachine> | ||||
| parse_state_machine(StringView input) | ||||
| { | ||||
|     auto state_machine = make<StateMachine>(); | ||||
|     GenericLexer lexer(input); | ||||
| 
 | ||||
|     auto consume_whitespace = [&] { | ||||
|         bool consumed = true; | ||||
|         while (consumed) { | ||||
|             consumed = lexer.consume_while(isspace).length() > 0; | ||||
|             if (lexer.consume_specific("//")) { | ||||
|                 lexer.consume_line(); | ||||
|                 consumed = true; | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     auto consume_identifier = [&] { | ||||
|         consume_whitespace(); | ||||
|         return lexer.consume_while([](char c) { return isalnum(c) || c == '_'; }); | ||||
|     }; | ||||
| 
 | ||||
|     auto get_hex_value = [&](char c) { | ||||
|         if (isdigit(c)) | ||||
|             return c - '0'; | ||||
|         else | ||||
|             return c - 'a' + 10; | ||||
|     }; | ||||
| 
 | ||||
|     auto consume_number = [&] { | ||||
|         int num = 0; | ||||
|         consume_whitespace(); | ||||
|         if (lexer.consume_specific("0x")) { | ||||
|             auto hex_digits = lexer.consume_while([](char c) { | ||||
|                 if (isdigit(c)) return true; | ||||
|             else { | ||||
|                 c = tolower(c); | ||||
|                 return (c >= 'a' && c <= 'f'); | ||||
|               } }); | ||||
|             for (auto c : hex_digits) | ||||
|                 num = 16 * num + get_hex_value(c); | ||||
|         } else { | ||||
|             lexer.consume_specific('\''); | ||||
|             if (lexer.next_is('\\')) | ||||
|                 num = (int)lexer.consume_escaped_character('\\'); | ||||
|             else | ||||
|                 num = lexer.consume_until('\'').to_int().value(); | ||||
|             lexer.consume_specific('\''); | ||||
|         } | ||||
|         return num; | ||||
|     }; | ||||
| 
 | ||||
|     auto consume_condition = [&] { | ||||
|         Range condition; | ||||
|         consume_whitespace(); | ||||
|         if (lexer.consume_specific('[')) { | ||||
|             consume_whitespace(); | ||||
|             condition.begin = consume_number(); | ||||
|             consume_whitespace(); | ||||
|             lexer.consume_specific(".."); | ||||
|             consume_whitespace(); | ||||
|             condition.end = consume_number(); | ||||
|             consume_whitespace(); | ||||
|             lexer.consume_specific(']'); | ||||
|         } else { | ||||
|             auto num = consume_number(); | ||||
|             condition.begin = num; | ||||
|             condition.end = num; | ||||
|         } | ||||
|         return condition; | ||||
|     }; | ||||
| 
 | ||||
|     auto consume_action = [&]() { | ||||
|         StateTransition action; | ||||
|         consume_whitespace(); | ||||
|         lexer.consume_specific("=>"); | ||||
|         consume_whitespace(); | ||||
|         lexer.consume_specific('('); | ||||
|         consume_whitespace(); | ||||
|         if (!lexer.consume_specific("_")) | ||||
|             action.new_state = consume_identifier(); | ||||
|         consume_whitespace(); | ||||
|         lexer.consume_specific(','); | ||||
|         consume_whitespace(); | ||||
|         if (!lexer.consume_specific("_")) | ||||
|             action.action = consume_identifier(); | ||||
|         consume_whitespace(); | ||||
|         lexer.consume_specific(')'); | ||||
|         return action; | ||||
|     }; | ||||
| 
 | ||||
|     auto consume_state_description | ||||
|         = [&] { | ||||
|               State state; | ||||
|               consume_whitespace(); | ||||
|               state.name = consume_identifier(); | ||||
|               consume_whitespace(); | ||||
|               consume_whitespace(); | ||||
|               lexer.consume_specific('{'); | ||||
|               for (;;) { | ||||
|                   consume_whitespace(); | ||||
|                   if (lexer.consume_specific('}')) { | ||||
|                       break; | ||||
|                   } | ||||
|                   if (lexer.consume_specific("@entry")) { | ||||
|                       consume_whitespace(); | ||||
|                       state.entry_action = consume_identifier(); | ||||
|                   } else if (lexer.consume_specific("@exit")) { | ||||
|                       consume_whitespace(); | ||||
|                       state.exit_action = consume_identifier(); | ||||
|                   } else if (lexer.next_is('@')) { | ||||
|                       auto directive = consume_identifier().to_string(); | ||||
|                       fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters()); | ||||
|                       exit(1); | ||||
|                   } else { | ||||
|                       MatchedAction matched_action; | ||||
|                       matched_action.range = consume_condition(); | ||||
|                       matched_action.action = consume_action(); | ||||
|                       state.actions.append(matched_action); | ||||
|                   } | ||||
|               } | ||||
|               return state; | ||||
|           }; | ||||
| 
 | ||||
|     while (!lexer.is_eof()) { | ||||
|         consume_whitespace(); | ||||
|         if (lexer.is_eof()) | ||||
|             break; | ||||
|         if (lexer.consume_specific("@namespace")) { | ||||
|             consume_whitespace(); | ||||
|             state_machine->namespaces = lexer.consume_while([](char c) { return isalpha(c) || c == ':'; }); | ||||
|         } else if (lexer.consume_specific("@begin")) { | ||||
|             consume_whitespace(); | ||||
|             state_machine->initial_state = consume_identifier(); | ||||
|         } else if (lexer.consume_specific("@name")) { | ||||
|             consume_whitespace(); | ||||
|             state_machine->name = consume_identifier(); | ||||
|         } else if (lexer.next_is("@anywhere")) { | ||||
|             lexer.consume_specific('@'); | ||||
|             state_machine->anywhere = consume_state_description(); | ||||
|         } else if (lexer.consume_specific('@')) { | ||||
|             auto directive = consume_identifier().to_string(); | ||||
|             fprintf(stderr, "Unimplemented @ directive %s\n", directive.characters()); | ||||
|             exit(1); | ||||
|         } else { | ||||
|             auto description = consume_state_description(); | ||||
|             state_machine->states.append(description); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (state_machine->initial_state.is_empty()) { | ||||
|         fprintf(stderr, "Missing @begin directive\n"); | ||||
|         exit(1); | ||||
|     } else if (state_machine->name.is_empty()) { | ||||
|         fprintf(stderr, "Missing @name directive\n"); | ||||
|         exit(1); | ||||
|     } | ||||
| 
 | ||||
|     if (state_machine->anywhere.has_value()) { | ||||
|         state_machine->anywhere.value().name = "_Anywhere"; | ||||
|     } | ||||
|     return state_machine; | ||||
| } | ||||
| 
 | ||||
| void output_header(const StateMachine&, SourceGenerator&); | ||||
| void output_cpp(const StateMachine&, SourceGenerator&); | ||||
| 
 | ||||
| int main(int argc, char** argv) | ||||
| { | ||||
|     Core::ArgsParser args_parser; | ||||
|     const char* path = nullptr; | ||||
|     bool header_mode = false; | ||||
|     args_parser.add_option(header_mode, "Generate .h file", "header", 'H'); | ||||
|     args_parser.add_positional_argument(path, "Path to parser description", "input", Core::ArgsParser::Required::Yes); | ||||
|     args_parser.parse(argc, argv); | ||||
| 
 | ||||
|     auto file_or_error = Core::File::open(path, Core::IODevice::ReadOnly); | ||||
|     if (file_or_error.is_error()) { | ||||
|         fprintf(stderr, "Cannot open %s\n", path); | ||||
|     } | ||||
| 
 | ||||
|     auto content = file_or_error.value()->read_all(); | ||||
|     auto state_machine = parse_state_machine(content); | ||||
| 
 | ||||
|     StringBuilder builder; | ||||
|     SourceGenerator generator { builder }; | ||||
|     if (header_mode) | ||||
|         output_header(*state_machine, generator); | ||||
|     else | ||||
|         output_cpp(*state_machine, generator); | ||||
|     outln("{}", generator.as_string_view()); | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| HashTable<String> actions(const StateMachine& machine) | ||||
| { | ||||
|     HashTable<String> table; | ||||
| 
 | ||||
|     auto do_state = [&](const State& state) { | ||||
|         if (state.entry_action.has_value()) | ||||
|             table.set(state.entry_action.value()); | ||||
|         if (state.exit_action.has_value()) | ||||
|             table.set(state.exit_action.value()); | ||||
|         for (auto action : state.actions) { | ||||
|             if (action.action.action.has_value()) | ||||
|                 table.set(action.action.action.value()); | ||||
|         } | ||||
|     }; | ||||
|     for (auto state : machine.states) { | ||||
|         do_state(state); | ||||
|     } | ||||
|     if (machine.anywhere.has_value()) | ||||
|         do_state(machine.anywhere.value()); | ||||
|     return move(table); | ||||
| } | ||||
| 
 | ||||
| void generate_lookup_table(const StateMachine& machine, SourceGenerator& generator) | ||||
| { | ||||
|     generator.append(R"~~~( | ||||
|     static constexpr StateTransition STATE_TRANSITION_TABLE[][256] = { | ||||
| )~~~"); | ||||
| 
 | ||||
|     auto generate_for_state = [&](const State& s) { | ||||
|         auto table_generator = generator.fork(); | ||||
|         table_generator.set("active_state", s.name); | ||||
|         table_generator.append("/* @active_state@ */ { "); | ||||
|         VERIFY(!s.name.is_empty()); | ||||
|         Vector<StateTransition> row; | ||||
|         for (int i = 0; i < 256; i++) | ||||
|             row.append({ s.name, "_Ignore" }); | ||||
|         for (auto action : s.actions) { | ||||
|             for (int range_element = action.range.begin; range_element <= action.range.end; range_element++) { | ||||
|                 row[range_element] = { action.action.new_state, action.action.action }; | ||||
|             } | ||||
|         } | ||||
|         for (int i = 0; i < 256; ++i) { | ||||
|             auto cell_generator = table_generator.fork(); | ||||
|             cell_generator.set("cell_new_state", row[i].new_state.value_or(s.name)); | ||||
|             cell_generator.set("cell_action", row[i].action.value_or("_Ignore")); | ||||
|             cell_generator.append(" {State::@cell_new_state@, Action::@cell_action@}, "); | ||||
|         } | ||||
|         table_generator.append("},\n"); | ||||
|     }; | ||||
|     if (machine.anywhere.has_value()) { | ||||
|         generate_for_state(machine.anywhere.value()); | ||||
|     } | ||||
|     for (auto s : machine.states) { | ||||
|         generate_for_state(s); | ||||
|     } | ||||
|     generator.append(R"~~~( | ||||
|     }; | ||||
| )~~~"); | ||||
| } | ||||
| 
 | ||||
| void output_header(const StateMachine& machine, SourceGenerator& generator) | ||||
| { | ||||
|     generator.set("class_name", machine.name); | ||||
|     generator.set("initial_state", machine.initial_state); | ||||
|     generator.set("state_count", String::number(machine.states.size() + 1)); | ||||
| 
 | ||||
|     generator.append(R"~~~( | ||||
| #pragma once | ||||
| 
 | ||||
| #include <AK/Function.h> | ||||
| #include <AK/Platform.h> | ||||
| #include <AK/Types.h> | ||||
|                      )~~~"); | ||||
|     if (machine.namespaces.has_value()) { | ||||
|         generator.set("namespace", machine.namespaces.value()); | ||||
|         generator.append(R"~~~( | ||||
| namespace @namespace@ { | ||||
| )~~~"); | ||||
|     } | ||||
|     generator.append(R"~~~( | ||||
| class @class_name@ { | ||||
| public: | ||||
|     enum class Action : u8 { | ||||
|         _Ignore, | ||||
| )~~~"); | ||||
|     for (auto a : actions(machine)) { | ||||
|         if (a.is_empty()) | ||||
|             continue; | ||||
|         auto action_generator = generator.fork(); | ||||
|         action_generator.set("action.name", a); | ||||
|         action_generator.append(R"~~~( | ||||
|         @action.name@, | ||||
|     )~~~"); | ||||
|     } | ||||
| 
 | ||||
|     generator.append(R"~~~( | ||||
|     }; // end Action
 | ||||
| 
 | ||||
|     typedef Function<void(Action, u8)> Handler; | ||||
| 
 | ||||
|     @class_name@(Handler); | ||||
| 
 | ||||
|     void advance(u8); | ||||
| 
 | ||||
| private: | ||||
|     enum class State : u8 { | ||||
|         _Anywhere, | ||||
| )~~~"); | ||||
| 
 | ||||
|     int largest_state_value = 0; | ||||
|     for (auto s : machine.states) { | ||||
|         auto state_generator = generator.fork(); | ||||
|         state_generator.set("state.name", s.name); | ||||
|         largest_state_value++; | ||||
|         state_generator.append(R"~~~( | ||||
|         @state.name@, | ||||
| )~~~"); | ||||
|     } | ||||
|     generator.append(R"~~~( | ||||
|     }; // end State
 | ||||
| 
 | ||||
|     struct StateTransition { | ||||
|         State new_state; | ||||
|         Action action; | ||||
|     }; | ||||
| 
 | ||||
|     State m_state { State::@initial_state@ }; | ||||
| 
 | ||||
|     Handler m_handler; | ||||
| 
 | ||||
|     StateTransition lookup_state_transition(u8); | ||||
| )~~~"); | ||||
| 
 | ||||
|     auto table_generator = generator.fork(); | ||||
|     generate_lookup_table(machine, table_generator); | ||||
|     generator.append(R"~~~( | ||||
| }; // end @class_name@
 | ||||
| )~~~"); | ||||
| 
 | ||||
|     if (machine.namespaces.has_value()) { | ||||
|         generator.append(R"~~~( | ||||
| } // end namespace
 | ||||
| )~~~"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void output_cpp(const StateMachine& machine, SourceGenerator& generator) | ||||
| { | ||||
|     VERIFY(!machine.name.is_empty()); | ||||
|     generator.set("class_name", machine.name); | ||||
|     generator.set("state_count", String::number(machine.states.size() + 1)); | ||||
| 
 | ||||
|     generator.append(R"~~~( | ||||
| #include "@class_name@.h" | ||||
| #include <AK/Function.h> | ||||
| #include <AK/Types.h> | ||||
| )~~~"); | ||||
|     if (machine.namespaces.has_value()) { | ||||
|         generator.set("namespace", machine.namespaces.value()); | ||||
|         generator.append(R"~~~( | ||||
| namespace @namespace@ { | ||||
| )~~~"); | ||||
|     } | ||||
|     generator.append(R"~~~( | ||||
| @class_name@::@class_name@(Handler handler) | ||||
|     : m_handler(move(handler)) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| ALWAYS_INLINE @class_name@::StateTransition @class_name@::lookup_state_transition(u8 byte) | ||||
| { | ||||
|     VERIFY((u8)m_state < @state_count@); | ||||
|     )~~~"); | ||||
|     if (machine.anywhere.has_value()) { | ||||
|         generator.append(R"~~~( | ||||
|     auto anywhere_state = STATE_TRANSITION_TABLE[0][byte]; | ||||
|     if (anywhere_state.new_state != @class_name@::State::_Anywhere || anywhere_state.action != @class_name@::Action::_Ignore) | ||||
|         return anywhere_state; | ||||
|     else | ||||
| )~~~"); | ||||
|     } | ||||
|     generator.append(R"~~~( | ||||
|         return STATE_TRANSITION_TABLE[(u8)m_state][byte]; | ||||
| } | ||||
| )~~~"); | ||||
| 
 | ||||
|     generator.append(R"~~~( | ||||
| 
 | ||||
| void @class_name@::advance(u8 byte) | ||||
| { | ||||
|     auto next_state = lookup_state_transition(byte); | ||||
|     bool state_will_change = next_state.new_state != m_state && next_state.new_state != @class_name@::State::_Anywhere; | ||||
| 
 | ||||
|     // only run exit directive if state is being changed
 | ||||
|     if (state_will_change) | ||||
|     { | ||||
|         switch (m_state) | ||||
|         { | ||||
| )~~~"); | ||||
|     for (auto s : machine.states) { | ||||
|         auto state_generator = generator.fork(); | ||||
|         if (s.exit_action.has_value()) { | ||||
|             state_generator.set("state_name", s.name); | ||||
|             state_generator.set("action", s.exit_action.value()); | ||||
|             state_generator.append(R"~~~( | ||||
|         case @class_name@::State::@state_name@: | ||||
|             m_handler(Action::@action@, byte); | ||||
|             break; | ||||
| )~~~"); | ||||
|         } | ||||
|     } | ||||
|     generator.append(R"~~~( | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (next_state.action != @class_name@::Action::_Ignore) | ||||
|         m_handler(next_state.action, byte); | ||||
|     m_state = next_state.new_state; | ||||
| 
 | ||||
|     // only run entry directive if state is being changed
 | ||||
|     if (state_will_change) | ||||
|     { | ||||
|         switch (next_state.new_state) | ||||
|         { | ||||
| )~~~"); | ||||
|     for (auto state : machine.states) { | ||||
|         auto state_generator = generator.fork(); | ||||
|         if (state.entry_action.has_value()) { | ||||
|             state_generator.set("state_name", state.name); | ||||
|             state_generator.set("action", state.entry_action.value()); | ||||
|             state_generator.append(R"~~~( | ||||
|         case @class_name@::State::@state_name@: | ||||
|             m_handler(Action::@action@, byte); | ||||
|             break; | ||||
| )~~~"); | ||||
|         } | ||||
|     } | ||||
|     generator.append(R"~~~( | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| )~~~"); | ||||
| 
 | ||||
|     if (machine.namespaces.has_value()) { | ||||
|         generator.append(R"~~~( | ||||
| } // end namespace
 | ||||
| )~~~"); | ||||
|     } | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Daniel Bertalan
						Daniel Bertalan