fix: Rewrite 2 times

2025-07-29 16:07:46 +00:00 · 2023-03-12 10:23:15 +03:00 · 2023-03-12 10:23:15 +03:00 · 0b4b3c39c5
commit 0b4b3c39c5
parent 5b478a07aa
3 changed files with 238 additions and 69 deletions
--- a/samples/test.properties
+++ b/samples/test.properties
@ -1,2 +1,15 @@
-asdasd\\def = asdlnj\
-  asdasd\\n
+foo=bar
+baz = qux
+asd: def
+properties : is a very bad format
+i\ hate\ writing\ tests ==
+colon::
+escaped = \\
+real.new\ line= \n
+fake.new.line=\\n
+   # Is a comment.
+# This too.
+! And this!
+  ! Last one i swear.
+not.a.comment#=#
+is\ it\ over!? = finally!!!
--- a/src/properties.v
+++ b/src/properties.v
@ -3,6 +3,208 @@ module properties
 import os
 import strings

+enum ParseState {
+	// We are parsing a key.
+	key
+	// We are parsing a value.
+	value
+}
+
+// PropertiesParser is a parser for properties files.
+[noinit]
+pub struct PropertiesParser {
+	// The string we are parsing.
+	raw string
+mut:
+	// The character index we are currently parsing.
+	current_index usize
+	// The thing we are currently parsing.
+	currently_parsing ParseState = .key
+	// The current identifier we are parsing.
+	current_identifier strings.Builder
+	// The current value we are parsing.
+	current_value strings.Builder
+	// Whether the current line is all whitespace.
+	current_line_is_all_whitespace bool = true
+	// The number of backslashes we have seen in a row.
+	current_backslash_count usize
+	// Whether we have encountered an unescaped backslash.
+	// This is used for escaping newlines.
+	encountered_unescaped_backslash bool
+pub mut:
+	// The parsed properties.
+	properties map[string]string
+}
+
+fn (p &PropertiesParser) is_at_end() bool {
+	return p.current_index >= p.raw.len - 1
+}
+
+fn (p &PropertiesParser) current_rune() rune {
+	return p.raw[p.current_index]
+}
+
+fn (mut p PropertiesParser) next_rune() rune {
+	p.current_index++
+	return p.current_rune()
+}
+
+fn (p &PropertiesParser) peek_rune() rune {
+	return p.raw[p.current_index + 1] or { `\0` }
+}
+
+fn (mut p PropertiesParser) write(value rune) {
+	match p.currently_parsing {
+		.key {
+			p.current_identifier.write_rune(value)
+		}
+		.value {
+			p.current_value.write_rune(value)
+		}
+	}
+}
+
+fn (mut p PropertiesParser) save_ident_and_value() {
+	if p.current_identifier.len > 0 {
+		p.properties[p.current_identifier.str().trim_right(' \t\f')] = p.current_value.str()
+	}
+}
+
+fn (mut p PropertiesParser) loop_inner() {
+	current_rune := p.current_rune()
+	print(current_rune)
+	defer {
+		if !p.is_at_end() {
+			p.next_rune()
+		}
+	}
+	match current_rune {
+		// Unescaped comment character.
+		`#`, `!` {
+			if !p.current_line_is_all_whitespace {
+				// We are in the middle of a key/value. Write the character.
+				p.write(current_rune)
+				return
+			}
+
+			// We are at the start of a comment. Skip to the end of the line.
+			for !p.is_at_end() && p.peek_rune() !in [`\n`, `\r`] {
+				p.next_rune()
+			}
+		}
+		// Unescaped whitespace.
+		` `, `\t`, `\f` {
+			// Ignore whitespace at the start of a line.
+			if p.current_line_is_all_whitespace {
+				return
+			}
+
+			if p.currently_parsing == .value && p.current_value.len > 0 {
+				p.current_value.write_rune(current_rune)
+				return
+			}
+
+			mut index := usize(1)
+			mut backslashes_in_a_row := usize(0)
+			mut there_is_an_unescaped_separator := false
+
+			for p.current_index + index < p.raw.len {
+				peek := p.raw[p.current_index + index]
+				match peek {
+					`\\` {
+						backslashes_in_a_row++
+					}
+					`\n`, `\r` {
+						// Not escaped. New logical line starts.
+						if backslashes_in_a_row % 2 == 0 {
+							break
+						}
+						backslashes_in_a_row = 0
+					}
+					`=`, `:` {
+						if backslashes_in_a_row % 2 == 1 {
+							continue
+						}
+						there_is_an_unescaped_separator = true
+					}
+					else {
+						backslashes_in_a_row = 0
+					}
+				}
+				index++
+			}
+
+			if there_is_an_unescaped_separator {
+				// We are parsing a key and there is an unescaped separator in the rest of the line.
+				// Write the whitespace.
+				p.write(current_rune)
+			}
+		}
+		// Unescaped separator.
+		`=`, `:` {
+			if p.currently_parsing == .value {
+				p.write(current_rune)
+			}
+			p.currently_parsing = .value
+			p.current_line_is_all_whitespace = false
+			p.current_backslash_count = 0
+		}
+		// Newline. May or may not be escaped.
+		`\n`, `\r` {
+			// We are at the end of a line. Not escaped.
+			if p.current_backslash_count % 2 == 0 || p.encountered_unescaped_backslash {
+				p.save_ident_and_value()
+				p.currently_parsing = .key
+				p.encountered_unescaped_backslash = false
+			}
+			p.current_line_is_all_whitespace = true
+			p.current_backslash_count = 0
+		}
+		`\\` {
+			p.current_line_is_all_whitespace = false
+			peek := p.peek_rune()
+
+			// Escaped whitespace/comment character/seperator.
+			// \\\! -> ! is escaped as there is an odd number of backslashes.
+			if p.current_backslash_count % 2 == 1 {
+				if peek in [` `, `\t`, `\f`, `#`, `!`, `=`, `:`] {
+					p.current_backslash_count = 0
+					p.write(p.next_rune())
+				}
+				return
+			}
+
+			// \\\x -> The last \ is not escaped as there is
+			// an even number of backslashes before it.
+			if peek != `\\` {
+				p.encountered_unescaped_backslash = true
+				return
+			}
+
+			// Escaped backslash.
+			// Just reset the backslash count as the number doesn't matter.
+			// It only needs to be even here.
+			p.current_backslash_count = 0
+			p.write(p.next_rune())
+		}
+		else {
+			p.current_line_is_all_whitespace = false
+			p.write(current_rune)
+		}
+	}
+}
+
+// parse parses the properties file.
+// The parsed properties are stored in the properties field.
+pub fn (mut p PropertiesParser) parse() {
+	for !p.is_at_end() {
+		p.loop_inner()
+	}
+	p.loop_inner()
+	// Handle files not ending in a newline.
+	p.save_ident_and_value()
+}
+
 // parse_file parses a properties file after reading it and returns a map of the key-value pairs.
 pub fn parse_file(path string) !map[string]string {
 	return parse(os.read_file(path)!)
@ -10,72 +212,10 @@ pub fn parse_file(path string) !map[string]string {

 // parse parses a properties string and returns a map of the key-value pairs.
 pub fn parse(raw string) map[string]string {
-	mut properties := map[string]string{}
-
-	mut current_raw_index := 0
-
-	mut current_ident := strings.new_builder(30)
-	mut current_value := strings.new_builder(30)
-
-	mut parsing_ident := true
-
-	for current_raw_index < raw.len {
-		match raw[current_raw_index] {
-			`#`, `!` {
-				// Ignore until we find a newline.
-				for raw[current_raw_index] !in [`\n`, `\r`] {
-					current_raw_index++
-				}
-			}
-			` `, `\t` {
-				if !parsing_ident {
-					current_value.write_rune(raw[current_raw_index])
-				}
-			}
-			`=`, `:` {
-				if parsing_ident {
-					parsing_ident = false
-				}
-			}
-			`\\` {
-				// if raw[current_raw_index + 1] or { `\0` } == `\\`
-				// 	|| raw[current_raw_index - 1] == `\\` {
-				// 	if parsing_ident {
-				// 		current_ident.write_rune(`\\`)
-				// 	} else {
-				// 		current_value.write_rune(`\\`)
-				// 	}
-				// }
-				// Ignore until we find a newline.
-				for raw[current_raw_index] !in [`\n`, `\r`] {
-					current_raw_index++
-				}
-			}
-			`\n`, `\r` {
-				parsing_ident = true
-
-				if current_ident.len > 0 {
-					properties[current_ident.str()] = current_value.str().trim_space()
+	mut parser := PropertiesParser{
+		raw: raw
 	}

-				current_ident.clear()
-				current_value.clear()
-			}
-			else {
-				if !parsing_ident {
-					current_value.write_rune(raw[current_raw_index])
-				} else {
-					current_ident.write_rune(raw[current_raw_index])
-				}
-			}
-		}
-		current_raw_index++
-	}
-
-	// Handle cases where the file doesn't end with a newline.
-	if current_ident.len > 0 {
-		properties[current_ident.str()] = current_value.str().trim_space()
-	}
-
-	return properties
+	parser.parse()
+	return parser.properties
 }
--- a/src/properties_test.v
+++ b/src/properties_test.v
@ -2,6 +2,22 @@ module properties

 fn test_properties_parse() ! {
 	props := parse_file('./samples/test.properties')!
+	// println(props.str().replace('\n', '\\n'))
+	//
+	assert props['foo'] == 'bar'
+	assert props['baz'] == 'qux'
+	assert props['asd'] == 'def'
+	assert props['properties'] == 'is a very bad format'
+	assert props['i hate writing tests'] == '='
+	assert props['colon'] == ':'
+	assert props['escaped'] == '\\'
+	// TODO: \n, \r, \u etc support.
+	// assert props['real.new line'] == '\n'
+	assert props['fake.new.line'] == '\\n'
+	assert props['not.a.comment#'] == '#'
+	assert props['is it over!?'] == 'finally!!!'

-	println(props)
+	props2 := parse('foo=bar')
+	// Checks if it handles ones not ending with a newline.
+	assert props2['foo'] == 'bar'
 }