From ca309b988e7908f4d8a92f4a7bc6a4b9970a8867 Mon Sep 17 00:00:00 2001 From: NotTheDr01ds <32344964+NotTheDr01ds@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:20:18 -0400 Subject: [PATCH] New std-rfc command `str dedent` (a.k.a. unindent) to remove common indentation from a multiline string (#894) Finally got around to polishing up and contributing the concept from [Nushell #11477](https://github.com/nushell/nushell/issues/11477). This is based on a (very slightly) modified version of the [Swift approach](https://docs.swift.org/swift-book/documentation/the-swift-programming-language/stringsandcharacters/#Multiline-String-Literals) suggested by @jameschensmith: ```nu > use std-rfc str > let s = " Totally unindented Indented by two spaces Indented by four spaces Intended by two spaces Unindented " > $s | str dedent Totally unindented Indented by two spaces Indented by four spaces Intended by two spaces Unindented ``` Specifically: * Requires a multi-line string where the first and last lines are blank (empty or contains only whitespace) * Will error if the first and/or last line are not blank * The amount of indentation to remove is based solely on the *number of spaces* on the final line of the string * Tabs are not considered for indentation calculations * The first and last line of the original string are not returned as part of the dedented string. Those lines are for "control"/formatting purposes only. * If you want the string to end in a linebreak, simply add an extra blank line before the last "control" line (as in the example above). * Unlike the Swift format, whitespace is allowed on the first line of the string. It may not be visible to the user in their IDE, and there's just no reason to create an error in this case since the line is intended to be stripped anyway. * *Only* indentation whitespace is removed. Any other whitespace is left untouched, including that on otherwise empty lines. A good (I hope) set of examples for these rules can be found in the test cases. --- stdlib-candidate/std-rfc/str/dedent/mod.nu | 93 +++++++++++++++ stdlib-candidate/std-rfc/str/mod.nu | 1 + stdlib-candidate/tests/mod.nu | 1 + stdlib-candidate/tests/str_dedent.nu | 128 +++++++++++++++++++++ 4 files changed, 223 insertions(+) create mode 100644 stdlib-candidate/std-rfc/str/dedent/mod.nu create mode 100644 stdlib-candidate/tests/str_dedent.nu diff --git a/stdlib-candidate/std-rfc/str/dedent/mod.nu b/stdlib-candidate/std-rfc/str/dedent/mod.nu new file mode 100644 index 0000000..fabc50f --- /dev/null +++ b/stdlib-candidate/std-rfc/str/dedent/mod.nu @@ -0,0 +1,93 @@ +# Removes common indent from a multi-line string based on the number of spaces on the last line. +# +# A.k.a. Unindent +# +# Example - Two leading spaces are removed from all lines: +# +# > let s = " +# Heading +# Indented Line +# Another Indented Line +# +# Another Heading +# " +# > $a | str dedent +# +# Heading +# Indented Line +# Another Indented Line +# +# Another Heading +export def main []: string -> string { + let string = $in + + if ($string | describe) != "string" { + let span = (view files | last) + error make { + msg: 'Requires multi-line string as pipeline input' + label: { + text: "err::pipeline_input" + span: { + start: $span.start + end: $span.end + } + } + } + } + + if ($string !~ '(?ms)^\s*\n') { + return (error make { + msg: 'First line must be empty' + }) + } + + if ($string !~ '(?ms)\n\s*$') { + return (error make { + msg: 'Last line must contain only whitespace indicating the dedent' + }) + } + + # Get number of spaces on the last line + let indent = $string + | str replace -r '(?ms).*\n( *)$' '$1' + | str length + + # Skip the first and last lines + let lines = ( + $string + | str replace -r '(?ms)^[^\n]*\n(.*)\n[^\n]*$' '$1' + # Use `split` instead of `lines`, since `lines` will + # drop legitimate trailing empty lines + | split row "\n" + | enumerate + | rename lineNumber text + ) + + let spaces = ('' | fill -c ' ' -w $indent) + + # Has to be done outside the replacement block or the error + # is converted to text. This is probably a Nushell bug, and + # this code can be recombined with the next iterator when + # the Nushell behavior is fixed. + for line in $lines { + if ($line.text !~ '^\s*$') and ($line.text | str index-of --range 0..($indent) $spaces) == -1 { + error make { + msg: $"Line ($line.lineNumber + 1) must be indented by ($indent) or more spaces." + } + } + } + + $lines + | each {|line| + # Don't operate on lines containing only whitespace + if ($line.text !~ '^\s*$') { + $line.text | str replace $spaces '' + } else { + $line.text + } + } + | to text + # Remove the trailing newline which indicated + # indent level + | str replace -r '(?ms)(.*)\n$' '$1' +} \ No newline at end of file diff --git a/stdlib-candidate/std-rfc/str/mod.nu b/stdlib-candidate/std-rfc/str/mod.nu index fbff9d9..9dbbcb2 100644 --- a/stdlib-candidate/std-rfc/str/mod.nu +++ b/stdlib-candidate/std-rfc/str/mod.nu @@ -1 +1,2 @@ export use xpend.nu * +export use dedent * diff --git a/stdlib-candidate/tests/mod.nu b/stdlib-candidate/tests/mod.nu index 34dc62f..42fc3a4 100644 --- a/stdlib-candidate/tests/mod.nu +++ b/stdlib-candidate/tests/mod.nu @@ -3,3 +3,4 @@ export module record.nu export module str_xpend.nu export module math.nu export module bench.nu +export module str_dedent.nu diff --git a/stdlib-candidate/tests/str_dedent.nu b/stdlib-candidate/tests/str_dedent.nu new file mode 100644 index 0000000..0f6c8ef --- /dev/null +++ b/stdlib-candidate/tests/str_dedent.nu @@ -0,0 +1,128 @@ +use std assert +use ../std-rfc str + +export def "test str dedent" [] { + + # Test 1: + # Should start with "Heading" in the first character position + # Should not end with a line-break + # The blank line has no extra spaces + assert equal ( + do { + let s = " + Heading + + one + two + " + $s | str dedent + } + ) "Heading\n\n one\n two" + + # Test 2: + # Same as #1, but the blank line has leftover whitespace + # indentation (16 spaces) which is left in the result + assert equal ( + do { + let s = " + Heading + + one + two + " + $s | str dedent + } + ) "Heading\n \n one\n two" + + # Test 3: + # Same, but with a single tab character on the "blank" line + assert equal ( + do { + let s = " + Heading +\t + one + two + " + $s | str dedent + } + ) "Heading\n\t\n one\n two" + + # Test 4: + # Ends with line-break + assert equal ( + do { + let s = " + Heading + + one + two + + " + $s | str dedent + } + ) "Heading\n\n one\n two\n" + + # Test 5: + # Identity - Returns the original string sans first and last empty lines + # No other whitespace should be removed + assert equal ( + do { + let s = "\n Identity \n" + $s | str dedent + } + ) " Identity " + + # Test 6: + # Error - Does not contain an empty first line + assert error {|| + let s = "Error" + $s | str dedent + } + + # Test 7: + # Error - Does not contain an empty last line + assert error {|| + let s = " + Error" + $s | str dedent + } + + # Test 8: + # Error - Line 1 does not have enough indentation + assert error {|| + let s = " + Line 1 + Line 2 + " + $s | str dedent + } + + # Test 8: + # Error - Line 2 does not have enough indentation + assert error {|| + let s = " + Line 1 + Line 2 + " + $s | str dedent + } + + # Test 9: + # Error - Line does not have enough indentation + assert error {|| + let s = " + Line + " + $s | str dedent + } + + # Test 10: + # "Hidden" whitespace on the first line is allowed + assert equal ( + do { + let s = " \t \n Identity \n" + $s | str dedent + } + ) " Identity " +} \ No newline at end of file