Merge pull request #3180 from tertsdiepraam/long_help_file

Create `help_section` macro and use it for `numfmt`
2025-07-28 11:37:44 +00:00 · 2022-08-20 13:45:54 +02:00 · 2022-08-20 13:45:54 +02:00 · 26b7099f83
commit 26b7099f83
parent 5694de7a68 2130b3ef69
7 changed files with 302 additions and 64 deletions
--- a/src/uu/base32/base32.md
+++ b/src/uu/base32/base32.md
@ -0,0 +1,17 @@
+# base32
+
+## Usage
+```
+base32 [OPTION]... [FILE]
+```
+
+## About
+
+encode/decode data and print to standard output
+With no FILE, or when FILE is -, read standard input.
+
+The data are encoded as described for the base32 alphabet in RFC
+4648. When decoding, the input may contain newlines in addition
+to the bytes of the formal base32 alphabet. Use --ignore-garbage
+to attempt to recover from any other non-alphabet bytes in the
+encoded stream.
--- a/src/uu/base32/src/base32.rs
+++ b/src/uu/base32/src/base32.rs
@ -8,22 +8,12 @@
 use std::io::{stdin, Read};

 use clap::Command;
-use uucore::{encoding::Format, error::UResult};
+use uucore::{encoding::Format, error::UResult, help_section, help_usage};

 pub mod base_common;

-static ABOUT: &str = "\
-encode/decode data and print to standard output
-With no FILE, or when FILE is -, read standard input.
-
-The data are encoded as described for the base32 alphabet in RFC
-4648. When decoding, the input may contain newlines in addition
-to the bytes of the formal base32 alphabet. Use --ignore-garbage
-to attempt to recover from any other non-alphabet bytes in the
-encoded stream.
-";
-
-const USAGE: &str = "{} [OPTION]... [FILE]";
+const ABOUT: &str = help_section!("about", "base32.md");
+const USAGE: &str = help_usage!("base32.md");

 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
--- a/src/uu/base64/base64.md
+++ b/src/uu/base64/base64.md
@ -0,0 +1,17 @@
+# base64
+
+## Usage
+```
+base64 [OPTION]... [FILE]
+```
+
+## About
+
+encode/decode data and print to standard output
+With no FILE, or when FILE is -, read standard input.
+
+The data are encoded as described for the base64 alphabet in RFC
+3548. When decoding, the input may contain newlines in addition
+to the bytes of the formal base64 alphabet. Use --ignore-garbage
+to attempt to recover from any other non-alphabet bytes in the
+encoded stream.
--- a/src/uu/base64/src/base64.rs
+++ b/src/uu/base64/src/base64.rs
@ -9,22 +9,12 @@
 use uu_base32::base_common;
 pub use uu_base32::uu_app;

-use uucore::{encoding::Format, error::UResult};
+use uucore::{encoding::Format, error::UResult, help_section, help_usage};

 use std::io::{stdin, Read};

-static ABOUT: &str = "\
-encode/decode data and print to standard output
-With no FILE, or when FILE is -, read standard input.
-
-The data are encoded as described for the base64 alphabet in RFC
-3548. When decoding, the input may contain newlines in addition
-to the bytes of the formal base64 alphabet. Use --ignore-garbage
-to attempt to recover from any other non-alphabet bytes in the
-encoded stream.
-";
-
-const USAGE: &str = "{0} [OPTION]... [FILE]";
+const ABOUT: &str = help_section!("about", "base64.md");
+const USAGE: &str = help_usage!("base64.md");

 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
--- a/src/uu/numfmt/numfmt.md
+++ b/src/uu/numfmt/numfmt.md
@ -0,0 +1,47 @@
+<!-- spell-checker:ignore N'th M'th -->
+# numfmt
+
+## Usage
+```
+numfmt [OPTION]... [NUMBER]...
+```
+
+## About
+
+Convert numbers from/to human-readable strings
+
+## Long Help
+
+UNIT options:
+   none   no auto-scaling is done; suffixes will trigger an error
+
+   auto   accept optional single/two letter suffix:
+
+          1K = 1000, 1Ki = 1024, 1M = 1000000, 1Mi = 1048576,
+
+   si     accept optional single letter suffix:
+
+          1K = 1000, 1M = 1000000, ...
+
+   iec    accept optional single letter suffix:
+
+          1K = 1024, 1M = 1048576, ...
+
+   iec-i  accept optional two-letter suffix:
+
+          1Ki = 1024, 1Mi = 1048576, ...
+
+FIELDS supports cut(1) style field ranges:
+  N    N'th field, counted from 1
+  N-   from N'th field, to end of line
+  N-M  from N'th to M'th field (inclusive)
+  -M   from first to M'th field (inclusive)
+  -    all fields
+Multiple fields/ranges can be separated with commas
+
+FORMAT must be suitable for printing one floating-point argument '%f'.
+Optional quote (%'f) will enable --grouping (if supported by current locale).
+Optional width value (%10f) will pad output. Optional zero (%010f) width
+will zero pad the number. Optional negative values (%-10f) will left align.
+Optional precision (%.1f) will override the input determined precision.
+
--- a/src/uu/numfmt/src/numfmt.rs
+++ b/src/uu/numfmt/src/numfmt.rs
@ -5,8 +5,6 @@
 //  * For the full copyright and license information, please view the LICENSE
 //  * file that was distributed with this source code.

-// spell-checker:ignore N'th M'th
-
 use crate::errors::*;
 use crate::format::format_and_print;
 use crate::options::*;
@ -18,47 +16,16 @@ use uucore::display::Quotable;
 use uucore::error::UResult;
 use uucore::format_usage;
 use uucore::ranges::Range;
+use uucore::{help_section, help_usage};

 pub mod errors;
 pub mod format;
 pub mod options;
 mod units;

-static ABOUT: &str = "Convert numbers from/to human-readable strings";
-static LONG_HELP: &str = "UNIT options:
-   none   no auto-scaling is done; suffixes will trigger an error
-
-   auto   accept optional single/two letter suffix:
-
-          1K = 1000, 1Ki = 1024, 1M = 1000000, 1Mi = 1048576,
-
-   si     accept optional single letter suffix:
-
-          1K = 1000, 1M = 1000000, ...
-
-   iec    accept optional single letter suffix:
-
-          1K = 1024, 1M = 1048576, ...
-
-   iec-i  accept optional two-letter suffix:
-
-          1Ki = 1024, 1Mi = 1048576, ...
-
-FIELDS supports cut(1) style field ranges:
-  N    N'th field, counted from 1
-  N-   from N'th field, to end of line
-  N-M  from N'th to M'th field (inclusive)
-  -M   from first to M'th field (inclusive)
-  -    all fields
-Multiple fields/ranges can be separated with commas
-
-FORMAT must be suitable for printing one floating-point argument '%f'.
-Optional quote (%'f) will enable --grouping (if supported by current locale).
-Optional width value (%10f) will pad output. Optional zero (%010f) width
-will zero pad the number. Optional negative values (%-10f) will left align.
-Optional precision (%.1f) will override the input determined precision.
-";
-const USAGE: &str = "{} [OPTION]... [NUMBER]...";
+const ABOUT: &str = help_section!("about", "numfmt.md");
+const LONG_HELP: &str = help_section!("long help", "numfmt.md");
+const USAGE: &str = help_usage!("numfmt.md");

 fn handle_args<'a>(args: impl Iterator<Item = &'a str>, options: &NumfmtOptions) -> UResult<()> {
    for l in args {
--- a/src/uucore_procs/src/lib.rs
+++ b/src/uucore_procs/src/lib.rs
@ -1,7 +1,9 @@
 // Copyright (C) ~ Roy Ivy III <rivy.dev@gmail.com>; MIT license

 extern crate proc_macro;
-use proc_macro::TokenStream;
+use std::{fs::File, io::Read, path::PathBuf};
+
+use proc_macro::{Literal, TokenStream, TokenTree};
 use quote::quote;

 //## rust proc-macro background info
@ -34,3 +36,211 @@ pub fn main(_args: TokenStream, stream: TokenStream) -> TokenStream {

    TokenStream::from(new)
 }
+
+/// Get the usage from the "Usage" section in the help file.
+///
+/// The usage is assumed to be surrounded by markdown code fences. It may span
+/// multiple lines. The first word of each line is assumed to be the name of
+/// the util and is replaced by "{}" so that the output of this function can be
+/// used with `uucore::format_usage`.
+#[proc_macro]
+pub fn help_usage(input: TokenStream) -> TokenStream {
+    let input: Vec<TokenTree> = input.into_iter().collect();
+    let filename = get_argument(&input, 0, "filename");
+    let text: String = parse_usage(&parse_help("usage", &filename));
+    TokenTree::Literal(Literal::string(&text)).into()
+}
+
+/// Reads a section from a file of the util as a `str` literal.
+///
+/// It reads from the file specified as the second argument, relative to the
+/// crate root. The contents of this file are read verbatim, without parsing or
+/// escaping. The name of the help file should match the name of the util.
+/// I.e. numfmt should have a file called `numfmt.md`. By convention, the file
+/// should start with a top-level section with the name of the util. The other
+/// sections must start with 2 `#` characters. Capitalization of the sections
+/// does not matter. Leading and trailing whitespace of each section will be
+/// removed.
+///
+/// Example:
+/// ```md
+/// # numfmt
+/// ## About
+/// Convert numbers from/to human-readable strings
+///
+/// ## Long help
+/// This text will be the long help
+/// ```
+///
+/// ```rust,ignore
+/// help_section!("about", "numfmt.md");
+/// ```
+#[proc_macro]
+pub fn help_section(input: TokenStream) -> TokenStream {
+    let input: Vec<TokenTree> = input.into_iter().collect();
+    let section = get_argument(&input, 0, "section");
+    let filename = get_argument(&input, 1, "filename");
+    let text = parse_help(&section, &filename);
+    TokenTree::Literal(Literal::string(&text)).into()
+}
+
+/// Get an argument from the input vector of `TokenTree`.
+///
+/// Asserts that the argument is a string literal and returns the string value,
+/// otherwise it panics with an error.
+fn get_argument(input: &[TokenTree], index: usize, name: &str) -> String {
+    // Multiply by two to ignore the `','` in between the arguments
+    let string = match &input.get(index * 2) {
+        Some(TokenTree::Literal(lit)) => lit.to_string(),
+        Some(_) => panic!("Argument {} should be a string literal.", index),
+        None => panic!("Missing argument at index {} for {}", index, name),
+    };
+
+    string
+        .parse::<String>()
+        .unwrap()
+        .strip_prefix('"')
+        .unwrap()
+        .strip_suffix('"')
+        .unwrap()
+        .to_string()
+}
+
+/// Read the help file and extract a section
+fn parse_help(section: &str, filename: &str) -> String {
+    let section = section.to_lowercase();
+    let section = section.trim_matches('"');
+    let mut content = String::new();
+    let mut path = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap());
+
+    path.push(filename);
+
+    File::open(path)
+        .unwrap()
+        .read_to_string(&mut content)
+        .unwrap();
+
+    parse_help_section(section, &content)
+}
+
+/// Get a single section from content
+///
+/// The section must be a second level section (i.e. start with `##`).
+fn parse_help_section(section: &str, content: &str) -> String {
+    fn is_section_header(line: &str, section: &str) -> bool {
+        line.strip_prefix("##")
+            .map_or(false, |l| l.trim().to_lowercase() == section)
+    }
+
+    // We cannot distinguish between an empty or non-existing section below,
+    // so we do a quick test to check whether the section exists to provide
+    // a nice error message.
+    if content.lines().all(|l| !is_section_header(l, section)) {
+        panic!(
+            "The section '{}' could not be found in the help file. Maybe it is spelled wrong?",
+            section
+        )
+    }
+
+    content
+        .lines()
+        .skip_while(|&l| !is_section_header(l, section))
+        .skip(1)
+        .take_while(|l| !l.starts_with("##"))
+        .collect::<Vec<_>>()
+        .join("\n")
+        .trim()
+        .to_string()
+}
+
+/// Parses a markdown code block into a usage string
+///
+/// The code fences are removed and the name of the util is replaced
+/// with `{}` so that it can be replaced with the appropriate name
+/// at runtime.
+fn parse_usage(content: &str) -> String {
+    content
+        .strip_suffix("```")
+        .unwrap()
+        .lines()
+        .skip(1) // Skip the "```" of markdown syntax
+        .map(|l| {
+            // Replace the util name (assumed to be the first word) with "{}"
+            // to be replaced with the runtime value later.
+            if let Some((_util, args)) = l.split_once(' ') {
+                format!("{{}} {}", args)
+            } else {
+                "{}".to_string()
+            }
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{parse_help_section, parse_usage};
+
+    #[test]
+    fn section_parsing() {
+        let input = "\
+            # ls\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+
+        assert_eq!(
+            parse_help_section("some section", input),
+            "This is some section"
+        );
+        assert_eq!(
+            parse_help_section("another section", input),
+            "This is the other section\nwith multiple lines"
+        );
+    }
+
+    #[test]
+    #[should_panic]
+    fn section_parsing_panic() {
+        let input = "\
+            # ls\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+        parse_help_section("non-existent section", input);
+    }
+
+    #[test]
+    fn usage_parsing() {
+        let input = "\
+            # ls\n\
+            ## Usage\n\
+            ```\n\
+            ls -l\n\
+            ```\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+
+        assert_eq!(parse_usage(&parse_help_section("usage", input)), "{} -l",);
+
+        assert_eq!(
+            parse_usage(
+                "\
+                ```\n\
+                util [some] [options]\n\
+                ```\
+                "
+            ),
+            "{} [some] [options]"
+        )
+    }
+}