mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-31 13:07:46 +00:00
cut: switch to using new coreopts syntax
This commit is contained in:
parent
f8509240df
commit
e019b2657e
1 changed files with 82 additions and 93 deletions
175
src/cut/cut.rs
175
src/cut/cut.rs
|
@ -25,8 +25,87 @@ mod buffer;
|
||||||
mod ranges;
|
mod ranges;
|
||||||
mod searcher;
|
mod searcher;
|
||||||
|
|
||||||
static NAME: &'static str = "cut";
|
static SYNTAX: &'static str = "[-d] [-s] [-z] [--output-delimiter] ((-f|-b|-c) {{sequence}}) {{sourcefile}}+";
|
||||||
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
static SUMMARY: &'static str = "Prints specified byte or field columns from each line of stdin or the input files";
|
||||||
|
static LONG_HELP: &'static str = "
|
||||||
|
Each call must specify a mode (what to use for columns),
|
||||||
|
a sequence (which columns to print), and provide a data source
|
||||||
|
|
||||||
|
Specifying a mode
|
||||||
|
|
||||||
|
Use --bytes (-b) or --characters (-c) to specify byte mode
|
||||||
|
|
||||||
|
Use --fields (-f) to specify field mode, where each line is broken into
|
||||||
|
fields identified by a delimiter character. For example for a typical CSV
|
||||||
|
you could use this in combination with setting comma as the delimiter
|
||||||
|
|
||||||
|
Specifying a sequence
|
||||||
|
|
||||||
|
A sequence is a group of 1 or more numbers or inclusive ranges separated
|
||||||
|
by a commas.
|
||||||
|
|
||||||
|
cut -f 2,5-7 some_file.txt
|
||||||
|
will display the 2nd, 5th, 6th, and 7th field for each source line
|
||||||
|
|
||||||
|
Ranges can extend to the end of the row by excluding the the second number
|
||||||
|
|
||||||
|
cut -f 3- some_file.txt
|
||||||
|
will display the 3rd field and all fields after for each source line
|
||||||
|
|
||||||
|
The first number of a range can be excluded, and this is effectively the
|
||||||
|
same as using 1 as the first number: it causes the range to begin at the
|
||||||
|
first column. Ranges can also display a single column
|
||||||
|
|
||||||
|
cut -f 1,3-5 some_file.txt
|
||||||
|
will display the 1st, 3rd, 4th, and 5th field for each source line
|
||||||
|
|
||||||
|
The --complement option, when used, inverts the effect of the sequence
|
||||||
|
|
||||||
|
cut --complement -f 4-6 some_file.txt
|
||||||
|
will display the every field but the 4th, 5th, and 6th
|
||||||
|
|
||||||
|
Specifying a data source
|
||||||
|
|
||||||
|
If no sourcefile arguments are specified, stdin is used as the source of
|
||||||
|
lines to print
|
||||||
|
|
||||||
|
If sourcefile arguments are specified, stdin is ignored and all files are
|
||||||
|
read in consecutively if a sourcefile is not successfully read, a warning
|
||||||
|
will print to stderr, and the eventual status code will be 1, but cut
|
||||||
|
will continue to read through proceeding sourcefiles
|
||||||
|
|
||||||
|
To print columns from both STDIN and a file argument, use - (dash) as a
|
||||||
|
sourcefile argument to represent stdin.
|
||||||
|
|
||||||
|
Field Mode options
|
||||||
|
|
||||||
|
The fields in each line are identified by a delimiter (separator)
|
||||||
|
|
||||||
|
Set the delimiter
|
||||||
|
Set the delimiter which separates fields in the file using the
|
||||||
|
--delimiter (-d) option. Setting the delimiter is optional.
|
||||||
|
If not set, a default delimiter of Tab will be used.
|
||||||
|
|
||||||
|
Optionally Filter based on delimiter
|
||||||
|
If the --only-delimited (-s) flag is provided, only lines which
|
||||||
|
contain the delimiter will be printed
|
||||||
|
|
||||||
|
Replace the delimiter
|
||||||
|
If the --output-delimiter option is provided, the argument used for
|
||||||
|
it will replace the delimiter character in each line printed. This is
|
||||||
|
useful for transforming tabular data - e.g. to convert a CSV to a
|
||||||
|
TSV (tab-separated file)
|
||||||
|
|
||||||
|
Line endings
|
||||||
|
|
||||||
|
When the --zero-terminated (-z) option is used, cut sees \\0 (null) as the
|
||||||
|
'line ending' character (both for the purposes of reading lines and
|
||||||
|
separating printed lines) instead of \\n (newline). This is useful for
|
||||||
|
tabular data where some of the cells may contain newlines
|
||||||
|
|
||||||
|
echo 'ab\\0cd' | cut -z -c 1
|
||||||
|
will result in 'a\\0c\\0'
|
||||||
|
";
|
||||||
|
|
||||||
struct Options {
|
struct Options {
|
||||||
out_delim: Option<String>,
|
out_delim: Option<String>,
|
||||||
|
@ -408,8 +487,7 @@ fn cut_files(mut filenames: Vec<String>, mode: Mode) -> i32 {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn uumain(args: Vec<String>) -> i32 {
|
pub fn uumain(args: Vec<String>) -> i32 {
|
||||||
let mut opts = uucore::coreopts::CoreOptions::new(NAME);
|
let mut opts = new_coreopts!(SYNTAX, SUMMARY, LONG_HELP);
|
||||||
|
|
||||||
opts.optopt("b", "bytes", "filter byte columns from the input source", "sequence");
|
opts.optopt("b", "bytes", "filter byte columns from the input source", "sequence");
|
||||||
opts.optopt("c", "characters", "alias for character mode", "sequence");
|
opts.optopt("c", "characters", "alias for character mode", "sequence");
|
||||||
opts.optopt("d", "delimiter", "specify the delimiter character that separates fields in the input source. Defaults to Tab.", "delimiter");
|
opts.optopt("d", "delimiter", "specify the delimiter character that separates fields in the input source. Defaults to Tab.", "delimiter");
|
||||||
|
@ -419,95 +497,6 @@ pub fn uumain(args: Vec<String>) -> i32 {
|
||||||
opts.optflag("s", "only-delimited", "in field mode, only print lines which contain the delimiter");
|
opts.optflag("s", "only-delimited", "in field mode, only print lines which contain the delimiter");
|
||||||
opts.optflag("z", "zero-terminated", "instead of filtering columns based on line, filter columns based on \\0 (NULL character)");
|
opts.optflag("z", "zero-terminated", "instead of filtering columns based on line, filter columns based on \\0 (NULL character)");
|
||||||
opts.optopt("", "output-delimiter", "in field mode, replace the delimiter in output lines with this option's argument", "new delimiter");
|
opts.optopt("", "output-delimiter", "in field mode, replace the delimiter in output lines with this option's argument", "new delimiter");
|
||||||
let usage = opts.usage("Prints specified byte or field columns from each line of stdin or the input files");
|
|
||||||
opts.help(format!("
|
|
||||||
{0} {1}
|
|
||||||
|
|
||||||
{0} [-d] [-s] [-z] [--output-delimiter] ((-f|-b|-c) {{sequence}}) {{sourcefile}}+
|
|
||||||
|
|
||||||
{2}
|
|
||||||
|
|
||||||
Reference
|
|
||||||
|
|
||||||
Each call must specify a mode (what to use for columns),
|
|
||||||
a sequence (which columns to print), and provide a data source
|
|
||||||
|
|
||||||
Specifying a mode
|
|
||||||
|
|
||||||
Use --bytes (-b) or --characters (-c) to specify byte mode
|
|
||||||
|
|
||||||
Use --fields (-f) to specify field mode, where each line is broken into
|
|
||||||
fields identified by a delimiter character. For example for a typical CSV
|
|
||||||
you could use this in combination with setting comma as the delimiter
|
|
||||||
|
|
||||||
Specifying a sequence
|
|
||||||
|
|
||||||
A sequence is a group of 1 or more numbers or inclusive ranges separated
|
|
||||||
by a commas.
|
|
||||||
|
|
||||||
cut -f 2,5-7 some_file.txt
|
|
||||||
will display the 2nd, 5th, 6th, and 7th field for each source line
|
|
||||||
|
|
||||||
Ranges can extend to the end of the row by excluding the the second number
|
|
||||||
|
|
||||||
cut -f 3- some_file.txt
|
|
||||||
will display the 3rd field and all fields after for each source line
|
|
||||||
|
|
||||||
The first number of a range can be excluded, and this is effectively the
|
|
||||||
same as using 1 as the first number: it causes the range to begin at the
|
|
||||||
first column. Ranges can also display a single column
|
|
||||||
|
|
||||||
cut -f 1,3-5 some_file.txt
|
|
||||||
will display the 1st, 3rd, 4th, and 5th field for each source line
|
|
||||||
|
|
||||||
The --complement option, when used, inverts the effect of the sequence
|
|
||||||
|
|
||||||
cut --complement -f 4-6 some_file.txt
|
|
||||||
will display the every field but the 4th, 5th, and 6th
|
|
||||||
|
|
||||||
Specifying a data source
|
|
||||||
|
|
||||||
If no sourcefile arguments are specified, stdin is used as the source of
|
|
||||||
lines to print
|
|
||||||
|
|
||||||
If sourcefile arguments are specified, stdin is ignored and all files are
|
|
||||||
read in consecutively if a sourcefile is not successfully read, a warning
|
|
||||||
will print to stderr, and the eventual status code will be 1, but cut
|
|
||||||
will continue to read through proceeding sourcefiles
|
|
||||||
|
|
||||||
To print columns from both STDIN and a file argument, use - (dash) as a
|
|
||||||
sourcefile argument to represent stdin.
|
|
||||||
|
|
||||||
Field Mode options
|
|
||||||
|
|
||||||
The fields in each line are identified by a delimiter (separator)
|
|
||||||
|
|
||||||
Set the delimiter
|
|
||||||
Set the delimiter which separates fields in the file using the
|
|
||||||
--delimiter (-d) option. Setting the delimiter is optional.
|
|
||||||
If not set, a default delimiter of Tab will be used.
|
|
||||||
|
|
||||||
Optionally Filter based on delimiter
|
|
||||||
If the --only-delimited (-s) flag is provided, only lines which
|
|
||||||
contain the delimiter will be printed
|
|
||||||
|
|
||||||
Replace the delimiter
|
|
||||||
If the --output-delimiter option is provided, the argument used for
|
|
||||||
it will replace the delimiter character in each line printed. This is
|
|
||||||
useful for transforming tabular data - e.g. to convert a CSV to a
|
|
||||||
TSV (tab-separated file)
|
|
||||||
|
|
||||||
Line endings
|
|
||||||
|
|
||||||
When the --zero-terminated (-z) option is used, cut sees \\0 (null) as the
|
|
||||||
'line ending' character (both for the purposes of reading lines and
|
|
||||||
separating printed lines) instead of \\n (newline). This is useful for
|
|
||||||
tabular data where some of the cells may contain newlines
|
|
||||||
|
|
||||||
echo 'ab\\0cd' | cut -z -c 1
|
|
||||||
will result in 'a\\0c\\0'
|
|
||||||
|
|
||||||
", NAME, VERSION, usage));
|
|
||||||
let matches = opts.parse(args);
|
let matches = opts.parse(args);
|
||||||
|
|
||||||
let complement = matches.opt_present("complement");
|
let complement = matches.opt_present("complement");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue