1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #6719 from andrewliebenow/basenc-faster-encoding

basenc: perform faster, streaming encoding
This commit is contained in:
Sylvestre Ledru 2024-10-05 16:33:55 +02:00 committed by GitHub
commit d41d51a0be
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1604 additions and 284 deletions

106
Cargo.lock generated
View file

@ -185,6 +185,21 @@ dependencies = [
"syn 2.0.79",
]
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -850,16 +865,6 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "env_logger"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
dependencies = [
"log",
"regex",
]
[[package]]
name = "equivalent"
version = "1.0.1"
@ -1548,6 +1553,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
"libm",
]
[[package]]
@ -1791,23 +1797,38 @@ dependencies = [
"hex",
]
[[package]]
name = "proptest"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d"
dependencies = [
"bit-set",
"bit-vec",
"bitflags 2.6.0",
"lazy_static",
"num-traits",
"rand",
"rand_chacha",
"rand_xorshift",
"regex-syntax",
"rusty-fork",
"tempfile",
"unarray",
]
[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quick-error"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
name = "quickcheck"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"env_logger",
"log",
"rand",
]
[[package]]
name = "quote"
version = "1.0.37"
@ -1862,6 +1883,15 @@ dependencies = [
"rand_core",
]
[[package]]
name = "rand_xorshift"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
dependencies = [
"rand_core",
]
[[package]]
name = "rayon"
version = "1.10.0"
@ -2030,6 +2060,18 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "rusty-fork"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
dependencies = [
"fnv",
"quick-error 1.2.3",
"tempfile",
"wait-timeout",
]
[[package]]
name = "same-file"
version = "1.0.6"
@ -2396,6 +2438,12 @@ version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]]
name = "unarray"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
[[package]]
name = "unicode-ident"
version = "1.0.13"
@ -2476,6 +2524,7 @@ name = "uu_base32"
version = "0.0.27"
dependencies = [
"clap",
"proptest",
"uucore",
]
@ -2483,6 +2532,7 @@ dependencies = [
name = "uu_base64"
version = "0.0.27"
dependencies = [
"clap",
"uu_base32",
"uucore",
]
@ -2586,7 +2636,7 @@ dependencies = [
"filetime",
"indicatif",
"libc",
"quick-error",
"quick-error 2.0.1",
"selinux",
"uucore",
"walkdir",
@ -2730,7 +2780,6 @@ dependencies = [
"num-bigint",
"num-prime",
"num-traits",
"quickcheck",
"rand",
"smallvec",
"uucore",
@ -3035,7 +3084,7 @@ dependencies = [
"chrono",
"clap",
"itertools",
"quick-error",
"quick-error 2.0.1",
"regex",
"uucore",
]
@ -3533,6 +3582,15 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wait-timeout"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
dependencies = [
"libc",
]
[[package]]
name = "walkdir"
version = "2.5.0"

View file

@ -1,3 +1,5 @@
# spell-checker:ignore proptest
[package]
name = "uu_base32"
version = "0.0.27"
@ -20,6 +22,9 @@ path = "src/base32.rs"
clap = { workspace = true }
uucore = { workspace = true, features = ["encoding"] }
[dev-dependencies]
proptest = "1.5.0"
[[bin]]
name = "base32"
path = "src/main.rs"

View file

@ -3,13 +3,11 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::io::{stdin, Read};
pub mod base_common;
use clap::Command;
use uucore::{encoding::Format, error::UResult, help_about, help_usage};
pub mod base_common;
const ABOUT: &str = help_about!("base32.md");
const USAGE: &str = help_usage!("base32.md");
@ -17,20 +15,11 @@ const USAGE: &str = help_usage!("base32.md");
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format = Format::Base32;
let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
let mut input = base_common::get_input(&config)?;
base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
base_common::handle_input(&mut input, format, config)
}
pub fn uu_app() -> Command {

View file

@ -3,27 +3,35 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::io::{stdout, Read, Write};
// spell-checker:ignore hexupper lsbf msbf unpadded
use clap::{crate_version, Arg, ArgAction, Command};
use std::fs::File;
use std::io::{self, ErrorKind, Read};
use std::path::{Path, PathBuf};
use uucore::display::Quotable;
use uucore::encoding::{wrap_print, Data, EncodeError, Format};
use uucore::encoding::{
for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
Format, Z85Wrapper, BASE2LSBF, BASE2MSBF,
};
use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode};
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
use uucore::format_usage;
use std::fs::File;
use std::io::{BufReader, Stdin};
use std::path::Path;
pub const BASE_CMD_PARSE_ERROR: i32 = 1;
use clap::{crate_version, Arg, ArgAction, Command};
/// Encoded output will be formatted in lines of this length (the last line can be shorter)
///
/// Other implementations default to 76
///
/// This default is only used if no "-w"/"--wrap" argument is passed
pub const WRAP_DEFAULT: usize = 76;
pub static BASE_CMD_PARSE_ERROR: i32 = 1;
// Config.
pub struct Config {
pub decode: bool,
pub ignore_garbage: bool,
pub wrap_cols: Option<usize>,
pub to_read: Option<String>,
pub to_read: Option<PathBuf>,
}
pub mod options {
@ -35,9 +43,10 @@ pub mod options {
impl Config {
pub fn from(options: &clap::ArgMatches) -> UResult<Self> {
let file: Option<String> = match options.get_many::<String>(options::FILE) {
let to_read = match options.get_many::<String>(options::FILE) {
Some(mut values) => {
let name = values.next().unwrap();
if let Some(extra_op) = values.next() {
return Err(UUsageError::new(
BASE_CMD_PARSE_ERROR,
@ -48,19 +57,22 @@ impl Config {
if name == "-" {
None
} else {
if !Path::exists(Path::new(name)) {
let path = Path::new(name);
if !path.exists() {
return Err(USimpleError::new(
BASE_CMD_PARSE_ERROR,
format!("{}: No such file or directory", name.maybe_quote()),
format!("{}: No such file or directory", path.maybe_quote()),
));
}
Some(name.clone())
Some(path.to_owned())
}
}
None => None,
};
let cols = options
let wrap_cols = options
.get_one::<String>(options::WRAP)
.map(|num| {
num.parse::<usize>().map_err(|_| {
@ -75,8 +87,8 @@ impl Config {
Ok(Self {
decode: options.get_flag(options::DECODE),
ignore_garbage: options.get_flag(options::IGNORE_GARBAGE),
wrap_cols: cols,
to_read: file,
wrap_cols,
to_read,
})
}
}
@ -118,7 +130,7 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
.short('w')
.long(options::WRAP)
.value_name("COLS")
.help("wrap encoded lines after COLS character (default 76, 0 to disable wrapping)")
.help(format!("wrap encoded lines after COLS character (default {WRAP_DEFAULT}, 0 to disable wrapping)"))
.overrides_with(options::WRAP),
)
// "multiple" arguments are used to check whether there is more than one
@ -131,55 +143,619 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
)
}
pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> UResult<Box<dyn Read + 'a>> {
pub fn get_input(config: &Config) -> UResult<Box<dyn Read>> {
match &config.to_read {
Some(name) => {
let file_buf =
File::open(Path::new(name)).map_err_context(|| name.maybe_quote().to_string())?;
Ok(Box::new(BufReader::new(file_buf))) // as Box<dyn Read>
Some(path_buf) => {
// Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode`
let file =
File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?;
Ok(Box::new(file))
}
None => {
Ok(Box::new(stdin_ref.lock())) // as Box<dyn Read>
let stdin_lock = io::stdin().lock();
Ok(Box::new(stdin_lock))
}
}
}
pub fn handle_input<R: Read>(
input: &mut R,
format: Format,
line_wrap: Option<usize>,
ignore_garbage: bool,
decode: bool,
) -> UResult<()> {
let mut data = Data::new(input, format).ignore_garbage(ignore_garbage);
if let Some(wrap) = line_wrap {
data = data.line_wrap(wrap);
}
pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> UResult<()> {
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format);
if decode {
match data.decode() {
Ok(s) => {
// Silent the warning as we want to the error message
#[allow(clippy::question_mark)]
if stdout().write_all(&s).is_err() {
// on windows console, writing invalid utf8 returns an error
return Err(USimpleError::new(1, "error: cannot write non-utf8 data"));
}
Ok(())
}
Err(_) => Err(USimpleError::new(1, "error: invalid input")),
}
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
let mut stdout_lock = io::stdout().lock();
if config.decode {
fast_decode::fast_decode(
input,
&mut stdout_lock,
supports_fast_decode_and_encode_ref,
config.ignore_garbage,
)
} else {
match data.encode() {
Ok(s) => {
wrap_print(&data, &s);
Ok(())
}
Err(EncodeError::InvalidInput) => Err(USimpleError::new(1, "error: invalid input")),
Err(_) => Err(USimpleError::new(
1,
"error: invalid input (length must be multiple of 4 characters)",
)),
}
fast_encode::fast_encode(
input,
&mut stdout_lock,
supports_fast_decode_and_encode_ref,
config.wrap_cols,
)
}
}
pub fn get_supports_fast_decode_and_encode(format: Format) -> Box<dyn SupportsFastDecodeAndEncode> {
const BASE16_VALID_DECODING_MULTIPLE: usize = 2;
const BASE2_VALID_DECODING_MULTIPLE: usize = 8;
const BASE32_VALID_DECODING_MULTIPLE: usize = 8;
const BASE64_VALID_DECODING_MULTIPLE: usize = 4;
const BASE16_UNPADDED_MULTIPLE: usize = 1;
const BASE2_UNPADDED_MULTIPLE: usize = 1;
const BASE32_UNPADDED_MULTIPLE: usize = 5;
const BASE64_UNPADDED_MULTIPLE: usize = 3;
match format {
Format::Base16 => Box::from(EncodingWrapper::new(
HEXUPPER,
BASE16_VALID_DECODING_MULTIPLE,
BASE16_UNPADDED_MULTIPLE,
// spell-checker:disable-next-line
b"0123456789ABCDEF",
)),
Format::Base2Lsbf => Box::from(EncodingWrapper::new(
BASE2LSBF,
BASE2_VALID_DECODING_MULTIPLE,
BASE2_UNPADDED_MULTIPLE,
// spell-checker:disable-next-line
b"01",
)),
Format::Base2Msbf => Box::from(EncodingWrapper::new(
BASE2MSBF,
BASE2_VALID_DECODING_MULTIPLE,
BASE2_UNPADDED_MULTIPLE,
// spell-checker:disable-next-line
b"01",
)),
Format::Base32 => Box::from(EncodingWrapper::new(
BASE32,
BASE32_VALID_DECODING_MULTIPLE,
BASE32_UNPADDED_MULTIPLE,
// spell-checker:disable-next-line
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=",
)),
Format::Base32Hex => Box::from(EncodingWrapper::new(
BASE32HEX,
BASE32_VALID_DECODING_MULTIPLE,
BASE32_UNPADDED_MULTIPLE,
// spell-checker:disable-next-line
b"0123456789ABCDEFGHIJKLMNOPQRSTUV=",
)),
Format::Base64 => Box::from(EncodingWrapper::new(
BASE64,
BASE64_VALID_DECODING_MULTIPLE,
BASE64_UNPADDED_MULTIPLE,
// spell-checker:disable-next-line
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/",
)),
Format::Base64Url => Box::from(EncodingWrapper::new(
BASE64URL,
BASE64_VALID_DECODING_MULTIPLE,
BASE64_UNPADDED_MULTIPLE,
// spell-checker:disable-next-line
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-",
)),
Format::Z85 => Box::from(Z85Wrapper {}),
}
}
pub mod fast_encode {
use crate::base_common::{format_read_error, WRAP_DEFAULT};
use std::{
collections::VecDeque,
io::{self, ErrorKind, Read, Write},
num::NonZeroUsize,
};
use uucore::{
encoding::SupportsFastDecodeAndEncode,
error::{UResult, USimpleError},
};
struct LineWrapping {
line_length: NonZeroUsize,
print_buffer: Vec<u8>,
}
// Start of helper functions
fn encode_in_chunks_to_buffer(
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
encode_in_chunks_of_size: usize,
bytes_to_steal: usize,
read_buffer: &[u8],
encoded_buffer: &mut VecDeque<u8>,
leftover_buffer: &mut VecDeque<u8>,
) -> UResult<()> {
let bytes_to_chunk = if bytes_to_steal > 0 {
let (stolen_bytes, rest_of_read_buffer) = read_buffer.split_at(bytes_to_steal);
leftover_buffer.extend(stolen_bytes);
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
assert!(leftover_buffer.len() == encode_in_chunks_of_size);
// Encode the old unencoded data and the stolen bytes, and add the result to
// `encoded_buffer`
supports_fast_decode_and_encode
.encode_to_vec_deque(leftover_buffer.make_contiguous(), encoded_buffer)?;
// Reset `leftover_buffer`
leftover_buffer.clear();
rest_of_read_buffer
} else {
// Do not need to steal bytes from `read_buffer`
read_buffer
};
let chunks_exact = bytes_to_chunk.chunks_exact(encode_in_chunks_of_size);
let remainder = chunks_exact.remainder();
for sl in chunks_exact {
assert!(sl.len() == encode_in_chunks_of_size);
supports_fast_decode_and_encode.encode_to_vec_deque(sl, encoded_buffer)?;
}
leftover_buffer.extend(remainder);
Ok(())
}
fn write_without_line_breaks(
encoded_buffer: &mut VecDeque<u8>,
output: &mut dyn Write,
is_cleanup: bool,
) -> io::Result<()> {
// TODO
// `encoded_buffer` only has to be a VecDeque if line wrapping is enabled
// (`make_contiguous` should be a no-op here)
// Refactoring could avoid this call
output.write_all(encoded_buffer.make_contiguous())?;
if is_cleanup {
output.write_all(b"\n")?;
} else {
encoded_buffer.clear();
}
Ok(())
}
fn write_with_line_breaks(
&mut LineWrapping {
ref line_length,
ref mut print_buffer,
}: &mut LineWrapping,
encoded_buffer: &mut VecDeque<u8>,
output: &mut dyn Write,
is_cleanup: bool,
) -> io::Result<()> {
let line_length = line_length.get();
let make_contiguous_result = encoded_buffer.make_contiguous();
let chunks_exact = make_contiguous_result.chunks_exact(line_length);
let mut bytes_added_to_print_buffer = 0;
for sl in chunks_exact {
bytes_added_to_print_buffer += sl.len();
print_buffer.extend_from_slice(sl);
print_buffer.push(b'\n');
}
output.write_all(print_buffer)?;
// Remove the bytes that were just printed from `encoded_buffer`
drop(encoded_buffer.drain(..bytes_added_to_print_buffer));
if is_cleanup {
if encoded_buffer.is_empty() {
// Do not write a newline in this case, because two trailing newlines should never be printed
} else {
// Print the partial line, since this is cleanup and no more data is coming
output.write_all(encoded_buffer.make_contiguous())?;
output.write_all(b"\n")?;
}
} else {
print_buffer.clear();
}
Ok(())
}
fn write_to_output(
line_wrapping_option: &mut Option<LineWrapping>,
encoded_buffer: &mut VecDeque<u8>,
output: &mut dyn Write,
is_cleanup: bool,
) -> io::Result<()> {
// Write all data in `encoded_buffer` to `output`
if let &mut Some(ref mut li) = line_wrapping_option {
write_with_line_breaks(li, encoded_buffer, output, is_cleanup)?;
} else {
write_without_line_breaks(encoded_buffer, output, is_cleanup)?;
}
Ok(())
}
// End of helper functions
pub fn fast_encode<R: Read, W: Write>(
input: &mut R,
mut output: W,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
wrap: Option<usize>,
) -> UResult<()> {
// Based on performance testing
const INPUT_BUFFER_SIZE: usize = 32 * 1_024;
const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;
let encode_in_chunks_of_size =
supports_fast_decode_and_encode.unpadded_multiple() * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
assert!(encode_in_chunks_of_size > 0);
// The "data-encoding" crate supports line wrapping, but not arbitrary line wrapping, only certain widths, so
// line wrapping must be handled here.
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L1710
let mut line_wrapping = match wrap {
// Line wrapping is disabled because "-w"/"--wrap" was passed with "0"
Some(0) => None,
// A custom line wrapping value was passed
Some(an) => Some(LineWrapping {
line_length: NonZeroUsize::new(an).unwrap(),
print_buffer: Vec::<u8>::new(),
}),
// Line wrapping was not set, so the default is used
None => Some(LineWrapping {
line_length: NonZeroUsize::new(WRAP_DEFAULT).unwrap(),
print_buffer: Vec::<u8>::new(),
}),
};
// Start of buffers
// Data that was read from `input`
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
assert!(!input_buffer.is_empty());
// Data that was read from `input` but has not been encoded yet
let mut leftover_buffer = VecDeque::<u8>::new();
// Encoded data that needs to be written to `output`
let mut encoded_buffer = VecDeque::<u8>::new();
// End of buffers
loop {
match input.read(&mut input_buffer) {
Ok(bytes_read_from_input) => {
if bytes_read_from_input == 0 {
break;
}
// The part of `input_buffer` that was actually filled by the call to `read`
let read_buffer = &input_buffer[..bytes_read_from_input];
// How many bytes to steal from `read_buffer` to get `leftover_buffer` to the right size
let bytes_to_steal = encode_in_chunks_of_size - leftover_buffer.len();
if bytes_to_steal > bytes_read_from_input {
// Do not have enough data to encode a chunk, so copy data to `leftover_buffer` and read more
leftover_buffer.extend(read_buffer);
assert!(leftover_buffer.len() < encode_in_chunks_of_size);
continue;
}
// Encode data in chunks, then place it in `encoded_buffer`
encode_in_chunks_to_buffer(
supports_fast_decode_and_encode,
encode_in_chunks_of_size,
bytes_to_steal,
read_buffer,
&mut encoded_buffer,
&mut leftover_buffer,
)?;
assert!(leftover_buffer.len() < encode_in_chunks_of_size);
// Write all data in `encoded_buffer` to `output`
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?;
}
Err(er) => {
let kind = er.kind();
if kind == ErrorKind::Interrupted {
// TODO
// Retry reading?
}
return Err(USimpleError::new(1, format_read_error(kind)));
}
}
}
// Cleanup
// `input` has finished producing data, so the data remaining in the buffers needs to be encoded and printed
{
// Encode all remaining unencoded bytes, placing them in `encoded_buffer`
supports_fast_decode_and_encode
.encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?;
// Write all data in `encoded_buffer` to output
// `is_cleanup` triggers special cleanup-only logic
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, true)?;
}
Ok(())
}
}
pub mod fast_decode {
use crate::base_common::format_read_error;
use std::io::{self, ErrorKind, Read, Write};
use uucore::{
encoding::SupportsFastDecodeAndEncode,
error::{UResult, USimpleError},
};
// Start of helper functions
fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] {
// If `ignore_garbage` is enabled, all characters outside the alphabet are ignored
// If it is not enabled, only '\n' and '\r' are ignored
if ignore_garbage {
// Note: "false" here
let mut table = [false; 256];
// Pass through no characters except those in the alphabet
for ue in alphabet {
let us = usize::from(*ue);
// Should not have been set yet
assert!(!table[us]);
table[us] = true;
}
table
} else {
// Note: "true" here
let mut table = [true; 256];
// Pass through all characters except '\n' and '\r'
for ue in [b'\n', b'\r'] {
let us = usize::from(ue);
// Should not have been set yet
assert!(table[us]);
table[us] = false;
}
table
}
}
fn decode_in_chunks_to_buffer(
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
decode_in_chunks_of_size: usize,
bytes_to_steal: usize,
read_buffer_filtered: &[u8],
decoded_buffer: &mut Vec<u8>,
leftover_buffer: &mut Vec<u8>,
) -> UResult<()> {
let bytes_to_chunk = if bytes_to_steal > 0 {
let (stolen_bytes, rest_of_read_buffer_filtered) =
read_buffer_filtered.split_at(bytes_to_steal);
leftover_buffer.extend(stolen_bytes);
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
assert!(leftover_buffer.len() == decode_in_chunks_of_size);
// Decode the old un-decoded data and the stolen bytes, and add the result to
// `decoded_buffer`
supports_fast_decode_and_encode.decode_into_vec(leftover_buffer, decoded_buffer)?;
// Reset `leftover_buffer`
leftover_buffer.clear();
rest_of_read_buffer_filtered
} else {
// Do not need to steal bytes from `read_buffer`
read_buffer_filtered
};
let chunks_exact = bytes_to_chunk.chunks_exact(decode_in_chunks_of_size);
let remainder = chunks_exact.remainder();
for sl in chunks_exact {
assert!(sl.len() == decode_in_chunks_of_size);
supports_fast_decode_and_encode.decode_into_vec(sl, decoded_buffer)?;
}
leftover_buffer.extend(remainder);
Ok(())
}
fn write_to_output(decoded_buffer: &mut Vec<u8>, output: &mut dyn Write) -> io::Result<()> {
// Write all data in `decoded_buffer` to `output`
output.write_all(decoded_buffer.as_slice())?;
decoded_buffer.clear();
Ok(())
}
// End of helper functions
pub fn fast_decode<R: Read, W: Write>(
input: &mut R,
mut output: &mut W,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
ignore_garbage: bool,
) -> UResult<()> {
// Based on performance testing
const INPUT_BUFFER_SIZE: usize = 32 * 1_024;
const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;
let alphabet = supports_fast_decode_and_encode.alphabet();
let decode_in_chunks_of_size = supports_fast_decode_and_encode.valid_decoding_multiple()
* DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
assert!(decode_in_chunks_of_size > 0);
// Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because
// "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
// passing it to "data-encoding", so there is no point in doing any filtering in "data-encoding". This also
// allows execution to stay on the happy path in "data-encoding":
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L754-L756
// It is also not worth using "data-encoding"'s ignore functionality when `ignore_garbage` is
// false.
// Note that the alphabet constants above already include the padding characters
// TODO
// Precompute this
let table = alphabet_to_table(alphabet, ignore_garbage);
// Start of buffers
// Data that was read from `input`
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
assert!(!input_buffer.is_empty());
// Data that was read from `input` but has not been decoded yet
let mut leftover_buffer = Vec::<u8>::new();
// Decoded data that needs to be written to `output`
let mut decoded_buffer = Vec::<u8>::new();
// Buffer that will be used when `ignore_garbage` is true, and the chunk read from `input` contains garbage
// data
let mut non_garbage_buffer = Vec::<u8>::new();
// End of buffers
loop {
match input.read(&mut input_buffer) {
Ok(bytes_read_from_input) => {
if bytes_read_from_input == 0 {
break;
}
let read_buffer_filtered = {
// The part of `input_buffer` that was actually filled by the call to `read`
let read_buffer = &input_buffer[..bytes_read_from_input];
// First just scan the data for the happy path
// Yields significant speedup when the input does not contain line endings
let found_garbage = read_buffer.iter().any(|ue| {
// Garbage, since it was not found in the table
!table[usize::from(*ue)]
});
if found_garbage {
non_garbage_buffer.clear();
for ue in read_buffer {
if table[usize::from(*ue)] {
// Not garbage, since it was found in the table
non_garbage_buffer.push(*ue);
}
}
non_garbage_buffer.as_slice()
} else {
read_buffer
}
};
// How many bytes to steal from `read_buffer` to get `leftover_buffer` to the right size
let bytes_to_steal = decode_in_chunks_of_size - leftover_buffer.len();
if bytes_to_steal > read_buffer_filtered.len() {
// Do not have enough data to decode a chunk, so copy data to `leftover_buffer` and read more
leftover_buffer.extend(read_buffer_filtered);
assert!(leftover_buffer.len() < decode_in_chunks_of_size);
continue;
}
// Decode data in chunks, then place it in `decoded_buffer`
decode_in_chunks_to_buffer(
supports_fast_decode_and_encode,
decode_in_chunks_of_size,
bytes_to_steal,
read_buffer_filtered,
&mut decoded_buffer,
&mut leftover_buffer,
)?;
assert!(leftover_buffer.len() < decode_in_chunks_of_size);
// Write all data in `decoded_buffer` to `output`
write_to_output(&mut decoded_buffer, &mut output)?;
}
Err(er) => {
let kind = er.kind();
if kind == ErrorKind::Interrupted {
// TODO
// Retry reading?
}
return Err(USimpleError::new(1, format_read_error(kind)));
}
}
}
// Cleanup
// `input` has finished producing data, so the data remaining in the buffers needs to be decoded and printed
{
// Decode all remaining encoded bytes, placing them in `decoded_buffer`
supports_fast_decode_and_encode
.decode_into_vec(&leftover_buffer, &mut decoded_buffer)?;
// Write all data in `decoded_buffer` to `output`
write_to_output(&mut decoded_buffer, &mut output)?;
}
Ok(())
}
}
fn format_read_error(kind: ErrorKind) -> String {
let kind_string = kind.to_string();
// e.g. "is a directory" -> "Is a directory"
let mut kind_string_capitalized = String::with_capacity(kind_string.len());
for (index, ch) in kind_string.char_indices() {
if index == 0 {
for cha in ch.to_uppercase() {
kind_string_capitalized.push(cha);
}
} else {
kind_string_capitalized.push(ch);
}
}
format!("read error: {kind_string_capitalized}")
}

View file

@ -0,0 +1,430 @@
// spell-checker:ignore lsbf msbf proptest
use proptest::{prelude::TestCaseError, prop_assert, prop_assert_eq, test_runner::TestRunner};
use std::io::Cursor;
use uu_base32::base_common::{fast_decode, fast_encode, get_supports_fast_decode_and_encode};
use uucore::encoding::{Format, SupportsFastDecodeAndEncode};
const CASES: u32 = {
#[cfg(debug_assertions)]
{
32
}
#[cfg(not(debug_assertions))]
{
128
}
};
const NORMAL_INPUT_SIZE_LIMIT: usize = {
#[cfg(debug_assertions)]
{
// 256 kibibytes
256 * 1024
}
#[cfg(not(debug_assertions))]
{
// 4 mebibytes
4 * 1024 * 1024
}
};
const LARGE_INPUT_SIZE_LIMIT: usize = 4 * NORMAL_INPUT_SIZE_LIMIT;
// Note that `TestRunner`s cannot be reused
fn get_test_runner() -> TestRunner {
TestRunner::new(proptest::test_runner::Config {
cases: CASES,
failure_persistence: None,
..proptest::test_runner::Config::default()
})
}
fn generic_round_trip(format: Format) {
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format);
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
// Make sure empty inputs round trip
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
),
|(ignore_garbage, line_wrap_zero, line_wrap)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
// Empty input
Vec::<u8>::new(),
)
},
)
.unwrap();
}
// Unusually large line wrapping settings
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(512_usize..65_535_usize),
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
),
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
input,
)
},
)
.unwrap();
}
// Spend more time on sane line wrapping settings
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
),
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
input,
)
},
)
.unwrap();
}
// Test with garbage data
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
// Garbage data to insert
proptest::collection::vec(
(
// Random index
proptest::num::usize::ANY,
// In all of the encodings being tested, non-ASCII bytes are garbage
128_u8..=u8::MAX,
),
0..4_096,
),
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
),
|(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
garbage_data,
input,
)
},
)
.unwrap();
}
// Test small inputs
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
proptest::collection::vec(proptest::num::u8::ANY, 0..1_024),
),
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
input,
)
},
)
.unwrap();
}
// Test small inputs with garbage data
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
// Garbage data to insert
proptest::collection::vec(
(
// Random index
proptest::num::usize::ANY,
// In all of the encodings being tested, non-ASCII bytes are garbage
128_u8..=u8::MAX,
),
0..1_024,
),
proptest::collection::vec(proptest::num::u8::ANY, 0..1_024),
),
|(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
garbage_data,
input,
)
},
)
.unwrap();
}
// Test large inputs
{
get_test_runner()
.run(
&(
proptest::bool::ANY,
proptest::bool::ANY,
proptest::option::of(0_usize..512_usize),
proptest::collection::vec(proptest::num::u8::ANY, 0..LARGE_INPUT_SIZE_LIMIT),
),
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
configurable_round_trip(
format,
supports_fast_decode_and_encode_ref,
ignore_garbage,
line_wrap_zero,
line_wrap,
// Do not add garbage
Vec::<(usize, u8)>::new(),
input,
)
},
)
.unwrap();
}
}
fn configurable_round_trip(
format: Format,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
ignore_garbage: bool,
line_wrap_zero: bool,
line_wrap: Option<usize>,
garbage_data: Vec<(usize, u8)>,
mut input: Vec<u8>,
) -> Result<(), TestCaseError> {
// Z85 only accepts inputs with lengths divisible by 4
if let Format::Z85 = format {
// Reduce length of "input" until it is divisible by 4
input.truncate((input.len() / 4) * 4);
assert!((input.len() % 4) == 0);
}
let line_wrap_to_use = if line_wrap_zero { Some(0) } else { line_wrap };
let input_len = input.len();
let garbage_data_len = garbage_data.len();
let garbage_data_is_empty = garbage_data_len == 0;
let (input, encoded) = {
let mut output = Vec::with_capacity(input_len * 8);
let mut cursor = Cursor::new(input);
fast_encode::fast_encode(
&mut cursor,
&mut output,
supports_fast_decode_and_encode,
line_wrap_to_use,
)
.unwrap();
(cursor.into_inner(), output)
};
let encoded_or_encoded_with_garbage = if garbage_data_is_empty {
encoded
} else {
let encoded_len = encoded.len();
let encoded_highest_index = match encoded_len.checked_sub(1) {
Some(0) | None => None,
Some(x) => Some(x),
};
let mut garbage_data_indexed = vec![Option::<u8>::None; encoded_len];
let mut encoded_with_garbage = Vec::<u8>::with_capacity(encoded_len + garbage_data_len);
for (index, garbage_byte) in garbage_data {
if let Some(x) = encoded_highest_index {
let index_to_use = index % x;
garbage_data_indexed[index_to_use] = Some(garbage_byte);
} else {
encoded_with_garbage.push(garbage_byte);
}
}
for (index, encoded_byte) in encoded.into_iter().enumerate() {
encoded_with_garbage.push(encoded_byte);
if let Some(garbage_byte) = garbage_data_indexed[index] {
encoded_with_garbage.push(garbage_byte);
}
}
encoded_with_garbage
};
match line_wrap_to_use {
Some(0) => {
let line_endings_count = encoded_or_encoded_with_garbage
.iter()
.filter(|byte| **byte == b'\n')
.count();
// If line wrapping is disabled, there should only be one '\n' character (at the very end of the output)
prop_assert_eq!(line_endings_count, 1);
}
_ => {
// TODO
// Validate other line wrapping settings
}
}
let decoded_or_error = {
let mut output = Vec::with_capacity(input_len);
let mut cursor = Cursor::new(encoded_or_encoded_with_garbage);
match fast_decode::fast_decode(
&mut cursor,
&mut output,
supports_fast_decode_and_encode,
ignore_garbage,
) {
Ok(()) => Ok(output),
Err(er) => Err(er),
}
};
let made_round_trip = match decoded_or_error {
Ok(ve) => input.as_slice() == ve.as_slice(),
Err(_) => false,
};
let result_was_correct = if garbage_data_is_empty || ignore_garbage {
// If there was no garbage data added, or if "ignore_garbage" was enabled, expect the round trip to succeed
made_round_trip
} else {
// If garbage data was added, and "ignore_garbage" was disabled, expect the round trip to fail
!made_round_trip
};
if !result_was_correct {
eprintln!(
"\
(configurable_round_trip) FAILURE
format: {format:?}
ignore_garbage: {ignore_garbage}
line_wrap_to_use: {line_wrap_to_use:?}
garbage_data_len: {garbage_data_len}
input_len: {input_len}
",
);
}
prop_assert!(result_was_correct);
Ok(())
}
#[test]
fn base16_round_trip() {
generic_round_trip(Format::Base16);
}
#[test]
fn base2lsbf_round_trip() {
generic_round_trip(Format::Base2Lsbf);
}
#[test]
fn base2msbf_round_trip() {
generic_round_trip(Format::Base2Msbf);
}
#[test]
fn base32_round_trip() {
generic_round_trip(Format::Base32);
}
#[test]
fn base32hex_round_trip() {
generic_round_trip(Format::Base32Hex);
}
#[test]
fn base64_round_trip() {
generic_round_trip(Format::Base64);
}
#[test]
fn base64url_round_trip() {
generic_round_trip(Format::Base64Url);
}
#[test]
fn z85_round_trip() {
generic_round_trip(Format::Z85);
}

View file

@ -17,6 +17,7 @@ readme.workspace = true
path = "src/base64.rs"
[dependencies]
clap = { workspace = true }
uucore = { workspace = true, features = ["encoding"] }
uu_base32 = { workspace = true }

View file

@ -3,13 +3,10 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use clap::Command;
use uu_base32::base_common;
pub use uu_base32::uu_app;
use uucore::{encoding::Format, error::UResult, help_about, help_usage};
use std::io::{stdin, Read};
const ABOUT: &str = help_about!("base64.md");
const USAGE: &str = help_usage!("base64.md");
@ -17,18 +14,13 @@ const USAGE: &str = help_usage!("base64.md");
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format = Format::Base64;
let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
let mut input = base_common::get_input(&config)?;
base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
base_common::handle_input(&mut input, format, config)
}
pub fn uu_app() -> Command {
base_common::base_app(ABOUT, USAGE)
}

View file

@ -0,0 +1,177 @@
<!--
spell-checker:ignore gibibyte toybox
-->
# Benchmarking base32, base64, and basenc
Note that the functionality of the `base32` and `base64` programs is identical to that of the `basenc` program, using
the "--base32" and "--base64" options, respectively. For that reason, it is only necessary to benchmark `basenc`.
To compare the runtime performance of the uutils implementation with the GNU Core Utilities implementation, you can
use a benchmarking tool like [hyperfine][0].
hyperfine currently does not measure maximum memory usage. Memory usage can be benchmarked using [poop][2], or
[toybox][3]'s "time" subcommand (both are Linux only).
Build the `basenc` binary using the release profile:
```Shell
cargo build --package uu_basenc --profile release
```
## Expected performance
uutils' `basenc` performs streaming decoding and encoding, and therefore should perform all operations with a constant
maximum memory usage, regardless of the size of the input. Release builds currently use less than 3 mebibytes of
memory, and memory usage greater than 10 mebibytes should be considered a bug.
As of September 2024, uutils' `basenc` has runtime performance equal to or superior to GNU Core Utilities' `basenc` in
in most scenarios. uutils' `basenc` uses slightly more memory, but given how small these quantities are in absolute
terms (see above), this is highly unlikely to be practically relevant to users.
## Benchmark results (2024-09-27)
### Setup
```Shell
# Use uutils' dd to create a 1 gibibyte in-memory file filled with random bytes (Linux only).
# On other platforms, you can use /tmp instead of /dev/shm, but note that /tmp is not guaranteed to be in-memory.
coreutils dd if=/dev/urandom of=/dev/shm/one-random-gibibyte bs=1024 count=1048576
# Encode this file for use in decoding performance testing
/usr/bin/basenc --base32hex -- /dev/shm/one-random-gibibyte 1>/dev/shm/one-random-gibibyte-base32hex-encoded
/usr/bin/basenc --z85 -- /dev/shm/one-random-gibibyte 1>/dev/shm/one-random-gibibyte-z85-encoded
```
### Programs being tested
uutils' `basenc`:
```
git rev-list HEAD | coreutils head -n 1 -- -
a0718ef0ffd50539a2e2bc0095c9fadcd70ab857
```
GNU Core Utilities' `basenc`:
```
/usr/bin/basenc --version | coreutils head -n 1 -- -
basenc (GNU coreutils) 9.4
```
### Encoding performance
#### "--base64", default line wrapping (76 characters)
Faster than GNU Core Utilities
```
hyperfine \
--sort \
command \
-- \
'/usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null' \
'./target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null'
Benchmark 1: /usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null
Time (mean ± σ): 965.1 ms ± 7.9 ms [User: 766.2 ms, System: 193.4 ms]
Range (min … max): 950.2 ms … 976.9 ms 10 runs
Benchmark 2: ./target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null
Time (mean ± σ): 696.6 ms ± 9.1 ms [User: 574.9 ms, System: 117.3 ms]
Range (min … max): 683.1 ms … 713.5 ms 10 runs
Relative speed comparison
1.39 ± 0.02 /usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null
1.00 ./target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null
```
#### "--base16", no line wrapping
Slower than GNU Core Utilities
```
poop \
'/usr/bin/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte' \
'./target/release/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte'
Benchmark 1 (6 runs): /usr/bin/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte
measurement mean ± σ min … max outliers delta
wall_time 836ms ± 13.3ms 822ms … 855ms 0 ( 0%) 0%
peak_rss 2.05MB ± 73.0KB 1.94MB … 2.12MB 0 ( 0%) 0%
cpu_cycles 2.85G ± 32.8M 2.82G … 2.91G 0 ( 0%) 0%
instructions 14.0G ± 58.7 14.0G … 14.0G 0 ( 0%) 0%
cache_references 70.0M ± 6.48M 63.7M … 78.8M 0 ( 0%) 0%
cache_misses 582K ± 172K 354K … 771K 0 ( 0%) 0%
branch_misses 667K ± 4.55K 662K … 674K 0 ( 0%) 0%
Benchmark 2 (6 runs): ./target/release/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte
measurement mean ± σ min … max outliers delta
wall_time 884ms ± 6.38ms 878ms … 895ms 0 ( 0%) 💩+ 5.7% ± 1.6%
peak_rss 2.65MB ± 66.8KB 2.55MB … 2.74MB 0 ( 0%) 💩+ 29.3% ± 4.4%
cpu_cycles 3.15G ± 8.61M 3.14G … 3.16G 0 ( 0%) 💩+ 10.6% ± 1.1%
instructions 10.5G ± 275 10.5G … 10.5G 0 ( 0%) ⚡- 24.9% ± 0.0%
cache_references 93.5M ± 6.10M 87.2M … 104M 0 ( 0%) 💩+ 33.7% ± 11.6%
cache_misses 415K ± 52.3K 363K … 474K 0 ( 0%) - 28.8% ± 28.0%
branch_misses 1.43M ± 4.82K 1.42M … 1.43M 0 ( 0%) 💩+113.9% ± 0.9%
```
### Decoding performance
#### "--base32hex"
Faster than GNU Core Utilities
```
hyperfine \
--sort \
command \
-- \
'/usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null' \
'./target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null'
Benchmark 1: /usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null
Time (mean ± σ): 7.154 s ± 0.082 s [User: 6.802 s, System: 0.323 s]
Range (min … max): 7.051 s … 7.297 s 10 runs
Benchmark 2: ./target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null
Time (mean ± σ): 2.679 s ± 0.025 s [User: 2.446 s, System: 0.221 s]
Range (min … max): 2.649 s … 2.718 s 10 runs
Relative speed comparison
2.67 ± 0.04 /usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null
1.00 ./target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null
```
#### "--z85", with "--ignore-garbage"
Faster than GNU Core Utilities
```
poop \
'/usr/bin/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded' \
'./target/release/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded'
Benchmark 1 (3 runs): /usr/bin/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded
measurement mean ± σ min … max outliers delta
wall_time 14.4s ± 68.4ms 14.3s … 14.4s 0 ( 0%) 0%
peak_rss 1.98MB ± 10.8KB 1.97MB … 1.99MB 0 ( 0%) 0%
cpu_cycles 58.4G ± 211M 58.3G … 58.7G 0 ( 0%) 0%
instructions 74.7G ± 64.0 74.7G … 74.7G 0 ( 0%) 0%
cache_references 41.8M ± 624K 41.2M … 42.4M 0 ( 0%) 0%
cache_misses 693K ± 118K 567K … 802K 0 ( 0%) 0%
branch_misses 1.24G ± 183K 1.24G … 1.24G 0 ( 0%) 0%
Benchmark 2 (3 runs): ./target/release/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded
measurement mean ± σ min … max outliers delta
wall_time 2.80s ± 17.9ms 2.79s … 2.82s 0 ( 0%) ⚡- 80.5% ± 0.8%
peak_rss 2.61MB ± 67.4KB 2.57MB … 2.69MB 0 ( 0%) 💩+ 31.9% ± 5.5%
cpu_cycles 10.8G ± 27.9M 10.8G … 10.9G 0 ( 0%) ⚡- 81.5% ± 0.6%
instructions 39.0G ± 353 39.0G … 39.0G 0 ( 0%) ⚡- 47.7% ± 0.0%
cache_references 114M ± 2.43M 112M … 116M 0 ( 0%) 💩+173.3% ± 9.6%
cache_misses 1.06M ± 288K 805K … 1.37M 0 ( 0%) + 52.6% ± 72.0%
branch_misses 1.18M ± 14.7K 1.16M … 1.19M 0 ( 0%) ⚡- 99.9% ± 0.0%
```
[0]: https://github.com/sharkdp/hyperfine
[1]: https://github.com/sharkdp/hyperfine?tab=readme-ov-file#installation
[2]: https://github.com/andrewrk/poop
[3]: https://landley.net/toybox/

View file

@ -3,19 +3,15 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//spell-checker:ignore (args) lsbf msbf
// spell-checker:ignore lsbf msbf
use clap::{Arg, ArgAction, Command};
use uu_base32::base_common::{self, Config, BASE_CMD_PARSE_ERROR};
use uucore::error::UClapError;
use uucore::{
encoding::Format,
error::{UResult, UUsageError},
};
use std::io::{stdin, Read};
use uucore::error::UClapError;
use uucore::{help_about, help_usage};
const ABOUT: &str = help_about!("basenc.md");
@ -81,16 +77,8 @@ fn parse_cmd_args(args: impl uucore::Args) -> UResult<(Config, Format)> {
#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let (config, format) = parse_cmd_args(args)?;
// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
let mut input = base_common::get_input(&config)?;
base_common::handle_input(&mut input, format, config)
}

View file

@ -111,8 +111,7 @@ where
OutputFormat::Hexadecimal => sum_hex,
OutputFormat::Base64 => match options.algo_name {
ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_SYSV | ALGORITHM_OPTIONS_BSD => sum_hex,
_ => encoding::encode(encoding::Format::Base64, &hex::decode(sum_hex).unwrap())
.unwrap(),
_ => encoding::for_cksum::BASE64.encode(&hex::decode(sum_hex).unwrap()),
},
};
// The BSD checksum output is 5 digit integer

View file

@ -26,9 +26,6 @@ uucore = { workspace = true }
num-bigint = { workspace = true }
num-prime = { workspace = true }
[dev-dependencies]
quickcheck = "1.0.3"
[[bin]]
name = "factor"
path = "src/main.rs"

View file

@ -77,7 +77,7 @@ default = []
backup-control = []
colors = []
checksum = ["data-encoding", "thiserror", "regex", "sum"]
encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"]
encoding = ["data-encoding", "data-encoding-macro", "z85"]
entries = ["libc"]
fs = ["dunce", "libc", "winapi-util", "windows-sys"]
fsext = ["libc", "windows-sys"]

View file

@ -3,35 +3,24 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUV
// spell-checker:ignore (encodings) lsbf msbf hexupper
// spell-checker:ignore (encodings) lsbf msbf
// spell-checker:ignore unpadded
use std::io::{self, Read, Write};
use data_encoding::{Encoding, BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER};
use crate::error::{UResult, USimpleError};
use data_encoding::Encoding;
use data_encoding_macro::new_encoding;
#[cfg(feature = "thiserror")]
use thiserror::Error;
use std::collections::VecDeque;
#[derive(Debug, Error)]
pub enum DecodeError {
#[error("{}", _0)]
Decode(#[from] data_encoding::DecodeError),
#[error("{}", _0)]
DecodeZ85(#[from] z85::DecodeError),
#[error("{}", _0)]
Io(#[from] io::Error),
// Re-export for the faster decoding/encoding logic
pub mod for_base_common {
pub use data_encoding::*;
}
#[derive(Debug)]
pub enum EncodeError {
Z85InputLenNotMultipleOf4,
InvalidInput,
pub mod for_cksum {
pub use data_encoding::BASE64;
}
pub type DecodeResult = Result<Vec<u8>, DecodeError>;
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Debug)]
pub enum Format {
Base64,
Base64Url,
@ -42,138 +31,182 @@ pub enum Format {
Base2Msbf,
Z85,
}
use self::Format::*;
const BASE2LSBF: Encoding = new_encoding! {
pub const BASE2LSBF: Encoding = new_encoding! {
symbols: "01",
bit_order: LeastSignificantFirst,
};
const BASE2MSBF: Encoding = new_encoding! {
pub const BASE2MSBF: Encoding = new_encoding! {
symbols: "01",
bit_order: MostSignificantFirst,
};
pub fn encode(f: Format, input: &[u8]) -> Result<String, EncodeError> {
Ok(match f {
Base32 => BASE32.encode(input),
Base64 => BASE64.encode(input),
Base64Url => BASE64URL.encode(input),
Base32Hex => BASE32HEX.encode(input),
Base16 => HEXUPPER.encode(input),
Base2Lsbf => BASE2LSBF.encode(input),
Base2Msbf => BASE2MSBF.encode(input),
Z85 => {
// According to the spec we should not accept inputs whose len is not a multiple of 4.
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
if input.len() % 4 == 0 {
z85::encode(input)
} else {
return Err(EncodeError::Z85InputLenNotMultipleOf4);
}
}
})
pub struct Z85Wrapper {}
pub struct EncodingWrapper {
pub alphabet: &'static [u8],
pub encoding: Encoding,
pub unpadded_multiple: usize,
pub valid_decoding_multiple: usize,
}
pub fn decode(f: Format, input: &[u8]) -> DecodeResult {
Ok(match f {
Base32 => BASE32.decode(input)?,
Base64 => BASE64.decode(input)?,
Base64Url => BASE64URL.decode(input)?,
Base32Hex => BASE32HEX.decode(input)?,
Base16 => HEXUPPER.decode(input)?,
Base2Lsbf => BASE2LSBF.decode(input)?,
Base2Msbf => BASE2MSBF.decode(input)?,
Z85 => {
// The z85 crate implements a padded encoding by using a leading '#' which is otherwise not allowed.
// We manually check for a leading '#' and return an error ourselves.
if input.starts_with(b"#") {
return Err(z85::DecodeError::InvalidByte(0, b'#').into());
} else {
z85::decode(input)?
}
}
})
}
impl EncodingWrapper {
pub fn new(
encoding: Encoding,
valid_decoding_multiple: usize,
unpadded_multiple: usize,
alphabet: &'static [u8],
) -> Self {
assert!(valid_decoding_multiple > 0);
pub struct Data<R: Read> {
line_wrap: usize,
ignore_garbage: bool,
input: R,
format: Format,
alphabet: &'static [u8],
}
assert!(unpadded_multiple > 0);
assert!(!alphabet.is_empty());
impl<R: Read> Data<R> {
pub fn new(input: R, format: Format) -> Self {
Self {
line_wrap: 76,
ignore_garbage: false,
input,
format,
alphabet: match format {
Base32 => b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=",
Base64 => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/",
Base64Url => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-",
Base32Hex => b"0123456789ABCDEFGHIJKLMNOPQRSTUV=",
Base16 => b"0123456789ABCDEF",
Base2Lsbf => b"01",
Base2Msbf => b"01",
Z85 => b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#",
},
alphabet,
encoding,
unpadded_multiple,
valid_decoding_multiple,
}
}
}
#[must_use]
pub fn line_wrap(mut self, wrap: usize) -> Self {
self.line_wrap = wrap;
self
pub trait SupportsFastDecodeAndEncode {
/// Returns the list of characters used by this encoding
fn alphabet(&self) -> &'static [u8];
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()>;
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()>;
/// Inputs with a length that is a multiple of this number do not have padding when encoded. For instance:
///
/// "The quick brown"
///
/// is 15 characters (divisible by 3), so it is encoded in Base64 without padding:
///
/// "VGhlIHF1aWNrIGJyb3du"
///
/// While:
///
/// "The quick brown fox"
///
/// is 19 characters, which is not divisible by 3, so its Base64 representation has padding:
///
/// "VGhlIHF1aWNrIGJyb3duIGZveA=="
///
/// The encoding performed by `fast_encode` depends on this number being correct.
fn unpadded_multiple(&self) -> usize;
/// Data to decode must be a length that is multiple of this number
///
/// The decoding performed by `fast_decode` depends on this number being correct.
fn valid_decoding_multiple(&self) -> usize;
}
impl SupportsFastDecodeAndEncode for Z85Wrapper {
fn alphabet(&self) -> &'static [u8] {
// Z85 alphabet
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#"
}
#[must_use]
pub fn ignore_garbage(mut self, ignore: bool) -> Self {
self.ignore_garbage = ignore;
self
}
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
if input.first() == Some(&b'#') {
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
}
pub fn decode(&mut self) -> DecodeResult {
let mut buf = vec![];
self.input.read_to_end(&mut buf)?;
if self.ignore_garbage {
buf.retain(|c| self.alphabet.contains(c));
} else {
buf.retain(|&c| c != b'\r' && c != b'\n');
let decode_result = match z85::decode(input) {
Ok(ve) => ve,
Err(_de) => {
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
}
};
decode(self.format, &buf)
output.extend_from_slice(&decode_result);
Ok(())
}
pub fn encode(&mut self) -> Result<String, EncodeError> {
let mut buf: Vec<u8> = vec![];
match self.input.read_to_end(&mut buf) {
Ok(_) => encode(self.format, buf.as_slice()),
Err(_) => Err(EncodeError::InvalidInput),
fn valid_decoding_multiple(&self) -> usize {
5
}
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()> {
// According to the spec we should not accept inputs whose len is not a multiple of 4.
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
if input.len() % 4 != 0 {
return Err(USimpleError::new(
1,
"error: invalid input (length must be multiple of 4 characters)".to_owned(),
));
}
let string = z85::encode(input);
output.extend(string.as_bytes());
Ok(())
}
fn unpadded_multiple(&self) -> usize {
4
}
}
// NOTE: this will likely be phased out at some point
pub fn wrap_print<R: Read>(data: &Data<R>, res: &str) {
let stdout = io::stdout();
wrap_write(stdout.lock(), data.line_wrap, res).unwrap();
}
pub fn wrap_write<W: Write>(mut writer: W, line_wrap: usize, res: &str) -> io::Result<()> {
use std::cmp::min;
if line_wrap == 0 {
return write!(writer, "{res}");
impl SupportsFastDecodeAndEncode for EncodingWrapper {
fn alphabet(&self) -> &'static [u8] {
self.alphabet
}
let mut start = 0;
while start < res.len() {
let end = min(start + line_wrap, res.len());
writeln!(writer, "{}", &res[start..end])?;
start = end;
// Adapted from `decode` in the "data-encoding" crate
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
let decode_len_result = match self.encoding.decode_len(input.len()) {
Ok(us) => us,
Err(_de) => {
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
}
};
let output_len = output.len();
output.resize(output_len + decode_len_result, 0);
match self.encoding.decode_mut(input, &mut (output[output_len..])) {
Ok(us) => {
// See:
// https://docs.rs/data-encoding/latest/data_encoding/struct.Encoding.html#method.decode_mut
// "Returns the length of the decoded output. This length may be smaller than the output length if the input contained padding or ignored characters. The output bytes after the returned length are not initialized and should not be read."
output.truncate(output_len + us);
}
Err(_de) => {
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
}
}
Ok(())
}
Ok(())
fn valid_decoding_multiple(&self) -> usize {
self.valid_decoding_multiple
}
// Adapted from `encode_append` in the "data-encoding" crate
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()> {
let output_len = output.len();
output.resize(output_len + self.encoding.encode_len(input.len()), 0);
let make_contiguous_result = output.make_contiguous();
self.encoding
.encode_mut(input, &mut (make_contiguous_result[output_len..]));
Ok(())
}
fn unpadded_multiple(&self) -> usize {
self.unpadded_multiple
}
}

View file

@ -146,3 +146,77 @@ fn test_base64_file_not_found() {
.fails()
.stderr_only("base64: a.txt: No such file or directory\n");
}
#[test]
fn test_no_repeated_trailing_newline() {
new_ucmd!()
.args(&["--wrap", "10", "--", "-"])
.pipe_in("The quick brown fox jumps over the lazy dog.")
.succeeds()
.stdout_only(
// cSpell:disable
"\
VGhlIHF1aW
NrIGJyb3du
IGZveCBqdW
1wcyBvdmVy
IHRoZSBsYX
p5IGRvZy4=
",
// cSpell:enable
);
}
#[test]
fn test_wrap_default() {
const PIPE_IN: &str = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.";
new_ucmd!()
.args(&["--", "-"])
.pipe_in(PIPE_IN)
.succeeds()
.stdout_only(
// cSpell:disable
"\
VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZy4gVGhlIHF1aWNrIGJy
b3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZy4gVGhlIHF1aWNrIGJyb3duIGZveCBqdW1w
cyBvdmVyIHRoZSBsYXp5IGRvZy4=
",
// cSpell:enable
);
}
// Prevent regression to:
//
// coreutils manpage base64 | rg --fixed-strings -- 'base32'
// The data are encoded as described for the base32 alphabet in RFC 4648.
// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage
// The data are encoded as described for the base32 alphabet in RFC 4648.
// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage
#[test]
fn test_manpage() {
use std::process::{Command, Stdio};
let test_scenario = TestScenario::new("");
let child = Command::new(test_scenario.bin_path)
.arg("manpage")
.arg("base64")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.unwrap();
let output = child.wait_with_output().unwrap();
assert_eq!(output.status.code().unwrap(), 0);
assert!(output.stderr.is_empty());
let stdout_str = std::str::from_utf8(&output.stdout).unwrap();
assert!(stdout_str.contains("base64 alphabet"));
assert!(!stdout_str.to_ascii_lowercase().contains("base32"));
}

View file

@ -3,17 +3,23 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//spell-checker: ignore (encodings) lsbf msbf
// spell-checker: ignore (encodings) lsbf msbf
use crate::common::util::TestScenario;
#[test]
fn test_z85_not_padded() {
fn test_z85_not_padded_decode() {
// The z85 crate deviates from the standard in some cases; we have to catch those
new_ucmd!()
.args(&["--z85", "-d"])
.pipe_in("##########")
.fails()
.stderr_only("basenc: error: invalid input\n");
}
#[test]
fn test_z85_not_padded_encode() {
// The z85 crate deviates from the standard in some cases; we have to catch those
new_ucmd!()
.args(&["--z85"])
.pipe_in("123")
@ -26,7 +32,7 @@ fn test_invalid_input() {
let error_message = if cfg!(windows) {
"basenc: .: Permission denied\n"
} else {
"basenc: error: invalid input\n"
"basenc: read error: Is a directory\n"
};
new_ucmd!()
.args(&["--base32", "."])
@ -40,7 +46,6 @@ fn test_base64() {
.arg("--base64")
.pipe_in("to>be?")
.succeeds()
.no_stderr()
.stdout_only("dG8+YmU/\n");
}
@ -50,7 +55,6 @@ fn test_base64_decode() {
.args(&["--base64", "-d"])
.pipe_in("dG8+YmU/")
.succeeds()
.no_stderr()
.stdout_only("to>be?");
}
@ -60,7 +64,6 @@ fn test_base64url() {
.arg("--base64url")
.pipe_in("to>be?")
.succeeds()
.no_stderr()
.stdout_only("dG8-YmU_\n");
}
@ -70,7 +73,6 @@ fn test_base64url_decode() {
.args(&["--base64url", "-d"])
.pipe_in("dG8-YmU_")
.succeeds()
.no_stderr()
.stdout_only("to>be?");
}
@ -80,7 +82,6 @@ fn test_base32() {
.arg("--base32")
.pipe_in("nice>base?")
.succeeds()
.no_stderr()
.stdout_only("NZUWGZJ6MJQXGZJ7\n"); // spell-checker:disable-line
}
@ -90,7 +91,6 @@ fn test_base32_decode() {
.args(&["--base32", "-d"])
.pipe_in("NZUWGZJ6MJQXGZJ7") // spell-checker:disable-line
.succeeds()
.no_stderr()
.stdout_only("nice>base?");
}
@ -100,7 +100,6 @@ fn test_base32hex() {
.arg("--base32hex")
.pipe_in("nice>base?")
.succeeds()
.no_stderr()
.stdout_only("DPKM6P9UC9GN6P9V\n"); // spell-checker:disable-line
}
@ -110,7 +109,6 @@ fn test_base32hex_decode() {
.args(&["--base32hex", "-d"])
.pipe_in("DPKM6P9UC9GN6P9V") // spell-checker:disable-line
.succeeds()
.no_stderr()
.stdout_only("nice>base?");
}
@ -120,7 +118,6 @@ fn test_base16() {
.arg("--base16")
.pipe_in("Hello, World!")
.succeeds()
.no_stderr()
.stdout_only("48656C6C6F2C20576F726C6421\n");
}
@ -130,7 +127,6 @@ fn test_base16_decode() {
.args(&["--base16", "-d"])
.pipe_in("48656C6C6F2C20576F726C6421")
.succeeds()
.no_stderr()
.stdout_only("Hello, World!");
}
@ -140,7 +136,6 @@ fn test_base2msbf() {
.arg("--base2msbf")
.pipe_in("msbf")
.succeeds()
.no_stderr()
.stdout_only("01101101011100110110001001100110\n");
}
@ -150,7 +145,6 @@ fn test_base2msbf_decode() {
.args(&["--base2msbf", "-d"])
.pipe_in("01101101011100110110001001100110")
.succeeds()
.no_stderr()
.stdout_only("msbf");
}
@ -160,7 +154,6 @@ fn test_base2lsbf() {
.arg("--base2lsbf")
.pipe_in("lsbf")
.succeeds()
.no_stderr()
.stdout_only("00110110110011100100011001100110\n");
}
@ -170,7 +163,6 @@ fn test_base2lsbf_decode() {
.args(&["--base2lsbf", "-d"])
.pipe_in("00110110110011100100011001100110")
.succeeds()
.no_stderr()
.stdout_only("lsbf");
}
@ -189,7 +181,6 @@ fn test_choose_last_encoding_z85() {
])
.pipe_in("Hello, World")
.succeeds()
.no_stderr()
.stdout_only("nm=QNz.92jz/PV8\n");
}
@ -208,7 +199,6 @@ fn test_choose_last_encoding_base64() {
])
.pipe_in("Hello, World!")
.succeeds()
.no_stderr()
.stdout_only("SGVsbG8sIFdvcmxkIQ==\n"); // spell-checker:disable-line
}
@ -227,7 +217,6 @@ fn test_choose_last_encoding_base2lsbf() {
])
.pipe_in("lsbf")
.succeeds()
.no_stderr()
.stdout_only("00110110110011100100011001100110\n");
}
@ -248,6 +237,18 @@ fn test_base32_decode_repeated() {
])
.pipe_in("NZUWGZJ6MJQXGZJ7") // spell-checker:disable-line
.succeeds()
.no_stderr()
.stdout_only("nice>base?");
}
// The restriction that input length has to be divisible by 4 only applies to data being encoded with Z85, not to the
// decoding of Z85-encoded data
#[test]
fn test_z85_length_check() {
new_ucmd!()
.args(&["--decode", "--z85"])
// Input has length 10, not divisible by 4
// spell-checker:disable-next-line
.pipe_in("f!$Kwh8WxM")
.succeeds()
.stdout_only("12345678");
}