mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #6719 from andrewliebenow/basenc-faster-encoding
basenc: perform faster, streaming encoding
This commit is contained in:
commit
d41d51a0be
15 changed files with 1604 additions and 284 deletions
106
Cargo.lock
generated
106
Cargo.lock
generated
|
@ -185,6 +185,21 @@ dependencies = [
|
|||
"syn 2.0.79",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
|
||||
dependencies = [
|
||||
"bit-vec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
|
@ -850,16 +865,6 @@ version = "0.3.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
|
@ -1548,6 +1553,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1791,23 +1797,38 @@ dependencies = [
|
|||
"hex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proptest"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"bit-vec",
|
||||
"bitflags 2.6.0",
|
||||
"lazy_static",
|
||||
"num-traits",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"rand_xorshift",
|
||||
"regex-syntax",
|
||||
"rusty-fork",
|
||||
"tempfile",
|
||||
"unarray",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "1.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
|
||||
|
||||
[[package]]
|
||||
name = "quickcheck"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
|
||||
dependencies = [
|
||||
"env_logger",
|
||||
"log",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.37"
|
||||
|
@ -1862,6 +1883,15 @@ dependencies = [
|
|||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xorshift"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.10.0"
|
||||
|
@ -2030,6 +2060,18 @@ dependencies = [
|
|||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rusty-fork"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"quick-error 1.2.3",
|
||||
"tempfile",
|
||||
"wait-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
|
@ -2396,6 +2438,12 @@ version = "1.15.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
|
||||
|
||||
[[package]]
|
||||
name = "unarray"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.13"
|
||||
|
@ -2476,6 +2524,7 @@ name = "uu_base32"
|
|||
version = "0.0.27"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"proptest",
|
||||
"uucore",
|
||||
]
|
||||
|
||||
|
@ -2483,6 +2532,7 @@ dependencies = [
|
|||
name = "uu_base64"
|
||||
version = "0.0.27"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"uu_base32",
|
||||
"uucore",
|
||||
]
|
||||
|
@ -2586,7 +2636,7 @@ dependencies = [
|
|||
"filetime",
|
||||
"indicatif",
|
||||
"libc",
|
||||
"quick-error",
|
||||
"quick-error 2.0.1",
|
||||
"selinux",
|
||||
"uucore",
|
||||
"walkdir",
|
||||
|
@ -2730,7 +2780,6 @@ dependencies = [
|
|||
"num-bigint",
|
||||
"num-prime",
|
||||
"num-traits",
|
||||
"quickcheck",
|
||||
"rand",
|
||||
"smallvec",
|
||||
"uucore",
|
||||
|
@ -3035,7 +3084,7 @@ dependencies = [
|
|||
"chrono",
|
||||
"clap",
|
||||
"itertools",
|
||||
"quick-error",
|
||||
"quick-error 2.0.1",
|
||||
"regex",
|
||||
"uucore",
|
||||
]
|
||||
|
@ -3533,6 +3582,15 @@ version = "0.9.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wait-timeout"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# spell-checker:ignore proptest
|
||||
|
||||
[package]
|
||||
name = "uu_base32"
|
||||
version = "0.0.27"
|
||||
|
@ -20,6 +22,9 @@ path = "src/base32.rs"
|
|||
clap = { workspace = true }
|
||||
uucore = { workspace = true, features = ["encoding"] }
|
||||
|
||||
[dev-dependencies]
|
||||
proptest = "1.5.0"
|
||||
|
||||
[[bin]]
|
||||
name = "base32"
|
||||
path = "src/main.rs"
|
||||
|
|
|
@ -3,13 +3,11 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use std::io::{stdin, Read};
|
||||
pub mod base_common;
|
||||
|
||||
use clap::Command;
|
||||
use uucore::{encoding::Format, error::UResult, help_about, help_usage};
|
||||
|
||||
pub mod base_common;
|
||||
|
||||
const ABOUT: &str = help_about!("base32.md");
|
||||
const USAGE: &str = help_usage!("base32.md");
|
||||
|
||||
|
@ -17,20 +15,11 @@ const USAGE: &str = help_usage!("base32.md");
|
|||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let format = Format::Base32;
|
||||
|
||||
let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
|
||||
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
|
||||
|
||||
// Create a reference to stdin so we can return a locked stdin from
|
||||
// parse_base_cmd_args
|
||||
let stdin_raw = stdin();
|
||||
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
|
||||
let mut input = base_common::get_input(&config)?;
|
||||
|
||||
base_common::handle_input(
|
||||
&mut input,
|
||||
format,
|
||||
config.wrap_cols,
|
||||
config.ignore_garbage,
|
||||
config.decode,
|
||||
)
|
||||
base_common::handle_input(&mut input, format, config)
|
||||
}
|
||||
|
||||
pub fn uu_app() -> Command {
|
||||
|
|
|
@ -3,27 +3,35 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use std::io::{stdout, Read, Write};
|
||||
// spell-checker:ignore hexupper lsbf msbf unpadded
|
||||
|
||||
use clap::{crate_version, Arg, ArgAction, Command};
|
||||
use std::fs::File;
|
||||
use std::io::{self, ErrorKind, Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
use uucore::display::Quotable;
|
||||
use uucore::encoding::{wrap_print, Data, EncodeError, Format};
|
||||
use uucore::encoding::{
|
||||
for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
|
||||
Format, Z85Wrapper, BASE2LSBF, BASE2MSBF,
|
||||
};
|
||||
use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode};
|
||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
||||
use uucore::format_usage;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Stdin};
|
||||
use std::path::Path;
|
||||
pub const BASE_CMD_PARSE_ERROR: i32 = 1;
|
||||
|
||||
use clap::{crate_version, Arg, ArgAction, Command};
|
||||
/// Encoded output will be formatted in lines of this length (the last line can be shorter)
|
||||
///
|
||||
/// Other implementations default to 76
|
||||
///
|
||||
/// This default is only used if no "-w"/"--wrap" argument is passed
|
||||
pub const WRAP_DEFAULT: usize = 76;
|
||||
|
||||
pub static BASE_CMD_PARSE_ERROR: i32 = 1;
|
||||
|
||||
// Config.
|
||||
pub struct Config {
|
||||
pub decode: bool,
|
||||
pub ignore_garbage: bool,
|
||||
pub wrap_cols: Option<usize>,
|
||||
pub to_read: Option<String>,
|
||||
pub to_read: Option<PathBuf>,
|
||||
}
|
||||
|
||||
pub mod options {
|
||||
|
@ -35,9 +43,10 @@ pub mod options {
|
|||
|
||||
impl Config {
|
||||
pub fn from(options: &clap::ArgMatches) -> UResult<Self> {
|
||||
let file: Option<String> = match options.get_many::<String>(options::FILE) {
|
||||
let to_read = match options.get_many::<String>(options::FILE) {
|
||||
Some(mut values) => {
|
||||
let name = values.next().unwrap();
|
||||
|
||||
if let Some(extra_op) = values.next() {
|
||||
return Err(UUsageError::new(
|
||||
BASE_CMD_PARSE_ERROR,
|
||||
|
@ -48,19 +57,22 @@ impl Config {
|
|||
if name == "-" {
|
||||
None
|
||||
} else {
|
||||
if !Path::exists(Path::new(name)) {
|
||||
let path = Path::new(name);
|
||||
|
||||
if !path.exists() {
|
||||
return Err(USimpleError::new(
|
||||
BASE_CMD_PARSE_ERROR,
|
||||
format!("{}: No such file or directory", name.maybe_quote()),
|
||||
format!("{}: No such file or directory", path.maybe_quote()),
|
||||
));
|
||||
}
|
||||
Some(name.clone())
|
||||
|
||||
Some(path.to_owned())
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let cols = options
|
||||
let wrap_cols = options
|
||||
.get_one::<String>(options::WRAP)
|
||||
.map(|num| {
|
||||
num.parse::<usize>().map_err(|_| {
|
||||
|
@ -75,8 +87,8 @@ impl Config {
|
|||
Ok(Self {
|
||||
decode: options.get_flag(options::DECODE),
|
||||
ignore_garbage: options.get_flag(options::IGNORE_GARBAGE),
|
||||
wrap_cols: cols,
|
||||
to_read: file,
|
||||
wrap_cols,
|
||||
to_read,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -118,7 +130,7 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
|
|||
.short('w')
|
||||
.long(options::WRAP)
|
||||
.value_name("COLS")
|
||||
.help("wrap encoded lines after COLS character (default 76, 0 to disable wrapping)")
|
||||
.help(format!("wrap encoded lines after COLS character (default {WRAP_DEFAULT}, 0 to disable wrapping)"))
|
||||
.overrides_with(options::WRAP),
|
||||
)
|
||||
// "multiple" arguments are used to check whether there is more than one
|
||||
|
@ -131,55 +143,619 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> UResult<Box<dyn Read + 'a>> {
|
||||
pub fn get_input(config: &Config) -> UResult<Box<dyn Read>> {
|
||||
match &config.to_read {
|
||||
Some(name) => {
|
||||
let file_buf =
|
||||
File::open(Path::new(name)).map_err_context(|| name.maybe_quote().to_string())?;
|
||||
Ok(Box::new(BufReader::new(file_buf))) // as Box<dyn Read>
|
||||
Some(path_buf) => {
|
||||
// Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode`
|
||||
let file =
|
||||
File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?;
|
||||
|
||||
Ok(Box::new(file))
|
||||
}
|
||||
None => {
|
||||
Ok(Box::new(stdin_ref.lock())) // as Box<dyn Read>
|
||||
let stdin_lock = io::stdin().lock();
|
||||
|
||||
Ok(Box::new(stdin_lock))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn handle_input<R: Read>(
|
||||
input: &mut R,
|
||||
format: Format,
|
||||
line_wrap: Option<usize>,
|
||||
ignore_garbage: bool,
|
||||
decode: bool,
|
||||
) -> UResult<()> {
|
||||
let mut data = Data::new(input, format).ignore_garbage(ignore_garbage);
|
||||
if let Some(wrap) = line_wrap {
|
||||
data = data.line_wrap(wrap);
|
||||
}
|
||||
pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> UResult<()> {
|
||||
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format);
|
||||
|
||||
if decode {
|
||||
match data.decode() {
|
||||
Ok(s) => {
|
||||
// Silent the warning as we want to the error message
|
||||
#[allow(clippy::question_mark)]
|
||||
if stdout().write_all(&s).is_err() {
|
||||
// on windows console, writing invalid utf8 returns an error
|
||||
return Err(USimpleError::new(1, "error: cannot write non-utf8 data"));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Err(_) => Err(USimpleError::new(1, "error: invalid input")),
|
||||
}
|
||||
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
|
||||
|
||||
let mut stdout_lock = io::stdout().lock();
|
||||
|
||||
if config.decode {
|
||||
fast_decode::fast_decode(
|
||||
input,
|
||||
&mut stdout_lock,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
config.ignore_garbage,
|
||||
)
|
||||
} else {
|
||||
match data.encode() {
|
||||
Ok(s) => {
|
||||
wrap_print(&data, &s);
|
||||
Ok(())
|
||||
}
|
||||
Err(EncodeError::InvalidInput) => Err(USimpleError::new(1, "error: invalid input")),
|
||||
Err(_) => Err(USimpleError::new(
|
||||
1,
|
||||
"error: invalid input (length must be multiple of 4 characters)",
|
||||
)),
|
||||
}
|
||||
fast_encode::fast_encode(
|
||||
input,
|
||||
&mut stdout_lock,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
config.wrap_cols,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_supports_fast_decode_and_encode(format: Format) -> Box<dyn SupportsFastDecodeAndEncode> {
|
||||
const BASE16_VALID_DECODING_MULTIPLE: usize = 2;
|
||||
const BASE2_VALID_DECODING_MULTIPLE: usize = 8;
|
||||
const BASE32_VALID_DECODING_MULTIPLE: usize = 8;
|
||||
const BASE64_VALID_DECODING_MULTIPLE: usize = 4;
|
||||
|
||||
const BASE16_UNPADDED_MULTIPLE: usize = 1;
|
||||
const BASE2_UNPADDED_MULTIPLE: usize = 1;
|
||||
const BASE32_UNPADDED_MULTIPLE: usize = 5;
|
||||
const BASE64_UNPADDED_MULTIPLE: usize = 3;
|
||||
|
||||
match format {
|
||||
Format::Base16 => Box::from(EncodingWrapper::new(
|
||||
HEXUPPER,
|
||||
BASE16_VALID_DECODING_MULTIPLE,
|
||||
BASE16_UNPADDED_MULTIPLE,
|
||||
// spell-checker:disable-next-line
|
||||
b"0123456789ABCDEF",
|
||||
)),
|
||||
Format::Base2Lsbf => Box::from(EncodingWrapper::new(
|
||||
BASE2LSBF,
|
||||
BASE2_VALID_DECODING_MULTIPLE,
|
||||
BASE2_UNPADDED_MULTIPLE,
|
||||
// spell-checker:disable-next-line
|
||||
b"01",
|
||||
)),
|
||||
Format::Base2Msbf => Box::from(EncodingWrapper::new(
|
||||
BASE2MSBF,
|
||||
BASE2_VALID_DECODING_MULTIPLE,
|
||||
BASE2_UNPADDED_MULTIPLE,
|
||||
// spell-checker:disable-next-line
|
||||
b"01",
|
||||
)),
|
||||
Format::Base32 => Box::from(EncodingWrapper::new(
|
||||
BASE32,
|
||||
BASE32_VALID_DECODING_MULTIPLE,
|
||||
BASE32_UNPADDED_MULTIPLE,
|
||||
// spell-checker:disable-next-line
|
||||
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=",
|
||||
)),
|
||||
Format::Base32Hex => Box::from(EncodingWrapper::new(
|
||||
BASE32HEX,
|
||||
BASE32_VALID_DECODING_MULTIPLE,
|
||||
BASE32_UNPADDED_MULTIPLE,
|
||||
// spell-checker:disable-next-line
|
||||
b"0123456789ABCDEFGHIJKLMNOPQRSTUV=",
|
||||
)),
|
||||
Format::Base64 => Box::from(EncodingWrapper::new(
|
||||
BASE64,
|
||||
BASE64_VALID_DECODING_MULTIPLE,
|
||||
BASE64_UNPADDED_MULTIPLE,
|
||||
// spell-checker:disable-next-line
|
||||
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/",
|
||||
)),
|
||||
Format::Base64Url => Box::from(EncodingWrapper::new(
|
||||
BASE64URL,
|
||||
BASE64_VALID_DECODING_MULTIPLE,
|
||||
BASE64_UNPADDED_MULTIPLE,
|
||||
// spell-checker:disable-next-line
|
||||
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-",
|
||||
)),
|
||||
Format::Z85 => Box::from(Z85Wrapper {}),
|
||||
}
|
||||
}
|
||||
|
||||
pub mod fast_encode {
|
||||
use crate::base_common::{format_read_error, WRAP_DEFAULT};
|
||||
use std::{
|
||||
collections::VecDeque,
|
||||
io::{self, ErrorKind, Read, Write},
|
||||
num::NonZeroUsize,
|
||||
};
|
||||
use uucore::{
|
||||
encoding::SupportsFastDecodeAndEncode,
|
||||
error::{UResult, USimpleError},
|
||||
};
|
||||
|
||||
struct LineWrapping {
|
||||
line_length: NonZeroUsize,
|
||||
print_buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
// Start of helper functions
|
||||
fn encode_in_chunks_to_buffer(
|
||||
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||
encode_in_chunks_of_size: usize,
|
||||
bytes_to_steal: usize,
|
||||
read_buffer: &[u8],
|
||||
encoded_buffer: &mut VecDeque<u8>,
|
||||
leftover_buffer: &mut VecDeque<u8>,
|
||||
) -> UResult<()> {
|
||||
let bytes_to_chunk = if bytes_to_steal > 0 {
|
||||
let (stolen_bytes, rest_of_read_buffer) = read_buffer.split_at(bytes_to_steal);
|
||||
|
||||
leftover_buffer.extend(stolen_bytes);
|
||||
|
||||
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
|
||||
assert!(leftover_buffer.len() == encode_in_chunks_of_size);
|
||||
|
||||
// Encode the old unencoded data and the stolen bytes, and add the result to
|
||||
// `encoded_buffer`
|
||||
supports_fast_decode_and_encode
|
||||
.encode_to_vec_deque(leftover_buffer.make_contiguous(), encoded_buffer)?;
|
||||
|
||||
// Reset `leftover_buffer`
|
||||
leftover_buffer.clear();
|
||||
|
||||
rest_of_read_buffer
|
||||
} else {
|
||||
// Do not need to steal bytes from `read_buffer`
|
||||
read_buffer
|
||||
};
|
||||
|
||||
let chunks_exact = bytes_to_chunk.chunks_exact(encode_in_chunks_of_size);
|
||||
|
||||
let remainder = chunks_exact.remainder();
|
||||
|
||||
for sl in chunks_exact {
|
||||
assert!(sl.len() == encode_in_chunks_of_size);
|
||||
|
||||
supports_fast_decode_and_encode.encode_to_vec_deque(sl, encoded_buffer)?;
|
||||
}
|
||||
|
||||
leftover_buffer.extend(remainder);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_without_line_breaks(
|
||||
encoded_buffer: &mut VecDeque<u8>,
|
||||
output: &mut dyn Write,
|
||||
is_cleanup: bool,
|
||||
) -> io::Result<()> {
|
||||
// TODO
|
||||
// `encoded_buffer` only has to be a VecDeque if line wrapping is enabled
|
||||
// (`make_contiguous` should be a no-op here)
|
||||
// Refactoring could avoid this call
|
||||
output.write_all(encoded_buffer.make_contiguous())?;
|
||||
|
||||
if is_cleanup {
|
||||
output.write_all(b"\n")?;
|
||||
} else {
|
||||
encoded_buffer.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_with_line_breaks(
|
||||
&mut LineWrapping {
|
||||
ref line_length,
|
||||
ref mut print_buffer,
|
||||
}: &mut LineWrapping,
|
||||
encoded_buffer: &mut VecDeque<u8>,
|
||||
output: &mut dyn Write,
|
||||
is_cleanup: bool,
|
||||
) -> io::Result<()> {
|
||||
let line_length = line_length.get();
|
||||
|
||||
let make_contiguous_result = encoded_buffer.make_contiguous();
|
||||
|
||||
let chunks_exact = make_contiguous_result.chunks_exact(line_length);
|
||||
|
||||
let mut bytes_added_to_print_buffer = 0;
|
||||
|
||||
for sl in chunks_exact {
|
||||
bytes_added_to_print_buffer += sl.len();
|
||||
|
||||
print_buffer.extend_from_slice(sl);
|
||||
print_buffer.push(b'\n');
|
||||
}
|
||||
|
||||
output.write_all(print_buffer)?;
|
||||
|
||||
// Remove the bytes that were just printed from `encoded_buffer`
|
||||
drop(encoded_buffer.drain(..bytes_added_to_print_buffer));
|
||||
|
||||
if is_cleanup {
|
||||
if encoded_buffer.is_empty() {
|
||||
// Do not write a newline in this case, because two trailing newlines should never be printed
|
||||
} else {
|
||||
// Print the partial line, since this is cleanup and no more data is coming
|
||||
output.write_all(encoded_buffer.make_contiguous())?;
|
||||
output.write_all(b"\n")?;
|
||||
}
|
||||
} else {
|
||||
print_buffer.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_to_output(
|
||||
line_wrapping_option: &mut Option<LineWrapping>,
|
||||
encoded_buffer: &mut VecDeque<u8>,
|
||||
output: &mut dyn Write,
|
||||
is_cleanup: bool,
|
||||
) -> io::Result<()> {
|
||||
// Write all data in `encoded_buffer` to `output`
|
||||
if let &mut Some(ref mut li) = line_wrapping_option {
|
||||
write_with_line_breaks(li, encoded_buffer, output, is_cleanup)?;
|
||||
} else {
|
||||
write_without_line_breaks(encoded_buffer, output, is_cleanup)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
// End of helper functions
|
||||
|
||||
pub fn fast_encode<R: Read, W: Write>(
|
||||
input: &mut R,
|
||||
mut output: W,
|
||||
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||
wrap: Option<usize>,
|
||||
) -> UResult<()> {
|
||||
// Based on performance testing
|
||||
const INPUT_BUFFER_SIZE: usize = 32 * 1_024;
|
||||
|
||||
const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;
|
||||
|
||||
let encode_in_chunks_of_size =
|
||||
supports_fast_decode_and_encode.unpadded_multiple() * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
|
||||
|
||||
assert!(encode_in_chunks_of_size > 0);
|
||||
|
||||
// The "data-encoding" crate supports line wrapping, but not arbitrary line wrapping, only certain widths, so
|
||||
// line wrapping must be handled here.
|
||||
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L1710
|
||||
let mut line_wrapping = match wrap {
|
||||
// Line wrapping is disabled because "-w"/"--wrap" was passed with "0"
|
||||
Some(0) => None,
|
||||
// A custom line wrapping value was passed
|
||||
Some(an) => Some(LineWrapping {
|
||||
line_length: NonZeroUsize::new(an).unwrap(),
|
||||
print_buffer: Vec::<u8>::new(),
|
||||
}),
|
||||
// Line wrapping was not set, so the default is used
|
||||
None => Some(LineWrapping {
|
||||
line_length: NonZeroUsize::new(WRAP_DEFAULT).unwrap(),
|
||||
print_buffer: Vec::<u8>::new(),
|
||||
}),
|
||||
};
|
||||
|
||||
// Start of buffers
|
||||
// Data that was read from `input`
|
||||
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
|
||||
|
||||
assert!(!input_buffer.is_empty());
|
||||
|
||||
// Data that was read from `input` but has not been encoded yet
|
||||
let mut leftover_buffer = VecDeque::<u8>::new();
|
||||
|
||||
// Encoded data that needs to be written to `output`
|
||||
let mut encoded_buffer = VecDeque::<u8>::new();
|
||||
// End of buffers
|
||||
|
||||
loop {
|
||||
match input.read(&mut input_buffer) {
|
||||
Ok(bytes_read_from_input) => {
|
||||
if bytes_read_from_input == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
// The part of `input_buffer` that was actually filled by the call to `read`
|
||||
let read_buffer = &input_buffer[..bytes_read_from_input];
|
||||
|
||||
// How many bytes to steal from `read_buffer` to get `leftover_buffer` to the right size
|
||||
let bytes_to_steal = encode_in_chunks_of_size - leftover_buffer.len();
|
||||
|
||||
if bytes_to_steal > bytes_read_from_input {
|
||||
// Do not have enough data to encode a chunk, so copy data to `leftover_buffer` and read more
|
||||
leftover_buffer.extend(read_buffer);
|
||||
|
||||
assert!(leftover_buffer.len() < encode_in_chunks_of_size);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Encode data in chunks, then place it in `encoded_buffer`
|
||||
encode_in_chunks_to_buffer(
|
||||
supports_fast_decode_and_encode,
|
||||
encode_in_chunks_of_size,
|
||||
bytes_to_steal,
|
||||
read_buffer,
|
||||
&mut encoded_buffer,
|
||||
&mut leftover_buffer,
|
||||
)?;
|
||||
|
||||
assert!(leftover_buffer.len() < encode_in_chunks_of_size);
|
||||
|
||||
// Write all data in `encoded_buffer` to `output`
|
||||
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?;
|
||||
}
|
||||
Err(er) => {
|
||||
let kind = er.kind();
|
||||
|
||||
if kind == ErrorKind::Interrupted {
|
||||
// TODO
|
||||
// Retry reading?
|
||||
}
|
||||
|
||||
return Err(USimpleError::new(1, format_read_error(kind)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
// `input` has finished producing data, so the data remaining in the buffers needs to be encoded and printed
|
||||
{
|
||||
// Encode all remaining unencoded bytes, placing them in `encoded_buffer`
|
||||
supports_fast_decode_and_encode
|
||||
.encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?;
|
||||
|
||||
// Write all data in `encoded_buffer` to output
|
||||
// `is_cleanup` triggers special cleanup-only logic
|
||||
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, true)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub mod fast_decode {
|
||||
use crate::base_common::format_read_error;
|
||||
use std::io::{self, ErrorKind, Read, Write};
|
||||
use uucore::{
|
||||
encoding::SupportsFastDecodeAndEncode,
|
||||
error::{UResult, USimpleError},
|
||||
};
|
||||
|
||||
// Start of helper functions
|
||||
fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] {
|
||||
// If `ignore_garbage` is enabled, all characters outside the alphabet are ignored
|
||||
// If it is not enabled, only '\n' and '\r' are ignored
|
||||
if ignore_garbage {
|
||||
// Note: "false" here
|
||||
let mut table = [false; 256];
|
||||
|
||||
// Pass through no characters except those in the alphabet
|
||||
for ue in alphabet {
|
||||
let us = usize::from(*ue);
|
||||
|
||||
// Should not have been set yet
|
||||
assert!(!table[us]);
|
||||
|
||||
table[us] = true;
|
||||
}
|
||||
|
||||
table
|
||||
} else {
|
||||
// Note: "true" here
|
||||
let mut table = [true; 256];
|
||||
|
||||
// Pass through all characters except '\n' and '\r'
|
||||
for ue in [b'\n', b'\r'] {
|
||||
let us = usize::from(ue);
|
||||
|
||||
// Should not have been set yet
|
||||
assert!(table[us]);
|
||||
|
||||
table[us] = false;
|
||||
}
|
||||
|
||||
table
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_in_chunks_to_buffer(
|
||||
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||
decode_in_chunks_of_size: usize,
|
||||
bytes_to_steal: usize,
|
||||
read_buffer_filtered: &[u8],
|
||||
decoded_buffer: &mut Vec<u8>,
|
||||
leftover_buffer: &mut Vec<u8>,
|
||||
) -> UResult<()> {
|
||||
let bytes_to_chunk = if bytes_to_steal > 0 {
|
||||
let (stolen_bytes, rest_of_read_buffer_filtered) =
|
||||
read_buffer_filtered.split_at(bytes_to_steal);
|
||||
|
||||
leftover_buffer.extend(stolen_bytes);
|
||||
|
||||
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
|
||||
assert!(leftover_buffer.len() == decode_in_chunks_of_size);
|
||||
|
||||
// Decode the old un-decoded data and the stolen bytes, and add the result to
|
||||
// `decoded_buffer`
|
||||
supports_fast_decode_and_encode.decode_into_vec(leftover_buffer, decoded_buffer)?;
|
||||
|
||||
// Reset `leftover_buffer`
|
||||
leftover_buffer.clear();
|
||||
|
||||
rest_of_read_buffer_filtered
|
||||
} else {
|
||||
// Do not need to steal bytes from `read_buffer`
|
||||
read_buffer_filtered
|
||||
};
|
||||
|
||||
let chunks_exact = bytes_to_chunk.chunks_exact(decode_in_chunks_of_size);
|
||||
|
||||
let remainder = chunks_exact.remainder();
|
||||
|
||||
for sl in chunks_exact {
|
||||
assert!(sl.len() == decode_in_chunks_of_size);
|
||||
|
||||
supports_fast_decode_and_encode.decode_into_vec(sl, decoded_buffer)?;
|
||||
}
|
||||
|
||||
leftover_buffer.extend(remainder);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_to_output(decoded_buffer: &mut Vec<u8>, output: &mut dyn Write) -> io::Result<()> {
|
||||
// Write all data in `decoded_buffer` to `output`
|
||||
output.write_all(decoded_buffer.as_slice())?;
|
||||
|
||||
decoded_buffer.clear();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
// End of helper functions
|
||||
|
||||
pub fn fast_decode<R: Read, W: Write>(
|
||||
input: &mut R,
|
||||
mut output: &mut W,
|
||||
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||
ignore_garbage: bool,
|
||||
) -> UResult<()> {
|
||||
// Based on performance testing
|
||||
const INPUT_BUFFER_SIZE: usize = 32 * 1_024;
|
||||
|
||||
const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;
|
||||
|
||||
let alphabet = supports_fast_decode_and_encode.alphabet();
|
||||
let decode_in_chunks_of_size = supports_fast_decode_and_encode.valid_decoding_multiple()
|
||||
* DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
|
||||
|
||||
assert!(decode_in_chunks_of_size > 0);
|
||||
|
||||
// Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because
|
||||
// "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
|
||||
// passing it to "data-encoding", so there is no point in doing any filtering in "data-encoding". This also
|
||||
// allows execution to stay on the happy path in "data-encoding":
|
||||
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L754-L756
|
||||
// It is also not worth using "data-encoding"'s ignore functionality when `ignore_garbage` is
|
||||
// false.
|
||||
// Note that the alphabet constants above already include the padding characters
|
||||
// TODO
|
||||
// Precompute this
|
||||
let table = alphabet_to_table(alphabet, ignore_garbage);
|
||||
|
||||
// Start of buffers
|
||||
// Data that was read from `input`
|
||||
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
|
||||
|
||||
assert!(!input_buffer.is_empty());
|
||||
|
||||
// Data that was read from `input` but has not been decoded yet
|
||||
let mut leftover_buffer = Vec::<u8>::new();
|
||||
|
||||
// Decoded data that needs to be written to `output`
|
||||
let mut decoded_buffer = Vec::<u8>::new();
|
||||
|
||||
// Buffer that will be used when `ignore_garbage` is true, and the chunk read from `input` contains garbage
|
||||
// data
|
||||
let mut non_garbage_buffer = Vec::<u8>::new();
|
||||
// End of buffers
|
||||
|
||||
loop {
|
||||
match input.read(&mut input_buffer) {
|
||||
Ok(bytes_read_from_input) => {
|
||||
if bytes_read_from_input == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let read_buffer_filtered = {
|
||||
// The part of `input_buffer` that was actually filled by the call to `read`
|
||||
let read_buffer = &input_buffer[..bytes_read_from_input];
|
||||
|
||||
// First just scan the data for the happy path
|
||||
// Yields significant speedup when the input does not contain line endings
|
||||
let found_garbage = read_buffer.iter().any(|ue| {
|
||||
// Garbage, since it was not found in the table
|
||||
!table[usize::from(*ue)]
|
||||
});
|
||||
|
||||
if found_garbage {
|
||||
non_garbage_buffer.clear();
|
||||
|
||||
for ue in read_buffer {
|
||||
if table[usize::from(*ue)] {
|
||||
// Not garbage, since it was found in the table
|
||||
non_garbage_buffer.push(*ue);
|
||||
}
|
||||
}
|
||||
|
||||
non_garbage_buffer.as_slice()
|
||||
} else {
|
||||
read_buffer
|
||||
}
|
||||
};
|
||||
|
||||
// How many bytes to steal from `read_buffer` to get `leftover_buffer` to the right size
|
||||
let bytes_to_steal = decode_in_chunks_of_size - leftover_buffer.len();
|
||||
|
||||
if bytes_to_steal > read_buffer_filtered.len() {
|
||||
// Do not have enough data to decode a chunk, so copy data to `leftover_buffer` and read more
|
||||
leftover_buffer.extend(read_buffer_filtered);
|
||||
|
||||
assert!(leftover_buffer.len() < decode_in_chunks_of_size);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decode data in chunks, then place it in `decoded_buffer`
|
||||
decode_in_chunks_to_buffer(
|
||||
supports_fast_decode_and_encode,
|
||||
decode_in_chunks_of_size,
|
||||
bytes_to_steal,
|
||||
read_buffer_filtered,
|
||||
&mut decoded_buffer,
|
||||
&mut leftover_buffer,
|
||||
)?;
|
||||
|
||||
assert!(leftover_buffer.len() < decode_in_chunks_of_size);
|
||||
|
||||
// Write all data in `decoded_buffer` to `output`
|
||||
write_to_output(&mut decoded_buffer, &mut output)?;
|
||||
}
|
||||
Err(er) => {
|
||||
let kind = er.kind();
|
||||
|
||||
if kind == ErrorKind::Interrupted {
|
||||
// TODO
|
||||
// Retry reading?
|
||||
}
|
||||
|
||||
return Err(USimpleError::new(1, format_read_error(kind)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
// `input` has finished producing data, so the data remaining in the buffers needs to be decoded and printed
|
||||
{
|
||||
// Decode all remaining encoded bytes, placing them in `decoded_buffer`
|
||||
supports_fast_decode_and_encode
|
||||
.decode_into_vec(&leftover_buffer, &mut decoded_buffer)?;
|
||||
|
||||
// Write all data in `decoded_buffer` to `output`
|
||||
write_to_output(&mut decoded_buffer, &mut output)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn format_read_error(kind: ErrorKind) -> String {
|
||||
let kind_string = kind.to_string();
|
||||
|
||||
// e.g. "is a directory" -> "Is a directory"
|
||||
let mut kind_string_capitalized = String::with_capacity(kind_string.len());
|
||||
|
||||
for (index, ch) in kind_string.char_indices() {
|
||||
if index == 0 {
|
||||
for cha in ch.to_uppercase() {
|
||||
kind_string_capitalized.push(cha);
|
||||
}
|
||||
} else {
|
||||
kind_string_capitalized.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
format!("read error: {kind_string_capitalized}")
|
||||
}
|
||||
|
|
430
src/uu/base32/tests/property_tests.rs
Normal file
430
src/uu/base32/tests/property_tests.rs
Normal file
|
@ -0,0 +1,430 @@
|
|||
// spell-checker:ignore lsbf msbf proptest
|
||||
|
||||
use proptest::{prelude::TestCaseError, prop_assert, prop_assert_eq, test_runner::TestRunner};
|
||||
use std::io::Cursor;
|
||||
use uu_base32::base_common::{fast_decode, fast_encode, get_supports_fast_decode_and_encode};
|
||||
use uucore::encoding::{Format, SupportsFastDecodeAndEncode};
|
||||
|
||||
const CASES: u32 = {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
32
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
{
|
||||
128
|
||||
}
|
||||
};
|
||||
|
||||
const NORMAL_INPUT_SIZE_LIMIT: usize = {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
// 256 kibibytes
|
||||
256 * 1024
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
{
|
||||
// 4 mebibytes
|
||||
4 * 1024 * 1024
|
||||
}
|
||||
};
|
||||
|
||||
const LARGE_INPUT_SIZE_LIMIT: usize = 4 * NORMAL_INPUT_SIZE_LIMIT;
|
||||
|
||||
// Note that `TestRunner`s cannot be reused
|
||||
fn get_test_runner() -> TestRunner {
|
||||
TestRunner::new(proptest::test_runner::Config {
|
||||
cases: CASES,
|
||||
failure_persistence: None,
|
||||
|
||||
..proptest::test_runner::Config::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn generic_round_trip(format: Format) {
|
||||
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format);
|
||||
|
||||
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
|
||||
|
||||
// Make sure empty inputs round trip
|
||||
{
|
||||
get_test_runner()
|
||||
.run(
|
||||
&(
|
||||
proptest::bool::ANY,
|
||||
proptest::bool::ANY,
|
||||
proptest::option::of(0_usize..512_usize),
|
||||
),
|
||||
|(ignore_garbage, line_wrap_zero, line_wrap)| {
|
||||
configurable_round_trip(
|
||||
format,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
ignore_garbage,
|
||||
line_wrap_zero,
|
||||
line_wrap,
|
||||
// Do not add garbage
|
||||
Vec::<(usize, u8)>::new(),
|
||||
// Empty input
|
||||
Vec::<u8>::new(),
|
||||
)
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Unusually large line wrapping settings
|
||||
{
|
||||
get_test_runner()
|
||||
.run(
|
||||
&(
|
||||
proptest::bool::ANY,
|
||||
proptest::bool::ANY,
|
||||
proptest::option::of(512_usize..65_535_usize),
|
||||
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
|
||||
),
|
||||
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
|
||||
configurable_round_trip(
|
||||
format,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
ignore_garbage,
|
||||
line_wrap_zero,
|
||||
line_wrap,
|
||||
// Do not add garbage
|
||||
Vec::<(usize, u8)>::new(),
|
||||
input,
|
||||
)
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Spend more time on sane line wrapping settings
|
||||
{
|
||||
get_test_runner()
|
||||
.run(
|
||||
&(
|
||||
proptest::bool::ANY,
|
||||
proptest::bool::ANY,
|
||||
proptest::option::of(0_usize..512_usize),
|
||||
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
|
||||
),
|
||||
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
|
||||
configurable_round_trip(
|
||||
format,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
ignore_garbage,
|
||||
line_wrap_zero,
|
||||
line_wrap,
|
||||
// Do not add garbage
|
||||
Vec::<(usize, u8)>::new(),
|
||||
input,
|
||||
)
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Test with garbage data
|
||||
{
|
||||
get_test_runner()
|
||||
.run(
|
||||
&(
|
||||
proptest::bool::ANY,
|
||||
proptest::bool::ANY,
|
||||
proptest::option::of(0_usize..512_usize),
|
||||
// Garbage data to insert
|
||||
proptest::collection::vec(
|
||||
(
|
||||
// Random index
|
||||
proptest::num::usize::ANY,
|
||||
// In all of the encodings being tested, non-ASCII bytes are garbage
|
||||
128_u8..=u8::MAX,
|
||||
),
|
||||
0..4_096,
|
||||
),
|
||||
proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT),
|
||||
),
|
||||
|(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| {
|
||||
configurable_round_trip(
|
||||
format,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
ignore_garbage,
|
||||
line_wrap_zero,
|
||||
line_wrap,
|
||||
garbage_data,
|
||||
input,
|
||||
)
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Test small inputs
|
||||
{
|
||||
get_test_runner()
|
||||
.run(
|
||||
&(
|
||||
proptest::bool::ANY,
|
||||
proptest::bool::ANY,
|
||||
proptest::option::of(0_usize..512_usize),
|
||||
proptest::collection::vec(proptest::num::u8::ANY, 0..1_024),
|
||||
),
|
||||
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
|
||||
configurable_round_trip(
|
||||
format,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
ignore_garbage,
|
||||
line_wrap_zero,
|
||||
line_wrap,
|
||||
// Do not add garbage
|
||||
Vec::<(usize, u8)>::new(),
|
||||
input,
|
||||
)
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Test small inputs with garbage data
|
||||
{
|
||||
get_test_runner()
|
||||
.run(
|
||||
&(
|
||||
proptest::bool::ANY,
|
||||
proptest::bool::ANY,
|
||||
proptest::option::of(0_usize..512_usize),
|
||||
// Garbage data to insert
|
||||
proptest::collection::vec(
|
||||
(
|
||||
// Random index
|
||||
proptest::num::usize::ANY,
|
||||
// In all of the encodings being tested, non-ASCII bytes are garbage
|
||||
128_u8..=u8::MAX,
|
||||
),
|
||||
0..1_024,
|
||||
),
|
||||
proptest::collection::vec(proptest::num::u8::ANY, 0..1_024),
|
||||
),
|
||||
|(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| {
|
||||
configurable_round_trip(
|
||||
format,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
ignore_garbage,
|
||||
line_wrap_zero,
|
||||
line_wrap,
|
||||
garbage_data,
|
||||
input,
|
||||
)
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Test large inputs
|
||||
{
|
||||
get_test_runner()
|
||||
.run(
|
||||
&(
|
||||
proptest::bool::ANY,
|
||||
proptest::bool::ANY,
|
||||
proptest::option::of(0_usize..512_usize),
|
||||
proptest::collection::vec(proptest::num::u8::ANY, 0..LARGE_INPUT_SIZE_LIMIT),
|
||||
),
|
||||
|(ignore_garbage, line_wrap_zero, line_wrap, input)| {
|
||||
configurable_round_trip(
|
||||
format,
|
||||
supports_fast_decode_and_encode_ref,
|
||||
ignore_garbage,
|
||||
line_wrap_zero,
|
||||
line_wrap,
|
||||
// Do not add garbage
|
||||
Vec::<(usize, u8)>::new(),
|
||||
input,
|
||||
)
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn configurable_round_trip(
|
||||
format: Format,
|
||||
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||
ignore_garbage: bool,
|
||||
line_wrap_zero: bool,
|
||||
line_wrap: Option<usize>,
|
||||
garbage_data: Vec<(usize, u8)>,
|
||||
mut input: Vec<u8>,
|
||||
) -> Result<(), TestCaseError> {
|
||||
// Z85 only accepts inputs with lengths divisible by 4
|
||||
if let Format::Z85 = format {
|
||||
// Reduce length of "input" until it is divisible by 4
|
||||
input.truncate((input.len() / 4) * 4);
|
||||
|
||||
assert!((input.len() % 4) == 0);
|
||||
}
|
||||
|
||||
let line_wrap_to_use = if line_wrap_zero { Some(0) } else { line_wrap };
|
||||
|
||||
let input_len = input.len();
|
||||
|
||||
let garbage_data_len = garbage_data.len();
|
||||
|
||||
let garbage_data_is_empty = garbage_data_len == 0;
|
||||
|
||||
let (input, encoded) = {
|
||||
let mut output = Vec::with_capacity(input_len * 8);
|
||||
|
||||
let mut cursor = Cursor::new(input);
|
||||
|
||||
fast_encode::fast_encode(
|
||||
&mut cursor,
|
||||
&mut output,
|
||||
supports_fast_decode_and_encode,
|
||||
line_wrap_to_use,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
(cursor.into_inner(), output)
|
||||
};
|
||||
|
||||
let encoded_or_encoded_with_garbage = if garbage_data_is_empty {
|
||||
encoded
|
||||
} else {
|
||||
let encoded_len = encoded.len();
|
||||
|
||||
let encoded_highest_index = match encoded_len.checked_sub(1) {
|
||||
Some(0) | None => None,
|
||||
Some(x) => Some(x),
|
||||
};
|
||||
|
||||
let mut garbage_data_indexed = vec![Option::<u8>::None; encoded_len];
|
||||
|
||||
let mut encoded_with_garbage = Vec::<u8>::with_capacity(encoded_len + garbage_data_len);
|
||||
|
||||
for (index, garbage_byte) in garbage_data {
|
||||
if let Some(x) = encoded_highest_index {
|
||||
let index_to_use = index % x;
|
||||
|
||||
garbage_data_indexed[index_to_use] = Some(garbage_byte);
|
||||
} else {
|
||||
encoded_with_garbage.push(garbage_byte);
|
||||
}
|
||||
}
|
||||
|
||||
for (index, encoded_byte) in encoded.into_iter().enumerate() {
|
||||
encoded_with_garbage.push(encoded_byte);
|
||||
|
||||
if let Some(garbage_byte) = garbage_data_indexed[index] {
|
||||
encoded_with_garbage.push(garbage_byte);
|
||||
}
|
||||
}
|
||||
|
||||
encoded_with_garbage
|
||||
};
|
||||
|
||||
match line_wrap_to_use {
|
||||
Some(0) => {
|
||||
let line_endings_count = encoded_or_encoded_with_garbage
|
||||
.iter()
|
||||
.filter(|byte| **byte == b'\n')
|
||||
.count();
|
||||
|
||||
// If line wrapping is disabled, there should only be one '\n' character (at the very end of the output)
|
||||
prop_assert_eq!(line_endings_count, 1);
|
||||
}
|
||||
_ => {
|
||||
// TODO
|
||||
// Validate other line wrapping settings
|
||||
}
|
||||
}
|
||||
|
||||
let decoded_or_error = {
|
||||
let mut output = Vec::with_capacity(input_len);
|
||||
|
||||
let mut cursor = Cursor::new(encoded_or_encoded_with_garbage);
|
||||
|
||||
match fast_decode::fast_decode(
|
||||
&mut cursor,
|
||||
&mut output,
|
||||
supports_fast_decode_and_encode,
|
||||
ignore_garbage,
|
||||
) {
|
||||
Ok(()) => Ok(output),
|
||||
Err(er) => Err(er),
|
||||
}
|
||||
};
|
||||
|
||||
let made_round_trip = match decoded_or_error {
|
||||
Ok(ve) => input.as_slice() == ve.as_slice(),
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
let result_was_correct = if garbage_data_is_empty || ignore_garbage {
|
||||
// If there was no garbage data added, or if "ignore_garbage" was enabled, expect the round trip to succeed
|
||||
made_round_trip
|
||||
} else {
|
||||
// If garbage data was added, and "ignore_garbage" was disabled, expect the round trip to fail
|
||||
|
||||
!made_round_trip
|
||||
};
|
||||
|
||||
if !result_was_correct {
|
||||
eprintln!(
|
||||
"\
|
||||
(configurable_round_trip) FAILURE
|
||||
format: {format:?}
|
||||
ignore_garbage: {ignore_garbage}
|
||||
line_wrap_to_use: {line_wrap_to_use:?}
|
||||
garbage_data_len: {garbage_data_len}
|
||||
input_len: {input_len}
|
||||
",
|
||||
);
|
||||
}
|
||||
|
||||
prop_assert!(result_was_correct);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base16_round_trip() {
|
||||
generic_round_trip(Format::Base16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base2lsbf_round_trip() {
|
||||
generic_round_trip(Format::Base2Lsbf);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base2msbf_round_trip() {
|
||||
generic_round_trip(Format::Base2Msbf);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base32_round_trip() {
|
||||
generic_round_trip(Format::Base32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base32hex_round_trip() {
|
||||
generic_round_trip(Format::Base32Hex);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base64_round_trip() {
|
||||
generic_round_trip(Format::Base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base64url_round_trip() {
|
||||
generic_round_trip(Format::Base64Url);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn z85_round_trip() {
|
||||
generic_round_trip(Format::Z85);
|
||||
}
|
|
@ -17,6 +17,7 @@ readme.workspace = true
|
|||
path = "src/base64.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = { workspace = true }
|
||||
uucore = { workspace = true, features = ["encoding"] }
|
||||
uu_base32 = { workspace = true }
|
||||
|
||||
|
|
|
@ -3,13 +3,10 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use clap::Command;
|
||||
use uu_base32::base_common;
|
||||
pub use uu_base32::uu_app;
|
||||
|
||||
use uucore::{encoding::Format, error::UResult, help_about, help_usage};
|
||||
|
||||
use std::io::{stdin, Read};
|
||||
|
||||
const ABOUT: &str = help_about!("base64.md");
|
||||
const USAGE: &str = help_usage!("base64.md");
|
||||
|
||||
|
@ -17,18 +14,13 @@ const USAGE: &str = help_usage!("base64.md");
|
|||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let format = Format::Base64;
|
||||
|
||||
let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
|
||||
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
|
||||
|
||||
// Create a reference to stdin so we can return a locked stdin from
|
||||
// parse_base_cmd_args
|
||||
let stdin_raw = stdin();
|
||||
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
|
||||
let mut input = base_common::get_input(&config)?;
|
||||
|
||||
base_common::handle_input(
|
||||
&mut input,
|
||||
format,
|
||||
config.wrap_cols,
|
||||
config.ignore_garbage,
|
||||
config.decode,
|
||||
)
|
||||
base_common::handle_input(&mut input, format, config)
|
||||
}
|
||||
|
||||
pub fn uu_app() -> Command {
|
||||
base_common::base_app(ABOUT, USAGE)
|
||||
}
|
||||
|
|
177
src/uu/basenc/BENCHMARKING.md
Normal file
177
src/uu/basenc/BENCHMARKING.md
Normal file
|
@ -0,0 +1,177 @@
|
|||
<!--
|
||||
spell-checker:ignore gibibyte toybox
|
||||
-->
|
||||
|
||||
# Benchmarking base32, base64, and basenc
|
||||
|
||||
Note that the functionality of the `base32` and `base64` programs is identical to that of the `basenc` program, using
|
||||
the "--base32" and "--base64" options, respectively. For that reason, it is only necessary to benchmark `basenc`.
|
||||
|
||||
To compare the runtime performance of the uutils implementation with the GNU Core Utilities implementation, you can
|
||||
use a benchmarking tool like [hyperfine][0].
|
||||
|
||||
hyperfine currently does not measure maximum memory usage. Memory usage can be benchmarked using [poop][2], or
|
||||
[toybox][3]'s "time" subcommand (both are Linux only).
|
||||
|
||||
Build the `basenc` binary using the release profile:
|
||||
|
||||
```Shell
|
||||
cargo build --package uu_basenc --profile release
|
||||
```
|
||||
|
||||
## Expected performance
|
||||
|
||||
uutils' `basenc` performs streaming decoding and encoding, and therefore should perform all operations with a constant
|
||||
maximum memory usage, regardless of the size of the input. Release builds currently use less than 3 mebibytes of
|
||||
memory, and memory usage greater than 10 mebibytes should be considered a bug.
|
||||
|
||||
As of September 2024, uutils' `basenc` has runtime performance equal to or superior to GNU Core Utilities' `basenc` in
|
||||
in most scenarios. uutils' `basenc` uses slightly more memory, but given how small these quantities are in absolute
|
||||
terms (see above), this is highly unlikely to be practically relevant to users.
|
||||
|
||||
## Benchmark results (2024-09-27)
|
||||
|
||||
### Setup
|
||||
|
||||
```Shell
|
||||
# Use uutils' dd to create a 1 gibibyte in-memory file filled with random bytes (Linux only).
|
||||
# On other platforms, you can use /tmp instead of /dev/shm, but note that /tmp is not guaranteed to be in-memory.
|
||||
coreutils dd if=/dev/urandom of=/dev/shm/one-random-gibibyte bs=1024 count=1048576
|
||||
|
||||
# Encode this file for use in decoding performance testing
|
||||
/usr/bin/basenc --base32hex -- /dev/shm/one-random-gibibyte 1>/dev/shm/one-random-gibibyte-base32hex-encoded
|
||||
/usr/bin/basenc --z85 -- /dev/shm/one-random-gibibyte 1>/dev/shm/one-random-gibibyte-z85-encoded
|
||||
```
|
||||
|
||||
### Programs being tested
|
||||
|
||||
uutils' `basenc`:
|
||||
|
||||
```
|
||||
❯ git rev-list HEAD | coreutils head -n 1 -- -
|
||||
a0718ef0ffd50539a2e2bc0095c9fadcd70ab857
|
||||
```
|
||||
|
||||
GNU Core Utilities' `basenc`:
|
||||
|
||||
```
|
||||
❯ /usr/bin/basenc --version | coreutils head -n 1 -- -
|
||||
basenc (GNU coreutils) 9.4
|
||||
```
|
||||
|
||||
### Encoding performance
|
||||
|
||||
#### "--base64", default line wrapping (76 characters)
|
||||
|
||||
➕ Faster than GNU Core Utilities
|
||||
|
||||
```
|
||||
❯ hyperfine \
|
||||
--sort \
|
||||
command \
|
||||
-- \
|
||||
'/usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null' \
|
||||
'./target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null'
|
||||
|
||||
Benchmark 1: /usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null
|
||||
Time (mean ± σ): 965.1 ms ± 7.9 ms [User: 766.2 ms, System: 193.4 ms]
|
||||
Range (min … max): 950.2 ms … 976.9 ms 10 runs
|
||||
|
||||
Benchmark 2: ./target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null
|
||||
Time (mean ± σ): 696.6 ms ± 9.1 ms [User: 574.9 ms, System: 117.3 ms]
|
||||
Range (min … max): 683.1 ms … 713.5 ms 10 runs
|
||||
|
||||
Relative speed comparison
|
||||
1.39 ± 0.02 /usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null
|
||||
1.00 ./target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null
|
||||
```
|
||||
|
||||
#### "--base16", no line wrapping
|
||||
|
||||
➖ Slower than GNU Core Utilities
|
||||
|
||||
```
|
||||
❯ poop \
|
||||
'/usr/bin/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte' \
|
||||
'./target/release/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte'
|
||||
|
||||
Benchmark 1 (6 runs): /usr/bin/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte
|
||||
measurement mean ± σ min … max outliers delta
|
||||
wall_time 836ms ± 13.3ms 822ms … 855ms 0 ( 0%) 0%
|
||||
peak_rss 2.05MB ± 73.0KB 1.94MB … 2.12MB 0 ( 0%) 0%
|
||||
cpu_cycles 2.85G ± 32.8M 2.82G … 2.91G 0 ( 0%) 0%
|
||||
instructions 14.0G ± 58.7 14.0G … 14.0G 0 ( 0%) 0%
|
||||
cache_references 70.0M ± 6.48M 63.7M … 78.8M 0 ( 0%) 0%
|
||||
cache_misses 582K ± 172K 354K … 771K 0 ( 0%) 0%
|
||||
branch_misses 667K ± 4.55K 662K … 674K 0 ( 0%) 0%
|
||||
Benchmark 2 (6 runs): ./target/release/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte
|
||||
measurement mean ± σ min … max outliers delta
|
||||
wall_time 884ms ± 6.38ms 878ms … 895ms 0 ( 0%) 💩+ 5.7% ± 1.6%
|
||||
peak_rss 2.65MB ± 66.8KB 2.55MB … 2.74MB 0 ( 0%) 💩+ 29.3% ± 4.4%
|
||||
cpu_cycles 3.15G ± 8.61M 3.14G … 3.16G 0 ( 0%) 💩+ 10.6% ± 1.1%
|
||||
instructions 10.5G ± 275 10.5G … 10.5G 0 ( 0%) ⚡- 24.9% ± 0.0%
|
||||
cache_references 93.5M ± 6.10M 87.2M … 104M 0 ( 0%) 💩+ 33.7% ± 11.6%
|
||||
cache_misses 415K ± 52.3K 363K … 474K 0 ( 0%) - 28.8% ± 28.0%
|
||||
branch_misses 1.43M ± 4.82K 1.42M … 1.43M 0 ( 0%) 💩+113.9% ± 0.9%
|
||||
```
|
||||
|
||||
### Decoding performance
|
||||
|
||||
#### "--base32hex"
|
||||
|
||||
➕ Faster than GNU Core Utilities
|
||||
|
||||
```
|
||||
❯ hyperfine \
|
||||
--sort \
|
||||
command \
|
||||
-- \
|
||||
'/usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null' \
|
||||
'./target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null'
|
||||
|
||||
Benchmark 1: /usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null
|
||||
Time (mean ± σ): 7.154 s ± 0.082 s [User: 6.802 s, System: 0.323 s]
|
||||
Range (min … max): 7.051 s … 7.297 s 10 runs
|
||||
|
||||
Benchmark 2: ./target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null
|
||||
Time (mean ± σ): 2.679 s ± 0.025 s [User: 2.446 s, System: 0.221 s]
|
||||
Range (min … max): 2.649 s … 2.718 s 10 runs
|
||||
|
||||
Relative speed comparison
|
||||
2.67 ± 0.04 /usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null
|
||||
1.00 ./target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null
|
||||
```
|
||||
|
||||
#### "--z85", with "--ignore-garbage"
|
||||
|
||||
➕ Faster than GNU Core Utilities
|
||||
|
||||
```
|
||||
❯ poop \
|
||||
'/usr/bin/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded' \
|
||||
'./target/release/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded'
|
||||
|
||||
Benchmark 1 (3 runs): /usr/bin/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded
|
||||
measurement mean ± σ min … max outliers delta
|
||||
wall_time 14.4s ± 68.4ms 14.3s … 14.4s 0 ( 0%) 0%
|
||||
peak_rss 1.98MB ± 10.8KB 1.97MB … 1.99MB 0 ( 0%) 0%
|
||||
cpu_cycles 58.4G ± 211M 58.3G … 58.7G 0 ( 0%) 0%
|
||||
instructions 74.7G ± 64.0 74.7G … 74.7G 0 ( 0%) 0%
|
||||
cache_references 41.8M ± 624K 41.2M … 42.4M 0 ( 0%) 0%
|
||||
cache_misses 693K ± 118K 567K … 802K 0 ( 0%) 0%
|
||||
branch_misses 1.24G ± 183K 1.24G … 1.24G 0 ( 0%) 0%
|
||||
Benchmark 2 (3 runs): ./target/release/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded
|
||||
measurement mean ± σ min … max outliers delta
|
||||
wall_time 2.80s ± 17.9ms 2.79s … 2.82s 0 ( 0%) ⚡- 80.5% ± 0.8%
|
||||
peak_rss 2.61MB ± 67.4KB 2.57MB … 2.69MB 0 ( 0%) 💩+ 31.9% ± 5.5%
|
||||
cpu_cycles 10.8G ± 27.9M 10.8G … 10.9G 0 ( 0%) ⚡- 81.5% ± 0.6%
|
||||
instructions 39.0G ± 353 39.0G … 39.0G 0 ( 0%) ⚡- 47.7% ± 0.0%
|
||||
cache_references 114M ± 2.43M 112M … 116M 0 ( 0%) 💩+173.3% ± 9.6%
|
||||
cache_misses 1.06M ± 288K 805K … 1.37M 0 ( 0%) + 52.6% ± 72.0%
|
||||
branch_misses 1.18M ± 14.7K 1.16M … 1.19M 0 ( 0%) ⚡- 99.9% ± 0.0%
|
||||
```
|
||||
|
||||
[0]: https://github.com/sharkdp/hyperfine
|
||||
[1]: https://github.com/sharkdp/hyperfine?tab=readme-ov-file#installation
|
||||
[2]: https://github.com/andrewrk/poop
|
||||
[3]: https://landley.net/toybox/
|
|
@ -3,19 +3,15 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
//spell-checker:ignore (args) lsbf msbf
|
||||
// spell-checker:ignore lsbf msbf
|
||||
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use uu_base32::base_common::{self, Config, BASE_CMD_PARSE_ERROR};
|
||||
|
||||
use uucore::error::UClapError;
|
||||
use uucore::{
|
||||
encoding::Format,
|
||||
error::{UResult, UUsageError},
|
||||
};
|
||||
|
||||
use std::io::{stdin, Read};
|
||||
use uucore::error::UClapError;
|
||||
|
||||
use uucore::{help_about, help_usage};
|
||||
|
||||
const ABOUT: &str = help_about!("basenc.md");
|
||||
|
@ -81,16 +77,8 @@ fn parse_cmd_args(args: impl uucore::Args) -> UResult<(Config, Format)> {
|
|||
#[uucore::main]
|
||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let (config, format) = parse_cmd_args(args)?;
|
||||
// Create a reference to stdin so we can return a locked stdin from
|
||||
// parse_base_cmd_args
|
||||
let stdin_raw = stdin();
|
||||
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
|
||||
|
||||
base_common::handle_input(
|
||||
&mut input,
|
||||
format,
|
||||
config.wrap_cols,
|
||||
config.ignore_garbage,
|
||||
config.decode,
|
||||
)
|
||||
let mut input = base_common::get_input(&config)?;
|
||||
|
||||
base_common::handle_input(&mut input, format, config)
|
||||
}
|
||||
|
|
|
@ -111,8 +111,7 @@ where
|
|||
OutputFormat::Hexadecimal => sum_hex,
|
||||
OutputFormat::Base64 => match options.algo_name {
|
||||
ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_SYSV | ALGORITHM_OPTIONS_BSD => sum_hex,
|
||||
_ => encoding::encode(encoding::Format::Base64, &hex::decode(sum_hex).unwrap())
|
||||
.unwrap(),
|
||||
_ => encoding::for_cksum::BASE64.encode(&hex::decode(sum_hex).unwrap()),
|
||||
},
|
||||
};
|
||||
// The BSD checksum output is 5 digit integer
|
||||
|
|
|
@ -26,9 +26,6 @@ uucore = { workspace = true }
|
|||
num-bigint = { workspace = true }
|
||||
num-prime = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
quickcheck = "1.0.3"
|
||||
|
||||
[[bin]]
|
||||
name = "factor"
|
||||
path = "src/main.rs"
|
||||
|
|
|
@ -77,7 +77,7 @@ default = []
|
|||
backup-control = []
|
||||
colors = []
|
||||
checksum = ["data-encoding", "thiserror", "regex", "sum"]
|
||||
encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"]
|
||||
encoding = ["data-encoding", "data-encoding-macro", "z85"]
|
||||
entries = ["libc"]
|
||||
fs = ["dunce", "libc", "winapi-util", "windows-sys"]
|
||||
fsext = ["libc", "windows-sys"]
|
||||
|
|
|
@ -3,35 +3,24 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUV
|
||||
// spell-checker:ignore (encodings) lsbf msbf hexupper
|
||||
// spell-checker:ignore (encodings) lsbf msbf
|
||||
// spell-checker:ignore unpadded
|
||||
|
||||
use std::io::{self, Read, Write};
|
||||
|
||||
use data_encoding::{Encoding, BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER};
|
||||
use crate::error::{UResult, USimpleError};
|
||||
use data_encoding::Encoding;
|
||||
use data_encoding_macro::new_encoding;
|
||||
#[cfg(feature = "thiserror")]
|
||||
use thiserror::Error;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum DecodeError {
|
||||
#[error("{}", _0)]
|
||||
Decode(#[from] data_encoding::DecodeError),
|
||||
#[error("{}", _0)]
|
||||
DecodeZ85(#[from] z85::DecodeError),
|
||||
#[error("{}", _0)]
|
||||
Io(#[from] io::Error),
|
||||
// Re-export for the faster decoding/encoding logic
|
||||
pub mod for_base_common {
|
||||
pub use data_encoding::*;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum EncodeError {
|
||||
Z85InputLenNotMultipleOf4,
|
||||
InvalidInput,
|
||||
pub mod for_cksum {
|
||||
pub use data_encoding::BASE64;
|
||||
}
|
||||
|
||||
pub type DecodeResult = Result<Vec<u8>, DecodeError>;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum Format {
|
||||
Base64,
|
||||
Base64Url,
|
||||
|
@ -42,138 +31,182 @@ pub enum Format {
|
|||
Base2Msbf,
|
||||
Z85,
|
||||
}
|
||||
use self::Format::*;
|
||||
|
||||
const BASE2LSBF: Encoding = new_encoding! {
|
||||
pub const BASE2LSBF: Encoding = new_encoding! {
|
||||
symbols: "01",
|
||||
bit_order: LeastSignificantFirst,
|
||||
};
|
||||
const BASE2MSBF: Encoding = new_encoding! {
|
||||
|
||||
pub const BASE2MSBF: Encoding = new_encoding! {
|
||||
symbols: "01",
|
||||
bit_order: MostSignificantFirst,
|
||||
};
|
||||
|
||||
pub fn encode(f: Format, input: &[u8]) -> Result<String, EncodeError> {
|
||||
Ok(match f {
|
||||
Base32 => BASE32.encode(input),
|
||||
Base64 => BASE64.encode(input),
|
||||
Base64Url => BASE64URL.encode(input),
|
||||
Base32Hex => BASE32HEX.encode(input),
|
||||
Base16 => HEXUPPER.encode(input),
|
||||
Base2Lsbf => BASE2LSBF.encode(input),
|
||||
Base2Msbf => BASE2MSBF.encode(input),
|
||||
Z85 => {
|
||||
// According to the spec we should not accept inputs whose len is not a multiple of 4.
|
||||
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
|
||||
if input.len() % 4 == 0 {
|
||||
z85::encode(input)
|
||||
} else {
|
||||
return Err(EncodeError::Z85InputLenNotMultipleOf4);
|
||||
}
|
||||
}
|
||||
})
|
||||
pub struct Z85Wrapper {}
|
||||
|
||||
pub struct EncodingWrapper {
|
||||
pub alphabet: &'static [u8],
|
||||
pub encoding: Encoding,
|
||||
pub unpadded_multiple: usize,
|
||||
pub valid_decoding_multiple: usize,
|
||||
}
|
||||
|
||||
pub fn decode(f: Format, input: &[u8]) -> DecodeResult {
|
||||
Ok(match f {
|
||||
Base32 => BASE32.decode(input)?,
|
||||
Base64 => BASE64.decode(input)?,
|
||||
Base64Url => BASE64URL.decode(input)?,
|
||||
Base32Hex => BASE32HEX.decode(input)?,
|
||||
Base16 => HEXUPPER.decode(input)?,
|
||||
Base2Lsbf => BASE2LSBF.decode(input)?,
|
||||
Base2Msbf => BASE2MSBF.decode(input)?,
|
||||
Z85 => {
|
||||
// The z85 crate implements a padded encoding by using a leading '#' which is otherwise not allowed.
|
||||
// We manually check for a leading '#' and return an error ourselves.
|
||||
if input.starts_with(b"#") {
|
||||
return Err(z85::DecodeError::InvalidByte(0, b'#').into());
|
||||
} else {
|
||||
z85::decode(input)?
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
impl EncodingWrapper {
|
||||
pub fn new(
|
||||
encoding: Encoding,
|
||||
valid_decoding_multiple: usize,
|
||||
unpadded_multiple: usize,
|
||||
alphabet: &'static [u8],
|
||||
) -> Self {
|
||||
assert!(valid_decoding_multiple > 0);
|
||||
|
||||
pub struct Data<R: Read> {
|
||||
line_wrap: usize,
|
||||
ignore_garbage: bool,
|
||||
input: R,
|
||||
format: Format,
|
||||
alphabet: &'static [u8],
|
||||
}
|
||||
assert!(unpadded_multiple > 0);
|
||||
|
||||
assert!(!alphabet.is_empty());
|
||||
|
||||
impl<R: Read> Data<R> {
|
||||
pub fn new(input: R, format: Format) -> Self {
|
||||
Self {
|
||||
line_wrap: 76,
|
||||
ignore_garbage: false,
|
||||
input,
|
||||
format,
|
||||
alphabet: match format {
|
||||
Base32 => b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=",
|
||||
Base64 => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/",
|
||||
Base64Url => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-",
|
||||
Base32Hex => b"0123456789ABCDEFGHIJKLMNOPQRSTUV=",
|
||||
Base16 => b"0123456789ABCDEF",
|
||||
Base2Lsbf => b"01",
|
||||
Base2Msbf => b"01",
|
||||
Z85 => b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#",
|
||||
},
|
||||
alphabet,
|
||||
encoding,
|
||||
unpadded_multiple,
|
||||
valid_decoding_multiple,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn line_wrap(mut self, wrap: usize) -> Self {
|
||||
self.line_wrap = wrap;
|
||||
self
|
||||
pub trait SupportsFastDecodeAndEncode {
|
||||
/// Returns the list of characters used by this encoding
|
||||
fn alphabet(&self) -> &'static [u8];
|
||||
|
||||
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()>;
|
||||
|
||||
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()>;
|
||||
|
||||
/// Inputs with a length that is a multiple of this number do not have padding when encoded. For instance:
|
||||
///
|
||||
/// "The quick brown"
|
||||
///
|
||||
/// is 15 characters (divisible by 3), so it is encoded in Base64 without padding:
|
||||
///
|
||||
/// "VGhlIHF1aWNrIGJyb3du"
|
||||
///
|
||||
/// While:
|
||||
///
|
||||
/// "The quick brown fox"
|
||||
///
|
||||
/// is 19 characters, which is not divisible by 3, so its Base64 representation has padding:
|
||||
///
|
||||
/// "VGhlIHF1aWNrIGJyb3duIGZveA=="
|
||||
///
|
||||
/// The encoding performed by `fast_encode` depends on this number being correct.
|
||||
fn unpadded_multiple(&self) -> usize;
|
||||
|
||||
/// Data to decode must be a length that is multiple of this number
|
||||
///
|
||||
/// The decoding performed by `fast_decode` depends on this number being correct.
|
||||
fn valid_decoding_multiple(&self) -> usize;
|
||||
}
|
||||
|
||||
impl SupportsFastDecodeAndEncode for Z85Wrapper {
|
||||
fn alphabet(&self) -> &'static [u8] {
|
||||
// Z85 alphabet
|
||||
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#"
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn ignore_garbage(mut self, ignore: bool) -> Self {
|
||||
self.ignore_garbage = ignore;
|
||||
self
|
||||
}
|
||||
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
|
||||
if input.first() == Some(&b'#') {
|
||||
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
|
||||
}
|
||||
|
||||
pub fn decode(&mut self) -> DecodeResult {
|
||||
let mut buf = vec![];
|
||||
self.input.read_to_end(&mut buf)?;
|
||||
if self.ignore_garbage {
|
||||
buf.retain(|c| self.alphabet.contains(c));
|
||||
} else {
|
||||
buf.retain(|&c| c != b'\r' && c != b'\n');
|
||||
let decode_result = match z85::decode(input) {
|
||||
Ok(ve) => ve,
|
||||
Err(_de) => {
|
||||
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
|
||||
}
|
||||
};
|
||||
decode(self.format, &buf)
|
||||
|
||||
output.extend_from_slice(&decode_result);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn encode(&mut self) -> Result<String, EncodeError> {
|
||||
let mut buf: Vec<u8> = vec![];
|
||||
match self.input.read_to_end(&mut buf) {
|
||||
Ok(_) => encode(self.format, buf.as_slice()),
|
||||
Err(_) => Err(EncodeError::InvalidInput),
|
||||
fn valid_decoding_multiple(&self) -> usize {
|
||||
5
|
||||
}
|
||||
|
||||
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()> {
|
||||
// According to the spec we should not accept inputs whose len is not a multiple of 4.
|
||||
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
|
||||
if input.len() % 4 != 0 {
|
||||
return Err(USimpleError::new(
|
||||
1,
|
||||
"error: invalid input (length must be multiple of 4 characters)".to_owned(),
|
||||
));
|
||||
}
|
||||
|
||||
let string = z85::encode(input);
|
||||
|
||||
output.extend(string.as_bytes());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn unpadded_multiple(&self) -> usize {
|
||||
4
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: this will likely be phased out at some point
|
||||
pub fn wrap_print<R: Read>(data: &Data<R>, res: &str) {
|
||||
let stdout = io::stdout();
|
||||
wrap_write(stdout.lock(), data.line_wrap, res).unwrap();
|
||||
}
|
||||
|
||||
pub fn wrap_write<W: Write>(mut writer: W, line_wrap: usize, res: &str) -> io::Result<()> {
|
||||
use std::cmp::min;
|
||||
|
||||
if line_wrap == 0 {
|
||||
return write!(writer, "{res}");
|
||||
impl SupportsFastDecodeAndEncode for EncodingWrapper {
|
||||
fn alphabet(&self) -> &'static [u8] {
|
||||
self.alphabet
|
||||
}
|
||||
|
||||
let mut start = 0;
|
||||
while start < res.len() {
|
||||
let end = min(start + line_wrap, res.len());
|
||||
writeln!(writer, "{}", &res[start..end])?;
|
||||
start = end;
|
||||
// Adapted from `decode` in the "data-encoding" crate
|
||||
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
|
||||
let decode_len_result = match self.encoding.decode_len(input.len()) {
|
||||
Ok(us) => us,
|
||||
Err(_de) => {
|
||||
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
|
||||
}
|
||||
};
|
||||
|
||||
let output_len = output.len();
|
||||
|
||||
output.resize(output_len + decode_len_result, 0);
|
||||
|
||||
match self.encoding.decode_mut(input, &mut (output[output_len..])) {
|
||||
Ok(us) => {
|
||||
// See:
|
||||
// https://docs.rs/data-encoding/latest/data_encoding/struct.Encoding.html#method.decode_mut
|
||||
// "Returns the length of the decoded output. This length may be smaller than the output length if the input contained padding or ignored characters. The output bytes after the returned length are not initialized and should not be read."
|
||||
output.truncate(output_len + us);
|
||||
}
|
||||
Err(_de) => {
|
||||
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Ok(())
|
||||
fn valid_decoding_multiple(&self) -> usize {
|
||||
self.valid_decoding_multiple
|
||||
}
|
||||
|
||||
// Adapted from `encode_append` in the "data-encoding" crate
|
||||
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()> {
|
||||
let output_len = output.len();
|
||||
|
||||
output.resize(output_len + self.encoding.encode_len(input.len()), 0);
|
||||
|
||||
let make_contiguous_result = output.make_contiguous();
|
||||
|
||||
self.encoding
|
||||
.encode_mut(input, &mut (make_contiguous_result[output_len..]));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn unpadded_multiple(&self) -> usize {
|
||||
self.unpadded_multiple
|
||||
}
|
||||
}
|
||||
|
|
|
@ -146,3 +146,77 @@ fn test_base64_file_not_found() {
|
|||
.fails()
|
||||
.stderr_only("base64: a.txt: No such file or directory\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_repeated_trailing_newline() {
|
||||
new_ucmd!()
|
||||
.args(&["--wrap", "10", "--", "-"])
|
||||
.pipe_in("The quick brown fox jumps over the lazy dog.")
|
||||
.succeeds()
|
||||
.stdout_only(
|
||||
// cSpell:disable
|
||||
"\
|
||||
VGhlIHF1aW
|
||||
NrIGJyb3du
|
||||
IGZveCBqdW
|
||||
1wcyBvdmVy
|
||||
IHRoZSBsYX
|
||||
p5IGRvZy4=
|
||||
",
|
||||
// cSpell:enable
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_default() {
|
||||
const PIPE_IN: &str = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.";
|
||||
|
||||
new_ucmd!()
|
||||
.args(&["--", "-"])
|
||||
.pipe_in(PIPE_IN)
|
||||
.succeeds()
|
||||
.stdout_only(
|
||||
// cSpell:disable
|
||||
"\
|
||||
VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZy4gVGhlIHF1aWNrIGJy
|
||||
b3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZy4gVGhlIHF1aWNrIGJyb3duIGZveCBqdW1w
|
||||
cyBvdmVyIHRoZSBsYXp5IGRvZy4=
|
||||
",
|
||||
// cSpell:enable
|
||||
);
|
||||
}
|
||||
|
||||
// Prevent regression to:
|
||||
//
|
||||
// ❯ coreutils manpage base64 | rg --fixed-strings -- 'base32'
|
||||
// The data are encoded as described for the base32 alphabet in RFC 4648.
|
||||
// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage
|
||||
// The data are encoded as described for the base32 alphabet in RFC 4648.
|
||||
// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage
|
||||
#[test]
|
||||
fn test_manpage() {
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
let test_scenario = TestScenario::new("");
|
||||
|
||||
let child = Command::new(test_scenario.bin_path)
|
||||
.arg("manpage")
|
||||
.arg("base64")
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.unwrap();
|
||||
|
||||
let output = child.wait_with_output().unwrap();
|
||||
|
||||
assert_eq!(output.status.code().unwrap(), 0);
|
||||
|
||||
assert!(output.stderr.is_empty());
|
||||
|
||||
let stdout_str = std::str::from_utf8(&output.stdout).unwrap();
|
||||
|
||||
assert!(stdout_str.contains("base64 alphabet"));
|
||||
|
||||
assert!(!stdout_str.to_ascii_lowercase().contains("base32"));
|
||||
}
|
||||
|
|
|
@ -3,17 +3,23 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
//spell-checker: ignore (encodings) lsbf msbf
|
||||
// spell-checker: ignore (encodings) lsbf msbf
|
||||
|
||||
use crate::common::util::TestScenario;
|
||||
|
||||
#[test]
|
||||
fn test_z85_not_padded() {
|
||||
fn test_z85_not_padded_decode() {
|
||||
// The z85 crate deviates from the standard in some cases; we have to catch those
|
||||
new_ucmd!()
|
||||
.args(&["--z85", "-d"])
|
||||
.pipe_in("##########")
|
||||
.fails()
|
||||
.stderr_only("basenc: error: invalid input\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_z85_not_padded_encode() {
|
||||
// The z85 crate deviates from the standard in some cases; we have to catch those
|
||||
new_ucmd!()
|
||||
.args(&["--z85"])
|
||||
.pipe_in("123")
|
||||
|
@ -26,7 +32,7 @@ fn test_invalid_input() {
|
|||
let error_message = if cfg!(windows) {
|
||||
"basenc: .: Permission denied\n"
|
||||
} else {
|
||||
"basenc: error: invalid input\n"
|
||||
"basenc: read error: Is a directory\n"
|
||||
};
|
||||
new_ucmd!()
|
||||
.args(&["--base32", "."])
|
||||
|
@ -40,7 +46,6 @@ fn test_base64() {
|
|||
.arg("--base64")
|
||||
.pipe_in("to>be?")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("dG8+YmU/\n");
|
||||
}
|
||||
|
||||
|
@ -50,7 +55,6 @@ fn test_base64_decode() {
|
|||
.args(&["--base64", "-d"])
|
||||
.pipe_in("dG8+YmU/")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("to>be?");
|
||||
}
|
||||
|
||||
|
@ -60,7 +64,6 @@ fn test_base64url() {
|
|||
.arg("--base64url")
|
||||
.pipe_in("to>be?")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("dG8-YmU_\n");
|
||||
}
|
||||
|
||||
|
@ -70,7 +73,6 @@ fn test_base64url_decode() {
|
|||
.args(&["--base64url", "-d"])
|
||||
.pipe_in("dG8-YmU_")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("to>be?");
|
||||
}
|
||||
|
||||
|
@ -80,7 +82,6 @@ fn test_base32() {
|
|||
.arg("--base32")
|
||||
.pipe_in("nice>base?")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("NZUWGZJ6MJQXGZJ7\n"); // spell-checker:disable-line
|
||||
}
|
||||
|
||||
|
@ -90,7 +91,6 @@ fn test_base32_decode() {
|
|||
.args(&["--base32", "-d"])
|
||||
.pipe_in("NZUWGZJ6MJQXGZJ7") // spell-checker:disable-line
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("nice>base?");
|
||||
}
|
||||
|
||||
|
@ -100,7 +100,6 @@ fn test_base32hex() {
|
|||
.arg("--base32hex")
|
||||
.pipe_in("nice>base?")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("DPKM6P9UC9GN6P9V\n"); // spell-checker:disable-line
|
||||
}
|
||||
|
||||
|
@ -110,7 +109,6 @@ fn test_base32hex_decode() {
|
|||
.args(&["--base32hex", "-d"])
|
||||
.pipe_in("DPKM6P9UC9GN6P9V") // spell-checker:disable-line
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("nice>base?");
|
||||
}
|
||||
|
||||
|
@ -120,7 +118,6 @@ fn test_base16() {
|
|||
.arg("--base16")
|
||||
.pipe_in("Hello, World!")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("48656C6C6F2C20576F726C6421\n");
|
||||
}
|
||||
|
||||
|
@ -130,7 +127,6 @@ fn test_base16_decode() {
|
|||
.args(&["--base16", "-d"])
|
||||
.pipe_in("48656C6C6F2C20576F726C6421")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("Hello, World!");
|
||||
}
|
||||
|
||||
|
@ -140,7 +136,6 @@ fn test_base2msbf() {
|
|||
.arg("--base2msbf")
|
||||
.pipe_in("msbf")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("01101101011100110110001001100110\n");
|
||||
}
|
||||
|
||||
|
@ -150,7 +145,6 @@ fn test_base2msbf_decode() {
|
|||
.args(&["--base2msbf", "-d"])
|
||||
.pipe_in("01101101011100110110001001100110")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("msbf");
|
||||
}
|
||||
|
||||
|
@ -160,7 +154,6 @@ fn test_base2lsbf() {
|
|||
.arg("--base2lsbf")
|
||||
.pipe_in("lsbf")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("00110110110011100100011001100110\n");
|
||||
}
|
||||
|
||||
|
@ -170,7 +163,6 @@ fn test_base2lsbf_decode() {
|
|||
.args(&["--base2lsbf", "-d"])
|
||||
.pipe_in("00110110110011100100011001100110")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("lsbf");
|
||||
}
|
||||
|
||||
|
@ -189,7 +181,6 @@ fn test_choose_last_encoding_z85() {
|
|||
])
|
||||
.pipe_in("Hello, World")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("nm=QNz.92jz/PV8\n");
|
||||
}
|
||||
|
||||
|
@ -208,7 +199,6 @@ fn test_choose_last_encoding_base64() {
|
|||
])
|
||||
.pipe_in("Hello, World!")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("SGVsbG8sIFdvcmxkIQ==\n"); // spell-checker:disable-line
|
||||
}
|
||||
|
||||
|
@ -227,7 +217,6 @@ fn test_choose_last_encoding_base2lsbf() {
|
|||
])
|
||||
.pipe_in("lsbf")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("00110110110011100100011001100110\n");
|
||||
}
|
||||
|
||||
|
@ -248,6 +237,18 @@ fn test_base32_decode_repeated() {
|
|||
])
|
||||
.pipe_in("NZUWGZJ6MJQXGZJ7") // spell-checker:disable-line
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("nice>base?");
|
||||
}
|
||||
|
||||
// The restriction that input length has to be divisible by 4 only applies to data being encoded with Z85, not to the
|
||||
// decoding of Z85-encoded data
|
||||
#[test]
|
||||
fn test_z85_length_check() {
|
||||
new_ucmd!()
|
||||
.args(&["--decode", "--z85"])
|
||||
// Input has length 10, not divisible by 4
|
||||
// spell-checker:disable-next-line
|
||||
.pipe_in("f!$Kwh8WxM")
|
||||
.succeeds()
|
||||
.stdout_only("12345678");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue