mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
basenc: perform faster, streaming encoding
Improve the performance, both in memory and time, of the encoding performed by the basenc (except in --z85 mode), base32, and base64 programs. These programs now perform encoding in a buffered/streaming manner, so encoding is not constrained by the amount of available memory.
This commit is contained in:
parent
50f99580b4
commit
846cf06272
4 changed files with 416 additions and 63 deletions
|
@ -3,22 +3,31 @@
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
|
|
||||||
use std::io::{stdout, Read, Write};
|
// spell-checker:ignore HEXUPPER Lsbf Msbf
|
||||||
|
|
||||||
|
use clap::{crate_version, Arg, ArgAction, Command};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{stdout, Read, Write};
|
||||||
|
use std::io::{BufReader, Stdin};
|
||||||
|
use std::path::Path;
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::encoding::{wrap_print, Data, EncodeError, Format};
|
use uucore::encoding::{
|
||||||
|
for_fast_encode::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
|
||||||
|
wrap_print, Data, EncodeError, Format,
|
||||||
|
};
|
||||||
|
use uucore::encoding::{BASE2LSBF, BASE2MSBF};
|
||||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
||||||
use uucore::format_usage;
|
use uucore::format_usage;
|
||||||
|
|
||||||
use std::fs::File;
|
pub const BASE_CMD_PARSE_ERROR: i32 = 1_i32;
|
||||||
use std::io::{BufReader, Stdin};
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
use clap::{crate_version, Arg, ArgAction, Command};
|
/// Encoded output will be formatted in lines of this length (the last line can be shorter)
|
||||||
|
///
|
||||||
|
/// Other implementations default to 76
|
||||||
|
///
|
||||||
|
/// This default is only used if no "-w"/"--wrap" argument is passed
|
||||||
|
const WRAP_DEFAULT: usize = 76_usize;
|
||||||
|
|
||||||
pub static BASE_CMD_PARSE_ERROR: i32 = 1;
|
|
||||||
|
|
||||||
// Config.
|
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub decode: bool,
|
pub decode: bool,
|
||||||
pub ignore_garbage: bool,
|
pub ignore_garbage: bool,
|
||||||
|
@ -118,7 +127,7 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
|
||||||
.short('w')
|
.short('w')
|
||||||
.long(options::WRAP)
|
.long(options::WRAP)
|
||||||
.value_name("COLS")
|
.value_name("COLS")
|
||||||
.help("wrap encoded lines after COLS character (default 76, 0 to disable wrapping)")
|
.help(format!("wrap encoded lines after COLS character (default {WRAP_DEFAULT}, 0 to disable wrapping)"))
|
||||||
.overrides_with(options::WRAP),
|
.overrides_with(options::WRAP),
|
||||||
)
|
)
|
||||||
// "multiple" arguments are used to check whether there is more than one
|
// "multiple" arguments are used to check whether there is more than one
|
||||||
|
@ -147,17 +156,43 @@ pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> UResult<Box<dyn R
|
||||||
pub fn handle_input<R: Read>(
|
pub fn handle_input<R: Read>(
|
||||||
input: &mut R,
|
input: &mut R,
|
||||||
format: Format,
|
format: Format,
|
||||||
line_wrap: Option<usize>,
|
wrap: Option<usize>,
|
||||||
ignore_garbage: bool,
|
ignore_garbage: bool,
|
||||||
decode: bool,
|
decode: bool,
|
||||||
) -> UResult<()> {
|
) -> UResult<()> {
|
||||||
let mut data = Data::new(input, format).ignore_garbage(ignore_garbage);
|
const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024_usize;
|
||||||
if let Some(wrap) = line_wrap {
|
|
||||||
data = data.line_wrap(wrap);
|
// These constants indicate that inputs with lengths divisible by these numbers will have no padding characters
|
||||||
}
|
// after encoding.
|
||||||
|
// For instance:
|
||||||
|
// "The quick brown"
|
||||||
|
// is 15 characters (divisible by 3), so it is encoded in Base64 without padding:
|
||||||
|
// "VGhlIHF1aWNrIGJyb3du"
|
||||||
|
// While:
|
||||||
|
// "The quick brown fox"
|
||||||
|
// is 19 characters, which is not divisible by 3, so its Base64 representation has padding:
|
||||||
|
// "VGhlIHF1aWNrIGJyb3duIGZveA=="
|
||||||
|
// The encoding logic in this function depends on these constants being correct, so do not modify
|
||||||
|
// them. Performance can be tuned by multiplying these numbers by a different multiple (see
|
||||||
|
// `ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE` above).
|
||||||
|
const BASE16_UN_PADDED_MULTIPLE: usize = 1_usize;
|
||||||
|
const BASE2_UN_PADDED_MULTIPLE: usize = 1_usize;
|
||||||
|
const BASE32_UN_PADDED_MULTIPLE: usize = 5_usize;
|
||||||
|
const BASE64_UN_PADDED_MULTIPLE: usize = 3_usize;
|
||||||
|
|
||||||
|
const BASE16_ENCODE_IN_CHUNKS_OF_SIZE: usize =
|
||||||
|
BASE16_UN_PADDED_MULTIPLE * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
|
||||||
|
const BASE2_ENCODE_IN_CHUNKS_OF_SIZE: usize =
|
||||||
|
BASE2_UN_PADDED_MULTIPLE * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
|
||||||
|
const BASE32_ENCODE_IN_CHUNKS_OF_SIZE: usize =
|
||||||
|
BASE32_UN_PADDED_MULTIPLE * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
|
||||||
|
const BASE64_ENCODE_IN_CHUNKS_OF_SIZE: usize =
|
||||||
|
BASE64_UN_PADDED_MULTIPLE * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
|
||||||
|
|
||||||
if decode {
|
if decode {
|
||||||
match data.decode() {
|
let mut data = Data::new(input, format);
|
||||||
|
|
||||||
|
match data.decode(ignore_garbage) {
|
||||||
Ok(s) => {
|
Ok(s) => {
|
||||||
// Silent the warning as we want to the error message
|
// Silent the warning as we want to the error message
|
||||||
#[allow(clippy::question_mark)]
|
#[allow(clippy::question_mark)]
|
||||||
|
@ -170,16 +205,307 @@ pub fn handle_input<R: Read>(
|
||||||
Err(_) => Err(USimpleError::new(1, "error: invalid input")),
|
Err(_) => Err(USimpleError::new(1, "error: invalid input")),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
match data.encode() {
|
#[allow(clippy::identity_op)]
|
||||||
Ok(s) => {
|
let encoding_and_encode_in_chunks_of_size = match format {
|
||||||
wrap_print(&data, &s);
|
// Use naive approach for Z85, since the crate being used doesn't have the API needed
|
||||||
Ok(())
|
Format::Z85 => {
|
||||||
|
let mut data = Data::new(input, format);
|
||||||
|
|
||||||
|
let result = match data.encode() {
|
||||||
|
Ok(st) => {
|
||||||
|
wrap_print(&st, wrap.unwrap_or(WRAP_DEFAULT))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Err(EncodeError::InvalidInput) => {
|
||||||
|
Err(USimpleError::new(1, "error: invalid input"))
|
||||||
|
}
|
||||||
|
Err(_) => Err(USimpleError::new(
|
||||||
|
1,
|
||||||
|
"error: invalid input (length must be multiple of 4 characters)",
|
||||||
|
)),
|
||||||
|
};
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
Err(EncodeError::InvalidInput) => Err(USimpleError::new(1, "error: invalid input")),
|
|
||||||
Err(_) => Err(USimpleError::new(
|
// For these, use faster, new encoding logic
|
||||||
1,
|
Format::Base16 => (&HEXUPPER, BASE16_ENCODE_IN_CHUNKS_OF_SIZE),
|
||||||
"error: invalid input (length must be multiple of 4 characters)",
|
Format::Base2Lsbf => (&BASE2LSBF, BASE2_ENCODE_IN_CHUNKS_OF_SIZE),
|
||||||
)),
|
Format::Base2Msbf => (&BASE2MSBF, BASE2_ENCODE_IN_CHUNKS_OF_SIZE),
|
||||||
}
|
Format::Base32 => (&BASE32, BASE32_ENCODE_IN_CHUNKS_OF_SIZE),
|
||||||
|
Format::Base32Hex => (&BASE32HEX, BASE32_ENCODE_IN_CHUNKS_OF_SIZE),
|
||||||
|
Format::Base64 => (&BASE64, BASE64_ENCODE_IN_CHUNKS_OF_SIZE),
|
||||||
|
Format::Base64Url => (&BASE64URL, BASE64_ENCODE_IN_CHUNKS_OF_SIZE),
|
||||||
|
};
|
||||||
|
|
||||||
|
fast_encode::fast_encode(input, encoding_and_encode_in_chunks_of_size, wrap)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mod fast_encode {
|
||||||
|
use crate::base_common::WRAP_DEFAULT;
|
||||||
|
use std::{
|
||||||
|
collections::VecDeque,
|
||||||
|
io::{self, ErrorKind, Read, StdoutLock, Write},
|
||||||
|
};
|
||||||
|
use uucore::{
|
||||||
|
encoding::for_fast_encode::Encoding,
|
||||||
|
error::{UResult, USimpleError},
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LineWrapping {
|
||||||
|
line_length: usize,
|
||||||
|
print_buffer: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start of helper functions
|
||||||
|
// Adapted from `encode_append` in the "data-encoding" crate
|
||||||
|
fn encode_append_vec_deque(encoding: &Encoding, input: &[u8], output: &mut VecDeque<u8>) {
|
||||||
|
let output_len = output.len();
|
||||||
|
|
||||||
|
output.resize(output_len + encoding.encode_len(input.len()), 0_u8);
|
||||||
|
|
||||||
|
let make_contiguous_result = output.make_contiguous();
|
||||||
|
|
||||||
|
encoding.encode_mut(input, &mut (make_contiguous_result[output_len..]));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_without_line_breaks(
|
||||||
|
encoded_buffer: &mut VecDeque<u8>,
|
||||||
|
stdout_lock: &mut StdoutLock,
|
||||||
|
is_cleanup: bool,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
// TODO
|
||||||
|
// `encoded_buffer` only has to be a VecDeque if line wrapping is enabled
|
||||||
|
// (`make_contiguous` should be a no-op here)
|
||||||
|
// Refactoring could avoid this call
|
||||||
|
stdout_lock.write_all(encoded_buffer.make_contiguous())?;
|
||||||
|
|
||||||
|
if is_cleanup {
|
||||||
|
stdout_lock.write_all(b"\n")?;
|
||||||
|
} else {
|
||||||
|
encoded_buffer.truncate(0_usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_with_line_breaks(
|
||||||
|
&mut LineWrapping {
|
||||||
|
ref line_length,
|
||||||
|
ref mut print_buffer,
|
||||||
|
}: &mut LineWrapping,
|
||||||
|
encoded_buffer: &mut VecDeque<u8>,
|
||||||
|
stdout_lock: &mut StdoutLock,
|
||||||
|
is_cleanup: bool,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
let line_length_usize = *line_length;
|
||||||
|
|
||||||
|
assert!(line_length_usize > 0_usize);
|
||||||
|
|
||||||
|
let number_of_lines = encoded_buffer.len() / line_length_usize;
|
||||||
|
|
||||||
|
// How many bytes to take from the front of `encoded_buffer` and then write to stdout
|
||||||
|
let number_of_bytes_to_drain = number_of_lines * line_length_usize;
|
||||||
|
|
||||||
|
let line_wrap_size_minus_one = line_length_usize - 1_usize;
|
||||||
|
|
||||||
|
let mut i = 0_usize;
|
||||||
|
|
||||||
|
for ue in encoded_buffer.drain(0_usize..number_of_bytes_to_drain) {
|
||||||
|
print_buffer.push(ue);
|
||||||
|
|
||||||
|
if i == line_wrap_size_minus_one {
|
||||||
|
print_buffer.push(b'\n');
|
||||||
|
|
||||||
|
i = 0_usize;
|
||||||
|
} else {
|
||||||
|
i += 1_usize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stdout_lock.write_all(print_buffer)?;
|
||||||
|
|
||||||
|
if is_cleanup {
|
||||||
|
if encoded_buffer.is_empty() {
|
||||||
|
// Do not write a newline in this case, because two trailing newlines should never be printed
|
||||||
|
} else {
|
||||||
|
// Print the partial line, since this is cleanup and no more data is coming
|
||||||
|
stdout_lock.write_all(encoded_buffer.make_contiguous())?;
|
||||||
|
stdout_lock.write_all(b"\n")?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print_buffer.truncate(0_usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_to_stdout(
|
||||||
|
line_wrapping_option: &mut Option<LineWrapping>,
|
||||||
|
encoded_buffer: &mut VecDeque<u8>,
|
||||||
|
stdout_lock: &mut StdoutLock,
|
||||||
|
is_cleanup: bool,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
// Write all data in `encoded_buffer` to stdout
|
||||||
|
if let &mut Some(ref mut li) = line_wrapping_option {
|
||||||
|
write_with_line_breaks(li, encoded_buffer, stdout_lock, is_cleanup)?;
|
||||||
|
} else {
|
||||||
|
write_without_line_breaks(encoded_buffer, stdout_lock, is_cleanup)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
// End of helper functions
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
// It turns out the crate being used already supports line wrapping:
|
||||||
|
// https://docs.rs/data-encoding/latest/data_encoding/struct.Specification.html#wrap-output-when-encoding-1
|
||||||
|
// Check if that crate's line wrapping is faster than the wrapping being performed in this function
|
||||||
|
// `encoding` and `encode_in_chunks_of_size` are passed in a tuple to indicate that they are logically tied
|
||||||
|
pub fn fast_encode<R: Read>(
|
||||||
|
input: &mut R,
|
||||||
|
(encoding, encode_in_chunks_of_size): (&Encoding, usize),
|
||||||
|
line_wrap: Option<usize>,
|
||||||
|
) -> UResult<()> {
|
||||||
|
/// Rust uses 8 kibibytes
|
||||||
|
///
|
||||||
|
/// https://github.com/rust-lang/rust/blob/1a5a2240bc1b8cf0bcce7acb946c78d6493a4fd3/library/std/src/sys_common/io.rs#L3
|
||||||
|
const INPUT_BUFFER_SIZE: usize = 8_usize * 1_024_usize;
|
||||||
|
|
||||||
|
let mut line_wrapping_option = match line_wrap {
|
||||||
|
// Line wrapping is disabled because "-w"/"--wrap" was passed with "0"
|
||||||
|
Some(0_usize) => None,
|
||||||
|
// A custom line wrapping value was passed
|
||||||
|
Some(an) => Some(LineWrapping {
|
||||||
|
line_length: an,
|
||||||
|
print_buffer: Vec::<u8>::new(),
|
||||||
|
}),
|
||||||
|
// Line wrapping was not set, so the default is used
|
||||||
|
None => Some(LineWrapping {
|
||||||
|
line_length: WRAP_DEFAULT,
|
||||||
|
print_buffer: Vec::<u8>::new(),
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start of buffers
|
||||||
|
// Data that was read from stdin
|
||||||
|
let mut input_buffer = vec![0_u8; INPUT_BUFFER_SIZE];
|
||||||
|
|
||||||
|
assert!(!input_buffer.is_empty());
|
||||||
|
|
||||||
|
// Data that was read from stdin but has not been encoded yet
|
||||||
|
let mut leftover_buffer = VecDeque::<u8>::new();
|
||||||
|
|
||||||
|
// Encoded data that needs to be written to stdout
|
||||||
|
let mut encoded_buffer = VecDeque::<u8>::new();
|
||||||
|
// End of buffers
|
||||||
|
|
||||||
|
let mut stdout_lock = io::stdout().lock();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match input.read(&mut input_buffer) {
|
||||||
|
Ok(bytes_read_from_input) => {
|
||||||
|
if bytes_read_from_input == 0_usize {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The part of `input_buffer` that was actually filled by the call to `read`
|
||||||
|
let read_buffer = &input_buffer[0_usize..bytes_read_from_input];
|
||||||
|
|
||||||
|
// How many bytes to steal from `read_buffer` to get `leftover_buffer` to the right size
|
||||||
|
let bytes_to_steal = encode_in_chunks_of_size - leftover_buffer.len();
|
||||||
|
|
||||||
|
if bytes_to_steal > bytes_read_from_input {
|
||||||
|
// Do not have enough data to encode a chunk, so copy data to `leftover_buffer` and read more
|
||||||
|
leftover_buffer.extend(read_buffer);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode data in chunks, then place it in `encoded_buffer`
|
||||||
|
{
|
||||||
|
let bytes_to_chunk = if bytes_to_steal > 0 {
|
||||||
|
let (stolen_bytes, rest_of_read_buffer) =
|
||||||
|
read_buffer.split_at(bytes_to_steal);
|
||||||
|
|
||||||
|
leftover_buffer.extend(stolen_bytes);
|
||||||
|
|
||||||
|
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
|
||||||
|
assert!(leftover_buffer.len() == encode_in_chunks_of_size);
|
||||||
|
|
||||||
|
// Encode the old unencoded data and the stolen bytes, and add the result to
|
||||||
|
// `encoded_buffer`
|
||||||
|
encode_append_vec_deque(
|
||||||
|
encoding,
|
||||||
|
leftover_buffer.make_contiguous(),
|
||||||
|
&mut encoded_buffer,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reset `leftover_buffer`
|
||||||
|
leftover_buffer.truncate(0_usize);
|
||||||
|
|
||||||
|
rest_of_read_buffer
|
||||||
|
} else {
|
||||||
|
// Do not need to steal bytes from `read_buffer`
|
||||||
|
read_buffer
|
||||||
|
};
|
||||||
|
|
||||||
|
let chunks_exact = bytes_to_chunk.chunks_exact(encode_in_chunks_of_size);
|
||||||
|
|
||||||
|
let remainder = chunks_exact.remainder();
|
||||||
|
|
||||||
|
for sl in chunks_exact {
|
||||||
|
assert!(sl.len() == encode_in_chunks_of_size);
|
||||||
|
|
||||||
|
encode_append_vec_deque(encoding, sl, &mut encoded_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
leftover_buffer.extend(remainder);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write all data in `encoded_buffer` to stdout
|
||||||
|
write_to_stdout(
|
||||||
|
&mut line_wrapping_option,
|
||||||
|
&mut encoded_buffer,
|
||||||
|
&mut stdout_lock,
|
||||||
|
false,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Err(er) => {
|
||||||
|
if er.kind() == ErrorKind::Interrupted {
|
||||||
|
// TODO
|
||||||
|
// Retry reading?
|
||||||
|
}
|
||||||
|
|
||||||
|
return Err(USimpleError::new(1_i32, format!("read error: {er}")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
// `input` has finished producing data, so the data remaining in the buffers needs to be encoded and printed
|
||||||
|
{
|
||||||
|
// Encode all remaining unencoded bytes, placing it in `encoded_buffer`
|
||||||
|
encode_append_vec_deque(
|
||||||
|
encoding,
|
||||||
|
leftover_buffer.make_contiguous(),
|
||||||
|
&mut encoded_buffer,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Write all data in `encoded_buffer` to stdout
|
||||||
|
// `is_cleanup` triggers special cleanup-only logic
|
||||||
|
write_to_stdout(
|
||||||
|
&mut line_wrapping_option,
|
||||||
|
&mut encoded_buffer,
|
||||||
|
&mut stdout_lock,
|
||||||
|
true,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,13 +6,19 @@
|
||||||
// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUV
|
// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUV
|
||||||
// spell-checker:ignore (encodings) lsbf msbf hexupper
|
// spell-checker:ignore (encodings) lsbf msbf hexupper
|
||||||
|
|
||||||
use std::io::{self, Read, Write};
|
use self::Format::*;
|
||||||
|
|
||||||
use data_encoding::{Encoding, BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER};
|
use data_encoding::{Encoding, BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER};
|
||||||
use data_encoding_macro::new_encoding;
|
use data_encoding_macro::new_encoding;
|
||||||
|
use std::io::{self, Read, Write};
|
||||||
|
|
||||||
#[cfg(feature = "thiserror")]
|
#[cfg(feature = "thiserror")]
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
|
// Re-export for the faster encoding logic
|
||||||
|
pub mod for_fast_encode {
|
||||||
|
pub use data_encoding::*;
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum DecodeError {
|
pub enum DecodeError {
|
||||||
#[error("{}", _0)]
|
#[error("{}", _0)]
|
||||||
|
@ -42,13 +48,12 @@ pub enum Format {
|
||||||
Base2Msbf,
|
Base2Msbf,
|
||||||
Z85,
|
Z85,
|
||||||
}
|
}
|
||||||
use self::Format::*;
|
|
||||||
|
|
||||||
const BASE2LSBF: Encoding = new_encoding! {
|
pub const BASE2LSBF: Encoding = new_encoding! {
|
||||||
symbols: "01",
|
symbols: "01",
|
||||||
bit_order: LeastSignificantFirst,
|
bit_order: LeastSignificantFirst,
|
||||||
};
|
};
|
||||||
const BASE2MSBF: Encoding = new_encoding! {
|
pub const BASE2MSBF: Encoding = new_encoding! {
|
||||||
symbols: "01",
|
symbols: "01",
|
||||||
bit_order: MostSignificantFirst,
|
bit_order: MostSignificantFirst,
|
||||||
};
|
};
|
||||||
|
@ -96,8 +101,6 @@ pub fn decode(f: Format, input: &[u8]) -> DecodeResult {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Data<R: Read> {
|
pub struct Data<R: Read> {
|
||||||
line_wrap: usize,
|
|
||||||
ignore_garbage: bool,
|
|
||||||
input: R,
|
input: R,
|
||||||
format: Format,
|
format: Format,
|
||||||
alphabet: &'static [u8],
|
alphabet: &'static [u8],
|
||||||
|
@ -106,8 +109,6 @@ pub struct Data<R: Read> {
|
||||||
impl<R: Read> Data<R> {
|
impl<R: Read> Data<R> {
|
||||||
pub fn new(input: R, format: Format) -> Self {
|
pub fn new(input: R, format: Format) -> Self {
|
||||||
Self {
|
Self {
|
||||||
line_wrap: 76,
|
|
||||||
ignore_garbage: false,
|
|
||||||
input,
|
input,
|
||||||
format,
|
format,
|
||||||
alphabet: match format {
|
alphabet: match format {
|
||||||
|
@ -123,22 +124,10 @@ impl<R: Read> Data<R> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[must_use]
|
pub fn decode(&mut self, ignore_garbage: bool) -> DecodeResult {
|
||||||
pub fn line_wrap(mut self, wrap: usize) -> Self {
|
|
||||||
self.line_wrap = wrap;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
#[must_use]
|
|
||||||
pub fn ignore_garbage(mut self, ignore: bool) -> Self {
|
|
||||||
self.ignore_garbage = ignore;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn decode(&mut self) -> DecodeResult {
|
|
||||||
let mut buf = vec![];
|
let mut buf = vec![];
|
||||||
self.input.read_to_end(&mut buf)?;
|
self.input.read_to_end(&mut buf)?;
|
||||||
if self.ignore_garbage {
|
if ignore_garbage {
|
||||||
buf.retain(|c| self.alphabet.contains(c));
|
buf.retain(|c| self.alphabet.contains(c));
|
||||||
} else {
|
} else {
|
||||||
buf.retain(|&c| c != b'\r' && c != b'\n');
|
buf.retain(|&c| c != b'\r' && c != b'\n');
|
||||||
|
@ -155,24 +144,27 @@ impl<R: Read> Data<R> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: this will likely be phased out at some point
|
pub fn wrap_print(res: &str, line_wrap: usize) -> io::Result<()> {
|
||||||
pub fn wrap_print<R: Read>(data: &Data<R>, res: &str) {
|
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
wrap_write(stdout.lock(), data.line_wrap, res).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn wrap_write<W: Write>(mut writer: W, line_wrap: usize, res: &str) -> io::Result<()> {
|
let mut stdout_lock = stdout.lock();
|
||||||
use std::cmp::min;
|
|
||||||
|
|
||||||
if line_wrap == 0 {
|
if line_wrap == 0 {
|
||||||
return write!(writer, "{res}");
|
stdout_lock.write_all(res.as_bytes())?;
|
||||||
}
|
} else {
|
||||||
|
let res_len = res.len();
|
||||||
|
|
||||||
let mut start = 0;
|
let mut start = 0;
|
||||||
while start < res.len() {
|
|
||||||
let end = min(start + line_wrap, res.len());
|
while start < res_len {
|
||||||
writeln!(writer, "{}", &res[start..end])?;
|
let start_plus_line_wrap = start + line_wrap;
|
||||||
start = end;
|
|
||||||
|
let end = start_plus_line_wrap.min(res_len);
|
||||||
|
|
||||||
|
writeln!(stdout_lock, "{}", &res[start..end])?;
|
||||||
|
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -146,3 +146,36 @@ fn test_base64_file_not_found() {
|
||||||
.fails()
|
.fails()
|
||||||
.stderr_only("base64: a.txt: No such file or directory\n");
|
.stderr_only("base64: a.txt: No such file or directory\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_no_repeated_trailing_newline() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--wrap", "10", "--", "-"])
|
||||||
|
.pipe_in("The quick brown fox jumps over the lazy dog.")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(
|
||||||
|
"\
|
||||||
|
VGhlIHF1aW
|
||||||
|
NrIGJyb3du
|
||||||
|
IGZveCBqdW
|
||||||
|
1wcyBvdmVy
|
||||||
|
IHRoZSBsYX
|
||||||
|
p5IGRvZy4=
|
||||||
|
",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_wrap_default() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["--", "-"])
|
||||||
|
.pipe_in("The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(
|
||||||
|
"\
|
||||||
|
VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZy4gVGhlIHF1aWNrIGJy
|
||||||
|
b3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZy4gVGhlIHF1aWNrIGJyb3duIGZveCBqdW1w
|
||||||
|
cyBvdmVyIHRoZSBsYXp5IGRvZy4=
|
||||||
|
",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
|
@ -26,7 +26,9 @@ fn test_invalid_input() {
|
||||||
let error_message = if cfg!(windows) {
|
let error_message = if cfg!(windows) {
|
||||||
"basenc: .: Permission denied\n"
|
"basenc: .: Permission denied\n"
|
||||||
} else {
|
} else {
|
||||||
"basenc: error: invalid input\n"
|
// TODO
|
||||||
|
// Other implementations do not show " (os error 21)"
|
||||||
|
"basenc: read error: Is a directory (os error 21)\n"
|
||||||
};
|
};
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["--base32", "."])
|
.args(&["--base32", "."])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue