mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
Address PR comments. Simplify structure.
This commit is contained in:
parent
ccb873519c
commit
4f19c0e9ff
3 changed files with 372 additions and 305 deletions
|
@ -3,28 +3,29 @@
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
|
|
||||||
// spell-checker:ignore HEXUPPER Lsbf Msbf
|
// spell-checker:ignore hexupper lsbf msbf unpadded
|
||||||
|
|
||||||
use clap::{crate_version, Arg, ArgAction, Command};
|
use clap::{crate_version, Arg, ArgAction, Command};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{Read, Stdin};
|
use std::io::{ErrorKind, Read, Stdin};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::encoding::{
|
use uucore::encoding::{
|
||||||
for_fast_encode::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
|
for_fast_encode::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
|
||||||
Format, ZEightFiveWrapper, BASE2LSBF, BASE2MSBF,
|
Format, Z85Wrapper, BASE2LSBF, BASE2MSBF,
|
||||||
};
|
};
|
||||||
|
use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode};
|
||||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
||||||
use uucore::format_usage;
|
use uucore::format_usage;
|
||||||
|
|
||||||
pub const BASE_CMD_PARSE_ERROR: i32 = 1_i32;
|
pub const BASE_CMD_PARSE_ERROR: i32 = 1;
|
||||||
|
|
||||||
/// Encoded output will be formatted in lines of this length (the last line can be shorter)
|
/// Encoded output will be formatted in lines of this length (the last line can be shorter)
|
||||||
///
|
///
|
||||||
/// Other implementations default to 76
|
/// Other implementations default to 76
|
||||||
///
|
///
|
||||||
/// This default is only used if no "-w"/"--wrap" argument is passed
|
/// This default is only used if no "-w"/"--wrap" argument is passed
|
||||||
const WRAP_DEFAULT: usize = 76_usize;
|
const WRAP_DEFAULT: usize = 76;
|
||||||
|
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub decode: bool,
|
pub decode: bool,
|
||||||
|
@ -157,160 +158,91 @@ pub fn handle_input<R: Read>(
|
||||||
ignore_garbage: bool,
|
ignore_garbage: bool,
|
||||||
decode: bool,
|
decode: bool,
|
||||||
) -> UResult<()> {
|
) -> UResult<()> {
|
||||||
const DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024_usize;
|
const BASE16_VALID_DECODING_MULTIPLE: usize = 2;
|
||||||
|
const BASE2_VALID_DECODING_MULTIPLE: usize = 8;
|
||||||
|
const BASE32_VALID_DECODING_MULTIPLE: usize = 8;
|
||||||
|
const BASE64_VALID_DECODING_MULTIPLE: usize = 4;
|
||||||
|
|
||||||
// These constants indicate that inputs with lengths divisible by these numbers will have no padding characters
|
const BASE16_UNPADDED_MULTIPLE: usize = 1;
|
||||||
// after encoding.
|
const BASE2_UNPADDED_MULTIPLE: usize = 1;
|
||||||
// For instance:
|
const BASE32_UNPADDED_MULTIPLE: usize = 5;
|
||||||
// "The quick brown"
|
const BASE64_UNPADDED_MULTIPLE: usize = 3;
|
||||||
// is 15 characters (divisible by 3), so it is encoded in Base64 without padding:
|
|
||||||
// "VGhlIHF1aWNrIGJyb3du"
|
|
||||||
// While:
|
|
||||||
// "The quick brown fox"
|
|
||||||
// is 19 characters, which is not divisible by 3, so its Base64 representation has padding:
|
|
||||||
// "VGhlIHF1aWNrIGJyb3duIGZveA=="
|
|
||||||
//
|
|
||||||
// The encoding performed by `fast_encode` depend on these constants being correct. Performance can be tuned by
|
|
||||||
// multiplying these numbers by a different multiple (see `DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE` above).
|
|
||||||
const BASE16_UN_PADDED_MULTIPLE: usize = 1_usize;
|
|
||||||
const BASE2_UN_PADDED_MULTIPLE: usize = 1_usize;
|
|
||||||
const BASE32_UN_PADDED_MULTIPLE: usize = 5_usize;
|
|
||||||
const BASE64_UN_PADDED_MULTIPLE: usize = 3_usize;
|
|
||||||
const Z85_UN_PADDED_MULTIPLE: usize = 4_usize;
|
|
||||||
|
|
||||||
// Similar to above, but for decoding
|
let supports_fast_decode_and_encode: Box<dyn SupportsFastDecodeAndEncode> = match format {
|
||||||
const BASE16_VALID_DECODING_MULTIPLE: usize = 2_usize;
|
Format::Base16 => Box::from(EncodingWrapper::new(
|
||||||
const BASE2_VALID_DECODING_MULTIPLE: usize = 8_usize;
|
HEXUPPER,
|
||||||
const BASE32_VALID_DECODING_MULTIPLE: usize = 8_usize;
|
BASE16_VALID_DECODING_MULTIPLE,
|
||||||
const BASE64_VALID_DECODING_MULTIPLE: usize = 4_usize;
|
BASE16_UNPADDED_MULTIPLE,
|
||||||
const Z85_VALID_DECODING_MULTIPLE: usize = 5_usize;
|
// spell-checker:disable-next-line
|
||||||
|
b"0123456789ABCDEF",
|
||||||
|
)),
|
||||||
|
Format::Base2Lsbf => Box::from(EncodingWrapper::new(
|
||||||
|
BASE2LSBF,
|
||||||
|
BASE2_VALID_DECODING_MULTIPLE,
|
||||||
|
BASE2_UNPADDED_MULTIPLE,
|
||||||
|
// spell-checker:disable-next-line
|
||||||
|
b"01",
|
||||||
|
)),
|
||||||
|
Format::Base2Msbf => Box::from(EncodingWrapper::new(
|
||||||
|
BASE2MSBF,
|
||||||
|
BASE2_VALID_DECODING_MULTIPLE,
|
||||||
|
BASE2_UNPADDED_MULTIPLE,
|
||||||
|
// spell-checker:disable-next-line
|
||||||
|
b"01",
|
||||||
|
)),
|
||||||
|
Format::Base32 => Box::from(EncodingWrapper::new(
|
||||||
|
BASE32,
|
||||||
|
BASE32_VALID_DECODING_MULTIPLE,
|
||||||
|
BASE32_UNPADDED_MULTIPLE,
|
||||||
|
// spell-checker:disable-next-line
|
||||||
|
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=",
|
||||||
|
)),
|
||||||
|
Format::Base32Hex => Box::from(EncodingWrapper::new(
|
||||||
|
BASE32HEX,
|
||||||
|
BASE32_VALID_DECODING_MULTIPLE,
|
||||||
|
BASE32_UNPADDED_MULTIPLE,
|
||||||
|
// spell-checker:disable-next-line
|
||||||
|
b"0123456789ABCDEFGHIJKLMNOPQRSTUV=",
|
||||||
|
)),
|
||||||
|
Format::Base64 => Box::from(EncodingWrapper::new(
|
||||||
|
BASE64,
|
||||||
|
BASE64_VALID_DECODING_MULTIPLE,
|
||||||
|
BASE64_UNPADDED_MULTIPLE,
|
||||||
|
// spell-checker:disable-next-line
|
||||||
|
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/",
|
||||||
|
)),
|
||||||
|
Format::Base64Url => Box::from(EncodingWrapper::new(
|
||||||
|
BASE64URL,
|
||||||
|
BASE64_VALID_DECODING_MULTIPLE,
|
||||||
|
BASE64_UNPADDED_MULTIPLE,
|
||||||
|
// spell-checker:disable-next-line
|
||||||
|
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-",
|
||||||
|
)),
|
||||||
|
Format::Z85 => Box::from(Z85Wrapper {}),
|
||||||
|
};
|
||||||
|
|
||||||
if decode {
|
if decode {
|
||||||
let (encoding, valid_decoding_multiple, alphabet): (_, _, &[u8]) = match format {
|
|
||||||
// Use naive approach (now only semi-naive) for Z85, since the crate being used doesn't have the API
|
|
||||||
// needed
|
|
||||||
Format::Z85 => {
|
|
||||||
// spell-checker:disable-next-line
|
|
||||||
let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#";
|
|
||||||
|
|
||||||
fast_decode::fast_decode(
|
|
||||||
input,
|
|
||||||
(
|
|
||||||
ZEightFiveWrapper {},
|
|
||||||
Z85_VALID_DECODING_MULTIPLE * DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE,
|
|
||||||
alphabet,
|
|
||||||
),
|
|
||||||
ignore_garbage,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
// For these, use faster, new decoding logic
|
|
||||||
Format::Base16 => (
|
|
||||||
HEXUPPER,
|
|
||||||
BASE16_VALID_DECODING_MULTIPLE,
|
|
||||||
// spell-checker:disable-next-line
|
|
||||||
b"0123456789ABCDEF",
|
|
||||||
),
|
|
||||||
Format::Base2Lsbf => (
|
|
||||||
BASE2LSBF,
|
|
||||||
BASE2_VALID_DECODING_MULTIPLE,
|
|
||||||
// spell-checker:disable-next-line
|
|
||||||
b"01",
|
|
||||||
),
|
|
||||||
Format::Base2Msbf => (
|
|
||||||
BASE2MSBF,
|
|
||||||
BASE2_VALID_DECODING_MULTIPLE,
|
|
||||||
// spell-checker:disable-next-line
|
|
||||||
b"01",
|
|
||||||
),
|
|
||||||
Format::Base32 => (
|
|
||||||
BASE32,
|
|
||||||
BASE32_VALID_DECODING_MULTIPLE,
|
|
||||||
// spell-checker:disable-next-line
|
|
||||||
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=",
|
|
||||||
),
|
|
||||||
Format::Base32Hex => (
|
|
||||||
BASE32HEX,
|
|
||||||
BASE32_VALID_DECODING_MULTIPLE,
|
|
||||||
// spell-checker:disable-next-line
|
|
||||||
b"0123456789ABCDEFGHIJKLMNOPQRSTUV=",
|
|
||||||
),
|
|
||||||
Format::Base64 => (
|
|
||||||
BASE64,
|
|
||||||
BASE64_VALID_DECODING_MULTIPLE,
|
|
||||||
// spell-checker:disable-next-line
|
|
||||||
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/",
|
|
||||||
),
|
|
||||||
Format::Base64Url => (
|
|
||||||
BASE64URL,
|
|
||||||
BASE64_VALID_DECODING_MULTIPLE,
|
|
||||||
// spell-checker:disable-next-line
|
|
||||||
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-",
|
|
||||||
),
|
|
||||||
};
|
|
||||||
|
|
||||||
fast_decode::fast_decode(
|
fast_decode::fast_decode(
|
||||||
input,
|
input,
|
||||||
(
|
supports_fast_decode_and_encode.as_ref(),
|
||||||
encoding,
|
|
||||||
valid_decoding_multiple * DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE,
|
|
||||||
alphabet,
|
|
||||||
),
|
|
||||||
ignore_garbage,
|
ignore_garbage,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(())
|
|
||||||
} else {
|
} else {
|
||||||
let (encoding, un_padded_multiple) = match format {
|
fast_encode::fast_encode(input, supports_fast_decode_and_encode.as_ref(), wrap)?;
|
||||||
// Use naive approach for Z85 (now only semi-naive), since the crate being used doesn't have the API
|
|
||||||
// needed
|
|
||||||
Format::Z85 => {
|
|
||||||
fast_encode::fast_encode(
|
|
||||||
input,
|
|
||||||
(
|
|
||||||
ZEightFiveWrapper {},
|
|
||||||
Z85_UN_PADDED_MULTIPLE * DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE,
|
|
||||||
),
|
|
||||||
wrap,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
// For these, use faster, new encoding logic
|
|
||||||
Format::Base16 => (HEXUPPER, BASE16_UN_PADDED_MULTIPLE),
|
|
||||||
Format::Base2Lsbf => (BASE2LSBF, BASE2_UN_PADDED_MULTIPLE),
|
|
||||||
Format::Base2Msbf => (BASE2MSBF, BASE2_UN_PADDED_MULTIPLE),
|
|
||||||
Format::Base32 => (BASE32, BASE32_UN_PADDED_MULTIPLE),
|
|
||||||
Format::Base32Hex => (BASE32HEX, BASE32_UN_PADDED_MULTIPLE),
|
|
||||||
Format::Base64 => (BASE64, BASE64_UN_PADDED_MULTIPLE),
|
|
||||||
Format::Base64Url => (BASE64URL, BASE64_UN_PADDED_MULTIPLE),
|
|
||||||
};
|
|
||||||
|
|
||||||
fast_encode::fast_encode(
|
|
||||||
input,
|
|
||||||
(
|
|
||||||
encoding,
|
|
||||||
un_padded_multiple * DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE,
|
|
||||||
),
|
|
||||||
wrap,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
mod fast_encode {
|
mod fast_encode {
|
||||||
use crate::base_common::WRAP_DEFAULT;
|
use crate::base_common::{format_read_error, WRAP_DEFAULT};
|
||||||
use std::{
|
use std::{
|
||||||
collections::VecDeque,
|
collections::VecDeque,
|
||||||
io::{self, ErrorKind, Read, StdoutLock, Write},
|
io::{self, ErrorKind, Read, StdoutLock, Write},
|
||||||
num::NonZeroUsize,
|
num::NonZeroUsize,
|
||||||
};
|
};
|
||||||
use uucore::{
|
use uucore::{
|
||||||
encoding::SupportsFastEncode,
|
encoding::SupportsFastDecodeAndEncode,
|
||||||
error::{UResult, USimpleError},
|
error::{UResult, USimpleError},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -319,6 +251,52 @@ mod fast_encode {
|
||||||
print_buffer: Vec<u8>,
|
print_buffer: Vec<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start of helper functions
|
||||||
|
fn encode_in_chunks_to_buffer(
|
||||||
|
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||||
|
encode_in_chunks_of_size: usize,
|
||||||
|
bytes_to_steal: usize,
|
||||||
|
read_buffer: &[u8],
|
||||||
|
encoded_buffer: &mut VecDeque<u8>,
|
||||||
|
leftover_buffer: &mut VecDeque<u8>,
|
||||||
|
) -> UResult<()> {
|
||||||
|
let bytes_to_chunk = if bytes_to_steal > 0 {
|
||||||
|
let (stolen_bytes, rest_of_read_buffer) = read_buffer.split_at(bytes_to_steal);
|
||||||
|
|
||||||
|
leftover_buffer.extend(stolen_bytes);
|
||||||
|
|
||||||
|
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
|
||||||
|
assert!(leftover_buffer.len() == encode_in_chunks_of_size);
|
||||||
|
|
||||||
|
// Encode the old unencoded data and the stolen bytes, and add the result to
|
||||||
|
// `encoded_buffer`
|
||||||
|
supports_fast_decode_and_encode
|
||||||
|
.encode_to_vec_deque(leftover_buffer.make_contiguous(), encoded_buffer)?;
|
||||||
|
|
||||||
|
// Reset `leftover_buffer`
|
||||||
|
leftover_buffer.clear();
|
||||||
|
|
||||||
|
rest_of_read_buffer
|
||||||
|
} else {
|
||||||
|
// Do not need to steal bytes from `read_buffer`
|
||||||
|
read_buffer
|
||||||
|
};
|
||||||
|
|
||||||
|
let chunks_exact = bytes_to_chunk.chunks_exact(encode_in_chunks_of_size);
|
||||||
|
|
||||||
|
let remainder = chunks_exact.remainder();
|
||||||
|
|
||||||
|
for sl in chunks_exact {
|
||||||
|
assert!(sl.len() == encode_in_chunks_of_size);
|
||||||
|
|
||||||
|
supports_fast_decode_and_encode.encode_to_vec_deque(sl, encoded_buffer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
leftover_buffer.extend(remainder);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn write_without_line_breaks(
|
fn write_without_line_breaks(
|
||||||
encoded_buffer: &mut VecDeque<u8>,
|
encoded_buffer: &mut VecDeque<u8>,
|
||||||
stdout_lock: &mut StdoutLock,
|
stdout_lock: &mut StdoutLock,
|
||||||
|
@ -348,13 +326,13 @@ mod fast_encode {
|
||||||
stdout_lock: &mut StdoutLock,
|
stdout_lock: &mut StdoutLock,
|
||||||
is_cleanup: bool,
|
is_cleanup: bool,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
let line_length_usize = line_length.get();
|
let line_length = line_length.get();
|
||||||
|
|
||||||
let make_contiguous_result = encoded_buffer.make_contiguous();
|
let make_contiguous_result = encoded_buffer.make_contiguous();
|
||||||
|
|
||||||
let chunks_exact = make_contiguous_result.chunks_exact(line_length_usize);
|
let chunks_exact = make_contiguous_result.chunks_exact(line_length);
|
||||||
|
|
||||||
let mut bytes_added_to_print_buffer = 0_usize;
|
let mut bytes_added_to_print_buffer = 0;
|
||||||
|
|
||||||
for sl in chunks_exact {
|
for sl in chunks_exact {
|
||||||
bytes_added_to_print_buffer += sl.len();
|
bytes_added_to_print_buffer += sl.len();
|
||||||
|
@ -400,25 +378,27 @@ mod fast_encode {
|
||||||
}
|
}
|
||||||
// End of helper functions
|
// End of helper functions
|
||||||
|
|
||||||
// TODO
|
pub fn fast_encode<R: Read>(
|
||||||
// It turns out the crate being used already supports line wrapping:
|
|
||||||
// https://docs.rs/data-encoding/latest/data_encoding/struct.Specification.html#wrap-output-when-encoding-1
|
|
||||||
// Check if that crate's line wrapping is faster than the wrapping being performed in this function
|
|
||||||
// Update: That crate does not support arbitrary width line wrapping. It only supports certain widths:
|
|
||||||
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L1710
|
|
||||||
//
|
|
||||||
/// `encoding` and `encode_in_chunks_of_size` are passed in a tuple to indicate that they are logically tied
|
|
||||||
pub fn fast_encode<R: Read, S: SupportsFastEncode>(
|
|
||||||
input: &mut R,
|
input: &mut R,
|
||||||
(supports_fast_encode, encode_in_chunks_of_size): (S, usize),
|
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||||
line_wrap: Option<usize>,
|
line_wrap: Option<usize>,
|
||||||
) -> UResult<()> {
|
) -> UResult<()> {
|
||||||
// Based on performance testing
|
// Based on performance testing
|
||||||
const INPUT_BUFFER_SIZE: usize = 32_usize * 1_024_usize;
|
const INPUT_BUFFER_SIZE: usize = 32 * 1_024;
|
||||||
|
|
||||||
|
const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;
|
||||||
|
|
||||||
|
let encode_in_chunks_of_size =
|
||||||
|
supports_fast_decode_and_encode.unpadded_multiple() * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
|
||||||
|
|
||||||
|
assert!(encode_in_chunks_of_size > 0);
|
||||||
|
|
||||||
|
// "data-encoding" supports line wrapping, but not arbitrary line wrapping, only certain widths, so line
|
||||||
|
// wrapping must be implemented here.
|
||||||
|
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L1710
|
||||||
let mut line_wrapping_option = match line_wrap {
|
let mut line_wrapping_option = match line_wrap {
|
||||||
// Line wrapping is disabled because "-w"/"--wrap" was passed with "0"
|
// Line wrapping is disabled because "-w"/"--wrap" was passed with "0"
|
||||||
Some(0_usize) => None,
|
Some(0) => None,
|
||||||
// A custom line wrapping value was passed
|
// A custom line wrapping value was passed
|
||||||
Some(an) => Some(LineWrapping {
|
Some(an) => Some(LineWrapping {
|
||||||
line_length: NonZeroUsize::new(an).unwrap(),
|
line_length: NonZeroUsize::new(an).unwrap(),
|
||||||
|
@ -433,7 +413,7 @@ mod fast_encode {
|
||||||
|
|
||||||
// Start of buffers
|
// Start of buffers
|
||||||
// Data that was read from stdin
|
// Data that was read from stdin
|
||||||
let mut input_buffer = vec![0_u8; INPUT_BUFFER_SIZE];
|
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
|
||||||
|
|
||||||
assert!(!input_buffer.is_empty());
|
assert!(!input_buffer.is_empty());
|
||||||
|
|
||||||
|
@ -449,7 +429,7 @@ mod fast_encode {
|
||||||
loop {
|
loop {
|
||||||
match input.read(&mut input_buffer) {
|
match input.read(&mut input_buffer) {
|
||||||
Ok(bytes_read_from_input) => {
|
Ok(bytes_read_from_input) => {
|
||||||
if bytes_read_from_input == 0_usize {
|
if bytes_read_from_input == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -467,44 +447,14 @@ mod fast_encode {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Encode data in chunks, then place it in `encoded_buffer`
|
// Encode data in chunks, then place it in `encoded_buffer`
|
||||||
{
|
encode_in_chunks_to_buffer(
|
||||||
let bytes_to_chunk = if bytes_to_steal > 0_usize {
|
supports_fast_decode_and_encode,
|
||||||
let (stolen_bytes, rest_of_read_buffer) =
|
encode_in_chunks_of_size,
|
||||||
read_buffer.split_at(bytes_to_steal);
|
bytes_to_steal,
|
||||||
|
read_buffer,
|
||||||
leftover_buffer.extend(stolen_bytes);
|
&mut encoded_buffer,
|
||||||
|
&mut leftover_buffer,
|
||||||
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
|
)?;
|
||||||
assert!(leftover_buffer.len() == encode_in_chunks_of_size);
|
|
||||||
|
|
||||||
// Encode the old unencoded data and the stolen bytes, and add the result to
|
|
||||||
// `encoded_buffer`
|
|
||||||
supports_fast_encode.encode_to_vec_deque(
|
|
||||||
leftover_buffer.make_contiguous(),
|
|
||||||
&mut encoded_buffer,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// Reset `leftover_buffer`
|
|
||||||
leftover_buffer.clear();
|
|
||||||
|
|
||||||
rest_of_read_buffer
|
|
||||||
} else {
|
|
||||||
// Do not need to steal bytes from `read_buffer`
|
|
||||||
read_buffer
|
|
||||||
};
|
|
||||||
|
|
||||||
let chunks_exact = bytes_to_chunk.chunks_exact(encode_in_chunks_of_size);
|
|
||||||
|
|
||||||
let remainder = chunks_exact.remainder();
|
|
||||||
|
|
||||||
for sl in chunks_exact {
|
|
||||||
assert!(sl.len() == encode_in_chunks_of_size);
|
|
||||||
|
|
||||||
supports_fast_encode.encode_to_vec_deque(sl, &mut encoded_buffer)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
leftover_buffer.extend(remainder);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write all data in `encoded_buffer` to stdout
|
// Write all data in `encoded_buffer` to stdout
|
||||||
write_to_stdout(
|
write_to_stdout(
|
||||||
|
@ -515,12 +465,14 @@ mod fast_encode {
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
Err(er) => {
|
Err(er) => {
|
||||||
if er.kind() == ErrorKind::Interrupted {
|
let kind = er.kind();
|
||||||
|
|
||||||
|
if kind == ErrorKind::Interrupted {
|
||||||
// TODO
|
// TODO
|
||||||
// Retry reading?
|
// Retry reading?
|
||||||
}
|
}
|
||||||
|
|
||||||
return Err(USimpleError::new(1_i32, format!("read error: {er}")));
|
return Err(USimpleError::new(1, format_read_error(kind)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -529,7 +481,7 @@ mod fast_encode {
|
||||||
// `input` has finished producing data, so the data remaining in the buffers needs to be encoded and printed
|
// `input` has finished producing data, so the data remaining in the buffers needs to be encoded and printed
|
||||||
{
|
{
|
||||||
// Encode all remaining unencoded bytes, placing them in `encoded_buffer`
|
// Encode all remaining unencoded bytes, placing them in `encoded_buffer`
|
||||||
supports_fast_encode
|
supports_fast_decode_and_encode
|
||||||
.encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?;
|
.encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?;
|
||||||
|
|
||||||
// Write all data in `encoded_buffer` to stdout
|
// Write all data in `encoded_buffer` to stdout
|
||||||
|
@ -547,19 +499,20 @@ mod fast_encode {
|
||||||
}
|
}
|
||||||
|
|
||||||
mod fast_decode {
|
mod fast_decode {
|
||||||
|
use crate::base_common::format_read_error;
|
||||||
use std::io::{self, ErrorKind, Read, StdoutLock, Write};
|
use std::io::{self, ErrorKind, Read, StdoutLock, Write};
|
||||||
use uucore::{
|
use uucore::{
|
||||||
encoding::SupportsFastDecode,
|
encoding::SupportsFastDecodeAndEncode,
|
||||||
error::{UResult, USimpleError},
|
error::{UResult, USimpleError},
|
||||||
};
|
};
|
||||||
|
|
||||||
// Start of helper functions
|
// Start of helper functions
|
||||||
pub fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256_usize] {
|
fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] {
|
||||||
// If "ignore_garbage" is enabled, all characters outside the alphabet are ignored
|
// If "ignore_garbage" is enabled, all characters outside the alphabet are ignored
|
||||||
// If it is not enabled, only '\n' and '\r' are ignored
|
// If it is not enabled, only '\n' and '\r' are ignored
|
||||||
if ignore_garbage {
|
if ignore_garbage {
|
||||||
// Note: "false" here
|
// Note: "false" here
|
||||||
let mut table = [false; 256_usize];
|
let mut table = [false; 256];
|
||||||
|
|
||||||
// Pass through no characters except those in the alphabet
|
// Pass through no characters except those in the alphabet
|
||||||
for ue in alphabet {
|
for ue in alphabet {
|
||||||
|
@ -574,7 +527,7 @@ mod fast_decode {
|
||||||
table
|
table
|
||||||
} else {
|
} else {
|
||||||
// Note: "true" here
|
// Note: "true" here
|
||||||
let mut table = [true; 256_usize];
|
let mut table = [true; 256];
|
||||||
|
|
||||||
// Pass through all characters except '\n' and '\r'
|
// Pass through all characters except '\n' and '\r'
|
||||||
for ue in [b'\n', b'\r'] {
|
for ue in [b'\n', b'\r'] {
|
||||||
|
@ -590,6 +543,51 @@ mod fast_decode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn decode_in_chunks_to_buffer(
|
||||||
|
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||||
|
decode_in_chunks_of_size: usize,
|
||||||
|
bytes_to_steal: usize,
|
||||||
|
read_buffer_filtered: &[u8],
|
||||||
|
decoded_buffer: &mut Vec<u8>,
|
||||||
|
leftover_buffer: &mut Vec<u8>,
|
||||||
|
) -> UResult<()> {
|
||||||
|
let bytes_to_chunk = if bytes_to_steal > 0 {
|
||||||
|
let (stolen_bytes, rest_of_read_buffer_filtered) =
|
||||||
|
read_buffer_filtered.split_at(bytes_to_steal);
|
||||||
|
|
||||||
|
leftover_buffer.extend(stolen_bytes);
|
||||||
|
|
||||||
|
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
|
||||||
|
assert!(leftover_buffer.len() == decode_in_chunks_of_size);
|
||||||
|
|
||||||
|
// Decode the old un-decoded data and the stolen bytes, and add the result to
|
||||||
|
// `decoded_buffer`
|
||||||
|
supports_fast_decode_and_encode.decode_into_vec(leftover_buffer, decoded_buffer)?;
|
||||||
|
|
||||||
|
// Reset `leftover_buffer`
|
||||||
|
leftover_buffer.clear();
|
||||||
|
|
||||||
|
rest_of_read_buffer_filtered
|
||||||
|
} else {
|
||||||
|
// Do not need to steal bytes from `read_buffer`
|
||||||
|
read_buffer_filtered
|
||||||
|
};
|
||||||
|
|
||||||
|
let chunks_exact = bytes_to_chunk.chunks_exact(decode_in_chunks_of_size);
|
||||||
|
|
||||||
|
let remainder = chunks_exact.remainder();
|
||||||
|
|
||||||
|
for sl in chunks_exact {
|
||||||
|
assert!(sl.len() == decode_in_chunks_of_size);
|
||||||
|
|
||||||
|
supports_fast_decode_and_encode.decode_into_vec(sl, decoded_buffer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
leftover_buffer.extend(remainder);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn write_to_stdout(
|
fn write_to_stdout(
|
||||||
decoded_buffer: &mut Vec<u8>,
|
decoded_buffer: &mut Vec<u8>,
|
||||||
stdout_lock: &mut StdoutLock,
|
stdout_lock: &mut StdoutLock,
|
||||||
|
@ -605,13 +603,21 @@ mod fast_decode {
|
||||||
|
|
||||||
/// `encoding`, `decode_in_chunks_of_size`, and `alphabet` are passed in a tuple to indicate that they are
|
/// `encoding`, `decode_in_chunks_of_size`, and `alphabet` are passed in a tuple to indicate that they are
|
||||||
/// logically tied
|
/// logically tied
|
||||||
pub fn fast_decode<R: Read, S: SupportsFastDecode>(
|
pub fn fast_decode<R: Read>(
|
||||||
input: &mut R,
|
input: &mut R,
|
||||||
(supports_fast_decode, decode_in_chunks_of_size, alphabet): (S, usize, &[u8]),
|
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
|
||||||
ignore_garbage: bool,
|
ignore_garbage: bool,
|
||||||
) -> UResult<()> {
|
) -> UResult<()> {
|
||||||
// Based on performance testing
|
// Based on performance testing
|
||||||
const INPUT_BUFFER_SIZE: usize = 32_usize * 1_024_usize;
|
const INPUT_BUFFER_SIZE: usize = 32 * 1_024;
|
||||||
|
|
||||||
|
const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;
|
||||||
|
|
||||||
|
let alphabet = supports_fast_decode_and_encode.alphabet();
|
||||||
|
let decode_in_chunks_of_size = supports_fast_decode_and_encode.valid_decoding_multiple()
|
||||||
|
* DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
|
||||||
|
|
||||||
|
assert!(decode_in_chunks_of_size > 0);
|
||||||
|
|
||||||
// Note that it's not worth using "data-encoding"'s ignore functionality if "ignore_garbage" is true, because
|
// Note that it's not worth using "data-encoding"'s ignore functionality if "ignore_garbage" is true, because
|
||||||
// "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
|
// "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
|
||||||
|
@ -627,7 +633,7 @@ mod fast_decode {
|
||||||
|
|
||||||
// Start of buffers
|
// Start of buffers
|
||||||
// Data that was read from stdin
|
// Data that was read from stdin
|
||||||
let mut input_buffer = vec![0_u8; INPUT_BUFFER_SIZE];
|
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];
|
||||||
|
|
||||||
assert!(!input_buffer.is_empty());
|
assert!(!input_buffer.is_empty());
|
||||||
|
|
||||||
|
@ -647,7 +653,7 @@ mod fast_decode {
|
||||||
loop {
|
loop {
|
||||||
match input.read(&mut input_buffer) {
|
match input.read(&mut input_buffer) {
|
||||||
Ok(bytes_read_from_input) => {
|
Ok(bytes_read_from_input) => {
|
||||||
if bytes_read_from_input == 0_usize {
|
if bytes_read_from_input == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -689,53 +695,27 @@ mod fast_decode {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode data in chunks, then place it in `decoded_buffer`
|
// Decode data in chunks, then place it in `decoded_buffer`
|
||||||
{
|
decode_in_chunks_to_buffer(
|
||||||
let bytes_to_chunk = if bytes_to_steal > 0_usize {
|
supports_fast_decode_and_encode,
|
||||||
let (stolen_bytes, rest_of_read_buffer_filtered) =
|
decode_in_chunks_of_size,
|
||||||
read_buffer_filtered.split_at(bytes_to_steal);
|
bytes_to_steal,
|
||||||
|
read_buffer_filtered,
|
||||||
leftover_buffer.extend(stolen_bytes);
|
&mut leftover_buffer,
|
||||||
|
&mut decoded_buffer,
|
||||||
// After appending the stolen bytes to `leftover_buffer`, it should be the right size
|
)?;
|
||||||
assert!(leftover_buffer.len() == decode_in_chunks_of_size);
|
|
||||||
|
|
||||||
// Decode the old un-decoded data and the stolen bytes, and add the result to
|
|
||||||
// `decoded_buffer`
|
|
||||||
supports_fast_decode
|
|
||||||
.decode_into_vec(&leftover_buffer, &mut decoded_buffer)?;
|
|
||||||
|
|
||||||
// Reset `leftover_buffer`
|
|
||||||
leftover_buffer.clear();
|
|
||||||
|
|
||||||
rest_of_read_buffer_filtered
|
|
||||||
} else {
|
|
||||||
// Do not need to steal bytes from `read_buffer`
|
|
||||||
read_buffer_filtered
|
|
||||||
};
|
|
||||||
|
|
||||||
let chunks_exact = bytes_to_chunk.chunks_exact(decode_in_chunks_of_size);
|
|
||||||
|
|
||||||
let remainder = chunks_exact.remainder();
|
|
||||||
|
|
||||||
for sl in chunks_exact {
|
|
||||||
assert!(sl.len() == decode_in_chunks_of_size);
|
|
||||||
|
|
||||||
supports_fast_decode.decode_into_vec(sl, &mut decoded_buffer)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
leftover_buffer.extend(remainder);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write all data in `decoded_buffer` to stdout
|
// Write all data in `decoded_buffer` to stdout
|
||||||
write_to_stdout(&mut decoded_buffer, &mut stdout_lock)?;
|
write_to_stdout(&mut decoded_buffer, &mut stdout_lock)?;
|
||||||
}
|
}
|
||||||
Err(er) => {
|
Err(er) => {
|
||||||
if er.kind() == ErrorKind::Interrupted {
|
let kind = er.kind();
|
||||||
|
|
||||||
|
if kind == ErrorKind::Interrupted {
|
||||||
// TODO
|
// TODO
|
||||||
// Retry reading?
|
// Retry reading?
|
||||||
}
|
}
|
||||||
|
|
||||||
return Err(USimpleError::new(1_i32, format!("read error: {er}")));
|
return Err(USimpleError::new(1, format_read_error(kind)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -744,7 +724,8 @@ mod fast_decode {
|
||||||
// `input` has finished producing data, so the data remaining in the buffers needs to be decoded and printed
|
// `input` has finished producing data, so the data remaining in the buffers needs to be decoded and printed
|
||||||
{
|
{
|
||||||
// Decode all remaining encoded bytes, placing them in `decoded_buffer`
|
// Decode all remaining encoded bytes, placing them in `decoded_buffer`
|
||||||
supports_fast_decode.decode_into_vec(&leftover_buffer, &mut decoded_buffer)?;
|
supports_fast_decode_and_encode
|
||||||
|
.decode_into_vec(&leftover_buffer, &mut decoded_buffer)?;
|
||||||
|
|
||||||
// Write all data in `decoded_buffer` to stdout
|
// Write all data in `decoded_buffer` to stdout
|
||||||
write_to_stdout(&mut decoded_buffer, &mut stdout_lock)?;
|
write_to_stdout(&mut decoded_buffer, &mut stdout_lock)?;
|
||||||
|
@ -753,3 +734,21 @@ mod fast_decode {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn format_read_error(kind: ErrorKind) -> String {
|
||||||
|
let kind_string = kind.to_string();
|
||||||
|
|
||||||
|
// e.g. "is a directory" -> "Is a directory"
|
||||||
|
let kind_string_uncapitalized = kind_string
|
||||||
|
.char_indices()
|
||||||
|
.map(|(index, character)| {
|
||||||
|
if index == 0 {
|
||||||
|
character.to_uppercase().collect::<String>()
|
||||||
|
} else {
|
||||||
|
character.to_string()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
|
format!("read error: {kind_string_uncapitalized}")
|
||||||
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
|
|
||||||
// spell-checker:ignore (encodings) lsbf msbf
|
// spell-checker:ignore (encodings) lsbf msbf unpadded
|
||||||
|
|
||||||
use crate::error::{UResult, USimpleError};
|
use crate::error::{UResult, USimpleError};
|
||||||
use data_encoding::Encoding;
|
use data_encoding::Encoding;
|
||||||
|
@ -41,19 +41,106 @@ pub const BASE2MSBF: Encoding = new_encoding! {
|
||||||
bit_order: MostSignificantFirst,
|
bit_order: MostSignificantFirst,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct ZEightFiveWrapper {}
|
pub struct Z85Wrapper {}
|
||||||
|
|
||||||
pub trait SupportsFastEncode {
|
pub struct EncodingWrapper {
|
||||||
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()>;
|
pub alphabet: &'static [u8],
|
||||||
|
pub encoding: Encoding,
|
||||||
|
pub unpadded_multiple: usize,
|
||||||
|
pub valid_decoding_multiple: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SupportsFastEncode for ZEightFiveWrapper {
|
impl EncodingWrapper {
|
||||||
|
pub fn new(
|
||||||
|
encoding: Encoding,
|
||||||
|
valid_decoding_multiple: usize,
|
||||||
|
unpadded_multiple: usize,
|
||||||
|
alphabet: &'static [u8],
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
alphabet,
|
||||||
|
encoding,
|
||||||
|
unpadded_multiple,
|
||||||
|
valid_decoding_multiple,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait SupportsFastDecodeAndEncode {
|
||||||
|
/// Returns the list of characters used by this encoding
|
||||||
|
fn alphabet(&self) -> &'static [u8];
|
||||||
|
|
||||||
|
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()>;
|
||||||
|
|
||||||
|
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()>;
|
||||||
|
|
||||||
|
/// Inputs with a length that is a multiple of this number do not have padding when encoded. For instance:
|
||||||
|
///
|
||||||
|
/// "The quick brown"
|
||||||
|
///
|
||||||
|
/// is 15 characters (divisible by 3), so it is encoded in Base64 without padding:
|
||||||
|
///
|
||||||
|
/// "VGhlIHF1aWNrIGJyb3du"
|
||||||
|
///
|
||||||
|
/// While:
|
||||||
|
///
|
||||||
|
/// "The quick brown fox"
|
||||||
|
///
|
||||||
|
/// is 19 characters, which is not divisible by 3, so its Base64 representation has padding:
|
||||||
|
///
|
||||||
|
/// "VGhlIHF1aWNrIGJyb3duIGZveA=="
|
||||||
|
///
|
||||||
|
/// The encoding performed by `fast_encode` depends on this number being correct.
|
||||||
|
fn unpadded_multiple(&self) -> usize;
|
||||||
|
|
||||||
|
/// Data to decode must be a length that is multiple of this number
|
||||||
|
///
|
||||||
|
/// The decoding performed by `fast_decode` depends on this number being correct.
|
||||||
|
fn valid_decoding_multiple(&self) -> usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SupportsFastDecodeAndEncode for Z85Wrapper {
|
||||||
|
fn alphabet(&self) -> &'static [u8] {
|
||||||
|
// Z85 alphabet
|
||||||
|
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
|
||||||
|
if input.first() == Some(&b'#') {
|
||||||
|
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// According to the spec we should not accept inputs whose len is not a multiple of 4.
|
||||||
|
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
|
||||||
|
if input.len() % 4 != 0 {
|
||||||
|
return Err(USimpleError::new(
|
||||||
|
1,
|
||||||
|
"error: invalid input (length must be multiple of 4 characters)".to_owned(),
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
let decode_result = match z85::decode(input) {
|
||||||
|
Ok(ve) => ve,
|
||||||
|
Err(_de) => {
|
||||||
|
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
output.extend_from_slice(&decode_result);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn valid_decoding_multiple(&self) -> usize {
|
||||||
|
5
|
||||||
|
}
|
||||||
|
|
||||||
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()> {
|
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()> {
|
||||||
// According to the spec we should not accept inputs whose len is not a multiple of 4.
|
// According to the spec we should not accept inputs whose len is not a multiple of 4.
|
||||||
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
|
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
|
||||||
if input.len() % 4_usize != 0_usize {
|
if input.len() % 4 != 0 {
|
||||||
return Err(USimpleError::new(
|
return Err(USimpleError::new(
|
||||||
1_i32,
|
1,
|
||||||
"error: invalid input (length must be multiple of 4 characters)".to_owned(),
|
"error: invalid input (length must be multiple of 4 characters)".to_owned(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
@ -64,70 +151,31 @@ impl SupportsFastEncode for ZEightFiveWrapper {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl SupportsFastEncode for Encoding {
|
fn unpadded_multiple(&self) -> usize {
|
||||||
// Adapted from `encode_append` in the "data-encoding" crate
|
4
|
||||||
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()> {
|
|
||||||
let output_len = output.len();
|
|
||||||
|
|
||||||
output.resize(output_len + self.encode_len(input.len()), 0_u8);
|
|
||||||
|
|
||||||
let make_contiguous_result = output.make_contiguous();
|
|
||||||
|
|
||||||
self.encode_mut(input, &mut (make_contiguous_result[output_len..]));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait SupportsFastDecode {
|
impl SupportsFastDecodeAndEncode for EncodingWrapper {
|
||||||
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()>;
|
fn alphabet(&self) -> &'static [u8] {
|
||||||
}
|
self.alphabet
|
||||||
|
|
||||||
impl SupportsFastDecode for ZEightFiveWrapper {
|
|
||||||
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
|
|
||||||
if input.first() == Some(&b'#') {
|
|
||||||
return Err(USimpleError::new(1_i32, "error: invalid input".to_owned()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// According to the spec we should not accept inputs whose len is not a multiple of 4.
|
|
||||||
// However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them.
|
|
||||||
if input.len() % 4_usize != 0_usize {
|
|
||||||
return Err(USimpleError::new(
|
|
||||||
1_i32,
|
|
||||||
"error: invalid input (length must be multiple of 4 characters)".to_owned(),
|
|
||||||
));
|
|
||||||
};
|
|
||||||
|
|
||||||
let decode_result = match z85::decode(input) {
|
|
||||||
Ok(ve) => ve,
|
|
||||||
Err(_de) => {
|
|
||||||
return Err(USimpleError::new(1_i32, "error: invalid input".to_owned()));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
output.extend_from_slice(&decode_result);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl SupportsFastDecode for Encoding {
|
|
||||||
// Adapted from `decode` in the "data-encoding" crate
|
// Adapted from `decode` in the "data-encoding" crate
|
||||||
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
|
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
|
||||||
let decode_len_result = match self.decode_len(input.len()) {
|
let decode_len_result = match self.encoding.decode_len(input.len()) {
|
||||||
Ok(us) => us,
|
Ok(us) => us,
|
||||||
Err(_de) => {
|
Err(_de) => {
|
||||||
return Err(USimpleError::new(1_i32, "error: invalid input".to_owned()));
|
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let output_len = output.len();
|
let output_len = output.len();
|
||||||
|
|
||||||
output.resize(output_len + decode_len_result, 0_u8);
|
output.resize(output_len + decode_len_result, 0);
|
||||||
|
|
||||||
match self.decode_mut(input, &mut (output[output_len..])) {
|
match self.encoding.decode_mut(input, &mut (output[output_len..])) {
|
||||||
Ok(us) => {
|
Ok(us) => {
|
||||||
// See:
|
// See:
|
||||||
// https://docs.rs/data-encoding/latest/data_encoding/struct.Encoding.html#method.decode_mut
|
// https://docs.rs/data-encoding/latest/data_encoding/struct.Encoding.html#method.decode_mut
|
||||||
|
@ -135,10 +183,32 @@ impl SupportsFastDecode for Encoding {
|
||||||
output.truncate(output_len + us);
|
output.truncate(output_len + us);
|
||||||
}
|
}
|
||||||
Err(_de) => {
|
Err(_de) => {
|
||||||
return Err(USimpleError::new(1_i32, "error: invalid input".to_owned()));
|
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn valid_decoding_multiple(&self) -> usize {
|
||||||
|
self.valid_decoding_multiple
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adapted from `encode_append` in the "data-encoding" crate
|
||||||
|
fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque<u8>) -> UResult<()> {
|
||||||
|
let output_len = output.len();
|
||||||
|
|
||||||
|
output.resize(output_len + self.encoding.encode_len(input.len()), 0);
|
||||||
|
|
||||||
|
let make_contiguous_result = output.make_contiguous();
|
||||||
|
|
||||||
|
self.encoding
|
||||||
|
.encode_mut(input, &mut (make_contiguous_result[output_len..]));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unpadded_multiple(&self) -> usize {
|
||||||
|
self.unpadded_multiple
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,9 +31,7 @@ fn test_invalid_input() {
|
||||||
let error_message = if cfg!(windows) {
|
let error_message = if cfg!(windows) {
|
||||||
"basenc: .: Permission denied\n"
|
"basenc: .: Permission denied\n"
|
||||||
} else {
|
} else {
|
||||||
// TODO
|
"basenc: read error: Is a directory\n"
|
||||||
// Other implementations do not show " (os error 21)"
|
|
||||||
"basenc: read error: Is a directory (os error 21)\n"
|
|
||||||
};
|
};
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["--base32", "."])
|
.args(&["--base32", "."])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue