From 4f19c0e9ffcfde474291bcc39f487b2437e909df Mon Sep 17 00:00:00 2001 From: Andrew Liebenow Date: Tue, 24 Sep 2024 07:49:28 -0500 Subject: [PATCH] Address PR comments. Simplify structure. --- src/uu/base32/src/base_common.rs | 491 ++++++++++++------------ src/uucore/src/lib/features/encoding.rs | 182 ++++++--- tests/by-util/test_basenc.rs | 4 +- 3 files changed, 372 insertions(+), 305 deletions(-) diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 9947f1e9c..5bee873da 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -3,28 +3,29 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore HEXUPPER Lsbf Msbf +// spell-checker:ignore hexupper lsbf msbf unpadded use clap::{crate_version, Arg, ArgAction, Command}; use std::fs::File; -use std::io::{Read, Stdin}; +use std::io::{ErrorKind, Read, Stdin}; use std::path::Path; use uucore::display::Quotable; use uucore::encoding::{ for_fast_encode::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER}, - Format, ZEightFiveWrapper, BASE2LSBF, BASE2MSBF, + Format, Z85Wrapper, BASE2LSBF, BASE2MSBF, }; +use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode}; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::format_usage; -pub const BASE_CMD_PARSE_ERROR: i32 = 1_i32; +pub const BASE_CMD_PARSE_ERROR: i32 = 1; /// Encoded output will be formatted in lines of this length (the last line can be shorter) /// /// Other implementations default to 76 /// /// This default is only used if no "-w"/"--wrap" argument is passed -const WRAP_DEFAULT: usize = 76_usize; +const WRAP_DEFAULT: usize = 76; pub struct Config { pub decode: bool, @@ -157,160 +158,91 @@ pub fn handle_input( ignore_garbage: bool, decode: bool, ) -> UResult<()> { - const DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024_usize; + const BASE16_VALID_DECODING_MULTIPLE: usize = 2; + const BASE2_VALID_DECODING_MULTIPLE: usize = 8; + const BASE32_VALID_DECODING_MULTIPLE: usize = 8; + const BASE64_VALID_DECODING_MULTIPLE: usize = 4; - // These constants indicate that inputs with lengths divisible by these numbers will have no padding characters - // after encoding. - // For instance: - // "The quick brown" - // is 15 characters (divisible by 3), so it is encoded in Base64 without padding: - // "VGhlIHF1aWNrIGJyb3du" - // While: - // "The quick brown fox" - // is 19 characters, which is not divisible by 3, so its Base64 representation has padding: - // "VGhlIHF1aWNrIGJyb3duIGZveA==" - // - // The encoding performed by `fast_encode` depend on these constants being correct. Performance can be tuned by - // multiplying these numbers by a different multiple (see `DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE` above). - const BASE16_UN_PADDED_MULTIPLE: usize = 1_usize; - const BASE2_UN_PADDED_MULTIPLE: usize = 1_usize; - const BASE32_UN_PADDED_MULTIPLE: usize = 5_usize; - const BASE64_UN_PADDED_MULTIPLE: usize = 3_usize; - const Z85_UN_PADDED_MULTIPLE: usize = 4_usize; + const BASE16_UNPADDED_MULTIPLE: usize = 1; + const BASE2_UNPADDED_MULTIPLE: usize = 1; + const BASE32_UNPADDED_MULTIPLE: usize = 5; + const BASE64_UNPADDED_MULTIPLE: usize = 3; - // Similar to above, but for decoding - const BASE16_VALID_DECODING_MULTIPLE: usize = 2_usize; - const BASE2_VALID_DECODING_MULTIPLE: usize = 8_usize; - const BASE32_VALID_DECODING_MULTIPLE: usize = 8_usize; - const BASE64_VALID_DECODING_MULTIPLE: usize = 4_usize; - const Z85_VALID_DECODING_MULTIPLE: usize = 5_usize; + let supports_fast_decode_and_encode: Box = match format { + Format::Base16 => Box::from(EncodingWrapper::new( + HEXUPPER, + BASE16_VALID_DECODING_MULTIPLE, + BASE16_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"0123456789ABCDEF", + )), + Format::Base2Lsbf => Box::from(EncodingWrapper::new( + BASE2LSBF, + BASE2_VALID_DECODING_MULTIPLE, + BASE2_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"01", + )), + Format::Base2Msbf => Box::from(EncodingWrapper::new( + BASE2MSBF, + BASE2_VALID_DECODING_MULTIPLE, + BASE2_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"01", + )), + Format::Base32 => Box::from(EncodingWrapper::new( + BASE32, + BASE32_VALID_DECODING_MULTIPLE, + BASE32_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=", + )), + Format::Base32Hex => Box::from(EncodingWrapper::new( + BASE32HEX, + BASE32_VALID_DECODING_MULTIPLE, + BASE32_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"0123456789ABCDEFGHIJKLMNOPQRSTUV=", + )), + Format::Base64 => Box::from(EncodingWrapper::new( + BASE64, + BASE64_VALID_DECODING_MULTIPLE, + BASE64_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/", + )), + Format::Base64Url => Box::from(EncodingWrapper::new( + BASE64URL, + BASE64_VALID_DECODING_MULTIPLE, + BASE64_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-", + )), + Format::Z85 => Box::from(Z85Wrapper {}), + }; if decode { - let (encoding, valid_decoding_multiple, alphabet): (_, _, &[u8]) = match format { - // Use naive approach (now only semi-naive) for Z85, since the crate being used doesn't have the API - // needed - Format::Z85 => { - // spell-checker:disable-next-line - let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#"; - - fast_decode::fast_decode( - input, - ( - ZEightFiveWrapper {}, - Z85_VALID_DECODING_MULTIPLE * DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE, - alphabet, - ), - ignore_garbage, - )?; - - return Ok(()); - } - - // For these, use faster, new decoding logic - Format::Base16 => ( - HEXUPPER, - BASE16_VALID_DECODING_MULTIPLE, - // spell-checker:disable-next-line - b"0123456789ABCDEF", - ), - Format::Base2Lsbf => ( - BASE2LSBF, - BASE2_VALID_DECODING_MULTIPLE, - // spell-checker:disable-next-line - b"01", - ), - Format::Base2Msbf => ( - BASE2MSBF, - BASE2_VALID_DECODING_MULTIPLE, - // spell-checker:disable-next-line - b"01", - ), - Format::Base32 => ( - BASE32, - BASE32_VALID_DECODING_MULTIPLE, - // spell-checker:disable-next-line - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=", - ), - Format::Base32Hex => ( - BASE32HEX, - BASE32_VALID_DECODING_MULTIPLE, - // spell-checker:disable-next-line - b"0123456789ABCDEFGHIJKLMNOPQRSTUV=", - ), - Format::Base64 => ( - BASE64, - BASE64_VALID_DECODING_MULTIPLE, - // spell-checker:disable-next-line - b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/", - ), - Format::Base64Url => ( - BASE64URL, - BASE64_VALID_DECODING_MULTIPLE, - // spell-checker:disable-next-line - b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-", - ), - }; - fast_decode::fast_decode( input, - ( - encoding, - valid_decoding_multiple * DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE, - alphabet, - ), + supports_fast_decode_and_encode.as_ref(), ignore_garbage, )?; - - Ok(()) } else { - let (encoding, un_padded_multiple) = match format { - // Use naive approach for Z85 (now only semi-naive), since the crate being used doesn't have the API - // needed - Format::Z85 => { - fast_encode::fast_encode( - input, - ( - ZEightFiveWrapper {}, - Z85_UN_PADDED_MULTIPLE * DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE, - ), - wrap, - )?; - - return Ok(()); - } - - // For these, use faster, new encoding logic - Format::Base16 => (HEXUPPER, BASE16_UN_PADDED_MULTIPLE), - Format::Base2Lsbf => (BASE2LSBF, BASE2_UN_PADDED_MULTIPLE), - Format::Base2Msbf => (BASE2MSBF, BASE2_UN_PADDED_MULTIPLE), - Format::Base32 => (BASE32, BASE32_UN_PADDED_MULTIPLE), - Format::Base32Hex => (BASE32HEX, BASE32_UN_PADDED_MULTIPLE), - Format::Base64 => (BASE64, BASE64_UN_PADDED_MULTIPLE), - Format::Base64Url => (BASE64URL, BASE64_UN_PADDED_MULTIPLE), - }; - - fast_encode::fast_encode( - input, - ( - encoding, - un_padded_multiple * DECODE_AND_ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE, - ), - wrap, - )?; - - Ok(()) + fast_encode::fast_encode(input, supports_fast_decode_and_encode.as_ref(), wrap)?; } + + Ok(()) } mod fast_encode { - use crate::base_common::WRAP_DEFAULT; + use crate::base_common::{format_read_error, WRAP_DEFAULT}; use std::{ collections::VecDeque, io::{self, ErrorKind, Read, StdoutLock, Write}, num::NonZeroUsize, }; use uucore::{ - encoding::SupportsFastEncode, + encoding::SupportsFastDecodeAndEncode, error::{UResult, USimpleError}, }; @@ -319,6 +251,52 @@ mod fast_encode { print_buffer: Vec, } + // Start of helper functions + fn encode_in_chunks_to_buffer( + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, + encode_in_chunks_of_size: usize, + bytes_to_steal: usize, + read_buffer: &[u8], + encoded_buffer: &mut VecDeque, + leftover_buffer: &mut VecDeque, + ) -> UResult<()> { + let bytes_to_chunk = if bytes_to_steal > 0 { + let (stolen_bytes, rest_of_read_buffer) = read_buffer.split_at(bytes_to_steal); + + leftover_buffer.extend(stolen_bytes); + + // After appending the stolen bytes to `leftover_buffer`, it should be the right size + assert!(leftover_buffer.len() == encode_in_chunks_of_size); + + // Encode the old unencoded data and the stolen bytes, and add the result to + // `encoded_buffer` + supports_fast_decode_and_encode + .encode_to_vec_deque(leftover_buffer.make_contiguous(), encoded_buffer)?; + + // Reset `leftover_buffer` + leftover_buffer.clear(); + + rest_of_read_buffer + } else { + // Do not need to steal bytes from `read_buffer` + read_buffer + }; + + let chunks_exact = bytes_to_chunk.chunks_exact(encode_in_chunks_of_size); + + let remainder = chunks_exact.remainder(); + + for sl in chunks_exact { + assert!(sl.len() == encode_in_chunks_of_size); + + supports_fast_decode_and_encode.encode_to_vec_deque(sl, encoded_buffer)?; + } + + leftover_buffer.extend(remainder); + + Ok(()) + } + fn write_without_line_breaks( encoded_buffer: &mut VecDeque, stdout_lock: &mut StdoutLock, @@ -348,13 +326,13 @@ mod fast_encode { stdout_lock: &mut StdoutLock, is_cleanup: bool, ) -> io::Result<()> { - let line_length_usize = line_length.get(); + let line_length = line_length.get(); let make_contiguous_result = encoded_buffer.make_contiguous(); - let chunks_exact = make_contiguous_result.chunks_exact(line_length_usize); + let chunks_exact = make_contiguous_result.chunks_exact(line_length); - let mut bytes_added_to_print_buffer = 0_usize; + let mut bytes_added_to_print_buffer = 0; for sl in chunks_exact { bytes_added_to_print_buffer += sl.len(); @@ -400,25 +378,27 @@ mod fast_encode { } // End of helper functions - // TODO - // It turns out the crate being used already supports line wrapping: - // https://docs.rs/data-encoding/latest/data_encoding/struct.Specification.html#wrap-output-when-encoding-1 - // Check if that crate's line wrapping is faster than the wrapping being performed in this function - // Update: That crate does not support arbitrary width line wrapping. It only supports certain widths: - // https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L1710 - // - /// `encoding` and `encode_in_chunks_of_size` are passed in a tuple to indicate that they are logically tied - pub fn fast_encode( + pub fn fast_encode( input: &mut R, - (supports_fast_encode, encode_in_chunks_of_size): (S, usize), + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, line_wrap: Option, ) -> UResult<()> { // Based on performance testing - const INPUT_BUFFER_SIZE: usize = 32_usize * 1_024_usize; + const INPUT_BUFFER_SIZE: usize = 32 * 1_024; + const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024; + + let encode_in_chunks_of_size = + supports_fast_decode_and_encode.unpadded_multiple() * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE; + + assert!(encode_in_chunks_of_size > 0); + + // "data-encoding" supports line wrapping, but not arbitrary line wrapping, only certain widths, so line + // wrapping must be implemented here. + // https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L1710 let mut line_wrapping_option = match line_wrap { // Line wrapping is disabled because "-w"/"--wrap" was passed with "0" - Some(0_usize) => None, + Some(0) => None, // A custom line wrapping value was passed Some(an) => Some(LineWrapping { line_length: NonZeroUsize::new(an).unwrap(), @@ -433,7 +413,7 @@ mod fast_encode { // Start of buffers // Data that was read from stdin - let mut input_buffer = vec![0_u8; INPUT_BUFFER_SIZE]; + let mut input_buffer = vec![0; INPUT_BUFFER_SIZE]; assert!(!input_buffer.is_empty()); @@ -449,7 +429,7 @@ mod fast_encode { loop { match input.read(&mut input_buffer) { Ok(bytes_read_from_input) => { - if bytes_read_from_input == 0_usize { + if bytes_read_from_input == 0 { break; } @@ -467,44 +447,14 @@ mod fast_encode { } // Encode data in chunks, then place it in `encoded_buffer` - { - let bytes_to_chunk = if bytes_to_steal > 0_usize { - let (stolen_bytes, rest_of_read_buffer) = - read_buffer.split_at(bytes_to_steal); - - leftover_buffer.extend(stolen_bytes); - - // After appending the stolen bytes to `leftover_buffer`, it should be the right size - assert!(leftover_buffer.len() == encode_in_chunks_of_size); - - // Encode the old unencoded data and the stolen bytes, and add the result to - // `encoded_buffer` - supports_fast_encode.encode_to_vec_deque( - leftover_buffer.make_contiguous(), - &mut encoded_buffer, - )?; - - // Reset `leftover_buffer` - leftover_buffer.clear(); - - rest_of_read_buffer - } else { - // Do not need to steal bytes from `read_buffer` - read_buffer - }; - - let chunks_exact = bytes_to_chunk.chunks_exact(encode_in_chunks_of_size); - - let remainder = chunks_exact.remainder(); - - for sl in chunks_exact { - assert!(sl.len() == encode_in_chunks_of_size); - - supports_fast_encode.encode_to_vec_deque(sl, &mut encoded_buffer)?; - } - - leftover_buffer.extend(remainder); - } + encode_in_chunks_to_buffer( + supports_fast_decode_and_encode, + encode_in_chunks_of_size, + bytes_to_steal, + read_buffer, + &mut encoded_buffer, + &mut leftover_buffer, + )?; // Write all data in `encoded_buffer` to stdout write_to_stdout( @@ -515,12 +465,14 @@ mod fast_encode { )?; } Err(er) => { - if er.kind() == ErrorKind::Interrupted { + let kind = er.kind(); + + if kind == ErrorKind::Interrupted { // TODO // Retry reading? } - return Err(USimpleError::new(1_i32, format!("read error: {er}"))); + return Err(USimpleError::new(1, format_read_error(kind))); } } } @@ -529,7 +481,7 @@ mod fast_encode { // `input` has finished producing data, so the data remaining in the buffers needs to be encoded and printed { // Encode all remaining unencoded bytes, placing them in `encoded_buffer` - supports_fast_encode + supports_fast_decode_and_encode .encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?; // Write all data in `encoded_buffer` to stdout @@ -547,19 +499,20 @@ mod fast_encode { } mod fast_decode { + use crate::base_common::format_read_error; use std::io::{self, ErrorKind, Read, StdoutLock, Write}; use uucore::{ - encoding::SupportsFastDecode, + encoding::SupportsFastDecodeAndEncode, error::{UResult, USimpleError}, }; // Start of helper functions - pub fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256_usize] { + fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] { // If "ignore_garbage" is enabled, all characters outside the alphabet are ignored // If it is not enabled, only '\n' and '\r' are ignored if ignore_garbage { // Note: "false" here - let mut table = [false; 256_usize]; + let mut table = [false; 256]; // Pass through no characters except those in the alphabet for ue in alphabet { @@ -574,7 +527,7 @@ mod fast_decode { table } else { // Note: "true" here - let mut table = [true; 256_usize]; + let mut table = [true; 256]; // Pass through all characters except '\n' and '\r' for ue in [b'\n', b'\r'] { @@ -590,6 +543,51 @@ mod fast_decode { } } + fn decode_in_chunks_to_buffer( + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, + decode_in_chunks_of_size: usize, + bytes_to_steal: usize, + read_buffer_filtered: &[u8], + decoded_buffer: &mut Vec, + leftover_buffer: &mut Vec, + ) -> UResult<()> { + let bytes_to_chunk = if bytes_to_steal > 0 { + let (stolen_bytes, rest_of_read_buffer_filtered) = + read_buffer_filtered.split_at(bytes_to_steal); + + leftover_buffer.extend(stolen_bytes); + + // After appending the stolen bytes to `leftover_buffer`, it should be the right size + assert!(leftover_buffer.len() == decode_in_chunks_of_size); + + // Decode the old un-decoded data and the stolen bytes, and add the result to + // `decoded_buffer` + supports_fast_decode_and_encode.decode_into_vec(leftover_buffer, decoded_buffer)?; + + // Reset `leftover_buffer` + leftover_buffer.clear(); + + rest_of_read_buffer_filtered + } else { + // Do not need to steal bytes from `read_buffer` + read_buffer_filtered + }; + + let chunks_exact = bytes_to_chunk.chunks_exact(decode_in_chunks_of_size); + + let remainder = chunks_exact.remainder(); + + for sl in chunks_exact { + assert!(sl.len() == decode_in_chunks_of_size); + + supports_fast_decode_and_encode.decode_into_vec(sl, decoded_buffer)?; + } + + leftover_buffer.extend(remainder); + + Ok(()) + } + fn write_to_stdout( decoded_buffer: &mut Vec, stdout_lock: &mut StdoutLock, @@ -605,13 +603,21 @@ mod fast_decode { /// `encoding`, `decode_in_chunks_of_size`, and `alphabet` are passed in a tuple to indicate that they are /// logically tied - pub fn fast_decode( + pub fn fast_decode( input: &mut R, - (supports_fast_decode, decode_in_chunks_of_size, alphabet): (S, usize, &[u8]), + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, ignore_garbage: bool, ) -> UResult<()> { // Based on performance testing - const INPUT_BUFFER_SIZE: usize = 32_usize * 1_024_usize; + const INPUT_BUFFER_SIZE: usize = 32 * 1_024; + + const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024; + + let alphabet = supports_fast_decode_and_encode.alphabet(); + let decode_in_chunks_of_size = supports_fast_decode_and_encode.valid_decoding_multiple() + * DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE; + + assert!(decode_in_chunks_of_size > 0); // Note that it's not worth using "data-encoding"'s ignore functionality if "ignore_garbage" is true, because // "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before @@ -627,7 +633,7 @@ mod fast_decode { // Start of buffers // Data that was read from stdin - let mut input_buffer = vec![0_u8; INPUT_BUFFER_SIZE]; + let mut input_buffer = vec![0; INPUT_BUFFER_SIZE]; assert!(!input_buffer.is_empty()); @@ -647,7 +653,7 @@ mod fast_decode { loop { match input.read(&mut input_buffer) { Ok(bytes_read_from_input) => { - if bytes_read_from_input == 0_usize { + if bytes_read_from_input == 0 { break; } @@ -689,53 +695,27 @@ mod fast_decode { } // Decode data in chunks, then place it in `decoded_buffer` - { - let bytes_to_chunk = if bytes_to_steal > 0_usize { - let (stolen_bytes, rest_of_read_buffer_filtered) = - read_buffer_filtered.split_at(bytes_to_steal); - - leftover_buffer.extend(stolen_bytes); - - // After appending the stolen bytes to `leftover_buffer`, it should be the right size - assert!(leftover_buffer.len() == decode_in_chunks_of_size); - - // Decode the old un-decoded data and the stolen bytes, and add the result to - // `decoded_buffer` - supports_fast_decode - .decode_into_vec(&leftover_buffer, &mut decoded_buffer)?; - - // Reset `leftover_buffer` - leftover_buffer.clear(); - - rest_of_read_buffer_filtered - } else { - // Do not need to steal bytes from `read_buffer` - read_buffer_filtered - }; - - let chunks_exact = bytes_to_chunk.chunks_exact(decode_in_chunks_of_size); - - let remainder = chunks_exact.remainder(); - - for sl in chunks_exact { - assert!(sl.len() == decode_in_chunks_of_size); - - supports_fast_decode.decode_into_vec(sl, &mut decoded_buffer)?; - } - - leftover_buffer.extend(remainder); - } + decode_in_chunks_to_buffer( + supports_fast_decode_and_encode, + decode_in_chunks_of_size, + bytes_to_steal, + read_buffer_filtered, + &mut leftover_buffer, + &mut decoded_buffer, + )?; // Write all data in `decoded_buffer` to stdout write_to_stdout(&mut decoded_buffer, &mut stdout_lock)?; } Err(er) => { - if er.kind() == ErrorKind::Interrupted { + let kind = er.kind(); + + if kind == ErrorKind::Interrupted { // TODO // Retry reading? } - return Err(USimpleError::new(1_i32, format!("read error: {er}"))); + return Err(USimpleError::new(1, format_read_error(kind))); } } } @@ -744,7 +724,8 @@ mod fast_decode { // `input` has finished producing data, so the data remaining in the buffers needs to be decoded and printed { // Decode all remaining encoded bytes, placing them in `decoded_buffer` - supports_fast_decode.decode_into_vec(&leftover_buffer, &mut decoded_buffer)?; + supports_fast_decode_and_encode + .decode_into_vec(&leftover_buffer, &mut decoded_buffer)?; // Write all data in `decoded_buffer` to stdout write_to_stdout(&mut decoded_buffer, &mut stdout_lock)?; @@ -753,3 +734,21 @@ mod fast_decode { Ok(()) } } + +fn format_read_error(kind: ErrorKind) -> String { + let kind_string = kind.to_string(); + + // e.g. "is a directory" -> "Is a directory" + let kind_string_uncapitalized = kind_string + .char_indices() + .map(|(index, character)| { + if index == 0 { + character.to_uppercase().collect::() + } else { + character.to_string() + } + }) + .collect::(); + + format!("read error: {kind_string_uncapitalized}") +} diff --git a/src/uucore/src/lib/features/encoding.rs b/src/uucore/src/lib/features/encoding.rs index c4831e032..0d2ce6222 100644 --- a/src/uucore/src/lib/features/encoding.rs +++ b/src/uucore/src/lib/features/encoding.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (encodings) lsbf msbf +// spell-checker:ignore (encodings) lsbf msbf unpadded use crate::error::{UResult, USimpleError}; use data_encoding::Encoding; @@ -41,19 +41,106 @@ pub const BASE2MSBF: Encoding = new_encoding! { bit_order: MostSignificantFirst, }; -pub struct ZEightFiveWrapper {} +pub struct Z85Wrapper {} -pub trait SupportsFastEncode { - fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()>; +pub struct EncodingWrapper { + pub alphabet: &'static [u8], + pub encoding: Encoding, + pub unpadded_multiple: usize, + pub valid_decoding_multiple: usize, } -impl SupportsFastEncode for ZEightFiveWrapper { +impl EncodingWrapper { + pub fn new( + encoding: Encoding, + valid_decoding_multiple: usize, + unpadded_multiple: usize, + alphabet: &'static [u8], + ) -> Self { + Self { + alphabet, + encoding, + unpadded_multiple, + valid_decoding_multiple, + } + } +} + +pub trait SupportsFastDecodeAndEncode { + /// Returns the list of characters used by this encoding + fn alphabet(&self) -> &'static [u8]; + + fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()>; + + fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()>; + + /// Inputs with a length that is a multiple of this number do not have padding when encoded. For instance: + /// + /// "The quick brown" + /// + /// is 15 characters (divisible by 3), so it is encoded in Base64 without padding: + /// + /// "VGhlIHF1aWNrIGJyb3du" + /// + /// While: + /// + /// "The quick brown fox" + /// + /// is 19 characters, which is not divisible by 3, so its Base64 representation has padding: + /// + /// "VGhlIHF1aWNrIGJyb3duIGZveA==" + /// + /// The encoding performed by `fast_encode` depends on this number being correct. + fn unpadded_multiple(&self) -> usize; + + /// Data to decode must be a length that is multiple of this number + /// + /// The decoding performed by `fast_decode` depends on this number being correct. + fn valid_decoding_multiple(&self) -> usize; +} + +impl SupportsFastDecodeAndEncode for Z85Wrapper { + fn alphabet(&self) -> &'static [u8] { + // Z85 alphabet + b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#" + } + + fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()> { + if input.first() == Some(&b'#') { + return Err(USimpleError::new(1, "error: invalid input".to_owned())); + } + + // According to the spec we should not accept inputs whose len is not a multiple of 4. + // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. + if input.len() % 4 != 0 { + return Err(USimpleError::new( + 1, + "error: invalid input (length must be multiple of 4 characters)".to_owned(), + )); + }; + + let decode_result = match z85::decode(input) { + Ok(ve) => ve, + Err(_de) => { + return Err(USimpleError::new(1, "error: invalid input".to_owned())); + } + }; + + output.extend_from_slice(&decode_result); + + Ok(()) + } + + fn valid_decoding_multiple(&self) -> usize { + 5 + } + fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()> { // According to the spec we should not accept inputs whose len is not a multiple of 4. // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. - if input.len() % 4_usize != 0_usize { + if input.len() % 4 != 0 { return Err(USimpleError::new( - 1_i32, + 1, "error: invalid input (length must be multiple of 4 characters)".to_owned(), )); } @@ -64,70 +151,31 @@ impl SupportsFastEncode for ZEightFiveWrapper { Ok(()) } -} -impl SupportsFastEncode for Encoding { - // Adapted from `encode_append` in the "data-encoding" crate - fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()> { - let output_len = output.len(); - - output.resize(output_len + self.encode_len(input.len()), 0_u8); - - let make_contiguous_result = output.make_contiguous(); - - self.encode_mut(input, &mut (make_contiguous_result[output_len..])); - - Ok(()) + fn unpadded_multiple(&self) -> usize { + 4 } } -pub trait SupportsFastDecode { - fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()>; -} - -impl SupportsFastDecode for ZEightFiveWrapper { - fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()> { - if input.first() == Some(&b'#') { - return Err(USimpleError::new(1_i32, "error: invalid input".to_owned())); - } - - // According to the spec we should not accept inputs whose len is not a multiple of 4. - // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. - if input.len() % 4_usize != 0_usize { - return Err(USimpleError::new( - 1_i32, - "error: invalid input (length must be multiple of 4 characters)".to_owned(), - )); - }; - - let decode_result = match z85::decode(input) { - Ok(ve) => ve, - Err(_de) => { - return Err(USimpleError::new(1_i32, "error: invalid input".to_owned())); - } - }; - - output.extend_from_slice(&decode_result); - - Ok(()) +impl SupportsFastDecodeAndEncode for EncodingWrapper { + fn alphabet(&self) -> &'static [u8] { + self.alphabet } -} -impl SupportsFastDecode for Encoding { // Adapted from `decode` in the "data-encoding" crate fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()> { - let decode_len_result = match self.decode_len(input.len()) { + let decode_len_result = match self.encoding.decode_len(input.len()) { Ok(us) => us, Err(_de) => { - return Err(USimpleError::new(1_i32, "error: invalid input".to_owned())); + return Err(USimpleError::new(1, "error: invalid input".to_owned())); } }; let output_len = output.len(); - output.resize(output_len + decode_len_result, 0_u8); + output.resize(output_len + decode_len_result, 0); - match self.decode_mut(input, &mut (output[output_len..])) { + match self.encoding.decode_mut(input, &mut (output[output_len..])) { Ok(us) => { // See: // https://docs.rs/data-encoding/latest/data_encoding/struct.Encoding.html#method.decode_mut @@ -135,10 +183,32 @@ impl SupportsFastDecode for Encoding { output.truncate(output_len + us); } Err(_de) => { - return Err(USimpleError::new(1_i32, "error: invalid input".to_owned())); + return Err(USimpleError::new(1, "error: invalid input".to_owned())); } } Ok(()) } + + fn valid_decoding_multiple(&self) -> usize { + self.valid_decoding_multiple + } + + // Adapted from `encode_append` in the "data-encoding" crate + fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()> { + let output_len = output.len(); + + output.resize(output_len + self.encoding.encode_len(input.len()), 0); + + let make_contiguous_result = output.make_contiguous(); + + self.encoding + .encode_mut(input, &mut (make_contiguous_result[output_len..])); + + Ok(()) + } + + fn unpadded_multiple(&self) -> usize { + self.unpadded_multiple + } } diff --git a/tests/by-util/test_basenc.rs b/tests/by-util/test_basenc.rs index 8701911f7..ce7e864a3 100644 --- a/tests/by-util/test_basenc.rs +++ b/tests/by-util/test_basenc.rs @@ -31,9 +31,7 @@ fn test_invalid_input() { let error_message = if cfg!(windows) { "basenc: .: Permission denied\n" } else { - // TODO - // Other implementations do not show " (os error 21)" - "basenc: read error: Is a directory (os error 21)\n" + "basenc: read error: Is a directory\n" }; new_ucmd!() .args(&["--base32", "."])