Merge pull request #2130 from electricboogie/master

sort: implement --buffer-size and --temporary-directory (external sort)
2025-07-28 11:37:44 +00:00 · 2021-04-28 09:21:14 +02:00 · 2021-04-28 09:21:14 +02:00 · a37e3181a2
commit a37e3181a2
parent 33139817a2 ec19bb72d5
9 changed files with 40489 additions and 18 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1353,6 +1353,9 @@ name = "serde"
 version = "1.0.125"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171"
 dependencies = [
 "serde_derive",
 ]
 [[package]]
 name = "serde_cbor"
@ -1431,6 +1434,9 @@ name = "smallvec"
 version = "1.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
 dependencies = [
 "serde",
 ]
 [[package]]
 name = "strsim"
@ -2363,7 +2369,10 @@ dependencies = [
 "rand 0.7.3",
 "rayon",
 "semver",
 "serde",
 "serde_json",
 "smallvec 1.6.1",
 "tempdir",
 "unicode-width",
 "uucore",
 "uucore_procs",
--- a/src/uu/sort/Cargo.toml
+++ b/src/uu/sort/Cargo.toml
@ -15,16 +15,19 @@ edition = "2018"
 path = "src/sort.rs"
 [dependencies]
 serde_json = { version = "1.0.64", default-features = false, features = ["alloc"] }
 serde = { version = "1.0", features = ["derive"] }
 rayon = "1.5"
 rand = "0.7"
 clap = "2.33"
 fnv = "1.0.7"
 itertools = "0.10.0"
 semver = "0.9.0"
-smallvec = "1.6.1"
+smallvec = { version="1.6.1", features=["serde"] } 
 unicode-width = "0.1.8"
 uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] }
 uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
 tempdir = "0.3.7"
 [[bin]]
 name = "sort"
--- a/src/uu/sort/src/external_sort/LICENSE
+++ b/src/uu/sort/src/external_sort/LICENSE
@ -0,0 +1,19 @@
 Copyright 2018 Battelle Memorial Institute
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 of the Software, and to permit persons to whom the Software is furnished to do
 so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/src/uu/sort/src/external_sort/mod.rs
+++ b/src/uu/sort/src/external_sort/mod.rs
@ -0,0 +1,295 @@
 use std::clone::Clone;
 use std::cmp::Ordering::Less;
 use std::collections::VecDeque;
 use std::error::Error;
 use std::fs::{File, OpenOptions};
 use std::io::SeekFrom::Start;
 use std::io::{BufRead, BufReader, BufWriter, Seek, Write};
 use std::marker::PhantomData;
 use std::path::PathBuf;
 use serde::de::DeserializeOwned;
 use serde::Serialize;
 use serde_json;
 use tempdir::TempDir;
 use super::{GlobalSettings, Line};
 /// Trait for types that can be used by
 /// [ExternalSorter](struct.ExternalSorter.html). Must be sortable, cloneable,
 /// serializeable, and able to report on it's size
 pub trait ExternallySortable: Clone + Serialize + DeserializeOwned {
    /// Get the size, in bytes, of this object (used to constrain the buffer
    /// used in the external sort).
    fn get_size(&self) -> u64;
 }
 /// Iterator that provides sorted `T`s
 pub struct ExtSortedIterator<Line> {
    buffers: Vec<VecDeque<Line>>,
    chunk_offsets: Vec<u64>,
    max_per_chunk: u64,
    chunks: u64,
    tmp_dir: TempDir,
    settings: GlobalSettings,
    failed: bool,
 }
 impl Iterator for ExtSortedIterator<Line>
 where
    Line: ExternallySortable,
 {
    type Item = Result<Line, Box<dyn Error>>;
    /// # Errors
    ///
    /// This method can fail due to issues reading intermediate sorted chunks
    /// from disk, or due to serde deserialization issues
    fn next(&mut self) -> Option<Self::Item> {
        if self.failed {
            return None;
        }
        // fill up any empty buffers
        let mut empty = true;
        for chunk_num in 0..self.chunks {
            if self.buffers[chunk_num as usize].is_empty() {
                let mut f = match File::open(self.tmp_dir.path().join(chunk_num.to_string())) {
                    Ok(f) => f,
                    Err(e) => {
                        self.failed = true;
                        return Some(Err(Box::new(e)));
                    }
                };
                match f.seek(Start(self.chunk_offsets[chunk_num as usize])) {
                    Ok(_) => (),
                    Err(e) => {
                        self.failed = true;
                        return Some(Err(Box::new(e)));
                    }
                }
                let bytes_read =
                    match fill_buff(&mut self.buffers[chunk_num as usize], f, self.max_per_chunk) {
                        Ok(bytes_read) => bytes_read,
                        Err(e) => {
                            self.failed = true;
                            return Some(Err(e));
                        }
                    };
                self.chunk_offsets[chunk_num as usize] += bytes_read;
                if !self.buffers[chunk_num as usize].is_empty() {
                    empty = false;
                }
            } else {
                empty = false;
            }
        }
        if empty {
            return None;
        }
        // find the next record to write
        // check is_empty() before unwrap()ing
        let mut idx = 0;
        for chunk_num in 0..self.chunks as usize {
            if !self.buffers[chunk_num].is_empty() {
                if self.buffers[idx].is_empty()
                    || (super::compare_by)(
                        self.buffers[chunk_num].front().unwrap(),
                        self.buffers[idx].front().unwrap(),
                        &self.settings,
                    ) == Less
                {
                    idx = chunk_num;
                }
            }
        }
        // unwrap due to checks above
        let r = self.buffers[idx].pop_front().unwrap();
        Some(Ok(r))
    }
 }
 /// Perform an external sort on an unsorted stream of incoming data
 pub struct ExternalSorter<Line>
 where
    Line: ExternallySortable,
 {
    tmp_dir: Option<PathBuf>,
    buffer_bytes: u64,
    phantom: PhantomData<Line>,
    settings: GlobalSettings,
 }
 impl ExternalSorter<Line>
 where
    Line: ExternallySortable,
 {
    /// Create a new `ExternalSorter` with a specified memory buffer and
    /// temporary directory
    pub fn new(
        buffer_bytes: u64,
        tmp_dir: Option<PathBuf>,
        settings: GlobalSettings,
    ) -> ExternalSorter<Line> {
        ExternalSorter {
            buffer_bytes,
            tmp_dir,
            phantom: PhantomData,
            settings,
        }
    }
    /// Sort (based on `compare`) the `T`s provided by `unsorted` and return an
    /// iterator
    ///
    /// # Errors
    ///
    /// This method can fail due to issues writing intermediate sorted chunks
    /// to disk, or due to serde serialization issues
    pub fn sort_by<I>(
        &self,
        unsorted: I,
        settings: GlobalSettings,
    ) -> Result<ExtSortedIterator<Line>, Box<dyn Error>>
    where
        I: Iterator<Item = Line>,
    {
        let tmp_dir = match self.tmp_dir {
            Some(ref p) => TempDir::new_in(p, "uutils_sort")?,
            None => TempDir::new("uutils_sort")?,
        };
        // creating the thing we need to return first due to the face that we need to
        // borrow tmp_dir and move it out
        let mut iter = ExtSortedIterator {
            buffers: Vec::new(),
            chunk_offsets: Vec::new(),
            max_per_chunk: 0,
            chunks: 0,
            tmp_dir,
            settings,
            failed: false,
        };
        {
            let mut total_read = 0;
            let mut chunk = Vec::new();
            // Initial buffer is specified by user
            let mut adjusted_buffer_size = self.buffer_bytes;
            let (iter_size, _) = unsorted.size_hint();
            // make the initial chunks on disk
            for seq in unsorted {
                let seq_size = seq.get_size();
                total_read += seq_size;
                // GNU minimum is 16 * (sizeof struct + 2), but GNU uses about
                // 1/10 the memory that we do.  And GNU even says in the code it may
                // not work on small buffer sizes.
                //
                // The following seems to work pretty well, and has about the same max
                // RSS as lower minimum values.
                //
                let minimum_buffer_size: u64 = iter_size as u64 * seq_size / 8;
                adjusted_buffer_size =
                    // Grow buffer size for a struct/Line larger than buffer
                    if adjusted_buffer_size < seq_size {
                        seq_size
                    } else if adjusted_buffer_size < minimum_buffer_size {
                        minimum_buffer_size
                    } else {
                        adjusted_buffer_size
                    };
                chunk.push(seq);
                if total_read >= adjusted_buffer_size {
                    super::sort_by(&mut chunk, &self.settings);
                    self.write_chunk(
                        &iter.tmp_dir.path().join(iter.chunks.to_string()),
                        &mut chunk,
                    )?;
                    chunk.clear();
                    total_read = 0;
                    iter.chunks += 1;
                }
            }
            // write the last chunk
            if chunk.len() > 0 {
                super::sort_by(&mut chunk, &self.settings);
                self.write_chunk(
                    &iter.tmp_dir.path().join(iter.chunks.to_string()),
                    &mut chunk,
                )?;
                iter.chunks += 1;
            }
            // initialize buffers for each chunk
            //
            // Having a right sized buffer for each chunk for smallish values seems silly to me?
            //
            // We will have to have the entire iter in memory sometime right?
            // Set minimum to the size of the writer buffer, ~8K
            //
            const MINIMUM_READBACK_BUFFER: u64 = 8200;
            let right_sized_buffer = adjusted_buffer_size
                .checked_div(iter.chunks)
                .unwrap_or(adjusted_buffer_size);
            iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER {
                right_sized_buffer
            } else {
                MINIMUM_READBACK_BUFFER
            };
            iter.buffers = vec![VecDeque::new(); iter.chunks as usize];
            iter.chunk_offsets = vec![0 as u64; iter.chunks as usize];
            for chunk_num in 0..iter.chunks {
                let offset = fill_buff(
                    &mut iter.buffers[chunk_num as usize],
                    File::open(iter.tmp_dir.path().join(chunk_num.to_string()))?,
                    iter.max_per_chunk,
                )?;
                iter.chunk_offsets[chunk_num as usize] = offset;
            }
        }
        Ok(iter)
    }
    fn write_chunk(&self, file: &PathBuf, chunk: &mut Vec<Line>) -> Result<(), Box<dyn Error>> {
        let new_file = OpenOptions::new().create(true).append(true).open(file)?;
        let mut buf_write = Box::new(BufWriter::new(new_file)) as Box<dyn Write>;
        for s in chunk {
            let mut serialized = serde_json::to_string(&s).expect("JSON write error: ");
            serialized.push_str("\n");
            buf_write.write(serialized.as_bytes())?;
        }
        buf_write.flush()?;
        Ok(())
    }
 }
 fn fill_buff<Line>(
    vec: &mut VecDeque<Line>,
    file: File,
    max_bytes: u64,
 ) -> Result<u64, Box<dyn Error>>
 where
    Line: ExternallySortable,
 {
    let mut total_read = 0;
    let mut bytes_read = 0;
    for line in BufReader::new(file).lines() {
        let line_s = line?;
        bytes_read += line_s.len() + 1;
        // This is where the bad stuff happens usually
        let deserialized: Line = serde_json::from_str(&line_s).expect("JSON read error: ");
        total_read += deserialized.get_size();
        vec.push_back(deserialized);
        if total_read > max_bytes {
            break;
        }
    }
    Ok(bytes_read as u64)
 }
--- a/src/uu/sort/src/numeric_str_cmp.rs
+++ b/src/uu/sort/src/numeric_str_cmp.rs
@ -14,20 +14,21 @@
 //! More specifically, exponent can be understood so that the original number is in (1..10)*10^exponent.
 //! From that follows the constraints of this algorithm: It is able to compare numbers in ±(1*10^[i64::MIN]..10*10^[i64::MAX]).
 use serde::{Deserialize, Serialize};
 use std::{cmp::Ordering, ops::Range};
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Clone)]
 enum Sign {
    Negative,
    Positive,
 }
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
 pub struct NumInfo {
    exponent: i64,
    sign: Sign,
 }
-
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
 pub struct NumInfoParseSettings {
    pub accept_si_units: bool,
    pub thousands_separator: Option<char>,
--- a/src/uu/sort/src/sort.rs
+++ b/src/uu/sort/src/sort.rs
@ -15,9 +15,11 @@
 #[macro_use]
 extern crate uucore;
 mod external_sort;
 mod numeric_str_cmp;
 use clap::{App, Arg};
 use external_sort::{ExternalSorter, ExternallySortable};
 use fnv::FnvHasher;
 use itertools::Itertools;
 use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings};
@ -25,6 +27,7 @@ use rand::distributions::Alphanumeric;
 use rand::{thread_rng, Rng};
 use rayon::prelude::*;
 use semver::Version;
 use serde::{Deserialize, Serialize};
 use smallvec::SmallVec;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
@ -35,6 +38,7 @@ use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Lines, Read, Write};
 use std::mem::replace;
 use std::ops::Range;
 use std::path::Path;
 use std::path::PathBuf;
 use unicode_width::UnicodeWidthStr;
 use uucore::fs::is_stdin_interactive; // for Iterator::dedup()
 use uucore::InvalidEncodingHandling;
@ -77,6 +81,8 @@ static OPT_RANDOM: &str = "random-sort";
 static OPT_ZERO_TERMINATED: &str = "zero-terminated";
 static OPT_PARALLEL: &str = "parallel";
 static OPT_FILES0_FROM: &str = "files0-from";
 static OPT_BUF_SIZE: &str = "buffer-size";
 static OPT_TMP_DIR: &str = "temporary-directory";
 static ARG_FILES: &str = "files";
@ -86,6 +92,8 @@ static THOUSANDS_SEP: char = ',';
 static NEGATIVE: char = '-';
 static POSITIVE: char = '+';
 static DEFAULT_BUF_SIZE: usize = std::usize::MAX;
 #[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
 enum SortMode {
    Numeric,
@ -95,7 +103,7 @@ enum SortMode {
    Version,
    Default,
 }
-
+#[derive(Clone)]
 struct GlobalSettings {
    mode: SortMode,
    debug: bool,
@ -116,6 +124,31 @@ struct GlobalSettings {
    separator: Option<char>,
    threads: String,
    zero_terminated: bool,
    buffer_size: usize,
    tmp_dir: PathBuf,
    ext_sort: bool,
 }
 impl GlobalSettings {
    // It's back to do conversions for command line opts!
    // Probably want to do through numstrcmp somehow now?
    fn human_numeric_convert(a: &str) -> usize {
        let num_str = &a[get_leading_gen(a)];
        let (_, suf_str) = a.split_at(num_str.len());
        let num_usize = num_str
            .parse::<usize>()
            .expect("Error parsing buffer size: ");
        let suf_usize: usize = match suf_str.to_uppercase().as_str() {
            // SI Units
            "B" => 1usize,
            "K" => 1000usize,
            "M" => 1000000usize,
            "G" => 1000000000usize,
            // GNU regards empty human numeric values as K by default
            _ => 1000usize,
        };
        num_usize * suf_usize
    }
 }
 impl Default for GlobalSettings {
@ -140,10 +173,13 @@ impl Default for GlobalSettings {
            separator: None,
            threads: String::new(),
            zero_terminated: false,
            buffer_size: DEFAULT_BUF_SIZE,
            tmp_dir: PathBuf::new(),
            ext_sort: false,
        }
    }
 }
-
+#[derive(Clone)]
 struct KeySettings {
    mode: SortMode,
    ignore_blanks: bool,
@ -168,6 +204,7 @@ impl From<&GlobalSettings> for KeySettings {
    }
 }
 #[derive(Debug, Serialize, Deserialize, Clone)]
 /// Represents the string selected by a FieldSelector.
 enum SelectionRange {
    /// If we had to transform this selection, we have to store a new string.
@ -199,6 +236,7 @@ impl SelectionRange {
    }
 }
 #[derive(Serialize, Deserialize, Clone)]
 enum NumCache {
    AsF64(GeneralF64ParseResult),
    WithInfo(NumInfo),
@ -219,7 +257,7 @@ impl NumCache {
        }
    }
 }
-
+#[derive(Serialize, Deserialize, Clone)]
 struct Selection {
    range: SelectionRange,
    num_cache: NumCache,
@ -234,12 +272,20 @@ impl Selection {
 type Field = Range<usize>;
 #[derive(Serialize, Deserialize, Clone)]
 struct Line {
    line: String,
    // The common case is not to specify fields. Let's make this fast.
    selections: SmallVec<[Selection; 1]>,
 }
 impl ExternallySortable for Line {
    fn get_size(&self) -> u64 {
        // Currently 96 bytes, but that could change, so we get that size here
        std::mem::size_of::<Line>() as u64
    }
 }
 impl Line {
    fn new(line: String, settings: &GlobalSettings) -> Self {
        let fields = if settings
@ -489,6 +535,7 @@ fn tokenize_with_separator(line: &str, separator: char) -> Vec<Field> {
    tokens
 }
 #[derive(Clone)]
 struct KeyPosition {
    /// 1-indexed, 0 is invalid.
    field: usize,
@ -578,7 +625,7 @@ impl KeyPosition {
        }
    }
 }
-
+#[derive(Clone)]
 struct FieldSelector {
    from: KeyPosition,
    to: Option<KeyPosition>,
@ -912,6 +959,22 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
                .takes_value(true)
                .value_name("NUM_THREADS"),
        )
        .arg(
            Arg::with_name(OPT_BUF_SIZE)
                .short("S")
                .long(OPT_BUF_SIZE)
                .help("sets the maximum SIZE of each segment in number of sorted items")
                .takes_value(true)
                .value_name("SIZE"),
        )
        .arg(
            Arg::with_name(OPT_TMP_DIR)
                .short("T")
                .long(OPT_TMP_DIR)
                .help("use DIR for temporaries, not $TMPDIR or /tmp")
                .takes_value(true)
                .value_name("DIR"),
        )
        .arg(
            Arg::with_name(OPT_FILES0_FROM)
                .long(OPT_FILES0_FROM)
@ -982,6 +1045,29 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
        env::set_var("RAYON_NUM_THREADS", &settings.threads);
    }
    if matches.is_present(OPT_BUF_SIZE) {
        settings.buffer_size = {
            let input = matches
                .value_of(OPT_BUF_SIZE)
                .map(String::from)
                .unwrap_or(format!("{}", DEFAULT_BUF_SIZE));
            GlobalSettings::human_numeric_convert(&input)
        };
        settings.ext_sort = true;
    }
    if matches.is_present(OPT_TMP_DIR) {
        let result = matches
            .value_of(OPT_TMP_DIR)
            .map(String::from)
            .unwrap_or(format!("{}", env::temp_dir().display()));
        settings.tmp_dir = PathBuf::from(result);
        settings.ext_sort = true;
    } else {
        settings.tmp_dir = env::temp_dir();
    }
    settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED);
    settings.merge = matches.is_present(OPT_MERGE);
@ -1066,10 +1152,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
        });
    }
-    exec(files, &settings)
+    exec(files, settings)
 }
-fn exec(files: Vec<String>, settings: &GlobalSettings) -> i32 {
+fn exec(files: Vec<String>, settings: GlobalSettings) -> i32 {
    let mut lines = Vec::new();
    let mut file_merger = FileMerger::new(&settings);
@ -1105,6 +1191,13 @@ fn exec(files: Vec<String>, settings: &GlobalSettings) -> i32 {
    if settings.check {
        return exec_check_file(&lines, &settings);
    }
    // Only use ext_sorter when we need to.
    // Probably faster that we don't create
    // an owned value each run
    if settings.ext_sort {
        lines = ext_sort_by(lines, settings.clone());
    } else {
        sort_by(&mut lines, &settings);
    }
@ -1112,7 +1205,7 @@ fn exec(files: Vec<String>, settings: &GlobalSettings) -> i32 {
    if settings.merge {
        if settings.unique {
            print_sorted(
-                file_merger.dedup_by(|a, b| compare_by(a, b, settings) == Ordering::Equal),
+                file_merger.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal),
                &settings,
            )
        } else {
@ -1122,7 +1215,7 @@ fn exec(files: Vec<String>, settings: &GlobalSettings) -> i32 {
        print_sorted(
            lines
                .into_iter()
-                .dedup_by(|a, b| compare_by(a, b, settings) == Ordering::Equal),
+                .dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal),
            &settings,
        )
    } else {
@ -1164,11 +1257,25 @@ fn exec_check_file(unwrapped_lines: &[Line], settings: &GlobalSettings) -> i32 {
    }
 }
-fn sort_by(lines: &mut Vec<Line>, settings: &GlobalSettings) {
+fn ext_sort_by(unsorted: Vec<Line>, settings: GlobalSettings) -> Vec<Line> {
    let external_sorter = ExternalSorter::new(
        settings.buffer_size as u64,
        Some(settings.tmp_dir.clone()),
        settings.clone(),
    );
    let iter = external_sorter
        .sort_by(unsorted.into_iter(), settings.clone())
        .unwrap()
        .map(|x| x.unwrap())
        .collect::<Vec<Line>>();
    iter
 }
 fn sort_by(unsorted: &mut Vec<Line>, settings: &GlobalSettings) {
    if settings.stable || settings.unique {
-        lines.par_sort_by(|a, b| compare_by(a, b, &settings))
+        unsorted.par_sort_by(|a, b| compare_by(a, b, &settings))
    } else {
-        lines.par_sort_unstable_by(|a, b| compare_by(a, b, &settings))
+        unsorted.par_sort_unstable_by(|a, b| compare_by(a, b, &settings))
    }
 }
@ -1189,8 +1296,8 @@ fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering
                    (b_str, b_selection.num_cache.as_num_info()),
                ),
                SortMode::GeneralNumeric => general_numeric_compare(
-                    a_selection.num_cache.as_f64(),
+                    general_f64_parse(&a_str[get_leading_gen(a_str)]),
-                    b_selection.num_cache.as_f64(),
+                    general_f64_parse(&b_str[get_leading_gen(b_str)]),
                ),
                SortMode::Month => month_compare(a_str, b_str),
                SortMode::Version => version_compare(a_str, b_str),
@ -1268,7 +1375,7 @@ fn get_leading_gen(input: &str) -> Range<usize> {
    leading_whitespace_len..input.len()
 }
-#[derive(Copy, Clone, PartialEq, PartialOrd)]
+#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, PartialOrd)]
 enum GeneralF64ParseResult {
    Invalid,
    NaN,
--- a/tests/by-util/test_sort.rs
+++ b/tests/by-util/test_sort.rs
@ -15,6 +15,31 @@ fn test_helper(file_name: &str, args: &str) {
        .stdout_is_fixture(format!("{}.expected.debug", file_name));
 }
 // FYI, the initialization size of our Line struct is 96 bytes.
 //  
 // At very small buffer sizes, with that overhead we are certainly going 
 // to overrun our buffer way, way, way too quickly because of these excess 
 // bytes for the struct.
 //
 // For instance, seq 0..20000 > ...text = 108894 bytes
 // But overhead is 1920000 + 108894 = 2028894 bytes
 //
 // Or kjvbible-random.txt = 4332506 bytes, but minimum size of its 
 // 99817 lines in memory * 96 bytes = 9582432 bytes
 //
 // Here, we test 108894 bytes with a 50K buffer
 //
 #[test]
 fn test_larger_than_specified_segment() {
    new_ucmd!()
        .arg("-n")
        .arg("-S")
        .arg("50K")
        .arg("ext_sort.txt")
        .succeeds()
        .stdout_is_fixture(format!("{}", "ext_sort.expected"));
 }
 #[test]
 fn test_months_whitespace() {
    test_helper("months-whitespace", "-M");
@ -34,6 +59,18 @@ fn test_human_numeric_whitespace() {
    test_helper("human-numeric-whitespace", "-h");
 }
 // This tests where serde often fails when reading back JSON 
 // if it finds a null value
 #[test]
 fn test_extsort_as64_bailout() {
    new_ucmd!()
        .arg("-g")
        .arg("-S 5K")
        .arg("multiple_decimals_general.txt")
        .succeeds()
        .stdout_is_fixture("multiple_decimals_general.expected");
 }
 #[test]
 fn test_multiple_decimals_general() {
    test_helper("multiple_decimals_general", "-g")
--- a/tests/fixtures/sort/ext_sort.expected
+++ b/tests/fixtures/sort/ext_sort.expected
--- a/tests/fixtures/sort/ext_sort.txt
+++ b/tests/fixtures/sort/ext_sort.txt