1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-08-01 05:27:45 +00:00

sort: read files as chunks, off-thread

Instead of using a BufReader and reading each line separately,
allocating a String for each one, we read to a chunk. Lines are
references to this chunk. This makes the allocator's job much easier
and yields performance improvements.

Chunks are read on a separate thread to further improve performance.
This commit is contained in:
Michael Debertol 2021-05-16 21:13:37 +02:00
parent 9e2c82d8e7
commit fcd48813e0
11 changed files with 1003 additions and 452 deletions

141
Cargo.lock generated
View file

@ -1,5 +1,11 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "Inflector"
version = "0.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
[[package]]
name = "advapi32-sys"
version = "0.2.0"
@ -63,6 +69,15 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "binary-heap-plus"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f068638f8ff9e118a9361e66a411eff410e7fb3ecaa23bf9272324f8fc606d7"
dependencies = [
"compare",
]
[[package]]
name = "bit-set"
version = "0.5.2"
@ -136,9 +151,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "cast"
version = "0.2.5"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc38c385bfd7e444464011bb24820f40dd1c76bcdfa1b78611cb7c2e5cafab75"
checksum = "57cdfa5d50aad6cb4d44dcab6101a7f79925bd59d82ca42f38a9856a28865374"
dependencies = [
"rustc_version",
]
@ -198,6 +213,12 @@ dependencies = [
"bitflags",
]
[[package]]
name = "compare"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3"
[[package]]
name = "constant_time_eq"
version = "0.1.5"
@ -999,6 +1020,29 @@ version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "ouroboros"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc1f52300b81ac4eeeb6c00c20f7e86556c427d9fb2d92b68fc73c22f331cd15"
dependencies = [
"ouroboros_macro",
"stable_deref_trait",
]
[[package]]
name = "ouroboros_macro"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41db02c8f8731cdd7a72b433c7900cce4bf245465b452c364bfd21f4566ab055"
dependencies = [
"Inflector",
"proc-macro-error",
"proc-macro2",
"quote 1.0.9",
"syn",
]
[[package]]
name = "output_vt100"
version = "0.1.2"
@ -1027,6 +1071,15 @@ dependencies = [
"proc-macro-hack",
]
[[package]]
name = "pest"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
dependencies = [
"ucd-trie",
]
[[package]]
name = "pkg-config"
version = "0.3.19"
@ -1089,6 +1142,30 @@ dependencies = [
"output_vt100",
]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote 1.0.9",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote 1.0.9",
"version_check",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
@ -1336,11 +1413,11 @@ checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2"
[[package]]
name = "rustc_version"
version = "0.2.3"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee"
dependencies = [
"semver",
"semver 0.11.0",
]
[[package]]
@ -1370,7 +1447,16 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
"semver-parser 0.7.0",
]
[[package]]
name = "semver"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6"
dependencies = [
"semver-parser 0.10.2",
]
[[package]]
@ -1380,10 +1466,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.125"
name = "semver-parser"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171"
checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7"
dependencies = [
"pest",
]
[[package]]
name = "serde"
version = "1.0.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
[[package]]
name = "serde_cbor"
@ -1397,9 +1492,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.125"
version = "1.0.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d"
checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
dependencies = [
"proc-macro2",
"quote 1.0.9",
@ -1468,6 +1563,12 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "strsim"
version = "0.8.0"
@ -1627,6 +1728,12 @@ version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06"
[[package]]
name = "ucd-trie"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"
[[package]]
name = "unicode-segmentation"
version = "1.7.1"
@ -2402,12 +2509,16 @@ dependencies = [
name = "uu_sort"
version = "0.0.6"
dependencies = [
"binary-heap-plus",
"clap",
"compare",
"fnv",
"itertools 0.10.0",
"memchr 2.4.0",
"ouroboros",
"rand 0.7.3",
"rayon",
"semver",
"semver 0.9.0",
"tempdir",
"unicode-width",
"uucore",
@ -2720,6 +2831,12 @@ version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "void"
version = "1.0.2"