diff --git a/Cargo.lock b/Cargo.lock
index 9c2134329..6386b7f8f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2801,6 +2801,7 @@ name = "uu_shuf"
 version = "0.0.14"
 dependencies = [
  "clap 3.1.18",
+ "memchr 2.5.0",
  "rand",
  "rand_core",
  "uucore",
diff --git a/src/uu/shuf/BENCHMARKING.md b/src/uu/shuf/BENCHMARKING.md
index 7607f04b4..23f647e9e 100644
--- a/src/uu/shuf/BENCHMARKING.md
+++ b/src/uu/shuf/BENCHMARKING.md
@@ -1,26 +1,51 @@
+<!-- spell-checker:ignore tmpfs -->
+
 # Benchmarking shuf
 
 `shuf` is a simple utility, but there are at least two important cases
 benchmark: with and without repetition.
 
 When benchmarking changes, make sure to always build with the `--release` flag.
-You can compare with another branch by compiling on that branch and than
+You can compare with another branch by compiling on that branch and then
 renaming the executable from `shuf` to `shuf.old`.
 
+## Generate sample data
+
+Sample input can be generated using `/dev/random`:
+
+```shell
+wget -O input.txt https://www.gutenberg.org/files/100/100-0.txt
+```
+
+To avoid distortions from IO, it is recommended to store input data in tmpfs.
+
 ## Without repetition
 
-By default, `shuf` samples without repetition. To benchmark only the
-randomization and not IO, we can pass the `-i` flag with a range of numbers to
-randomly sample from. An example of a command that works well for testing:
+By default, `shuf` samples without repetition. 
+
+To benchmark only the randomization and not IO, we can pass the `-i` flag with 
+a range of numbers to randomly sample from. An example of a command that works 
+well for testing:
 
 ```shell
 hyperfine --warmup 10 "target/release/shuf -i 0-10000000"
 ```
 
+To measure the time taken by shuffling an input file, the following command can
+be used::
+
+```shell
+hyperfine --warmup 10 "target/release/shuf input.txt > /dev/null"
+```
+
+It is important to discard the output by redirecting it to `/dev/null`, since
+otherwise, a substantial amount of time is added to write the output to the
+filesystem.
+
 ## With repetition
 
 When repetition is allowed, `shuf` works very differently under the hood, so it
-should be benchmarked separately. In this case we have to pass the `-n` flag or
+should be benchmarked separately. In this case, we have to pass the `-n` flag or
 the command will run forever. An example of a hyperfine command is
 
 ```shell
diff --git a/src/uu/shuf/Cargo.toml b/src/uu/shuf/Cargo.toml
index dd4ed18c1..cbf7deb26 100644
--- a/src/uu/shuf/Cargo.toml
+++ b/src/uu/shuf/Cargo.toml
@@ -16,6 +16,7 @@ path = "src/shuf.rs"
 
 [dependencies]
 clap = { version = "3.1", features = ["wrap_help", "cargo"] }
+memchr = "2.5.0"
 rand = "0.8"
 rand_core = "0.6"
 uucore = { version=">=0.0.11", package="uucore", path="../../uucore" }
diff --git a/src/uu/shuf/src/shuf.rs b/src/uu/shuf/src/shuf.rs
index 6369fc9b5..9c0d08bdd 100644
--- a/src/uu/shuf/src/shuf.rs
+++ b/src/uu/shuf/src/shuf.rs
@@ -8,6 +8,7 @@
 // spell-checker:ignore (ToDO) cmdline evec seps rvec fdata
 
 use clap::{crate_version, Arg, Command, Values};
+use memchr::memchr_iter;
 use rand::prelude::SliceRandom;
 use rand::RngCore;
 use std::fs::File;
@@ -218,20 +219,12 @@ fn find_seps(data: &mut Vec<&[u8]>, sep: u8) {
         if data[i].contains(&sep) {
             let this = data.swap_remove(i);
             let mut p = 0;
-            let mut i = 1;
-            loop {
-                if i == this.len() {
-                    break;
-                }
-
-                if this[i] == sep {
-                    data.push(&this[p..i]);
-                    p = i + 1;
-                }
-                i += 1;
+            for i in memchr_iter(sep, this) {
+                data.push(&this[p..i]);
+                p = i + 1;
             }
             if p < this.len() {
-                data.push(&this[p..i]);
+                data.push(&this[p..]);
             }
         }
     }