1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 19:47:45 +00:00

tsort: print nodes and cycles as they are visited

Update `tsort` so that

* nodes are printed as they are visited,
* cycles are printed as they are discovered,
* finding a cycle doesn't terminate the traversal,
* multiple cycles can be found and displayed.

Fixes #7074
This commit is contained in:
Jeffrey Finkelstein 2025-01-07 19:42:45 -05:00
parent 1bb33e0446
commit af99952de6
2 changed files with 122 additions and 30 deletions

View file

@ -2,7 +2,7 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//spell-checker:ignore TAOCP
//spell-checker:ignore TAOCP indegree
use clap::{crate_version, Arg, Command};
use std::collections::{HashMap, HashSet, VecDeque};
use std::fmt::Display;
@ -75,7 +75,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
};
// Create the directed graph from pairs of tokens in the input data.
let mut g = Graph::default();
let mut g = Graph::new(input.clone());
for ab in data.split_whitespace().collect::<Vec<&str>>().chunks(2) {
match ab {
[a, b] => g.add_edge(a, b),
@ -83,20 +83,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
}
}
match g.run_tsort() {
Err(cycle) => {
show!(TsortError::Loop(input.to_string()));
for node in &cycle {
show!(TsortError::LoopNode(node.to_string()));
}
println!("{}", cycle.join("\n"));
Ok(())
}
Ok(ordering) => {
println!("{}", ordering.join("\n"));
Ok(())
}
}
g.run_tsort();
Ok(())
}
pub fn uu_app() -> Command {
Command::new(uucore::util_name())
@ -112,6 +100,20 @@ pub fn uu_app() -> Command {
)
}
/// Find the element `x` in `vec` and remove it, returning its index.
fn remove<T>(vec: &mut Vec<T>, x: T) -> Option<usize>
where
T: PartialEq,
{
for i in 0..vec.len() {
if vec[i] == x {
vec.remove(i);
return Some(i);
}
}
None
}
// We use String as a representation of node here
// but using integer may improve performance.
#[derive(Default)]
@ -125,12 +127,20 @@ impl<'input> Node<'input> {
self.successor_names.push(successor_name);
}
}
#[derive(Default)]
struct Graph<'input> {
name: String,
nodes: HashMap<&'input str, Node<'input>>,
}
impl<'input> Graph<'input> {
fn new(name: String) -> Graph<'input> {
Self {
name,
nodes: HashMap::default(),
}
}
fn add_node(&mut self, name: &'input str) {
self.nodes.entry(name).or_default();
}
@ -147,9 +157,14 @@ impl<'input> Graph<'input> {
to_node.predecessor_count += 1;
}
}
fn remove_edge(&mut self, u: &'input str, v: &'input str) {
remove(&mut self.nodes.get_mut(u).unwrap().successor_names, v);
self.nodes.get_mut(v).unwrap().predecessor_count -= 1;
}
/// Implementation of algorithm T from TAOCP (Don. Knuth), vol. 1.
fn run_tsort(&mut self) -> Result<Vec<&'input str>, Vec<&'input str>> {
let mut result = Vec::with_capacity(self.nodes.len());
fn run_tsort(&mut self) {
// First, we find a node that have no prerequisites (independent nodes)
// If no such node exists, then there is a cycle.
let mut independent_nodes_queue: VecDeque<&'input str> = self
@ -166,10 +181,18 @@ impl<'input> Graph<'input> {
independent_nodes_queue.make_contiguous().sort_unstable(); // to make sure the resulting ordering is deterministic we need to order independent nodes
// FIXME: this doesn't comply entirely with the GNU coreutils implementation.
// we remove each independent node, from the graph, updating each successor predecessor_count variable as we do.
while let Some(name_of_next_node_to_process) = independent_nodes_queue.pop_front() {
result.push(name_of_next_node_to_process);
if let Some(node_to_process) = self.nodes.remove(name_of_next_node_to_process) {
// To make sure the resulting ordering is deterministic we
// need to order independent nodes.
//
// FIXME: this doesn't comply entirely with the GNU coreutils
// implementation.
independent_nodes_queue.make_contiguous().sort_unstable();
while !self.nodes.is_empty() {
// Get the next node (breaking any cycles necessary to do so).
let v = self.find_next_node(&mut independent_nodes_queue);
println!("{v}");
if let Some(node_to_process) = self.nodes.remove(v) {
for successor_name in node_to_process.successor_names {
let successor_node = self.nodes.get_mut(successor_name).unwrap();
successor_node.predecessor_count -= 1;
@ -180,20 +203,61 @@ impl<'input> Graph<'input> {
}
}
}
}
// if the graph has no cycle (it's a dependency tree), the graph should be empty now, as all nodes have been deleted.
if self.nodes.is_empty() {
Ok(result)
} else {
// otherwise, we detect and show a cycle to the user (as the GNU coreutils implementation does)
Err(self.detect_cycle())
/// Get the in-degree of the node with the given name.
fn indegree(&self, name: &str) -> Option<usize> {
self.nodes.get(name).map(|data| data.predecessor_count)
}
// Pre-condition: self.nodes is non-empty.
fn find_next_node(&mut self, frontier: &mut VecDeque<&'input str>) -> &'input str {
// If there are no nodes of in-degree zero but there are still
// un-visited nodes in the graph, then there must be a cycle.
// We need to find the cycle, display it, and then break the
// cycle.
//
// A cycle is guaranteed to be of length at least two. We break
// the cycle by deleting an arbitrary edge (the first). That is
// not necessarily the optimal thing, but it should be enough to
// continue making progress in the graph traversal.
//
// It is possible that deleting the edge does not actually
// result in the target node having in-degree zero, so we repeat
// the process until such a node appears.
loop {
match frontier.pop_front() {
None => self.find_and_break_cycle(frontier),
Some(v) => return v,
}
}
}
fn find_and_break_cycle(&mut self, frontier: &mut VecDeque<&'input str>) {
let cycle = self.detect_cycle();
show!(TsortError::Loop(self.name.clone()));
for node in &cycle {
show!(TsortError::LoopNode(node.to_string()));
}
let u = cycle[0];
let v = cycle[1];
self.remove_edge(u, v);
if self.indegree(v).unwrap() == 0 {
frontier.push_back(v);
}
}
fn detect_cycle(&self) -> Vec<&'input str> {
// Sort the nodes just to make this function deterministic.
let mut nodes = Vec::new();
for node in self.nodes.keys() {
nodes.push(node);
}
nodes.sort_unstable();
let mut visited = HashSet::new();
let mut stack = Vec::with_capacity(self.nodes.len());
for &node in self.nodes.keys() {
for node in nodes {
if !visited.contains(node) && self.dfs(node, &mut visited, &mut stack) {
return stack;
}

View file

@ -83,3 +83,31 @@ fn test_split_on_any_whitespace() {
.succeeds()
.stdout_only("a\nb\n");
}
#[test]
fn test_cycle() {
// The graph looks like: a --> b <==> c --> d
new_ucmd!()
.pipe_in("a b b c c d c b")
.fails()
.code_is(1)
.stdout_is("a\nc\nd\nb\n")
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\n");
}
#[test]
fn test_two_cycles() {
// The graph looks like:
//
// a
// |
// V
// c <==> b <==> d
//
new_ucmd!()
.pipe_in("a b b c c b b d d b")
.fails()
.code_is(1)
.stdout_is("a\nc\nd\nb\n")
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\ntsort: -: input contains a loop:\ntsort: b\ntsort: d\n");
}