mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
tsort: print nodes and cycles as they are visited
Update `tsort` so that * nodes are printed as they are visited, * cycles are printed as they are discovered, * finding a cycle doesn't terminate the traversal, * multiple cycles can be found and displayed. Fixes #7074
This commit is contained in:
parent
1bb33e0446
commit
af99952de6
2 changed files with 122 additions and 30 deletions
|
@ -2,7 +2,7 @@
|
|||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
//spell-checker:ignore TAOCP
|
||||
//spell-checker:ignore TAOCP indegree
|
||||
use clap::{crate_version, Arg, Command};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::fmt::Display;
|
||||
|
@ -75,7 +75,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
};
|
||||
|
||||
// Create the directed graph from pairs of tokens in the input data.
|
||||
let mut g = Graph::default();
|
||||
let mut g = Graph::new(input.clone());
|
||||
for ab in data.split_whitespace().collect::<Vec<&str>>().chunks(2) {
|
||||
match ab {
|
||||
[a, b] => g.add_edge(a, b),
|
||||
|
@ -83,20 +83,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
}
|
||||
}
|
||||
|
||||
match g.run_tsort() {
|
||||
Err(cycle) => {
|
||||
show!(TsortError::Loop(input.to_string()));
|
||||
for node in &cycle {
|
||||
show!(TsortError::LoopNode(node.to_string()));
|
||||
}
|
||||
println!("{}", cycle.join("\n"));
|
||||
Ok(())
|
||||
}
|
||||
Ok(ordering) => {
|
||||
println!("{}", ordering.join("\n"));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
g.run_tsort();
|
||||
Ok(())
|
||||
}
|
||||
pub fn uu_app() -> Command {
|
||||
Command::new(uucore::util_name())
|
||||
|
@ -112,6 +100,20 @@ pub fn uu_app() -> Command {
|
|||
)
|
||||
}
|
||||
|
||||
/// Find the element `x` in `vec` and remove it, returning its index.
|
||||
fn remove<T>(vec: &mut Vec<T>, x: T) -> Option<usize>
|
||||
where
|
||||
T: PartialEq,
|
||||
{
|
||||
for i in 0..vec.len() {
|
||||
if vec[i] == x {
|
||||
vec.remove(i);
|
||||
return Some(i);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// We use String as a representation of node here
|
||||
// but using integer may improve performance.
|
||||
#[derive(Default)]
|
||||
|
@ -125,12 +127,20 @@ impl<'input> Node<'input> {
|
|||
self.successor_names.push(successor_name);
|
||||
}
|
||||
}
|
||||
#[derive(Default)]
|
||||
|
||||
struct Graph<'input> {
|
||||
name: String,
|
||||
nodes: HashMap<&'input str, Node<'input>>,
|
||||
}
|
||||
|
||||
impl<'input> Graph<'input> {
|
||||
fn new(name: String) -> Graph<'input> {
|
||||
Self {
|
||||
name,
|
||||
nodes: HashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_node(&mut self, name: &'input str) {
|
||||
self.nodes.entry(name).or_default();
|
||||
}
|
||||
|
@ -147,9 +157,14 @@ impl<'input> Graph<'input> {
|
|||
to_node.predecessor_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_edge(&mut self, u: &'input str, v: &'input str) {
|
||||
remove(&mut self.nodes.get_mut(u).unwrap().successor_names, v);
|
||||
self.nodes.get_mut(v).unwrap().predecessor_count -= 1;
|
||||
}
|
||||
|
||||
/// Implementation of algorithm T from TAOCP (Don. Knuth), vol. 1.
|
||||
fn run_tsort(&mut self) -> Result<Vec<&'input str>, Vec<&'input str>> {
|
||||
let mut result = Vec::with_capacity(self.nodes.len());
|
||||
fn run_tsort(&mut self) {
|
||||
// First, we find a node that have no prerequisites (independent nodes)
|
||||
// If no such node exists, then there is a cycle.
|
||||
let mut independent_nodes_queue: VecDeque<&'input str> = self
|
||||
|
@ -166,10 +181,18 @@ impl<'input> Graph<'input> {
|
|||
independent_nodes_queue.make_contiguous().sort_unstable(); // to make sure the resulting ordering is deterministic we need to order independent nodes
|
||||
// FIXME: this doesn't comply entirely with the GNU coreutils implementation.
|
||||
|
||||
// we remove each independent node, from the graph, updating each successor predecessor_count variable as we do.
|
||||
while let Some(name_of_next_node_to_process) = independent_nodes_queue.pop_front() {
|
||||
result.push(name_of_next_node_to_process);
|
||||
if let Some(node_to_process) = self.nodes.remove(name_of_next_node_to_process) {
|
||||
// To make sure the resulting ordering is deterministic we
|
||||
// need to order independent nodes.
|
||||
//
|
||||
// FIXME: this doesn't comply entirely with the GNU coreutils
|
||||
// implementation.
|
||||
independent_nodes_queue.make_contiguous().sort_unstable();
|
||||
|
||||
while !self.nodes.is_empty() {
|
||||
// Get the next node (breaking any cycles necessary to do so).
|
||||
let v = self.find_next_node(&mut independent_nodes_queue);
|
||||
println!("{v}");
|
||||
if let Some(node_to_process) = self.nodes.remove(v) {
|
||||
for successor_name in node_to_process.successor_names {
|
||||
let successor_node = self.nodes.get_mut(successor_name).unwrap();
|
||||
successor_node.predecessor_count -= 1;
|
||||
|
@ -180,20 +203,61 @@ impl<'input> Graph<'input> {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the graph has no cycle (it's a dependency tree), the graph should be empty now, as all nodes have been deleted.
|
||||
if self.nodes.is_empty() {
|
||||
Ok(result)
|
||||
} else {
|
||||
// otherwise, we detect and show a cycle to the user (as the GNU coreutils implementation does)
|
||||
Err(self.detect_cycle())
|
||||
/// Get the in-degree of the node with the given name.
|
||||
fn indegree(&self, name: &str) -> Option<usize> {
|
||||
self.nodes.get(name).map(|data| data.predecessor_count)
|
||||
}
|
||||
|
||||
// Pre-condition: self.nodes is non-empty.
|
||||
fn find_next_node(&mut self, frontier: &mut VecDeque<&'input str>) -> &'input str {
|
||||
// If there are no nodes of in-degree zero but there are still
|
||||
// un-visited nodes in the graph, then there must be a cycle.
|
||||
// We need to find the cycle, display it, and then break the
|
||||
// cycle.
|
||||
//
|
||||
// A cycle is guaranteed to be of length at least two. We break
|
||||
// the cycle by deleting an arbitrary edge (the first). That is
|
||||
// not necessarily the optimal thing, but it should be enough to
|
||||
// continue making progress in the graph traversal.
|
||||
//
|
||||
// It is possible that deleting the edge does not actually
|
||||
// result in the target node having in-degree zero, so we repeat
|
||||
// the process until such a node appears.
|
||||
loop {
|
||||
match frontier.pop_front() {
|
||||
None => self.find_and_break_cycle(frontier),
|
||||
Some(v) => return v,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn find_and_break_cycle(&mut self, frontier: &mut VecDeque<&'input str>) {
|
||||
let cycle = self.detect_cycle();
|
||||
show!(TsortError::Loop(self.name.clone()));
|
||||
for node in &cycle {
|
||||
show!(TsortError::LoopNode(node.to_string()));
|
||||
}
|
||||
let u = cycle[0];
|
||||
let v = cycle[1];
|
||||
self.remove_edge(u, v);
|
||||
if self.indegree(v).unwrap() == 0 {
|
||||
frontier.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_cycle(&self) -> Vec<&'input str> {
|
||||
// Sort the nodes just to make this function deterministic.
|
||||
let mut nodes = Vec::new();
|
||||
for node in self.nodes.keys() {
|
||||
nodes.push(node);
|
||||
}
|
||||
nodes.sort_unstable();
|
||||
|
||||
let mut visited = HashSet::new();
|
||||
let mut stack = Vec::with_capacity(self.nodes.len());
|
||||
for &node in self.nodes.keys() {
|
||||
for node in nodes {
|
||||
if !visited.contains(node) && self.dfs(node, &mut visited, &mut stack) {
|
||||
return stack;
|
||||
}
|
||||
|
|
|
@ -83,3 +83,31 @@ fn test_split_on_any_whitespace() {
|
|||
.succeeds()
|
||||
.stdout_only("a\nb\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cycle() {
|
||||
// The graph looks like: a --> b <==> c --> d
|
||||
new_ucmd!()
|
||||
.pipe_in("a b b c c d c b")
|
||||
.fails()
|
||||
.code_is(1)
|
||||
.stdout_is("a\nc\nd\nb\n")
|
||||
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_two_cycles() {
|
||||
// The graph looks like:
|
||||
//
|
||||
// a
|
||||
// |
|
||||
// V
|
||||
// c <==> b <==> d
|
||||
//
|
||||
new_ucmd!()
|
||||
.pipe_in("a b b c c b b d d b")
|
||||
.fails()
|
||||
.code_is(1)
|
||||
.stdout_is("a\nc\nd\nb\n")
|
||||
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\ntsort: -: input contains a loop:\ntsort: b\ntsort: d\n");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue