mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
tsort: print nodes and cycles as they are visited
Update `tsort` so that * nodes are printed as they are visited, * cycles are printed as they are discovered, * finding a cycle doesn't terminate the traversal, * multiple cycles can be found and displayed. Fixes #7074
This commit is contained in:
parent
1bb33e0446
commit
af99952de6
2 changed files with 122 additions and 30 deletions
|
@ -2,7 +2,7 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
//spell-checker:ignore TAOCP
|
//spell-checker:ignore TAOCP indegree
|
||||||
use clap::{crate_version, Arg, Command};
|
use clap::{crate_version, Arg, Command};
|
||||||
use std::collections::{HashMap, HashSet, VecDeque};
|
use std::collections::{HashMap, HashSet, VecDeque};
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
@ -75,7 +75,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Create the directed graph from pairs of tokens in the input data.
|
// Create the directed graph from pairs of tokens in the input data.
|
||||||
let mut g = Graph::default();
|
let mut g = Graph::new(input.clone());
|
||||||
for ab in data.split_whitespace().collect::<Vec<&str>>().chunks(2) {
|
for ab in data.split_whitespace().collect::<Vec<&str>>().chunks(2) {
|
||||||
match ab {
|
match ab {
|
||||||
[a, b] => g.add_edge(a, b),
|
[a, b] => g.add_edge(a, b),
|
||||||
|
@ -83,20 +83,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
match g.run_tsort() {
|
g.run_tsort();
|
||||||
Err(cycle) => {
|
Ok(())
|
||||||
show!(TsortError::Loop(input.to_string()));
|
|
||||||
for node in &cycle {
|
|
||||||
show!(TsortError::LoopNode(node.to_string()));
|
|
||||||
}
|
|
||||||
println!("{}", cycle.join("\n"));
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
Ok(ordering) => {
|
|
||||||
println!("{}", ordering.join("\n"));
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
pub fn uu_app() -> Command {
|
pub fn uu_app() -> Command {
|
||||||
Command::new(uucore::util_name())
|
Command::new(uucore::util_name())
|
||||||
|
@ -112,6 +100,20 @@ pub fn uu_app() -> Command {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Find the element `x` in `vec` and remove it, returning its index.
|
||||||
|
fn remove<T>(vec: &mut Vec<T>, x: T) -> Option<usize>
|
||||||
|
where
|
||||||
|
T: PartialEq,
|
||||||
|
{
|
||||||
|
for i in 0..vec.len() {
|
||||||
|
if vec[i] == x {
|
||||||
|
vec.remove(i);
|
||||||
|
return Some(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
// We use String as a representation of node here
|
// We use String as a representation of node here
|
||||||
// but using integer may improve performance.
|
// but using integer may improve performance.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -125,12 +127,20 @@ impl<'input> Node<'input> {
|
||||||
self.successor_names.push(successor_name);
|
self.successor_names.push(successor_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[derive(Default)]
|
|
||||||
struct Graph<'input> {
|
struct Graph<'input> {
|
||||||
|
name: String,
|
||||||
nodes: HashMap<&'input str, Node<'input>>,
|
nodes: HashMap<&'input str, Node<'input>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'input> Graph<'input> {
|
impl<'input> Graph<'input> {
|
||||||
|
fn new(name: String) -> Graph<'input> {
|
||||||
|
Self {
|
||||||
|
name,
|
||||||
|
nodes: HashMap::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn add_node(&mut self, name: &'input str) {
|
fn add_node(&mut self, name: &'input str) {
|
||||||
self.nodes.entry(name).or_default();
|
self.nodes.entry(name).or_default();
|
||||||
}
|
}
|
||||||
|
@ -147,9 +157,14 @@ impl<'input> Graph<'input> {
|
||||||
to_node.predecessor_count += 1;
|
to_node.predecessor_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn remove_edge(&mut self, u: &'input str, v: &'input str) {
|
||||||
|
remove(&mut self.nodes.get_mut(u).unwrap().successor_names, v);
|
||||||
|
self.nodes.get_mut(v).unwrap().predecessor_count -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
/// Implementation of algorithm T from TAOCP (Don. Knuth), vol. 1.
|
/// Implementation of algorithm T from TAOCP (Don. Knuth), vol. 1.
|
||||||
fn run_tsort(&mut self) -> Result<Vec<&'input str>, Vec<&'input str>> {
|
fn run_tsort(&mut self) {
|
||||||
let mut result = Vec::with_capacity(self.nodes.len());
|
|
||||||
// First, we find a node that have no prerequisites (independent nodes)
|
// First, we find a node that have no prerequisites (independent nodes)
|
||||||
// If no such node exists, then there is a cycle.
|
// If no such node exists, then there is a cycle.
|
||||||
let mut independent_nodes_queue: VecDeque<&'input str> = self
|
let mut independent_nodes_queue: VecDeque<&'input str> = self
|
||||||
|
@ -166,10 +181,18 @@ impl<'input> Graph<'input> {
|
||||||
independent_nodes_queue.make_contiguous().sort_unstable(); // to make sure the resulting ordering is deterministic we need to order independent nodes
|
independent_nodes_queue.make_contiguous().sort_unstable(); // to make sure the resulting ordering is deterministic we need to order independent nodes
|
||||||
// FIXME: this doesn't comply entirely with the GNU coreutils implementation.
|
// FIXME: this doesn't comply entirely with the GNU coreutils implementation.
|
||||||
|
|
||||||
// we remove each independent node, from the graph, updating each successor predecessor_count variable as we do.
|
// To make sure the resulting ordering is deterministic we
|
||||||
while let Some(name_of_next_node_to_process) = independent_nodes_queue.pop_front() {
|
// need to order independent nodes.
|
||||||
result.push(name_of_next_node_to_process);
|
//
|
||||||
if let Some(node_to_process) = self.nodes.remove(name_of_next_node_to_process) {
|
// FIXME: this doesn't comply entirely with the GNU coreutils
|
||||||
|
// implementation.
|
||||||
|
independent_nodes_queue.make_contiguous().sort_unstable();
|
||||||
|
|
||||||
|
while !self.nodes.is_empty() {
|
||||||
|
// Get the next node (breaking any cycles necessary to do so).
|
||||||
|
let v = self.find_next_node(&mut independent_nodes_queue);
|
||||||
|
println!("{v}");
|
||||||
|
if let Some(node_to_process) = self.nodes.remove(v) {
|
||||||
for successor_name in node_to_process.successor_names {
|
for successor_name in node_to_process.successor_names {
|
||||||
let successor_node = self.nodes.get_mut(successor_name).unwrap();
|
let successor_node = self.nodes.get_mut(successor_name).unwrap();
|
||||||
successor_node.predecessor_count -= 1;
|
successor_node.predecessor_count -= 1;
|
||||||
|
@ -180,20 +203,61 @@ impl<'input> Graph<'input> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// if the graph has no cycle (it's a dependency tree), the graph should be empty now, as all nodes have been deleted.
|
/// Get the in-degree of the node with the given name.
|
||||||
if self.nodes.is_empty() {
|
fn indegree(&self, name: &str) -> Option<usize> {
|
||||||
Ok(result)
|
self.nodes.get(name).map(|data| data.predecessor_count)
|
||||||
} else {
|
}
|
||||||
// otherwise, we detect and show a cycle to the user (as the GNU coreutils implementation does)
|
|
||||||
Err(self.detect_cycle())
|
// Pre-condition: self.nodes is non-empty.
|
||||||
|
fn find_next_node(&mut self, frontier: &mut VecDeque<&'input str>) -> &'input str {
|
||||||
|
// If there are no nodes of in-degree zero but there are still
|
||||||
|
// un-visited nodes in the graph, then there must be a cycle.
|
||||||
|
// We need to find the cycle, display it, and then break the
|
||||||
|
// cycle.
|
||||||
|
//
|
||||||
|
// A cycle is guaranteed to be of length at least two. We break
|
||||||
|
// the cycle by deleting an arbitrary edge (the first). That is
|
||||||
|
// not necessarily the optimal thing, but it should be enough to
|
||||||
|
// continue making progress in the graph traversal.
|
||||||
|
//
|
||||||
|
// It is possible that deleting the edge does not actually
|
||||||
|
// result in the target node having in-degree zero, so we repeat
|
||||||
|
// the process until such a node appears.
|
||||||
|
loop {
|
||||||
|
match frontier.pop_front() {
|
||||||
|
None => self.find_and_break_cycle(frontier),
|
||||||
|
Some(v) => return v,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_and_break_cycle(&mut self, frontier: &mut VecDeque<&'input str>) {
|
||||||
|
let cycle = self.detect_cycle();
|
||||||
|
show!(TsortError::Loop(self.name.clone()));
|
||||||
|
for node in &cycle {
|
||||||
|
show!(TsortError::LoopNode(node.to_string()));
|
||||||
|
}
|
||||||
|
let u = cycle[0];
|
||||||
|
let v = cycle[1];
|
||||||
|
self.remove_edge(u, v);
|
||||||
|
if self.indegree(v).unwrap() == 0 {
|
||||||
|
frontier.push_back(v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn detect_cycle(&self) -> Vec<&'input str> {
|
fn detect_cycle(&self) -> Vec<&'input str> {
|
||||||
|
// Sort the nodes just to make this function deterministic.
|
||||||
|
let mut nodes = Vec::new();
|
||||||
|
for node in self.nodes.keys() {
|
||||||
|
nodes.push(node);
|
||||||
|
}
|
||||||
|
nodes.sort_unstable();
|
||||||
|
|
||||||
let mut visited = HashSet::new();
|
let mut visited = HashSet::new();
|
||||||
let mut stack = Vec::with_capacity(self.nodes.len());
|
let mut stack = Vec::with_capacity(self.nodes.len());
|
||||||
for &node in self.nodes.keys() {
|
for node in nodes {
|
||||||
if !visited.contains(node) && self.dfs(node, &mut visited, &mut stack) {
|
if !visited.contains(node) && self.dfs(node, &mut visited, &mut stack) {
|
||||||
return stack;
|
return stack;
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,3 +83,31 @@ fn test_split_on_any_whitespace() {
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("a\nb\n");
|
.stdout_only("a\nb\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cycle() {
|
||||||
|
// The graph looks like: a --> b <==> c --> d
|
||||||
|
new_ucmd!()
|
||||||
|
.pipe_in("a b b c c d c b")
|
||||||
|
.fails()
|
||||||
|
.code_is(1)
|
||||||
|
.stdout_is("a\nc\nd\nb\n")
|
||||||
|
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_two_cycles() {
|
||||||
|
// The graph looks like:
|
||||||
|
//
|
||||||
|
// a
|
||||||
|
// |
|
||||||
|
// V
|
||||||
|
// c <==> b <==> d
|
||||||
|
//
|
||||||
|
new_ucmd!()
|
||||||
|
.pipe_in("a b b c c b b d d b")
|
||||||
|
.fails()
|
||||||
|
.code_is(1)
|
||||||
|
.stdout_is("a\nc\nd\nb\n")
|
||||||
|
.stderr_is("tsort: -: input contains a loop:\ntsort: b\ntsort: c\ntsort: -: input contains a loop:\ntsort: b\ntsort: d\n");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue