From 7cf5f5c2f7008c3519e19cc1f659c84af516cc7b Mon Sep 17 00:00:00 2001 From: Krysztal Huang Date: Wed, 10 Jul 2024 17:30:11 +0800 Subject: [PATCH] uucore: Move `pgrep/process.rs` to `uucore` (#6483) needed for https://github.com/uutils/procps/ --- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features.rs | 2 + src/uucore/src/lib/features/proc_info.rs | 496 +++++++++++++++++++++++ src/uucore/src/lib/features/process.rs | 5 +- 4 files changed, 501 insertions(+), 3 deletions(-) create mode 100644 src/uucore/src/lib/features/proc_info.rs diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 24800b86e..132c6c4d4 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -88,6 +88,7 @@ mode = ["libc"] perms = ["libc", "walkdir"] pipes = [] process = ["libc"] +proc-info = ["walkdir"] quoting-style = [] ranges = [] ringbuffer = [] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index ef7b17b31..abf401008 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -45,6 +45,8 @@ pub mod entries; pub mod perms; #[cfg(all(unix, feature = "pipes"))] pub mod pipes; +#[cfg(all(target_os = "linux", feature = "proc-info"))] +pub mod proc_info; #[cfg(all(unix, feature = "process"))] pub mod process; diff --git a/src/uucore/src/lib/features/proc_info.rs b/src/uucore/src/lib/features/proc_info.rs new file mode 100644 index 000000000..d46284845 --- /dev/null +++ b/src/uucore/src/lib/features/proc_info.rs @@ -0,0 +1,496 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore exitstatus cmdline kworker + +//! Set of functions to manage IDs +//! +//! This module provide [`ProcessInformation`] and [`TerminalType`] and corresponding +//! functions for obtaining process information. +//! +//! And also provide [`walk_process`] function to collecting all the information of +//! processes in current system. +//! +//! Utilities that rely on this module: +//! `pgrep` (TBD) +//! `pwait` (TBD) +//! `snice` (TBD) +//! + +use std::{ + collections::{HashMap, HashSet}, + fmt::{self, Display, Formatter}, + fs, + path::PathBuf, + rc::Rc, +}; +use walkdir::{DirEntry, WalkDir}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum TerminalType { + Tty(u64), + TtyS(u64), + Pts(u64), +} + +impl TryFrom for TerminalType { + type Error = (); + + fn try_from(value: String) -> Result { + Self::try_from(value.as_str()) + } +} + +impl TryFrom<&str> for TerminalType { + type Error = (); + + fn try_from(value: &str) -> Result { + Self::try_from(PathBuf::from(value)) + } +} + +impl TryFrom for TerminalType { + type Error = (); + + fn try_from(value: PathBuf) -> Result { + // Three case: /dev/pts/* , /dev/ttyS**, /dev/tty** + + let mut iter = value.iter(); + // Case 1 + + // Considering this format: **/**/pts/ + if let (Some(_), Some(num)) = (iter.find(|it| *it == "pts"), iter.next()) { + return num + .to_str() + .ok_or(())? + .parse::() + .map_err(|_| ()) + .map(TerminalType::Pts); + }; + + // Considering this format: **/**/ttyS** then **/**/tty** + let path = value.to_str().ok_or(())?; + + let f = |prefix: &str| { + value + .iter() + .last()? + .to_str()? + .strip_prefix(prefix)? + .parse::() + .ok() + }; + + if path.contains("ttyS") { + // Case 2 + f("ttyS").ok_or(()).map(TerminalType::TtyS) + } else if path.contains("tty") { + // Case 3 + f("tty").ok_or(()).map(TerminalType::Tty) + } else { + Err(()) + } + } +} + +/// State or process +#[derive(Debug, PartialEq, Eq)] +pub enum RunState { + ///`R`, running + Running, + ///`S`, sleeping + Sleeping, + ///`D`, sleeping in an uninterruptible wait + UninterruptibleWait, + ///`Z`, zombie + Zombie, + ///`T`, traced or stopped + Stopped, +} + +impl Display for RunState { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Self::Running => write!(f, "R"), + Self::Sleeping => write!(f, "S"), + Self::UninterruptibleWait => write!(f, "D"), + Self::Zombie => write!(f, "Z"), + Self::Stopped => write!(f, "T"), + } + } +} + +impl TryFrom for RunState { + type Error = io::Error; + + fn try_from(value: char) -> Result { + match value { + 'R' => Ok(Self::Running), + 'S' => Ok(Self::Sleeping), + 'D' => Ok(Self::UninterruptibleWait), + 'Z' => Ok(Self::Zombie), + 'T' => Ok(Self::Stopped), + _ => Err(io::ErrorKind::InvalidInput.into()), + } + } +} + +impl TryFrom<&str> for RunState { + type Error = io::Error; + + fn try_from(value: &str) -> Result { + if value.len() != 1 { + return Err(io::ErrorKind::InvalidInput.into()); + } + + Self::try_from( + value + .chars() + .nth(0) + .ok_or::(io::ErrorKind::InvalidInput.into())?, + ) + } +} + +impl TryFrom for RunState { + type Error = io::Error; + + fn try_from(value: String) -> Result { + Self::try_from(value.as_str()) + } +} + +/// Process ID and its information +#[derive(Debug, Clone, Default)] +pub struct ProcessInformation { + pub pid: usize, + pub cmdline: String, + + inner_status: String, + inner_stat: String, + + /// Processed `/proc/self/status` file + cached_status: Option>>, + /// Processed `/proc/self/stat` file + cached_stat: Option>>, + + cached_start_time: Option, + cached_tty: Option>>, +} + +impl ProcessInformation { + /// Try new with pid path such as `/proc/self` + /// + /// # Error + /// + /// If the files in path cannot be parsed into [ProcessInformation], + /// it almost caused by wrong filesystem structure. + /// + /// - [The /proc Filesystem](https://docs.kernel.org/filesystems/proc.html#process-specific-subdirectories) + pub fn try_new(value: PathBuf) -> Result { + let dir_append = |mut path: PathBuf, str: String| { + path.push(str); + path + }; + + let value = if value.is_symlink() { + fs::read_link(value)? + } else { + value + }; + + let pid = { + value + .iter() + .last() + .ok_or(io::ErrorKind::Other)? + .to_str() + .ok_or(io::ErrorKind::InvalidData)? + .parse::() + .map_err(|_| io::ErrorKind::InvalidData)? + }; + let cmdline = fs::read_to_string(dir_append(value.clone(), "cmdline".into()))? + .replace('\0', " ") + .trim_end() + .into(); + + Ok(Self { + pid, + cmdline, + inner_status: fs::read_to_string(dir_append(value.clone(), "status".into()))?, + inner_stat: fs::read_to_string(dir_append(value, "stat".into()))?, + ..Default::default() + }) + } + + pub fn proc_status(&self) -> &str { + &self.inner_status + } + + pub fn proc_stat(&self) -> &str { + &self.inner_stat + } + + /// Collect information from `/proc//status` file + pub fn status(&mut self) -> Rc> { + if let Some(c) = &self.cached_status { + return Rc::clone(c); + } + + let result = self + .inner_status + .lines() + .filter_map(|it| it.split_once(':')) + .map(|it| (it.0.to_string(), it.1.trim_start().to_string())) + .collect::>(); + + let result = Rc::new(result); + self.cached_status = Some(Rc::clone(&result)); + Rc::clone(&result) + } + + /// Collect information from `/proc//stat` file + fn stat(&mut self) -> Rc> { + if let Some(c) = &self.cached_stat { + return Rc::clone(c); + } + + let result: Vec<_> = stat_split(&self.inner_stat); + + let result = Rc::new(result); + self.cached_stat = Some(Rc::clone(&result)); + Rc::clone(&result) + } + + /// Fetch start time + /// + /// - [The /proc Filesystem: Table 1-4](https://docs.kernel.org/filesystems/proc.html#id10) + pub fn start_time(&mut self) -> Result { + if let Some(time) = self.cached_start_time { + return Ok(time); + } + + // Kernel doc: https://docs.kernel.org/filesystems/proc.html#process-specific-subdirectories + // Table 1-4 + let time = self + .stat() + .get(21) + .ok_or(io::ErrorKind::InvalidData)? + .parse::() + .map_err(|_| io::ErrorKind::InvalidData)?; + + self.cached_start_time = Some(time); + + Ok(time) + } + + /// Fetch run state + /// + /// - [The /proc Filesystem: Table 1-4](https://docs.kernel.org/filesystems/proc.html#id10) + /// + /// # Error + /// + /// If parsing failed, this function will return [io::ErrorKind::InvalidInput] + pub fn run_state(&mut self) -> Result { + RunState::try_from(self.stat().get(2).unwrap().as_str()) + } + + /// This function will scan the `/proc//fd` directory + /// + /// # Error + /// + /// If scanned pid had mismatched permission, + /// it will caused [std::io::ErrorKind::PermissionDenied] error. + pub fn ttys(&mut self) -> Result>, io::Error> { + if let Some(tty) = &self.cached_tty { + return Ok(Rc::clone(tty)); + } + + let path = PathBuf::from(format!("/proc/{}/fd", self.pid)); + + let result = Rc::new( + fs::read_dir(path)? + .flatten() + .filter(|it| it.path().is_symlink()) + .flat_map(|it| fs::read_link(it.path())) + .flat_map(TerminalType::try_from) + .collect::>(), + ); + + self.cached_tty = Some(Rc::clone(&result)); + + Ok(result) + } +} + +impl TryFrom for ProcessInformation { + type Error = io::Error; + + fn try_from(value: DirEntry) -> Result { + let value = value.into_path(); + + Self::try_new(value) + } +} + +/// Parsing `/proc/self/stat` file. +/// +/// In some case, the first pair (and the only one pair) will contains whitespace, +/// so if we want to parse it, we have to write new algorithm. +/// +/// TODO: If possible, test and use regex to replace this algorithm. +fn stat_split(stat: &str) -> Vec { + let stat = String::from(stat); + + let mut buf = String::with_capacity(stat.len()); + + let l = stat.find('('); + let r = stat.find(')'); + let content = if let (Some(l), Some(r)) = (l, r) { + let replaced = stat[(l + 1)..r].replace(' ', "$$"); + + buf.push_str(&stat[..l]); + buf.push_str(&replaced); + buf.push_str(&stat[(r + 1)..stat.len()]); + + &buf + } else { + &stat + }; + + content + .split_whitespace() + .map(|it| it.replace("$$", " ")) + .collect() +} + +/// Iterating pid in current system +pub fn walk_process() -> impl Iterator { + WalkDir::new("/proc/") + .max_depth(1) + .follow_links(false) + .into_iter() + .flatten() + .filter(|it| it.path().is_dir()) + .flat_map(ProcessInformation::try_from) +} + +#[cfg(test)] +#[cfg(target_os = "linux")] +mod tests { + + use super::*; + use std::str::FromStr; + + #[test] + fn test_tty_convention() { + assert_eq!( + TerminalType::try_from("/dev/tty1").unwrap(), + TerminalType::Tty(1) + ); + assert_eq!( + TerminalType::try_from("/dev/tty10").unwrap(), + TerminalType::Tty(10) + ); + assert_eq!( + TerminalType::try_from("/dev/pts/1").unwrap(), + TerminalType::Pts(1) + ); + assert_eq!( + TerminalType::try_from("/dev/pts/10").unwrap(), + TerminalType::Pts(10) + ); + assert_eq!( + TerminalType::try_from("/dev/ttyS1").unwrap(), + TerminalType::TtyS(1) + ); + assert_eq!( + TerminalType::try_from("/dev/ttyS10").unwrap(), + TerminalType::TtyS(10) + ); + assert_eq!( + TerminalType::try_from("ttyS10").unwrap(), + TerminalType::TtyS(10) + ); + + assert!(TerminalType::try_from("value").is_err()); + assert!(TerminalType::try_from("TtyS10").is_err()); + } + + #[test] + fn test_run_state_conversion() { + assert_eq!(RunState::try_from("R").unwrap(), RunState::Running); + assert_eq!(RunState::try_from("S").unwrap(), RunState::Sleeping); + assert_eq!( + RunState::try_from("D").unwrap(), + RunState::UninterruptibleWait + ); + assert_eq!(RunState::try_from("T").unwrap(), RunState::Stopped); + assert_eq!(RunState::try_from("Z").unwrap(), RunState::Zombie); + + assert!(RunState::try_from("G").is_err()); + assert!(RunState::try_from("Rg").is_err()); + + assert!(RunState::try_from(String::from("Rg")).is_err()); + } + + fn current_pid() -> usize { + // Direct read link of /proc/self. + // It's result must be current programs pid. + fs::read_link("/proc/self") + .unwrap() + .to_str() + .unwrap() + .parse::() + .unwrap() + } + + #[test] + fn test_walk_pid() { + let current_pid = current_pid(); + + let find = walk_process().find(|it| it.pid == current_pid); + + assert!(find.is_some()); + } + + #[test] + fn test_process_information() { + let current_pid = current_pid(); + + let mut pid_entry = ProcessInformation::try_new( + PathBuf::from_str(&format!("/proc/{}", current_pid)).unwrap(), + ) + .unwrap(); + + let result = WalkDir::new(format!("/proc/{}/fd", current_pid)) + .into_iter() + .flatten() + .map(DirEntry::into_path) + .flat_map(|it| it.read_link()) + .flat_map(TerminalType::try_from) + .collect::>(); + + assert_eq!(pid_entry.ttys().unwrap(), result.into()); + } + + #[test] + fn test_process_information_new() { + let result = ProcessInformation::try_new(PathBuf::from_iter(["/", "proc", "1"])); + assert!(result.is_ok()); + } + + #[test] + fn test_stat_split() { + let case = "32 (idle_inject/3) S 2 0 0 0 -1 69238848 0 0 0 0 0 0 0 0 -51 0 1 0 34 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 3 50 1 0 0 0 0 0 0 0 0 0 0 0"; + assert!(stat_split(case)[1] == "idle_inject/3"); + + let case = "3508 (sh) S 3478 3478 3478 0 -1 4194304 67 0 0 0 0 0 0 0 20 0 1 0 11911 2961408 238 18446744073709551615 94340156948480 94340157028757 140736274114368 0 0 0 0 4096 65538 1 0 0 17 8 0 0 0 0 0 94340157054704 94340157059616 94340163108864 140736274122780 140736274122976 140736274122976 140736274124784 0"; + assert!(stat_split(case)[1] == "sh"); + + let case = "47246 (kworker /10:1-events) I 2 0 0 0 -1 69238880 0 0 0 0 17 29 0 0 20 0 1 0 1396260 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 10 0 0 0 0 0 0 0 0 0 0 0 0 0"; + assert!(stat_split(case)[1] == "kworker /10:1-events"); + } +} diff --git a/src/uucore/src/lib/features/process.rs b/src/uucore/src/lib/features/process.rs index c7dff1f05..d0dcd1255 100644 --- a/src/uucore/src/lib/features/process.rs +++ b/src/uucore/src/lib/features/process.rs @@ -3,10 +3,9 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (vars) cvar exitstatus +// spell-checker:ignore (vars) cvar exitstatus cmdline kworker // spell-checker:ignore (sys/unix) WIFSIGNALED - -//! Set of functions to manage IDs +// spell-checker:ignore pgrep pwait snice use libc::{gid_t, pid_t, uid_t}; use std::io;