diff --git a/Cargo.lock b/Cargo.lock index 99a90c1..b91736c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -187,7 +187,6 @@ dependencies = [ "env_logger", "itertools", "log", - "ref-cast", "regex", "rusqlite", "size", @@ -396,26 +395,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "ref-cast" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" -dependencies = [ - "ref-cast-impl", -] - -[[package]] -name = "ref-cast-impl" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "regex" version = "1.11.1" diff --git a/Cargo.toml b/Cargo.toml index 6913094..fa5129d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ diff = "0.1.13" env_logger = "0.11.3" itertools = "0.14.0" log = "0.4.20" -ref-cast = "1.0.24" regex = "1.11.1" rusqlite = { version = "0.35.0", features = [ "bundled" ] } size = "0.5.0" diff --git a/src/diff.rs b/src/diff.rs index df23686..5f11f11 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -4,19 +4,30 @@ use std::{ self, Write as _, }, + path::{ + Path, + PathBuf, + }, + thread, }; +use anyhow::{ + Context as _, + Error, + Result, +}; use itertools::{ EitherOrBoth, Itertools, }; -use ref_cast::RefCast as _; +use size::Size; use unicode_width::UnicodeWidthStr as _; use yansi::Paint as _; use crate::{ StorePath, Version, + store, }; #[derive(Debug, Default)] @@ -42,17 +53,118 @@ impl DiffStatus { } } -pub fn write_diffln<'a>( - writer: &mut dyn fmt::Write, +/// Writes the diff header (<<< out, >>>in) and package diff. +/// +/// Returns the amount of package diffs written. Even when zero, the header will +/// be written. +pub fn write_paths_diffln( + writer: &mut impl fmt::Write, + path_old: &Path, + path_new: &Path, +) -> Result { + let mut connection = store::connect()?; + + let paths_old = + connection.query_depdendents(path_old).with_context(|| { + format!( + "failed to query dependencies of path '{path}'", + path = path_old.display() + ) + })?; + + log::info!( + "found {count} packages in old closure", + count = paths_old.len(), + ); + + let paths_new = + connection.query_depdendents(path_new).with_context(|| { + format!( + "failed to query dependencies of path '{path}'", + path = path_new.display() + ) + })?; + log::info!( + "found {count} packages in new closure", + count = paths_new.len(), + ); + + drop(connection); + + writeln!( + writer, + "{arrows} {old}", + arrows = "<<<".bold(), + old = path_old.display(), + )?; + writeln!( + writer, + "{arrows} {new}", + arrows = ">>>".bold(), + new = path_new.display(), + )?; + + writeln!(writer)?; + + #[expect(clippy::pattern_type_mismatch)] + Ok(write_packages_diffln( + writer, + paths_old.iter().map(|(_, path)| path), + paths_new.iter().map(|(_, path)| path), + )?) +} + +fn deduplicate_versions(versions: &mut Vec) { + versions.sort_unstable(); + + let mut deduplicated = Vec::new(); + let mut deduplicated_push = |mut version: Version, count: usize| { + if count > 1 { + write!(version, " * {count}").unwrap(); + } + deduplicated.push(version); + }; + + let mut last_version = None::<(Version, usize)>; + for version in versions.iter() { + #[expect(clippy::mixed_read_write_in_expression)] + let Some((last_version_value, count)) = last_version.take() else { + last_version = Some((version.clone(), 1)); + continue; + }; + + if last_version_value == *version { + last_version = Some((last_version_value, count + 1)); + } else { + deduplicated_push(last_version_value, count); + } + } + + if let Some((version, count)) = last_version.take() { + deduplicated_push(version, count); + } + + *versions = deduplicated; +} + +fn write_packages_diffln<'a>( + writer: &mut impl fmt::Write, paths_old: impl Iterator, paths_new: impl Iterator, ) -> Result { - let mut paths = HashMap::<&str, Diff>>>::new(); + let mut paths = HashMap::<&str, Diff>>::new(); for path in paths_old { match path.parse_name_and_version() { Ok((name, version)) => { - paths.entry(name).or_default().old.push(version); + log::debug!("parsed name: {name}"); + log::debug!("parsed version: {version:?}"); + + paths + .entry(name) + .or_default() + .old + .push(version.unwrap_or(Version::from("".to_owned()))); }, Err(error) => { @@ -64,7 +176,14 @@ pub fn write_diffln<'a>( for path in paths_new { match path.parse_name_and_version() { Ok((name, version)) => { - paths.entry(name).or_default().new.push(version); + log::debug!("parsed name: {name}"); + log::debug!("parsed version: {version:?}"); + + paths + .entry(name) + .or_default() + .new + .push(version.unwrap_or(Version::from("".to_owned()))); }, Err(error) => { @@ -76,13 +195,13 @@ pub fn write_diffln<'a>( let mut diffs = paths .into_iter() .filter_map(|(name, mut versions)| { - versions.old.sort_unstable(); - versions.new.sort_unstable(); + deduplicate_versions(&mut versions.old); + deduplicate_versions(&mut versions.new); let status = match (versions.old.len(), versions.new.len()) { (0, 0) => unreachable!(), - (0, _) => DiffStatus::Removed, - (_, 0) => DiffStatus::Added, + (0, _) => DiffStatus::Added, + (_, 0) => DiffStatus::Removed, (..) if versions.old != versions.new => DiffStatus::Changed, (..) => return None, }; @@ -134,7 +253,7 @@ pub fn write_diffln<'a>( for diff in Itertools::zip_longest(versions.old.iter(), versions.new.iter()) { match diff { - EitherOrBoth::Right(old_version) => { + EitherOrBoth::Left(old_version) => { if oldwrote { write!(oldacc, ", ")?; } else { @@ -142,7 +261,7 @@ pub fn write_diffln<'a>( oldwrote = true; } - for old_comp in old_version.unwrap_or(Version::ref_cast("")) { + for old_comp in old_version { match old_comp { Ok(old_comp) => write!(oldacc, "{old}", old = old_comp.red())?, Err(ignored) => write!(oldacc, "{ignored}")?, @@ -150,8 +269,7 @@ pub fn write_diffln<'a>( } }, - // I have no idea why itertools is returning `versions.new` in `Left`. - EitherOrBoth::Left(new_version) => { + EitherOrBoth::Right(new_version) => { if newwrote { write!(newacc, ", ")?; } else { @@ -159,7 +277,7 @@ pub fn write_diffln<'a>( newwrote = true; } - for new_comp in new_version.unwrap_or(Version::ref_cast("")) { + for new_comp in new_version { match new_comp { Ok(new_comp) => write!(newacc, "{new}", new = new_comp.green())?, Err(ignored) => write!(newacc, "{ignored}")?, @@ -172,9 +290,6 @@ pub fn write_diffln<'a>( continue; } - let old_version = old_version.unwrap_or(Version::ref_cast("")); - let new_version = new_version.unwrap_or(Version::ref_cast("")); - if oldwrote { write!(oldacc, ", ")?; } else { @@ -193,7 +308,7 @@ pub fn write_diffln<'a>( new_version.into_iter(), ) { match diff { - EitherOrBoth::Right(old_comp) => { + EitherOrBoth::Left(old_comp) => { match old_comp { Ok(old_comp) => { write!(oldacc, "{old}", old = old_comp.red())?; @@ -204,7 +319,7 @@ pub fn write_diffln<'a>( } }, - EitherOrBoth::Left(new_comp) => { + EitherOrBoth::Right(new_comp) => { match new_comp { Ok(new_comp) => { write!(newacc, "{new}", new = new_comp.green())?; @@ -255,3 +370,49 @@ pub fn write_diffln<'a>( Ok(diffs.len()) } + +/// Spawns a task to compute the data required by [`write_size_diffln`]. +#[must_use] +pub fn spawn_size_diff( + path_old: PathBuf, + path_new: PathBuf, +) -> thread::JoinHandle> { + log::debug!("calculating closure sizes in background"); + + thread::spawn(move || { + let mut connection = store::connect()?; + + Ok::<_, Error>(( + connection.query_closure_size(&path_old)?, + connection.query_closure_size(&path_new)?, + )) + }) +} + +/// Writes the size difference. +pub fn write_size_diffln( + writer: &mut impl fmt::Write, + size_old: Size, + size_new: Size, +) -> fmt::Result { + let size_diff = size_new - size_old; + + writeln!( + writer, + "{header}: {size_old} -> {size_new}", + header = "SIZE".bold(), + size_old = size_old.red(), + size_new = size_new.green(), + )?; + + writeln!( + writer, + "{header}: {size_diff}", + header = "DIFF".bold(), + size_diff = if size_diff.bytes() > 0 { + size_diff.green() + } else { + size_diff.red() + }, + ) +} diff --git a/src/lib.rs b/src/lib.rs index c94991e..f1d73d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,17 +13,23 @@ use anyhow::{ use derive_more::Deref; mod diff; -pub use diff::write_diffln; +pub use diff::{ + spawn_size_diff, + write_paths_diffln, + write_size_diffln, +}; -pub mod store; +mod store; mod version; -use ref_cast::RefCast as _; -pub use version::Version; +use version::Version; #[derive(Deref, Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct DerivationId(i64); +struct DerivationId(i64); +/// A validated store path. Always starts with /nix/store. +/// +/// Can be created using `StorePath::try_from(path_buf)`. #[derive(Deref, Debug, Clone, PartialEq, Eq, Hash)] pub struct StorePath(PathBuf); @@ -49,7 +55,7 @@ impl StorePath { /// This function first drops the inputs first 44 chars, since that is exactly /// the length of the `/nix/store/0004yybkm5hnwjyxv129js3mjp7kbrax-` prefix. /// Then it matches that against our store path regex. - pub fn parse_name_and_version(&self) -> Result<(&str, Option<&Version>)> { + fn parse_name_and_version(&self) -> Result<(&str, Option)> { static STORE_PATH_REGEX: sync::LazyLock = sync::LazyLock::new(|| { regex::Regex::new("(.+?)(-([0-9].*?))?$") @@ -83,8 +89,8 @@ impl StorePath { bail!("failed to extract name from path '{path}'"); } - let version: Option<&Version> = captures.get(2).map(|capture| { - Version::ref_cast(capture.as_str().trim_start_matches('-')) + let version: Option = captures.get(2).map(|capture| { + Version::from(capture.as_str().trim_start_matches('-').to_owned()) }); Ok((name, version)) diff --git a/src/main.rs b/src/main.rs index 2622e5c..5131478 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,17 +9,13 @@ use std::{ }, path::PathBuf, process, - thread, }; use anyhow::{ - Context as _, - Error, Result, anyhow, }; use clap::Parser as _; -use dix::store; use yansi::Paint as _; struct WriteFmt(W); @@ -64,108 +60,24 @@ fn real_main() -> Result<()> { }) .init(); - // Handle to the thread collecting closure size information. - // We do this as early as possible because Nix is slow. - let closure_size_handle = { - log::debug!("calculating closure sizes in background"); - - let old_path = old_path.clone(); - let new_path = new_path.clone(); - - thread::spawn(move || { - let mut connection = store::connect()?; - - Ok::<_, Error>(( - connection.query_closure_size(&old_path)?, - connection.query_closure_size(&new_path)?, - )) - }) - }; - - let mut connection = store::connect()?; - - let paths_old = - connection.query_depdendents(&old_path).with_context(|| { - format!( - "failed to query dependencies of path '{path}'", - path = old_path.display() - ) - })?; - - log::info!( - "found {count} packages in old closure", - count = paths_old.len(), - ); - - let paths_new = - connection.query_depdendents(&new_path).with_context(|| { - format!( - "failed to query dependencies of path '{path}'", - path = new_path.display() - ) - })?; - - log::info!( - "found {count} packages in new closure", - count = paths_new.len(), - ); - - drop(connection); - let mut out = WriteFmt(io::stdout()); - writeln!( - out, - "{arrows} {old_path}", - arrows = "<<<".bold(), - old_path = old_path.display(), - )?; - writeln!( - out, - "{arrows} {new_path}", - arrows = ">>>".bold(), - new_path = new_path.display(), - )?; + // Handle to the thread collecting closure size information. + // We do this as early as possible because Nix is slow. + let closure_size_handle = + dix::spawn_size_diff(old_path.clone(), new_path.clone()); - writeln!(out)?; + let wrote = dix::write_paths_diffln(&mut out, &old_path, &new_path)?; - #[expect(clippy::pattern_type_mismatch)] - let wrote = dix::write_diffln( - &mut out, - paths_old.iter().map(|(_, path)| path), - paths_new.iter().map(|(_, path)| path), - )?; - - let (closure_size_old, closure_size_new) = closure_size_handle + let (size_old, size_new) = closure_size_handle .join() .map_err(|_| anyhow!("failed to get closure size due to thread error"))??; - let size_old = size::Size::from_bytes(closure_size_old); - let size_new = size::Size::from_bytes(closure_size_new); - let size_diff = size_new - size_old; - if wrote > 0 { writeln!(out)?; } - writeln!( - out, - "{header}: {size_old} -> {size_new}", - header = "SIZE".bold(), - size_old = size_old.red(), - size_new = size_new.green(), - )?; - - writeln!( - out, - "{header}: {size_diff}", - header = "DIFF".bold(), - size_diff = if size_diff.bytes() > 0 { - size_diff.green() - } else { - size_diff.red() - }, - )?; + dix::write_size_diffln(&mut out, size_old, size_new)?; Ok(()) } diff --git a/src/store.rs b/src/store.rs index 0540867..9aedde3 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1,10 +1,22 @@ -use std::{collections::HashMap, path::Path, result}; +use std::{ + collections::HashMap, + path::Path, + result, +}; -use anyhow::{Context as _, Result, anyhow}; +use anyhow::{ + Context as _, + Result, + anyhow, +}; use derive_more::Deref; use rusqlite::OpenFlags; +use size::Size; -use crate::{DerivationId, StorePath}; +use crate::{ + DerivationId, + StorePath, +}; #[derive(Deref)] pub struct Connection(rusqlite::Connection); @@ -17,41 +29,45 @@ pub fn connect() -> Result { let inner = rusqlite::Connection::open_with_flags( DATABASE_PATH, - OpenFlags::SQLITE_OPEN_READ_ONLY // we only run queries, safeguard against corrupting the ddb - | OpenFlags::SQLITE_OPEN_NO_MUTEX // part of the default flags, rusqlite takes care of locking anyways + OpenFlags::SQLITE_OPEN_READ_ONLY // We only run queries, safeguard against corrupting the DB. + | OpenFlags::SQLITE_OPEN_NO_MUTEX // Part of the default flags, rusqlite takes care of locking anyways. | OpenFlags::SQLITE_OPEN_URI, ) .with_context(|| { format!("failed to connect to Nix database at {DATABASE_PATH}") })?; - // perform a batched query to set some settings using PRAGMA + // Perform a batched query to set some settings using PRAGMA // the main performance bottleneck when dix was run before // was that the database file has to be brought from disk into - // memory + // memory. // // We read a large part of the DB anyways in each query, // so it makes sense to set aside a large region of memory-mapped // I/O prevent incuring page faults which can be done using - // `mmap_size` + // `mmap_size`. // // This made a performance difference of about 500ms (but only - // when it was first run!) + // when it was first run for a long time!). // // The file pages of the store can be evicted from main memory - // using `dd of=/nix/var/nix/db/db.sqlite oflag=nocache conv=notrunc,fdatasync count=0` - // if you want to test this. (Source: [https://unix.stackexchange.com/questions/36907/drop-a-specific-file-from-the-linux-filesystem-cache]) + // using `dd of=/nix/var/nix/db/db.sqlite oflag=nocache conv=notrunc,fdatasync + // count=0` if you want to test this. Source: . // - // Documentation about the settings can be found [here](https://www.sqlite.org/pragma.html) + // Documentation about the settings can be found here: + // + // [0]: 256MB, enough to fit the whole DB (at least on my system - Dragyx). + // [1]: Always store temporary tables ain memory. inner .execute_batch( " - PRAGMA mmap_size=268435456; -- 256MB, enough to fit the whole DB (at least on my system) - PRAGMA temp_store=2; -- store temporary tables always in memory - PRAGMA query_only;", + PRAGMA mmap_size=268435456; -- See [0]. + PRAGMA temp_store=2; -- See [1]. + PRAGMA query_only; + ", ) .with_context(|| { - format!("Error during setup commansd of Nix databse as {DATABASE_PATH}") + format!("failed to cache Nix database at {DATABASE_PATH}") })?; Ok(Connection(inner)) @@ -79,7 +95,7 @@ fn path_to_canonical_string(path: &Path) -> Result { impl Connection { /// Gets the total closure size of the given store path by summing up the nar /// size of all depdendent derivations. - pub fn query_closure_size(&mut self, path: &Path) -> Result { + pub fn query_closure_size(&mut self, path: &Path) -> Result { const QUERY: &str = " WITH RECURSIVE graph(p) AS ( @@ -98,7 +114,7 @@ impl Connection { let closure_size = self .prepare_cached(QUERY)? - .query_row([path], |row| row.get(0))?; + .query_row([path], |row| Ok(Size::from_bytes(row.get::<_, i64>(0)?)))?; Ok(closure_size) } @@ -143,6 +159,7 @@ impl Connection { /// We might want to collect the paths in the graph directly as /// well in the future, depending on how much we use them /// in the operations on the graph. + #[expect(dead_code)] pub fn query_dependency_graph( &mut self, path: &StorePath, diff --git a/src/version.rs b/src/version.rs index 456ccb1..0fb13db 100644 --- a/src/version.rs +++ b/src/version.rs @@ -6,11 +6,9 @@ use derive_more::{ Display, From, }; -use ref_cast::RefCast; -#[derive(RefCast, Deref, Display, Debug, PartialEq, Eq)] -#[repr(transparent)] -pub struct Version(str); +#[derive(Deref, DerefMut, Display, Debug, Clone, PartialEq, Eq, From)] +pub struct Version(String); impl PartialOrd for Version { fn partial_cmp(&self, other: &Self) -> Option { @@ -20,21 +18,20 @@ impl PartialOrd for Version { impl cmp::Ord for Version { fn cmp(&self, that: &Self) -> cmp::Ordering { - let this = VersionComponentIter::from(&**self).filter_map(Result::ok); - let that = VersionComponentIter::from(&**that).filter_map(Result::ok); + let this = VersionComponentIter::from(&***self).filter_map(Result::ok); + let that = VersionComponentIter::from(&***that).filter_map(Result::ok); this.cmp(that) } } -#[expect(clippy::into_iter_without_iter)] impl<'a> IntoIterator for &'a Version { type Item = Result, &'a str>; type IntoIter = VersionComponentIter<'a>; fn into_iter(self) -> Self::IntoIter { - VersionComponentIter::from(&**self) + VersionComponentIter::from(&***self) } } @@ -80,7 +77,7 @@ impl<'a> Iterator for VersionComponentIter<'a> { type Item = Result, &'a str>; fn next(&mut self) -> Option { - if self.starts_with(['.', '-']) { + if self.starts_with(['.', '-', '*', ' ']) { let ret = &self[..1]; **self = &self[1..]; return Some(Err(ret)); @@ -93,7 +90,8 @@ impl<'a> Iterator for VersionComponentIter<'a> { let component_len = self .chars() .take_while(|&char| { - char.is_ascii_digit() == is_digit && !matches!(char, '.' | '-') + char.is_ascii_digit() == is_digit + && !matches!(char, '.' | '-' | '*' | ' ') }) .map(char::len_utf8) .sum();