1
Fork 0
mirror of https://github.com/RGBCube/dix synced 2025-05-14 02:24:58 +00:00

feat: better lib, expose more behaviour for external use, deduplication

This commit is contained in:
RGBCube 2025-05-09 18:41:38 +03:00 committed by bloxx12
parent 4372250ba2
commit e6b8f7b3a7
7 changed files with 245 additions and 173 deletions

21
Cargo.lock generated
View file

@ -187,7 +187,6 @@ dependencies = [
"env_logger",
"itertools",
"log",
"ref-cast",
"regex",
"rusqlite",
"size",
@ -396,26 +395,6 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "ref-cast"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf"
dependencies = [
"ref-cast-impl",
]
[[package]]
name = "ref-cast-impl"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "regex"
version = "1.11.1"

View file

@ -13,7 +13,6 @@ diff = "0.1.13"
env_logger = "0.11.3"
itertools = "0.14.0"
log = "0.4.20"
ref-cast = "1.0.24"
regex = "1.11.1"
rusqlite = { version = "0.35.0", features = [ "bundled" ] }
size = "0.5.0"

View file

@ -4,19 +4,30 @@ use std::{
self,
Write as _,
},
path::{
Path,
PathBuf,
},
thread,
};
use anyhow::{
Context as _,
Error,
Result,
};
use itertools::{
EitherOrBoth,
Itertools,
};
use ref_cast::RefCast as _;
use size::Size;
use unicode_width::UnicodeWidthStr as _;
use yansi::Paint as _;
use crate::{
StorePath,
Version,
store,
};
#[derive(Debug, Default)]
@ -42,17 +53,118 @@ impl DiffStatus {
}
}
pub fn write_diffln<'a>(
writer: &mut dyn fmt::Write,
/// Writes the diff header (<<< out, >>>in) and package diff.
///
/// Returns the amount of package diffs written. Even when zero, the header will
/// be written.
pub fn write_paths_diffln(
writer: &mut impl fmt::Write,
path_old: &Path,
path_new: &Path,
) -> Result<usize> {
let mut connection = store::connect()?;
let paths_old =
connection.query_depdendents(path_old).with_context(|| {
format!(
"failed to query dependencies of path '{path}'",
path = path_old.display()
)
})?;
log::info!(
"found {count} packages in old closure",
count = paths_old.len(),
);
let paths_new =
connection.query_depdendents(path_new).with_context(|| {
format!(
"failed to query dependencies of path '{path}'",
path = path_new.display()
)
})?;
log::info!(
"found {count} packages in new closure",
count = paths_new.len(),
);
drop(connection);
writeln!(
writer,
"{arrows} {old}",
arrows = "<<<".bold(),
old = path_old.display(),
)?;
writeln!(
writer,
"{arrows} {new}",
arrows = ">>>".bold(),
new = path_new.display(),
)?;
writeln!(writer)?;
#[expect(clippy::pattern_type_mismatch)]
Ok(write_packages_diffln(
writer,
paths_old.iter().map(|(_, path)| path),
paths_new.iter().map(|(_, path)| path),
)?)
}
fn deduplicate_versions(versions: &mut Vec<Version>) {
versions.sort_unstable();
let mut deduplicated = Vec::new();
let mut deduplicated_push = |mut version: Version, count: usize| {
if count > 1 {
write!(version, " * {count}").unwrap();
}
deduplicated.push(version);
};
let mut last_version = None::<(Version, usize)>;
for version in versions.iter() {
#[expect(clippy::mixed_read_write_in_expression)]
let Some((last_version_value, count)) = last_version.take() else {
last_version = Some((version.clone(), 1));
continue;
};
if last_version_value == *version {
last_version = Some((last_version_value, count + 1));
} else {
deduplicated_push(last_version_value, count);
}
}
if let Some((version, count)) = last_version.take() {
deduplicated_push(version, count);
}
*versions = deduplicated;
}
fn write_packages_diffln<'a>(
writer: &mut impl fmt::Write,
paths_old: impl Iterator<Item = &'a StorePath>,
paths_new: impl Iterator<Item = &'a StorePath>,
) -> Result<usize, fmt::Error> {
let mut paths = HashMap::<&str, Diff<Vec<Option<&Version>>>>::new();
let mut paths = HashMap::<&str, Diff<Vec<Version>>>::new();
for path in paths_old {
match path.parse_name_and_version() {
Ok((name, version)) => {
paths.entry(name).or_default().old.push(version);
log::debug!("parsed name: {name}");
log::debug!("parsed version: {version:?}");
paths
.entry(name)
.or_default()
.old
.push(version.unwrap_or(Version::from("<none>".to_owned())));
},
Err(error) => {
@ -64,7 +176,14 @@ pub fn write_diffln<'a>(
for path in paths_new {
match path.parse_name_and_version() {
Ok((name, version)) => {
paths.entry(name).or_default().new.push(version);
log::debug!("parsed name: {name}");
log::debug!("parsed version: {version:?}");
paths
.entry(name)
.or_default()
.new
.push(version.unwrap_or(Version::from("<none>".to_owned())));
},
Err(error) => {
@ -76,13 +195,13 @@ pub fn write_diffln<'a>(
let mut diffs = paths
.into_iter()
.filter_map(|(name, mut versions)| {
versions.old.sort_unstable();
versions.new.sort_unstable();
deduplicate_versions(&mut versions.old);
deduplicate_versions(&mut versions.new);
let status = match (versions.old.len(), versions.new.len()) {
(0, 0) => unreachable!(),
(0, _) => DiffStatus::Removed,
(_, 0) => DiffStatus::Added,
(0, _) => DiffStatus::Added,
(_, 0) => DiffStatus::Removed,
(..) if versions.old != versions.new => DiffStatus::Changed,
(..) => return None,
};
@ -134,7 +253,7 @@ pub fn write_diffln<'a>(
for diff in Itertools::zip_longest(versions.old.iter(), versions.new.iter())
{
match diff {
EitherOrBoth::Right(old_version) => {
EitherOrBoth::Left(old_version) => {
if oldwrote {
write!(oldacc, ", ")?;
} else {
@ -142,7 +261,7 @@ pub fn write_diffln<'a>(
oldwrote = true;
}
for old_comp in old_version.unwrap_or(Version::ref_cast("<none>")) {
for old_comp in old_version {
match old_comp {
Ok(old_comp) => write!(oldacc, "{old}", old = old_comp.red())?,
Err(ignored) => write!(oldacc, "{ignored}")?,
@ -150,8 +269,7 @@ pub fn write_diffln<'a>(
}
},
// I have no idea why itertools is returning `versions.new` in `Left`.
EitherOrBoth::Left(new_version) => {
EitherOrBoth::Right(new_version) => {
if newwrote {
write!(newacc, ", ")?;
} else {
@ -159,7 +277,7 @@ pub fn write_diffln<'a>(
newwrote = true;
}
for new_comp in new_version.unwrap_or(Version::ref_cast("<none>")) {
for new_comp in new_version {
match new_comp {
Ok(new_comp) => write!(newacc, "{new}", new = new_comp.green())?,
Err(ignored) => write!(newacc, "{ignored}")?,
@ -172,9 +290,6 @@ pub fn write_diffln<'a>(
continue;
}
let old_version = old_version.unwrap_or(Version::ref_cast("<none>"));
let new_version = new_version.unwrap_or(Version::ref_cast("<none>"));
if oldwrote {
write!(oldacc, ", ")?;
} else {
@ -193,7 +308,7 @@ pub fn write_diffln<'a>(
new_version.into_iter(),
) {
match diff {
EitherOrBoth::Right(old_comp) => {
EitherOrBoth::Left(old_comp) => {
match old_comp {
Ok(old_comp) => {
write!(oldacc, "{old}", old = old_comp.red())?;
@ -204,7 +319,7 @@ pub fn write_diffln<'a>(
}
},
EitherOrBoth::Left(new_comp) => {
EitherOrBoth::Right(new_comp) => {
match new_comp {
Ok(new_comp) => {
write!(newacc, "{new}", new = new_comp.green())?;
@ -255,3 +370,49 @@ pub fn write_diffln<'a>(
Ok(diffs.len())
}
/// Spawns a task to compute the data required by [`write_size_diffln`].
#[must_use]
pub fn spawn_size_diff(
path_old: PathBuf,
path_new: PathBuf,
) -> thread::JoinHandle<Result<(Size, Size)>> {
log::debug!("calculating closure sizes in background");
thread::spawn(move || {
let mut connection = store::connect()?;
Ok::<_, Error>((
connection.query_closure_size(&path_old)?,
connection.query_closure_size(&path_new)?,
))
})
}
/// Writes the size difference.
pub fn write_size_diffln(
writer: &mut impl fmt::Write,
size_old: Size,
size_new: Size,
) -> fmt::Result {
let size_diff = size_new - size_old;
writeln!(
writer,
"{header}: {size_old} -> {size_new}",
header = "SIZE".bold(),
size_old = size_old.red(),
size_new = size_new.green(),
)?;
writeln!(
writer,
"{header}: {size_diff}",
header = "DIFF".bold(),
size_diff = if size_diff.bytes() > 0 {
size_diff.green()
} else {
size_diff.red()
},
)
}

View file

@ -13,17 +13,23 @@ use anyhow::{
use derive_more::Deref;
mod diff;
pub use diff::write_diffln;
pub use diff::{
spawn_size_diff,
write_paths_diffln,
write_size_diffln,
};
pub mod store;
mod store;
mod version;
use ref_cast::RefCast as _;
pub use version::Version;
use version::Version;
#[derive(Deref, Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct DerivationId(i64);
struct DerivationId(i64);
/// A validated store path. Always starts with /nix/store.
///
/// Can be created using `StorePath::try_from(path_buf)`.
#[derive(Deref, Debug, Clone, PartialEq, Eq, Hash)]
pub struct StorePath(PathBuf);
@ -49,7 +55,7 @@ impl StorePath {
/// This function first drops the inputs first 44 chars, since that is exactly
/// the length of the `/nix/store/0004yybkm5hnwjyxv129js3mjp7kbrax-` prefix.
/// Then it matches that against our store path regex.
pub fn parse_name_and_version(&self) -> Result<(&str, Option<&Version>)> {
fn parse_name_and_version(&self) -> Result<(&str, Option<Version>)> {
static STORE_PATH_REGEX: sync::LazyLock<regex::Regex> =
sync::LazyLock::new(|| {
regex::Regex::new("(.+?)(-([0-9].*?))?$")
@ -83,8 +89,8 @@ impl StorePath {
bail!("failed to extract name from path '{path}'");
}
let version: Option<&Version> = captures.get(2).map(|capture| {
Version::ref_cast(capture.as_str().trim_start_matches('-'))
let version: Option<Version> = captures.get(2).map(|capture| {
Version::from(capture.as_str().trim_start_matches('-').to_owned())
});
Ok((name, version))

View file

@ -9,17 +9,13 @@ use std::{
},
path::PathBuf,
process,
thread,
};
use anyhow::{
Context as _,
Error,
Result,
anyhow,
};
use clap::Parser as _;
use dix::store;
use yansi::Paint as _;
struct WriteFmt<W: io::Write>(W);
@ -64,108 +60,24 @@ fn real_main() -> Result<()> {
})
.init();
// Handle to the thread collecting closure size information.
// We do this as early as possible because Nix is slow.
let closure_size_handle = {
log::debug!("calculating closure sizes in background");
let old_path = old_path.clone();
let new_path = new_path.clone();
thread::spawn(move || {
let mut connection = store::connect()?;
Ok::<_, Error>((
connection.query_closure_size(&old_path)?,
connection.query_closure_size(&new_path)?,
))
})
};
let mut connection = store::connect()?;
let paths_old =
connection.query_depdendents(&old_path).with_context(|| {
format!(
"failed to query dependencies of path '{path}'",
path = old_path.display()
)
})?;
log::info!(
"found {count} packages in old closure",
count = paths_old.len(),
);
let paths_new =
connection.query_depdendents(&new_path).with_context(|| {
format!(
"failed to query dependencies of path '{path}'",
path = new_path.display()
)
})?;
log::info!(
"found {count} packages in new closure",
count = paths_new.len(),
);
drop(connection);
let mut out = WriteFmt(io::stdout());
writeln!(
out,
"{arrows} {old_path}",
arrows = "<<<".bold(),
old_path = old_path.display(),
)?;
writeln!(
out,
"{arrows} {new_path}",
arrows = ">>>".bold(),
new_path = new_path.display(),
)?;
// Handle to the thread collecting closure size information.
// We do this as early as possible because Nix is slow.
let closure_size_handle =
dix::spawn_size_diff(old_path.clone(), new_path.clone());
writeln!(out)?;
let wrote = dix::write_paths_diffln(&mut out, &old_path, &new_path)?;
#[expect(clippy::pattern_type_mismatch)]
let wrote = dix::write_diffln(
&mut out,
paths_old.iter().map(|(_, path)| path),
paths_new.iter().map(|(_, path)| path),
)?;
let (closure_size_old, closure_size_new) = closure_size_handle
let (size_old, size_new) = closure_size_handle
.join()
.map_err(|_| anyhow!("failed to get closure size due to thread error"))??;
let size_old = size::Size::from_bytes(closure_size_old);
let size_new = size::Size::from_bytes(closure_size_new);
let size_diff = size_new - size_old;
if wrote > 0 {
writeln!(out)?;
}
writeln!(
out,
"{header}: {size_old} -> {size_new}",
header = "SIZE".bold(),
size_old = size_old.red(),
size_new = size_new.green(),
)?;
writeln!(
out,
"{header}: {size_diff}",
header = "DIFF".bold(),
size_diff = if size_diff.bytes() > 0 {
size_diff.green()
} else {
size_diff.red()
},
)?;
dix::write_size_diffln(&mut out, size_old, size_new)?;
Ok(())
}

View file

@ -1,10 +1,22 @@
use std::{collections::HashMap, path::Path, result};
use std::{
collections::HashMap,
path::Path,
result,
};
use anyhow::{Context as _, Result, anyhow};
use anyhow::{
Context as _,
Result,
anyhow,
};
use derive_more::Deref;
use rusqlite::OpenFlags;
use size::Size;
use crate::{DerivationId, StorePath};
use crate::{
DerivationId,
StorePath,
};
#[derive(Deref)]
pub struct Connection(rusqlite::Connection);
@ -17,41 +29,45 @@ pub fn connect() -> Result<Connection> {
let inner = rusqlite::Connection::open_with_flags(
DATABASE_PATH,
OpenFlags::SQLITE_OPEN_READ_ONLY // we only run queries, safeguard against corrupting the ddb
| OpenFlags::SQLITE_OPEN_NO_MUTEX // part of the default flags, rusqlite takes care of locking anyways
OpenFlags::SQLITE_OPEN_READ_ONLY // We only run queries, safeguard against corrupting the DB.
| OpenFlags::SQLITE_OPEN_NO_MUTEX // Part of the default flags, rusqlite takes care of locking anyways.
| OpenFlags::SQLITE_OPEN_URI,
)
.with_context(|| {
format!("failed to connect to Nix database at {DATABASE_PATH}")
})?;
// perform a batched query to set some settings using PRAGMA
// Perform a batched query to set some settings using PRAGMA
// the main performance bottleneck when dix was run before
// was that the database file has to be brought from disk into
// memory
// memory.
//
// We read a large part of the DB anyways in each query,
// so it makes sense to set aside a large region of memory-mapped
// I/O prevent incuring page faults which can be done using
// `mmap_size`
// `mmap_size`.
//
// This made a performance difference of about 500ms (but only
// when it was first run!)
// when it was first run for a long time!).
//
// The file pages of the store can be evicted from main memory
// using `dd of=/nix/var/nix/db/db.sqlite oflag=nocache conv=notrunc,fdatasync count=0`
// if you want to test this. (Source: [https://unix.stackexchange.com/questions/36907/drop-a-specific-file-from-the-linux-filesystem-cache])
// using `dd of=/nix/var/nix/db/db.sqlite oflag=nocache conv=notrunc,fdatasync
// count=0` if you want to test this. Source: <https://unix.stackexchange.com/questions/36907/drop-a-specific-file-from-the-linux-filesystem-cache>.
//
// Documentation about the settings can be found [here](https://www.sqlite.org/pragma.html)
// Documentation about the settings can be found here: <https://www.sqlite.org/pragma.html>
//
// [0]: 256MB, enough to fit the whole DB (at least on my system - Dragyx).
// [1]: Always store temporary tables ain memory.
inner
.execute_batch(
"
PRAGMA mmap_size=268435456; -- 256MB, enough to fit the whole DB (at least on my system)
PRAGMA temp_store=2; -- store temporary tables always in memory
PRAGMA query_only;",
PRAGMA mmap_size=268435456; -- See [0].
PRAGMA temp_store=2; -- See [1].
PRAGMA query_only;
",
)
.with_context(|| {
format!("Error during setup commansd of Nix databse as {DATABASE_PATH}")
format!("failed to cache Nix database at {DATABASE_PATH}")
})?;
Ok(Connection(inner))
@ -79,7 +95,7 @@ fn path_to_canonical_string(path: &Path) -> Result<String> {
impl Connection {
/// Gets the total closure size of the given store path by summing up the nar
/// size of all depdendent derivations.
pub fn query_closure_size(&mut self, path: &Path) -> Result<usize> {
pub fn query_closure_size(&mut self, path: &Path) -> Result<Size> {
const QUERY: &str = "
WITH RECURSIVE
graph(p) AS (
@ -98,7 +114,7 @@ impl Connection {
let closure_size = self
.prepare_cached(QUERY)?
.query_row([path], |row| row.get(0))?;
.query_row([path], |row| Ok(Size::from_bytes(row.get::<_, i64>(0)?)))?;
Ok(closure_size)
}
@ -143,6 +159,7 @@ impl Connection {
/// We might want to collect the paths in the graph directly as
/// well in the future, depending on how much we use them
/// in the operations on the graph.
#[expect(dead_code)]
pub fn query_dependency_graph(
&mut self,
path: &StorePath,

View file

@ -6,11 +6,9 @@ use derive_more::{
Display,
From,
};
use ref_cast::RefCast;
#[derive(RefCast, Deref, Display, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Version(str);
#[derive(Deref, DerefMut, Display, Debug, Clone, PartialEq, Eq, From)]
pub struct Version(String);
impl PartialOrd for Version {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
@ -20,21 +18,20 @@ impl PartialOrd for Version {
impl cmp::Ord for Version {
fn cmp(&self, that: &Self) -> cmp::Ordering {
let this = VersionComponentIter::from(&**self).filter_map(Result::ok);
let that = VersionComponentIter::from(&**that).filter_map(Result::ok);
let this = VersionComponentIter::from(&***self).filter_map(Result::ok);
let that = VersionComponentIter::from(&***that).filter_map(Result::ok);
this.cmp(that)
}
}
#[expect(clippy::into_iter_without_iter)]
impl<'a> IntoIterator for &'a Version {
type Item = Result<VersionComponent<'a>, &'a str>;
type IntoIter = VersionComponentIter<'a>;
fn into_iter(self) -> Self::IntoIter {
VersionComponentIter::from(&**self)
VersionComponentIter::from(&***self)
}
}
@ -80,7 +77,7 @@ impl<'a> Iterator for VersionComponentIter<'a> {
type Item = Result<VersionComponent<'a>, &'a str>;
fn next(&mut self) -> Option<Self::Item> {
if self.starts_with(['.', '-']) {
if self.starts_with(['.', '-', '*', ' ']) {
let ret = &self[..1];
**self = &self[1..];
return Some(Err(ret));
@ -93,7 +90,8 @@ impl<'a> Iterator for VersionComponentIter<'a> {
let component_len = self
.chars()
.take_while(|&char| {
char.is_ascii_digit() == is_digit && !matches!(char, '.' | '-')
char.is_ascii_digit() == is_digit
&& !matches!(char, '.' | '-' | '*' | ' ')
})
.map(char::len_utf8)
.sum();