forked from lix-project/lix
* `show-duplication.pl', a small utility that shows the amount of
package duplication present in (e.g.) a profile. It shows the number of instances of each package in a closure, along with the size in bytes of each instance as well as the "waste" (the difference between the sum of the sizes of all instances and the average size). $ ./show-duplication.pl /nix/var/nix/profiles/default gcc 11 3.3.6 19293318 3.4.4 21425257 ... average 14942970, waste 149429707 coreutils 6 ... average package duplication 1.87628865979381, total size 3486330471, total waste 1335324237, 38.3017114443825% wasted This utility is useful for measuring the cost in terms of disk space of the Nix approach.
This commit is contained in:
parent
e0afaf1857
commit
9488ae7357
2 changed files with 73 additions and 1 deletions
|
@ -7,7 +7,6 @@ use strict;
|
|||
select $ofh;
|
||||
}
|
||||
|
||||
#my @paths = ("/nix/store/caef3a49150506d233f474322a824e50-glibc-2.3.3", "/nix/store/a8a9d585d1ad4b1bc911be7743b3b996-glibc-2.3.3");
|
||||
my @paths = ("/nix/store");
|
||||
|
||||
my $tmpfile = "/tmp/nix-optimise-hash-list";
|
||||
|
|
73
scripts/show-duplication.pl
Executable file
73
scripts/show-duplication.pl
Executable file
|
@ -0,0 +1,73 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
if (scalar @ARGV != 1) {
|
||||
print "syntax: show-duplication.pl PATH\n";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
my $root = $ARGV[0];
|
||||
|
||||
|
||||
my $nameRE = "(?:(?:[A-Za-z0-9\+\_]|(?:-[^0-9]))+)";
|
||||
my $versionRE = "(?:[A-Za-z0-9\.\-]+)";
|
||||
|
||||
|
||||
my %pkgInstances;
|
||||
|
||||
|
||||
my $pid = open(PATHS, "-|") || exec "nix-store", "-qR", $root;
|
||||
while (<PATHS>) {
|
||||
chomp;
|
||||
/^.*\/[0-9a-z]*-(.*)$/;
|
||||
my $nameVersion = $1;
|
||||
$nameVersion =~ /^($nameRE)(-($versionRE))?$/;
|
||||
$name = $1;
|
||||
$version = $3;
|
||||
$version = "(unnumbered)" unless defined $version;
|
||||
# print "$nameVersion $name $version\n";
|
||||
push @{$pkgInstances{$name}}, {version => $version, path => $_};
|
||||
}
|
||||
close PATHS or exit 1;
|
||||
|
||||
|
||||
sub pathSize {
|
||||
my $path = shift;
|
||||
my @st = lstat $path or die;
|
||||
|
||||
my $size = $st[7];
|
||||
|
||||
if (-d $path) {
|
||||
opendir DIR, $path or die;
|
||||
foreach my $name (readdir DIR) {
|
||||
next if $name eq "." || $name eq "..";
|
||||
$size += pathSize("$path/$name");
|
||||
}
|
||||
}
|
||||
|
||||
return $size;
|
||||
}
|
||||
|
||||
|
||||
my $totalPaths = 0;
|
||||
my $totalSize = 0, $totalWaste = 0;
|
||||
|
||||
foreach my $name (sort {scalar @{$pkgInstances{$b}} <=> scalar @{$pkgInstances{$a}}} (keys %pkgInstances)) {
|
||||
print "$name ", scalar @{$pkgInstances{$name}}, "\n";
|
||||
my $allSize = 0;
|
||||
foreach my $x (sort {$a->{version} cmp $b->{version}} @{$pkgInstances{$name}}) {
|
||||
$totalPaths++;
|
||||
my $size = pathSize $x->{path};
|
||||
$allSize += $size;
|
||||
print " $x->{version} $size\n";
|
||||
}
|
||||
my $avgSize = int($allSize / scalar @{$pkgInstances{$name}});
|
||||
my $waste = $allSize - $avgSize;
|
||||
$totalSize += $allSize;
|
||||
$totalWaste += $waste;
|
||||
print " average $avgSize, waste $waste\n";
|
||||
}
|
||||
|
||||
|
||||
my $avgDupl = $totalPaths / scalar (keys %pkgInstances);
|
||||
my $wasteFactor = ($totalWaste / $totalSize) * 100;
|
||||
print "average package duplication $avgDupl, total size $totalSize, total waste $totalWaste, $wasteFactor% wasted\n";
|
Loading…
Reference in a new issue