* Use a weighted use heuristic to disambiguate between multiple

occurances of a component.  If the shortest path distance between a
  component P and Q in the referers graph is D, then the contribution
  of Q to the use of P is 1 / R^D, where R >= 1, typically 2.  This
  expresses that distant indirect uses are less important than nearby
  uses.

  For instance, this can disambiguate between the bootstrap GCC in
  Nixpkgs and the GCC of the final stdenv (the former has more uses,
  but they are further away),  and between the GCC of the final stdenv
  and the GCC+G77 build (the latter has very few uses).
This commit is contained in:
Eelco Dolstra 2005-03-01 10:33:55 +00:00
parent 2c4302dd7a
commit db322a47ff

View file

@ -126,6 +126,64 @@ sub containsPatch {
} }
# Compute the "weighted" number of uses of a path in the build graph.
sub computeUses {
my $narFiles = shift;
my $path = shift;
# Find the deriver of $path.
return 1 unless defined $$narFiles{$path};
my $deriver = @{$$narFiles{$path}}[0]->{deriver};
return 1 unless defined $deriver && $deriver ne "";
# print " DERIVER $deriver\n";
# Optimisation: build the referers graph from the references
# graph.
my %referers;
foreach my $q (keys %{$narFiles}) {
my @refs = split " ", @{$$narFiles{$q}}[0]->{references};
foreach my $r (@refs) {
$referers{$r} = [] unless defined $referers{$r};
push @{$referers{$r}}, $q;
}
}
# Determine the shortest path from $deriver to all other reachable
# paths in the `referers' graph.
my %dist;
$dist{$deriver} = 0;
my @queue = ($deriver);
my $pos = 0;
while ($pos < scalar @queue) {
my $p = $queue[$pos];
$pos++;
foreach my $q (@{$referers{$p}}) {
if (!defined $dist{$q}) {
$dist{$q} = $dist{$p} + 1;
# print " $q $dist{$q}\n";
push @queue, $q;
}
}
}
my $wuse = 1.0;
foreach my $user (keys %dist) {
next if $user eq $deriver;
# print " $user $dist{$user}\n";
$wuse += 1.0 / 2.0**$dist{$user};
}
# print " XXX $path $wuse\n";
return $wuse;
}
# For each output path in the destination, see if we need to / can # For each output path in the destination, see if we need to / can
# create a patch. # create a patch.
@ -134,7 +192,7 @@ print "creating patches...\n";
foreach my $p (keys %dstOutPaths) { foreach my $p (keys %dstOutPaths) {
# If exactly the same path already exists in the source, skip it. # If exactly the same path already exists in the source, skip it.
# !!! next if defined $srcOutPaths{$p}; next if defined $srcOutPaths{$p};
print " $p\n"; print " $p\n";
@ -154,17 +212,32 @@ foreach my $p (keys %dstOutPaths) {
(my $name2, my $version2) = getNameVersion $q; (my $name2, my $version2) = getNameVersion $q;
if ($name eq $name2) { if ($name eq $name2) {
# If the sizes differ to much, then skip. This # If the sizes differ too much, then skip. This
# disambiguates between, e.g., a real component and a # disambiguates between, e.g., a real component and a
# wrapper component (cf. Firefox in Nixpkgs). # wrapper component (cf. Firefox in Nixpkgs).
my $srcSize = @{$srcNarFiles{$q}}[0]->{size}; my $srcSize = @{$srcNarFiles{$q}}[0]->{size};
my $dstSize = @{$dstNarFiles{$p}}[0]->{size}; my $dstSize = @{$dstNarFiles{$p}}[0]->{size};
my $ratio = $srcSize / $dstSize; my $ratio = $srcSize / $dstSize;
$ratio = 1 / $ratio if $ratio < 1; $ratio = 1 / $ratio if $ratio < 1;
print " $srcSize $dstSize $ratio $q\n"; # print " SIZE $srcSize $dstSize $ratio $q\n";
if ($ratio >= 3) { if ($ratio >= 3) {
print " SKIPPING $q due to size ratio $ratio\n"; print " SKIPPING $q due to size ratio $ratio ($srcSize $dstSize)\n";
next;
}
# If the numbers of weighted uses differ too much, then
# skip. This disambiguates between, e.g., the bootstrap
# GCC and the final GCC in Nixpkgs.
my $srcUses = computeUses \%srcNarFiles, $q;
my $dstUses = computeUses \%dstNarFiles, $p;
$ratio = $srcUses / $dstUses;
$ratio = 1 / $ratio if $ratio < 1;
print " USE $srcUses $dstUses $ratio $q\n";
if ($ratio >= 2) {
print " SKIPPING $q due to use ratio $ratio ($srcUses $dstUses)\n";
next;
} }
# If there are multiple matching names, include the ones # If there are multiple matching names, include the ones
@ -190,8 +263,6 @@ foreach my $p (keys %dstOutPaths) {
# Generate a patch between $closest and $p. # Generate a patch between $closest and $p.
print " $p <- $closest\n"; print " $p <- $closest\n";
next;
# If the patch already exists, skip it. # If the patch already exists, skip it.
if (containsPatch(\%srcPatches, $p, $closest) || if (containsPatch(\%srcPatches, $p, $closest) ||
containsPatch(\%dstPatches, $p, $closest)) containsPatch(\%dstPatches, $p, $closest))
@ -260,8 +331,6 @@ foreach my $p (keys %dstOutPaths) {
} }
} }
exit 0; # !!!
# Add in any potentially useful patches in the source (namely, those # Add in any potentially useful patches in the source (namely, those
# patches that produce either paths in the destination or paths that # patches that produce either paths in the destination or paths that