Replace ad-hoc command-not-found generation scripts with nix-index

This is much, MUCH faster than the existing solution, especially on large channels.
It can be made even faster if we actually reuse anything whatsoever, but this is
a massive improvement in itself.
This commit is contained in:
Ilya K 2022-11-12 17:15:50 +03:00
parent d5c5417116
commit 8656f3e07f
6 changed files with 73 additions and 470 deletions

View file

@ -1,181 +0,0 @@
/* A local disk cache for fast lookups of NAR index files in a binary
cache. */
#include "binary-cache-store.hh"
#include "fs-accessor.hh"
#include "sqlite.hh"
#include "sync.hh"
#include <sqlite3.h>
#include <nlohmann/json.hpp>
using namespace nix;
MakeError(BadJSON, Error);
class FileCache
{
struct State
{
SQLite db;
SQLiteStmt queryPath, insertPath, queryFiles, insertFile;
};
Sync<State> state_;
struct Stat : FSAccessor::Stat
{
std::string target;
};
public:
FileCache(const Path & path)
{
auto state(state_.lock());
static std::string cacheSchema = R"sql(
create table if not exists StorePaths (
id integer primary key autoincrement not null,
path text unique not null
);
create table if not exists StorePathContents (
storePath integer not null,
subPath text not null,
type integer not null,
fileSize integer,
isExecutable integer,
target text,
primary key (storePath, subPath),
foreign key (storePath) references StorePaths(id) on delete cascade
);
)sql";
state->db = SQLite(path);
state->db.exec("pragma foreign_keys = 1");
state->db.exec(cacheSchema);
if (sqlite3_busy_timeout(state->db, 60 * 60 * 1000) != SQLITE_OK)
SQLiteError::throw_(state->db, "setting timeout");
state->queryPath.create(state->db,
"select id from StorePaths where path = ?");
state->insertPath.create(state->db,
"insert or ignore into StorePaths(path) values (?)");
state->queryFiles.create(state->db,
"select subPath, type, fileSize, isExecutable, target from StorePathContents where storePath = ?");
state->insertFile.create(state->db,
"insert into StorePathContents(storePath, subPath, type, fileSize, isExecutable, target) values (?, ?, ?, ?, ?, ?)");
}
/* Return the files in a store path, using a SQLite database to
cache the results. */
std::map<std::string, Stat>
getFiles(ref<BinaryCacheStore> binaryCache, const Path & storePath)
{
std::map<std::string, Stat> files;
/* Look up the path in the SQLite cache. */
{
auto state(state_.lock());
auto useQueryPath(state->queryPath.use()(storePath));
if (useQueryPath.next()) {
auto id = useQueryPath.getInt(0);
auto useQueryFiles(state->queryFiles.use()(id));
while (useQueryFiles.next()) {
Stat st;
st.type = (FSAccessor::Type) useQueryFiles.getInt(1);
st.fileSize = (uint64_t) useQueryFiles.getInt(2);
st.isExecutable = useQueryFiles.getInt(3) != 0;
if (!useQueryFiles.isNull(4))
st.target = useQueryFiles.getStr(4);
files.emplace(useQueryFiles.getStr(0), st);
}
return files;
}
}
using json = nlohmann::json;
std::function<void(const std::string &, json &)> recurse;
recurse = [&](const std::string & relPath, json & v) {
Stat st;
std::string type = v["type"];
if (type == "directory") {
st.type = FSAccessor::Type::tDirectory;
for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) {
std::string name = i.key();
recurse(relPath.empty() ? name : relPath + "/" + name, i.value());
}
} else if (type == "regular") {
st.type = FSAccessor::Type::tRegular;
st.fileSize = v["size"];
st.isExecutable = v.value("executable", false);
} else if (type == "symlink") {
st.type = FSAccessor::Type::tSymlink;
st.target = v.value("target", "");
} else return;
files[relPath] = st;
};
/* It's not in the cache, so get the .ls.xz file (which
contains a JSON serialisation of the listing of the NAR
contents) from the binary cache. */
auto now1 = std::chrono::steady_clock::now();
auto s = binaryCache->getFile(std::string(baseNameOf(storePath).substr(0, StorePath::HashLen)) + ".ls");
if (!s)
printInfo("warning: no listing of %s in binary cache", storePath);
else {
try {
json ls = json::parse(*s);
if (ls.value("version", 0) != 1)
throw Error("NAR index for %s has an unsupported version", storePath);
recurse("", ls.at("root"));
} catch (json::parse_error & e) {
// FIXME: some filenames have non-UTF8 characters in them,
// which is not supported by nlohmann::json. So we have to
// skip the entire package.
throw BadJSON(e.what());
}
}
/* Insert the store path into the database. */
{
auto state(state_.lock());
SQLiteTxn txn(state->db);
if (state->queryPath.use()(storePath).next()) return files;
state->insertPath.use()(storePath).exec();
uint64_t id = sqlite3_last_insert_rowid(state->db);
for (auto & x : files) {
state->insertFile.use()
(id)
(x.first)
(x.second.type)
(x.second.fileSize, x.second.type == FSAccessor::Type::tRegular)
(x.second.isExecutable, x.second.type == FSAccessor::Type::tRegular)
(x.second.target, x.second.type == FSAccessor::Type::tSymlink)
.exec();
}
txn.commit();
}
auto now2 = std::chrono::steady_clock::now();
printInfo("processed %s in %d ms", storePath,
std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count());
return files;
}
};

View file

@ -1,5 +1,21 @@
{ {
"nodes": { "nodes": {
"flake-compat": {
"flake": false,
"locked": {
"lastModified": 1641205782,
"narHash": "sha256-4jY7RCWUoZ9cKD8co0/4tFARpWB+57+r1bLLvXNJliY=",
"owner": "edolstra",
"repo": "flake-compat",
"rev": "b7547d3eed6f32d06102ead8991ec52ab0a4f1a7",
"type": "github"
},
"original": {
"owner": "edolstra",
"repo": "flake-compat",
"type": "github"
}
},
"lowdown-src": { "lowdown-src": {
"flake": false, "flake": false,
"locked": { "locked": {
@ -23,11 +39,11 @@
"nixpkgs-regression": "nixpkgs-regression" "nixpkgs-regression": "nixpkgs-regression"
}, },
"locked": { "locked": {
"lastModified": 1662636880, "lastModified": 1673354285,
"narHash": "sha256-GJOhzWvNDztkqn96THKiodFRIkk7RsrzecVoX/e8FOk=", "narHash": "sha256-nSZH3ziwlMBe83pfAeMZWy6+URpEQISnZNmdO438dC8=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nix", "repo": "nix",
"rev": "c8e8eea95e6d235ba0120bda5a8ff87b97f3e5a7", "rev": "877e7a5ccf21279d9e027867e63181b2b9a6a15a",
"type": "github" "type": "github"
}, },
"original": { "original": {
@ -35,18 +51,40 @@
"type": "indirect" "type": "indirect"
} }
}, },
"nix-index": {
"inputs": {
"flake-compat": "flake-compat",
"nixpkgs": [
"nix",
"nixpkgs"
]
},
"locked": {
"lastModified": 1673358637,
"narHash": "sha256-UVKfgjnkU8U5+z96CiKUs1pnIFd3/2cJuvcQmWC0Hoo=",
"owner": "bennofs",
"repo": "nix-index",
"rev": "7ac935501a0f4ec094d15a35acf2bb35442259bb",
"type": "github"
},
"original": {
"owner": "bennofs",
"repo": "nix-index",
"type": "github"
}
},
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1657693803, "lastModified": 1670461440,
"narHash": "sha256-G++2CJ9u0E7NNTAi9n5G8TdDmGJXcIjkJ3NF8cetQB8=", "narHash": "sha256-jy1LB8HOMKGJEGXgzFRLDU1CBGL0/LlkolgnqIsF0D8=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "365e1b3a859281cf11b94f87231adeabbdd878a2", "rev": "04a75b2eecc0acf6239acf9dd04485ff8d14f425",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "NixOS", "owner": "NixOS",
"ref": "nixos-22.05-small", "ref": "nixos-22.11-small",
"repo": "nixpkgs", "repo": "nixpkgs",
"type": "github" "type": "github"
} }
@ -70,6 +108,7 @@
"root": { "root": {
"inputs": { "inputs": {
"nix": "nix", "nix": "nix",
"nix-index": "nix-index",
"nixpkgs": [ "nixpkgs": [
"nix", "nix",
"nixpkgs" "nixpkgs"

View file

@ -2,9 +2,13 @@
description = "Script for generating Nixpkgs/NixOS channels"; description = "Script for generating Nixpkgs/NixOS channels";
inputs.nixpkgs.follows = "nix/nixpkgs"; inputs.nixpkgs.follows = "nix/nixpkgs";
inputs.nix-index = {
url = "github:bennofs/nix-index";
inputs.nixpkgs.follows = "nix/nixpkgs";
};
outputs = { self, nixpkgs, nix }: outputs = { self, nixpkgs, nix, nix-index }:
let nix-index' = nix-index.packages.x86_64-linux.nix-index; in
{ {
overlays.default = final: prev: { overlays.default = final: prev: {
@ -23,15 +27,6 @@
buildCommand = '' buildCommand = ''
mkdir -p $out/bin mkdir -p $out/bin
cp ${./file-cache.hh} file-cache.hh
g++ -Os -g ${./generate-programs-index.cc} -Wall -std=c++14 -o $out/bin/generate-programs-index -I . \
$(pkg-config --cflags nix-main) \
$(pkg-config --libs nix-main) \
$(pkg-config --libs nix-expr) \
$(pkg-config --libs nix-store) \
-lsqlite3 -lgc
g++ -Os -g ${./index-debuginfo.cc} -Wall -std=c++14 -o $out/bin/index-debuginfo -I . \ g++ -Os -g ${./index-debuginfo.cc} -Wall -std=c++14 -o $out/bin/index-debuginfo -I . \
$(pkg-config --cflags nix-main) \ $(pkg-config --cflags nix-main) \
$(pkg-config --libs nix-main) \ $(pkg-config --libs nix-main) \
@ -57,6 +52,7 @@
brotli brotli
jq jq
nixos-channel-native-programs nixos-channel-native-programs
nix-index'
]; ];
buildCommand = '' buildCommand = ''
@ -65,7 +61,7 @@
cp ${./mirror-nixos-branch.pl} $out/bin/mirror-nixos-branch cp ${./mirror-nixos-branch.pl} $out/bin/mirror-nixos-branch
wrapProgram $out/bin/mirror-nixos-branch \ wrapProgram $out/bin/mirror-nixos-branch \
--set PERL5LIB $PERL5LIB \ --set PERL5LIB $PERL5LIB \
--prefix PATH : ${wget}/bin:${git}/bin:${final.nix}/bin:${gnutar}/bin:${xz}/bin:${rsync}/bin:${openssh}/bin:${nixos-channel-native-programs}/bin:$out/bin --prefix PATH : ${wget}/bin:${git}/bin:${final.nix}/bin:${gnutar}/bin:${xz}/bin:${rsync}/bin:${openssh}/bin:${nix-index'}/bin:${nixos-channel-native-programs}/bin:$out/bin
patchShebangs $out/bin patchShebangs $out/bin
''; '';

View file

@ -1,218 +0,0 @@
#include <nix/config.h>
#include <chrono>
#include <regex>
#include "shared.hh"
#include "globals.hh"
#include "eval.hh"
#include "store-api.hh"
#include "get-drvs.hh"
#include "thread-pool.hh"
#include "sqlite.hh"
#include "binary-cache-store.hh"
#include "logging.hh"
#include "file-cache.hh"
using namespace nix;
static const char * programsSchema = R"sql(
create table if not exists Programs (
name text not null,
system text not null,
package text not null,
primary key (name, system, package)
);
)sql";
void mainWrapped(int argc, char * * argv)
{
initNix();
initGC();
if (argc != 6) throw Error("usage: generate-programs-index CACHE-DB PROGRAMS-DB BINARY-CACHE-URI STORE-PATHS NIXPKGS-PATH");
Path cacheDbPath = argv[1];
Path programsDbPath = argv[2];
Path storePathsFile = argv[4];
Path nixpkgsPath = argv[5];
settings.readOnlyMode = true;
loggerSettings.showTrace = true;
auto localStore = openStore();
std::string binaryCacheUri = argv[3];
if (hasSuffix(binaryCacheUri, "/")) binaryCacheUri.pop_back();
auto binaryCache = openStore(binaryCacheUri).cast<BinaryCacheStore>();
/* Get the allowed store paths to be included in the database. */
auto allowedPaths = binaryCache->parseStorePathSet(tokenizeString<PathSet>(readFile(storePathsFile)));
StorePathSet allowedPathsClosure;
binaryCache->computeFSClosure(allowedPaths, allowedPathsClosure);
printMsg(lvlInfo, "%d top-level paths, %d paths in closure",
allowedPaths.size(), allowedPathsClosure.size());
FileCache fileCache(cacheDbPath);
/* Initialise the programs database. */
struct ProgramsState
{
SQLite db;
SQLiteStmt insertProgram;
};
Sync<ProgramsState> programsState_;
unlink(programsDbPath.c_str());
{
auto programsState(programsState_.lock());
programsState->db = SQLite(programsDbPath);
programsState->db.exec("pragma synchronous = off");
programsState->db.exec("pragma main.journal_mode = truncate");
programsState->db.exec(programsSchema);
programsState->insertProgram.create(programsState->db,
"insert or replace into Programs(name, system, package) values (?, ?, ?)");
}
EvalState state({}, localStore);
Value vRoot;
state.eval(state.parseExprFromFile(resolveExprPath(absPath(nixpkgsPath))), vRoot);
/* Get all derivations. */
DrvInfos packages;
for (auto system : std::set<std::string>{"x86_64-linux", "aarch64-linux"}) {
auto args = state.buildBindings(2);
args.alloc(state.symbols.create("config")).mkAttrs(&state.emptyBindings);
args.alloc(state.symbols.create("system")).mkString(system);
getDerivations(state, vRoot, "", *args.finish(), packages, true);
}
/* For each store path, figure out the package with the shortest
attribute name. E.g. "nix" is preferred over "nixStable". */
std::map<StorePath, DrvInfo *> packagesByPath;
for (auto & package : packages)
try {
auto outputs = package.queryOutputs(true);
for (auto & [_, storePath] : outputs) {
if (!storePath) continue;
if (!allowedPathsClosure.count(*storePath)) continue;
auto i = packagesByPath.find(*storePath);
if (i != packagesByPath.end() &&
(i->second->attrPath.size() < package.attrPath.size() ||
(i->second->attrPath.size() == package.attrPath.size() && i->second->attrPath < package.attrPath)))
continue;
packagesByPath.emplace(std::move(*storePath), &package);
}
} catch (AssertionError & e) {
} catch (Error & e) {
e.addTrace({}, hintfmt("in package '%s': ", package.attrPath));
throw;
}
/* Note: we don't index hidden files. */
std::regex isProgram("bin/([^.][^/]*)");
/* Process each store path. */
auto doPath = [&](const Path & storePath, DrvInfo * package) {
try {
auto files = fileCache.getFiles(binaryCache, storePath);
if (files.empty()) return;
std::set<std::string> programs;
for (auto file : files) {
std::smatch match;
if (!std::regex_match(file.first, match, isProgram)) continue;
auto curPath = file.first;
auto stat = file.second;
while (stat.type == FSAccessor::Type::tSymlink) {
auto target = canonPath(
hasPrefix(stat.target, "/")
? stat.target
: dirOf(storePath + "/" + curPath) + "/" + stat.target);
// FIXME: resolve symlinks in components of stat.target.
if (!hasPrefix(target, "/nix/store/")) break;
/* Assume that symlinks to other store paths point
to executables. But check symlinks within the
same store path. */
if (target.compare(0, storePath.size(), storePath) != 0) {
stat.type = FSAccessor::Type::tRegular;
stat.isExecutable = true;
break;
}
std::string sub(target, storePath.size() + 1);
auto file2 = files.find(sub);
if (file2 == files.end()) {
printError("symlink %s has non-existent target %s",
storePath + "/" + file.first, stat.target);
break;
}
curPath = sub;
stat = file2->second;
}
if (stat.type == FSAccessor::Type::tDirectory
|| stat.type == FSAccessor::Type::tSymlink
|| (stat.type == FSAccessor::Type::tRegular && !stat.isExecutable))
continue;
programs.insert(match[1]);
}
if (programs.empty()) return;
{
auto programsState(programsState_.lock());
SQLiteTxn txn(programsState->db);
for (auto & program : programs)
programsState->insertProgram.use()(program)(package->querySystem())(package->attrPath).exec();
txn.commit();
}
} catch (BadJSON & e) {
printError("error: in %s (%s): %s", package->attrPath, storePath, e.what());
}
};
/* Enqueue work items for each package. */
ThreadPool threadPool(16);
for (auto & i : packagesByPath)
threadPool.enqueue(std::bind(doPath, binaryCache->printStorePath(i.first), i.second));
threadPool.process();
/* Vacuum programs.sqlite to make it smaller. */
{
auto programsState(programsState_.lock());
programsState->db.exec("vacuum");
}
}
int main(int argc, char * * argv)
{
return handleExceptions(argv[0], [&]() {
mainWrapped(argc, argv);
});
}

View file

@ -3,11 +3,12 @@
#include <regex> #include <regex>
#include "shared.hh" #include "shared.hh"
#include "sqlite.hh"
#include "s3-binary-cache-store.hh" #include "s3-binary-cache-store.hh"
#include "thread-pool.hh" #include "thread-pool.hh"
#include "nar-info.hh" #include "nar-info.hh"
#include "file-cache.hh" #include <nlohmann/json.hpp>
// cache.nixos.org/debuginfo/<build-id> // cache.nixos.org/debuginfo/<build-id>
// => redirect to NAR // => redirect to NAR
@ -18,75 +19,41 @@ void mainWrapped(int argc, char * * argv)
{ {
initNix(); initNix();
if (argc != 4) throw Error("usage: index-debuginfo CACHE-DB BINARY-CACHE-URI STORE-PATHS"); if (argc != 3) throw Error("usage: index-debuginfo DEBUG-DB BINARY-CACHE-URI");
Path cacheDbPath = argv[1]; Path debugDbPath = argv[1];
std::string binaryCacheUri = argv[2]; std::string binaryCacheUri = argv[2];
Path storePathsFile = argv[3];
FileCache fileCache(cacheDbPath);
if (hasSuffix(binaryCacheUri, "/")) binaryCacheUri.pop_back(); if (hasSuffix(binaryCacheUri, "/")) binaryCacheUri.pop_back();
auto binaryCache = openStore(binaryCacheUri).cast<S3BinaryCacheStore>(); auto binaryCache = openStore(binaryCacheUri).cast<S3BinaryCacheStore>();
auto storePaths = binaryCache->parseStorePathSet(tokenizeString<PathSet>(readFile(storePathsFile)));
std::regex debugFileRegex("^lib/debug/\\.build-id/[0-9a-f]{2}/[0-9a-f]{38}\\.debug$");
ThreadPool threadPool(25); ThreadPool threadPool(25);
auto doFile = [&](std::string member, std::string key, std::string target) { auto doFile = [&](std::string build_id, std::string url, std::string filename) {
checkInterrupt(); checkInterrupt();
nlohmann::json json; nlohmann::json json;
json["archive"] = target; json["archive"] = url;
json["member"] = member; json["member"] = filename;
// FIXME: or should we overwrite? The previous link may point // FIXME: or should we overwrite? The previous link may point
// to a GC'ed file, so overwriting might be useful... // to a GC'ed file, so overwriting might be useful...
if (binaryCache->fileExists(key)) return; if (binaryCache->fileExists(build_id)) return;
printError("redirecting %s to %s", key, target); printError("redirecting %s to %s", build_id, filename);
binaryCache->upsertFile(key, json.dump(), "application/json"); binaryCache->upsertFile(build_id, json.dump(), "application/json");
}; };
auto doPath = [&](const Path & storePath) { auto db = SQLite(debugDbPath);
checkInterrupt();
try { auto stmt = SQLiteStmt(db, "select build_id, url, filename from DebugInfo;");
auto files = fileCache.getFiles(binaryCache, storePath); auto query = stmt.use();
std::string prefix = "lib/debug/.build-id/"; while (query.next()) {
threadPool.enqueue(std::bind(doFile, query.getStr(0), query.getStr(1), query.getStr(2)));
for (auto & file : files) {
if (file.second.type != FSAccessor::Type::tRegular
|| !std::regex_match(file.first, debugFileRegex))
continue;
std::string buildId =
std::string(file.first, prefix.size(), 2) +
std::string(file.first, prefix.size() + 3, 38);
auto info = binaryCache->queryPathInfo(binaryCache->parseStorePath(storePath)).cast<const NarInfo>();
assert(hasPrefix(info->url, "nar/"));
std::string key = "debuginfo/" + buildId;
std::string target = "../" + info->url;
threadPool.enqueue(std::bind(doFile, file.first, key, target));
} }
} catch (BadJSON & e) {
printError("error: in %s: %s", storePath, e.what());
}
};
for (auto & storePath : storePaths)
if (hasSuffix(storePath.name(), "-debug"))
threadPool.enqueue(std::bind(doPath, binaryCache->printStorePath(storePath)));
threadPool.process(); threadPool.process();
} }

View file

@ -265,9 +265,9 @@ if ($bucketReleases && $bucketReleases->head_key("$releasePrefix")) {
File::Path::make_path("$tmpDir/unpack"); File::Path::make_path("$tmpDir/unpack");
run("tar", "xfJ", "$tmpDir/nixexprs.tar.xz", "-C", "$tmpDir/unpack"); run("tar", "xfJ", "$tmpDir/nixexprs.tar.xz", "-C", "$tmpDir/unpack");
my $exprDir = glob("$tmpDir/unpack/*"); my $exprDir = glob("$tmpDir/unpack/*");
run("generate-programs-index", "$filesCache", "$exprDir/programs.sqlite", "http://nix-cache.s3.amazonaws.com/", "$tmpDir/store-paths", "$exprDir/nixpkgs"); run("nix-channel-index", "-o", "$exprDir/programs.sqlite", "-d", "$exprDir/debug.sqlite", "-f", "$exprDir/nixpkgs", "-s", "aarch64-linux", "-s", "x86_64-linux");
run("index-debuginfo", "$filesCache", "s3://nix-cache", "$tmpDir/store-paths"); run("index-debuginfo", "$exprDir/debug.sqlite", "s3://nix-cache");
run("rm", "-f", "$tmpDir/nixexprs.tar.xz", "$exprDir/programs.sqlite-journal"); run("rm", "-f", "$tmpDir/nixexprs.tar.xz", "$exprDir/debug.sqlite");
unlink("$tmpDir/nixexprs.tar.xz.sha256"); unlink("$tmpDir/nixexprs.tar.xz.sha256");
run("tar", "cfJ", "$tmpDir/nixexprs.tar.xz", "-C", "$tmpDir/unpack", basename($exprDir)); run("tar", "cfJ", "$tmpDir/nixexprs.tar.xz", "-C", "$tmpDir/unpack", basename($exprDir));
run("rm", "-rf", "$tmpDir/unpack"); run("rm", "-rf", "$tmpDir/unpack");