Merge pull request #62 from K900/the-nix-index-thing

Replace ad-hoc command-not-found generation scripts with nix-index
This commit is contained in:
Graham Christensen 2023-01-30 14:08:08 +00:00 committed by GitHub
commit 035a2d539b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 32 additions and 529 deletions

View file

@ -1,181 +0,0 @@
/* A local disk cache for fast lookups of NAR index files in a binary
cache. */
#include "binary-cache-store.hh"
#include "fs-accessor.hh"
#include "sqlite.hh"
#include "sync.hh"
#include <sqlite3.h>
#include <nlohmann/json.hpp>
using namespace nix;
MakeError(BadJSON, Error);
class FileCache
{
struct State
{
SQLite db;
SQLiteStmt queryPath, insertPath, queryFiles, insertFile;
};
Sync<State> state_;
struct Stat : FSAccessor::Stat
{
std::string target;
};
public:
FileCache(const Path & path)
{
auto state(state_.lock());
static std::string cacheSchema = R"sql(
create table if not exists StorePaths (
id integer primary key autoincrement not null,
path text unique not null
);
create table if not exists StorePathContents (
storePath integer not null,
subPath text not null,
type integer not null,
fileSize integer,
isExecutable integer,
target text,
primary key (storePath, subPath),
foreign key (storePath) references StorePaths(id) on delete cascade
);
)sql";
state->db = SQLite(path);
state->db.exec("pragma foreign_keys = 1");
state->db.exec(cacheSchema);
if (sqlite3_busy_timeout(state->db, 60 * 60 * 1000) != SQLITE_OK)
SQLiteError::throw_(state->db, "setting timeout");
state->queryPath.create(state->db,
"select id from StorePaths where path = ?");
state->insertPath.create(state->db,
"insert or ignore into StorePaths(path) values (?)");
state->queryFiles.create(state->db,
"select subPath, type, fileSize, isExecutable, target from StorePathContents where storePath = ?");
state->insertFile.create(state->db,
"insert into StorePathContents(storePath, subPath, type, fileSize, isExecutable, target) values (?, ?, ?, ?, ?, ?)");
}
/* Return the files in a store path, using a SQLite database to
cache the results. */
std::map<std::string, Stat>
getFiles(ref<BinaryCacheStore> binaryCache, const Path & storePath)
{
std::map<std::string, Stat> files;
/* Look up the path in the SQLite cache. */
{
auto state(state_.lock());
auto useQueryPath(state->queryPath.use()(storePath));
if (useQueryPath.next()) {
auto id = useQueryPath.getInt(0);
auto useQueryFiles(state->queryFiles.use()(id));
while (useQueryFiles.next()) {
Stat st;
st.type = (FSAccessor::Type) useQueryFiles.getInt(1);
st.fileSize = (uint64_t) useQueryFiles.getInt(2);
st.isExecutable = useQueryFiles.getInt(3) != 0;
if (!useQueryFiles.isNull(4))
st.target = useQueryFiles.getStr(4);
files.emplace(useQueryFiles.getStr(0), st);
}
return files;
}
}
using json = nlohmann::json;
std::function<void(const std::string &, json &)> recurse;
recurse = [&](const std::string & relPath, json & v) {
Stat st;
std::string type = v["type"];
if (type == "directory") {
st.type = FSAccessor::Type::tDirectory;
for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) {
std::string name = i.key();
recurse(relPath.empty() ? name : relPath + "/" + name, i.value());
}
} else if (type == "regular") {
st.type = FSAccessor::Type::tRegular;
st.fileSize = v["size"];
st.isExecutable = v.value("executable", false);
} else if (type == "symlink") {
st.type = FSAccessor::Type::tSymlink;
st.target = v.value("target", "");
} else return;
files[relPath] = st;
};
/* It's not in the cache, so get the .ls.xz file (which
contains a JSON serialisation of the listing of the NAR
contents) from the binary cache. */
auto now1 = std::chrono::steady_clock::now();
auto s = binaryCache->getFile(std::string(baseNameOf(storePath).substr(0, StorePath::HashLen)) + ".ls");
if (!s)
printInfo("warning: no listing of %s in binary cache", storePath);
else {
try {
json ls = json::parse(*s);
if (ls.value("version", 0) != 1)
throw Error("NAR index for %s has an unsupported version", storePath);
recurse("", ls.at("root"));
} catch (json::parse_error & e) {
// FIXME: some filenames have non-UTF8 characters in them,
// which is not supported by nlohmann::json. So we have to
// skip the entire package.
throw BadJSON(e.what());
}
}
/* Insert the store path into the database. */
{
auto state(state_.lock());
SQLiteTxn txn(state->db);
if (state->queryPath.use()(storePath).next()) return files;
state->insertPath.use()(storePath).exec();
uint64_t id = sqlite3_last_insert_rowid(state->db);
for (auto & x : files) {
state->insertFile.use()
(id)
(x.first)
(x.second.type)
(x.second.fileSize, x.second.type == FSAccessor::Type::tRegular)
(x.second.isExecutable, x.second.type == FSAccessor::Type::tRegular)
(x.second.target, x.second.type == FSAccessor::Type::tSymlink)
.exec();
}
txn.commit();
}
auto now2 = std::chrono::steady_clock::now();
printInfo("processed %s in %d ms", storePath,
std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count());
return files;
}
};

View file

@ -1,79 +1,24 @@
{
"nodes": {
"lowdown-src": {
"flake": false,
"locked": {
"lastModified": 1633514407,
"narHash": "sha256-Dw32tiMjdK9t3ETl5fzGrutQTzh2rufgZV4A/BbxuD4=",
"owner": "kristapsdz",
"repo": "lowdown",
"rev": "d2c2b44ff6c27b936ec27358a2653caaef8f73b8",
"type": "github"
},
"original": {
"owner": "kristapsdz",
"repo": "lowdown",
"type": "github"
}
},
"nix": {
"inputs": {
"lowdown-src": "lowdown-src",
"nixpkgs": "nixpkgs",
"nixpkgs-regression": "nixpkgs-regression"
},
"locked": {
"lastModified": 1662636880,
"narHash": "sha256-GJOhzWvNDztkqn96THKiodFRIkk7RsrzecVoX/e8FOk=",
"owner": "NixOS",
"repo": "nix",
"rev": "c8e8eea95e6d235ba0120bda5a8ff87b97f3e5a7",
"type": "github"
},
"original": {
"id": "nix",
"type": "indirect"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1657693803,
"narHash": "sha256-G++2CJ9u0E7NNTAi9n5G8TdDmGJXcIjkJ3NF8cetQB8=",
"lastModified": 1673527292,
"narHash": "sha256-903EpRSDCfUvic7Hsiqwy+h7zlMTLAUbCXkEGGriCfM=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "365e1b3a859281cf11b94f87231adeabbdd878a2",
"rev": "6a3f9996408c970b99b8b992b11bb249d1455b62",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-22.05-small",
"ref": "nixos-22.11-small",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs-regression": {
"locked": {
"lastModified": 1643052045,
"narHash": "sha256-uGJ0VXIhWKGXxkeNnq4TvV3CIOkUJ3PAoLZ3HMzNVMw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2",
"type": "github"
},
"original": {
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2",
"type": "github"
}
},
"root": {
"inputs": {
"nix": "nix",
"nixpkgs": [
"nix",
"nixpkgs"
]
"nixpkgs": "nixpkgs"
}
}
},

View file

@ -1,18 +1,15 @@
{
description = "Script for generating Nixpkgs/NixOS channels";
inputs.nixpkgs.follows = "nix/nixpkgs";
outputs = { self, nixpkgs, nix }:
inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-22.11-small";
outputs = { self, nixpkgs }:
{
overlays.default = final: prev: {
nixos-channel-native-programs = with final; stdenv.mkDerivation {
name = "nixos-channel-native-programs";
buildInputs = [
final.nix
nix
pkgconfig
boehmgc
nlohmann_json
@ -23,15 +20,6 @@
buildCommand = ''
mkdir -p $out/bin
cp ${./file-cache.hh} file-cache.hh
g++ -Os -g ${./generate-programs-index.cc} -Wall -std=c++14 -o $out/bin/generate-programs-index -I . \
$(pkg-config --cflags nix-main) \
$(pkg-config --libs nix-main) \
$(pkg-config --libs nix-expr) \
$(pkg-config --libs nix-store) \
-lsqlite3 -lgc
g++ -Os -g ${./index-debuginfo.cc} -Wall -std=c++14 -o $out/bin/index-debuginfo -I . \
$(pkg-config --cflags nix-main) \
$(pkg-config --libs nix-main) \
@ -43,8 +31,8 @@
nixos-channel-scripts = with final; stdenv.mkDerivation {
name = "nixos-channel-scripts";
buildInputs = with final.perlPackages;
[ final.nix
buildInputs = with perlPackages;
[ nix
sqlite
makeWrapper
perl
@ -57,6 +45,7 @@
brotli
jq
nixos-channel-native-programs
nix-index
];
buildCommand = ''
@ -65,7 +54,7 @@
cp ${./mirror-nixos-branch.pl} $out/bin/mirror-nixos-branch
wrapProgram $out/bin/mirror-nixos-branch \
--set PERL5LIB $PERL5LIB \
--prefix PATH : ${wget}/bin:${git}/bin:${final.nix}/bin:${gnutar}/bin:${xz}/bin:${rsync}/bin:${openssh}/bin:${nixos-channel-native-programs}/bin:$out/bin
--prefix PATH : ${lib.makeBinPath [ wget git nix gnutar xz rsync openssh nix-index nixos-channel-native-programs ]}
patchShebangs $out/bin
'';
@ -75,8 +64,7 @@
defaultPackage.x86_64-linux = (import nixpkgs {
system = "x86_64-linux";
overlays = [ nix.overlays.default self.overlays.default ];
overlays = [ self.overlays.default ];
}).nixos-channel-scripts;
};
}

View file

@ -1,218 +0,0 @@
#include <nix/config.h>
#include <chrono>
#include <regex>
#include "shared.hh"
#include "globals.hh"
#include "eval.hh"
#include "store-api.hh"
#include "get-drvs.hh"
#include "thread-pool.hh"
#include "sqlite.hh"
#include "binary-cache-store.hh"
#include "logging.hh"
#include "file-cache.hh"
using namespace nix;
static const char * programsSchema = R"sql(
create table if not exists Programs (
name text not null,
system text not null,
package text not null,
primary key (name, system, package)
);
)sql";
void mainWrapped(int argc, char * * argv)
{
initNix();
initGC();
if (argc != 6) throw Error("usage: generate-programs-index CACHE-DB PROGRAMS-DB BINARY-CACHE-URI STORE-PATHS NIXPKGS-PATH");
Path cacheDbPath = argv[1];
Path programsDbPath = argv[2];
Path storePathsFile = argv[4];
Path nixpkgsPath = argv[5];
settings.readOnlyMode = true;
loggerSettings.showTrace = true;
auto localStore = openStore();
std::string binaryCacheUri = argv[3];
if (hasSuffix(binaryCacheUri, "/")) binaryCacheUri.pop_back();
auto binaryCache = openStore(binaryCacheUri).cast<BinaryCacheStore>();
/* Get the allowed store paths to be included in the database. */
auto allowedPaths = binaryCache->parseStorePathSet(tokenizeString<PathSet>(readFile(storePathsFile)));
StorePathSet allowedPathsClosure;
binaryCache->computeFSClosure(allowedPaths, allowedPathsClosure);
printMsg(lvlInfo, "%d top-level paths, %d paths in closure",
allowedPaths.size(), allowedPathsClosure.size());
FileCache fileCache(cacheDbPath);
/* Initialise the programs database. */
struct ProgramsState
{
SQLite db;
SQLiteStmt insertProgram;
};
Sync<ProgramsState> programsState_;
unlink(programsDbPath.c_str());
{
auto programsState(programsState_.lock());
programsState->db = SQLite(programsDbPath);
programsState->db.exec("pragma synchronous = off");
programsState->db.exec("pragma main.journal_mode = truncate");
programsState->db.exec(programsSchema);
programsState->insertProgram.create(programsState->db,
"insert or replace into Programs(name, system, package) values (?, ?, ?)");
}
EvalState state({}, localStore);
Value vRoot;
state.eval(state.parseExprFromFile(resolveExprPath(absPath(nixpkgsPath))), vRoot);
/* Get all derivations. */
DrvInfos packages;
for (auto system : std::set<std::string>{"x86_64-linux", "aarch64-linux"}) {
auto args = state.buildBindings(2);
args.alloc(state.symbols.create("config")).mkAttrs(&state.emptyBindings);
args.alloc(state.symbols.create("system")).mkString(system);
getDerivations(state, vRoot, "", *args.finish(), packages, true);
}
/* For each store path, figure out the package with the shortest
attribute name. E.g. "nix" is preferred over "nixStable". */
std::map<StorePath, DrvInfo *> packagesByPath;
for (auto & package : packages)
try {
auto outputs = package.queryOutputs(true);
for (auto & [_, storePath] : outputs) {
if (!storePath) continue;
if (!allowedPathsClosure.count(*storePath)) continue;
auto i = packagesByPath.find(*storePath);
if (i != packagesByPath.end() &&
(i->second->attrPath.size() < package.attrPath.size() ||
(i->second->attrPath.size() == package.attrPath.size() && i->second->attrPath < package.attrPath)))
continue;
packagesByPath.emplace(std::move(*storePath), &package);
}
} catch (AssertionError & e) {
} catch (Error & e) {
e.addTrace({}, hintfmt("in package '%s': ", package.attrPath));
throw;
}
/* Note: we don't index hidden files. */
std::regex isProgram("bin/([^.][^/]*)");
/* Process each store path. */
auto doPath = [&](const Path & storePath, DrvInfo * package) {
try {
auto files = fileCache.getFiles(binaryCache, storePath);
if (files.empty()) return;
std::set<std::string> programs;
for (auto file : files) {
std::smatch match;
if (!std::regex_match(file.first, match, isProgram)) continue;
auto curPath = file.first;
auto stat = file.second;
while (stat.type == FSAccessor::Type::tSymlink) {
auto target = canonPath(
hasPrefix(stat.target, "/")
? stat.target
: dirOf(storePath + "/" + curPath) + "/" + stat.target);
// FIXME: resolve symlinks in components of stat.target.
if (!hasPrefix(target, "/nix/store/")) break;
/* Assume that symlinks to other store paths point
to executables. But check symlinks within the
same store path. */
if (target.compare(0, storePath.size(), storePath) != 0) {
stat.type = FSAccessor::Type::tRegular;
stat.isExecutable = true;
break;
}
std::string sub(target, storePath.size() + 1);
auto file2 = files.find(sub);
if (file2 == files.end()) {
printError("symlink %s has non-existent target %s",
storePath + "/" + file.first, stat.target);
break;
}
curPath = sub;
stat = file2->second;
}
if (stat.type == FSAccessor::Type::tDirectory
|| stat.type == FSAccessor::Type::tSymlink
|| (stat.type == FSAccessor::Type::tRegular && !stat.isExecutable))
continue;
programs.insert(match[1]);
}
if (programs.empty()) return;
{
auto programsState(programsState_.lock());
SQLiteTxn txn(programsState->db);
for (auto & program : programs)
programsState->insertProgram.use()(program)(package->querySystem())(package->attrPath).exec();
txn.commit();
}
} catch (BadJSON & e) {
printError("error: in %s (%s): %s", package->attrPath, storePath, e.what());
}
};
/* Enqueue work items for each package. */
ThreadPool threadPool(16);
for (auto & i : packagesByPath)
threadPool.enqueue(std::bind(doPath, binaryCache->printStorePath(i.first), i.second));
threadPool.process();
/* Vacuum programs.sqlite to make it smaller. */
{
auto programsState(programsState_.lock());
programsState->db.exec("vacuum");
}
}
int main(int argc, char * * argv)
{
return handleExceptions(argv[0], [&]() {
mainWrapped(argc, argv);
});
}

View file

@ -3,11 +3,12 @@
#include <regex>
#include "shared.hh"
#include "sqlite.hh"
#include "s3-binary-cache-store.hh"
#include "thread-pool.hh"
#include "nar-info.hh"
#include "file-cache.hh"
#include <nlohmann/json.hpp>
// cache.nixos.org/debuginfo/<build-id>
// => redirect to NAR
@ -18,74 +19,42 @@ void mainWrapped(int argc, char * * argv)
{
initNix();
if (argc != 4) throw Error("usage: index-debuginfo CACHE-DB BINARY-CACHE-URI STORE-PATHS");
if (argc != 3) throw Error("usage: index-debuginfo DEBUG-DB BINARY-CACHE-URI");
Path cacheDbPath = argv[1];
Path debugDbPath = argv[1];
std::string binaryCacheUri = argv[2];
Path storePathsFile = argv[3];
FileCache fileCache(cacheDbPath);
if (hasSuffix(binaryCacheUri, "/")) binaryCacheUri.pop_back();
auto binaryCache = openStore(binaryCacheUri).cast<S3BinaryCacheStore>();
auto storePaths = binaryCache->parseStorePathSet(tokenizeString<PathSet>(readFile(storePathsFile)));
std::regex debugFileRegex("^lib/debug/\\.build-id/[0-9a-f]{2}/[0-9a-f]{38}\\.debug$");
ThreadPool threadPool(25);
auto doFile = [&](std::string member, std::string key, std::string target) {
auto doFile = [&](std::string build_id, std::string url, std::string filename) {
checkInterrupt();
nlohmann::json json;
json["archive"] = target;
json["member"] = member;
json["archive"] = url;
json["member"] = filename;
std::string key = "debuginfo/" + build_id;
// FIXME: or should we overwrite? The previous link may point
// to a GC'ed file, so overwriting might be useful...
if (binaryCache->fileExists(key)) return;
printError("redirecting %s to %s", key, target);
printError("redirecting %s to %s", key, filename);
binaryCache->upsertFile(key, json.dump(), "application/json");
};
auto doPath = [&](const Path & storePath) {
checkInterrupt();
auto db = SQLite(debugDbPath);
try {
auto files = fileCache.getFiles(binaryCache, storePath);
auto stmt = SQLiteStmt(db, "select build_id, url, filename from DebugInfo;");
auto query = stmt.use();
std::string prefix = "lib/debug/.build-id/";
for (auto & file : files) {
if (file.second.type != FSAccessor::Type::tRegular
|| !std::regex_match(file.first, debugFileRegex))
continue;
std::string buildId =
std::string(file.first, prefix.size(), 2) +
std::string(file.first, prefix.size() + 3, 38);
auto info = binaryCache->queryPathInfo(binaryCache->parseStorePath(storePath)).cast<const NarInfo>();
assert(hasPrefix(info->url, "nar/"));
std::string key = "debuginfo/" + buildId;
std::string target = "../" + info->url;
threadPool.enqueue(std::bind(doFile, file.first, key, target));
}
} catch (BadJSON & e) {
printError("error: in %s: %s", storePath, e.what());
}
};
for (auto & storePath : storePaths)
if (hasSuffix(storePath.name(), "-debug"))
threadPool.enqueue(std::bind(doPath, binaryCache->printStorePath(storePath)));
while (query.next()) {
threadPool.enqueue(std::bind(doFile, query.getStr(0), query.getStr(1), query.getStr(2)));
}
threadPool.process();
}

View file

@ -265,9 +265,9 @@ if ($bucketReleases && $bucketReleases->head_key("$releasePrefix")) {
File::Path::make_path("$tmpDir/unpack");
run("tar", "xfJ", "$tmpDir/nixexprs.tar.xz", "-C", "$tmpDir/unpack");
my $exprDir = glob("$tmpDir/unpack/*");
run("generate-programs-index", "$filesCache", "$exprDir/programs.sqlite", "http://nix-cache.s3.amazonaws.com/", "$tmpDir/store-paths", "$exprDir/nixpkgs");
run("index-debuginfo", "$filesCache", "s3://nix-cache", "$tmpDir/store-paths");
run("rm", "-f", "$tmpDir/nixexprs.tar.xz", "$exprDir/programs.sqlite-journal");
run("nix-channel-index", "-o", "$exprDir/programs.sqlite", "-d", "$exprDir/debug.sqlite", "-f", "$exprDir/nixpkgs", "-s", "aarch64-linux", "-s", "x86_64-linux");
run("index-debuginfo", "$exprDir/debug.sqlite", "s3://nix-cache");
run("rm", "-f", "$tmpDir/nixexprs.tar.xz", "$exprDir/debug.sqlite");
unlink("$tmpDir/nixexprs.tar.xz.sha256");
run("tar", "cfJ", "$tmpDir/nixexprs.tar.xz", "-C", "$tmpDir/unpack", basename($exprDir));
run("rm", "-rf", "$tmpDir/unpack");