diff --git a/default.nix b/default.nix index 3dc3816..d6b4b47 100644 --- a/default.nix +++ b/default.nix @@ -20,7 +20,9 @@ stdenv.mkDerivation { buildCommand = '' mkdir -p $out/bin - g++ -g ${./generate-programs-index.cc} -Wall -std=c++14 -o $out/bin/generate-programs-index \ + cp ${./file-cache.hh} file-cache.hh + + g++ -g ${./generate-programs-index.cc} -Wall -std=c++14 -o $out/bin/generate-programs-index -I . \ $(pkg-config --cflags nix-main) \ $(pkg-config --libs nix-main) \ $(pkg-config --libs nix-expr) \ diff --git a/file-cache.hh b/file-cache.hh new file mode 100644 index 0000000..8439196 --- /dev/null +++ b/file-cache.hh @@ -0,0 +1,168 @@ +/* A local disk cache for fast lookups of NAR index files in a binary + cache. */ + +#include "binary-cache-store.hh" +#include "fs-accessor.hh" +#include "sqlite.hh" +#include "sync.hh" + +#include +#include + +using namespace nix; + +MakeError(BadJSON, Error); + +class FileCache +{ + struct State + { + SQLite db; + SQLiteStmt queryPath, insertPath, queryFiles, insertFile; + }; + + Sync state_; + +public: + + FileCache(const Path & path) + { + auto state(state_.lock()); + + static std::string cacheSchema = R"sql( + + create table if not exists StorePaths ( + id integer primary key autoincrement not null, + path text unique not null + ); + + create table if not exists StorePathContents ( + storePath integer not null, + subPath text not null, + type integer not null, + fileSize integer, + isExecutable integer, + primary key (storePath, subPath), + foreign key (storePath) references StorePaths(id) on delete cascade + ); + + )sql"; + + state->db = SQLite(path); + state->db.exec("pragma foreign_keys = 1"); + state->db.exec(cacheSchema); + + if (sqlite3_busy_timeout(state->db, 60 * 60 * 1000) != SQLITE_OK) + throwSQLiteError(state->db, "setting timeout"); + + state->queryPath.create(state->db, + "select id from StorePaths where path = ?"); + state->insertPath.create(state->db, + "insert or ignore into StorePaths(path) values (?)"); + state->queryFiles.create(state->db, + "select subPath, type, fileSize, isExecutable from StorePathContents where storePath = ?"); + state->insertFile.create(state->db, + "insert into StorePathContents(storePath, subPath, type, fileSize, isExecutable) values (?, ?, ?, ?, ?)"); + } + + /* Return the files in a store path, using a SQLite database to + cache the results. */ + std::map + getFiles(ref binaryCache, const Path & storePath) + { + std::map files; + + /* Look up the path in the SQLite cache. */ + { + auto state(state_.lock()); + auto useQueryPath(state->queryPath.use()(storePath)); + if (useQueryPath.next()) { + auto id = useQueryPath.getInt(0); + auto useQueryFiles(state->queryFiles.use()(id)); + while (useQueryFiles.next()) { + files[useQueryFiles.getStr(0)] = FSAccessor::Stat{ + (FSAccessor::Type) useQueryFiles.getInt(1), (uint64_t) useQueryFiles.getInt(2), useQueryFiles.getInt(3) != 0}; + } + return files; + } + } + + using json = nlohmann::json; + + std::function recurse; + + recurse = [&](const std::string & relPath, json & v) { + FSAccessor::Stat st; + + std::string type = v["type"]; + + if (type == "directory") { + st.type = FSAccessor::Type::tDirectory; + for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) { + std::string name = i.key(); + recurse(relPath.empty() ? name : relPath + "/" + name, i.value()); + } + } else if (type == "regular") { + st.type = FSAccessor::Type::tRegular; + st.fileSize = v["size"]; + st.isExecutable = v.value("executable", false); + } else if (type == "symlink") { + st.type = FSAccessor::Type::tSymlink; + } else return; + + files[relPath] = st; + }; + + /* It's not in the cache, so get the .ls.xz file (which + contains a JSON serialisation of the listing of the NAR + contents) from the binary cache. */ + auto now1 = std::chrono::steady_clock::now(); + auto s = binaryCache->getFile(storePathToHash(storePath) + ".ls"); + if (!s) + printInfo("warning: no listing of %s in binary cache", storePath); + else { + try { + json ls = json::parse(*s); + + if (ls.value("version", 0) != 1) + throw Error("NAR index for ā€˜%sā€™ has an unsupported version", storePath); + + recurse("", ls.at("root")); + } catch (std::invalid_argument & e) { + // FIXME: some filenames have non-UTF8 characters in them, + // which is not supported by nlohmann::json. So we have to + // skip the entire package. + throw BadJSON(e.what()); + } + } + + /* Insert the store path into the database. */ + { + auto state(state_.lock()); + SQLiteTxn txn(state->db); + + if (state->queryPath.use()(storePath).next()) return files; + state->insertPath.use()(storePath).exec(); + uint64_t id = sqlite3_last_insert_rowid(state->db); + + for (auto & x : files) { + state->insertFile.use() + (id) + (x.first) + (x.second.type) + (x.second.fileSize, x.second.type == FSAccessor::Type::tRegular) + (x.second.isExecutable, x.second.type == FSAccessor::Type::tRegular) + .exec(); + } + + txn.commit(); + } + + auto now2 = std::chrono::steady_clock::now(); + printInfo("processed %s in %d ms", storePath, + std::chrono::duration_cast(now2 - now1).count()); + + return files; + } +}; + diff --git a/generate-programs-index.cc b/generate-programs-index.cc index 9b0a4b1..46c21b0 100644 --- a/generate-programs-index.cc +++ b/generate-programs-index.cc @@ -9,38 +9,14 @@ #include "store-api.hh" #include "common-opts.hh" #include "get-drvs.hh" -#include "fs-accessor.hh" #include "thread-pool.hh" #include "sqlite.hh" #include "download.hh" -#include "compression.hh" #include "binary-cache-store.hh" -#include - -#include +#include "file-cache.hh" using namespace nix; -using json = nlohmann::json; - -static const char * cacheSchema = R"sql( - - create table if not exists StorePaths ( - id integer primary key autoincrement not null, - path text unique not null - ); - - create table if not exists StorePathContents ( - storePath integer not null, - subPath text not null, - type integer not null, - fileSize integer, - isExecutable integer, - primary key (storePath, subPath), - foreign key (storePath) references StorePaths(id) on delete cascade - ); - -)sql"; static const char * programsSchema = R"sql( @@ -53,14 +29,12 @@ static const char * programsSchema = R"sql( )sql"; -MakeError(BadJSON, Error); - void mainWrapped(int argc, char * * argv) { initNix(); initGC(); - if (argc != 6) throw Error("usage: generate-programs-index CACHE-DB PROGRAMS-DB STORE-URI STORE-PATHS NIXPKGS-PATH"); + if (argc != 6) throw Error("usage: generate-programs-index CACHE-DB PROGRAMS-DB BINARY-CACHE-URI STORE-PATHS NIXPKGS-PATH"); Path cacheDbPath = argv[1]; Path programsDbPath = argv[2]; @@ -75,14 +49,6 @@ void mainWrapped(int argc, char * * argv) if (hasSuffix(binaryCacheUri, "/")) binaryCacheUri.pop_back(); auto binaryCache = openStore(binaryCacheUri).cast(); - struct CacheState - { - SQLite db; - SQLiteStmt queryPath, insertPath, queryFiles, insertFile; - }; - - Sync cacheState_; - /* Get the allowed store paths to be included in the database. */ auto allowedPaths = tokenizeString(readFile(storePathsFile, true)); @@ -92,26 +58,7 @@ void mainWrapped(int argc, char * * argv) printMsg(lvlInfo, format("%d top-level paths, %d paths in closure") % allowedPaths.size() % allowedPathsClosure.size()); - /* Initialise the cache database. */ - { - auto cacheState(cacheState_.lock()); - - cacheState->db = SQLite(cacheDbPath); - cacheState->db.exec("pragma foreign_keys = 1"); - cacheState->db.exec(cacheSchema); - - if (sqlite3_busy_timeout(cacheState->db, 60 * 60 * 1000) != SQLITE_OK) - throwSQLiteError(cacheState->db, "setting timeout"); - - cacheState->queryPath.create(cacheState->db, - "select id from StorePaths where path = ?"); - cacheState->insertPath.create(cacheState->db, - "insert or ignore into StorePaths(path) values (?)"); - cacheState->queryFiles.create(cacheState->db, - "select subPath, type, fileSize, isExecutable from StorePathContents where storePath = ?"); - cacheState->insertFile.create(cacheState->db, - "insert into StorePathContents(storePath, subPath, type, fileSize, isExecutable) values (?, ?, ?, ?, ?)"); - } + FileCache fileCache(cacheDbPath); /* Initialise the programs database. */ struct ProgramsState @@ -179,108 +126,13 @@ void mainWrapped(int argc, char * * argv) throw; } - /* Return the files in a store path, using a SQLite database to cache the results. */ - auto getFiles = [&](const Path & storePath) { - std::map files; - - /* Look up the path in the SQLite cache. */ - { - auto cacheState(cacheState_.lock()); - auto useQueryPath(cacheState->queryPath.use()(storePath)); - if (useQueryPath.next()) { - auto id = useQueryPath.getInt(0); - auto useQueryFiles(cacheState->queryFiles.use()(id)); - while (useQueryFiles.next()) { - files[useQueryFiles.getStr(0)] = FSAccessor::Stat{ - (FSAccessor::Type) useQueryFiles.getInt(1), (uint64_t) useQueryFiles.getInt(2), useQueryFiles.getInt(3) != 0}; - } - return files; - } - } - - std::function recurse; - - recurse = [&](const std::string & relPath, json & v) { - FSAccessor::Stat st; - - std::string type = v["type"]; - - if (type == "directory") { - st.type = FSAccessor::Type::tDirectory; - for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) { - std::string name = i.key(); - recurse(relPath.empty() ? name : relPath + "/" + name, i.value()); - } - } else if (type == "regular") { - st.type = FSAccessor::Type::tRegular; - st.fileSize = v["size"]; - st.isExecutable = v.value("executable", false); - } else if (type == "symlink") { - st.type = FSAccessor::Type::tSymlink; - } else return; - - files[relPath] = st; - }; - - /* It's not in the cache, so get the .ls.xz file (which - contains a JSON serialisation of the listing of the NAR - contents) from the binary cache. */ - auto now1 = std::chrono::steady_clock::now(); - auto s = binaryCache->getFile(storePathToHash(storePath) + ".ls"); - if (!s) - printInfo("warning: no listing of %s in binary cache", storePath); - else { - try { - json ls = json::parse(*s); - - if (ls.value("version", 0) != 1) - throw Error("NAR index for ā€˜%sā€™ has an unsupported version", storePath); - - recurse("", ls.at("root")); - } catch (std::invalid_argument & e) { - // FIXME: some filenames have non-UTF8 characters in them, - // which is not supported by nlohmann::json. So we have to - // skip the entire package. - throw BadJSON(e.what()); - } - } - - /* Insert the store path into the database. */ - { - auto cacheState(cacheState_.lock()); - SQLiteTxn txn(cacheState->db); - - if (cacheState->queryPath.use()(storePath).next()) return files; - cacheState->insertPath.use()(storePath).exec(); - uint64_t id = sqlite3_last_insert_rowid(cacheState->db); - - for (auto & x : files) { - cacheState->insertFile.use() - (id) - (x.first) - (x.second.type) - (x.second.fileSize, x.second.type == FSAccessor::Type::tRegular) - (x.second.isExecutable, x.second.type == FSAccessor::Type::tRegular) - .exec(); - } - - txn.commit(); - } - - auto now2 = std::chrono::steady_clock::now(); - printInfo("processed %s in %d ms", storePath, - std::chrono::duration_cast(now2 - now1).count()); - - return files; - }; - /* Note: we don't index hidden files. */ std::regex isProgram("bin/([^.][^/]*)"); /* Process each store path. */ auto doPath = [&](const Path & storePath, DrvInfo * package) { try { - auto files = getFiles(storePath); + auto files = fileCache.getFiles(binaryCache, storePath); if (files.empty()) return; std::set programs;