Factor out the file cache
This commit is contained in:
parent
3daca6b592
commit
a91fb566a9
3 changed files with 175 additions and 153 deletions
|
@ -20,7 +20,9 @@ stdenv.mkDerivation {
|
||||||
buildCommand = ''
|
buildCommand = ''
|
||||||
mkdir -p $out/bin
|
mkdir -p $out/bin
|
||||||
|
|
||||||
g++ -g ${./generate-programs-index.cc} -Wall -std=c++14 -o $out/bin/generate-programs-index \
|
cp ${./file-cache.hh} file-cache.hh
|
||||||
|
|
||||||
|
g++ -g ${./generate-programs-index.cc} -Wall -std=c++14 -o $out/bin/generate-programs-index -I . \
|
||||||
$(pkg-config --cflags nix-main) \
|
$(pkg-config --cflags nix-main) \
|
||||||
$(pkg-config --libs nix-main) \
|
$(pkg-config --libs nix-main) \
|
||||||
$(pkg-config --libs nix-expr) \
|
$(pkg-config --libs nix-expr) \
|
||||||
|
|
168
file-cache.hh
Normal file
168
file-cache.hh
Normal file
|
@ -0,0 +1,168 @@
|
||||||
|
/* A local disk cache for fast lookups of NAR index files in a binary
|
||||||
|
cache. */
|
||||||
|
|
||||||
|
#include "binary-cache-store.hh"
|
||||||
|
#include "fs-accessor.hh"
|
||||||
|
#include "sqlite.hh"
|
||||||
|
#include "sync.hh"
|
||||||
|
|
||||||
|
#include <sqlite3.h>
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
|
||||||
|
using namespace nix;
|
||||||
|
|
||||||
|
MakeError(BadJSON, Error);
|
||||||
|
|
||||||
|
class FileCache
|
||||||
|
{
|
||||||
|
struct State
|
||||||
|
{
|
||||||
|
SQLite db;
|
||||||
|
SQLiteStmt queryPath, insertPath, queryFiles, insertFile;
|
||||||
|
};
|
||||||
|
|
||||||
|
Sync<State> state_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
FileCache(const Path & path)
|
||||||
|
{
|
||||||
|
auto state(state_.lock());
|
||||||
|
|
||||||
|
static std::string cacheSchema = R"sql(
|
||||||
|
|
||||||
|
create table if not exists StorePaths (
|
||||||
|
id integer primary key autoincrement not null,
|
||||||
|
path text unique not null
|
||||||
|
);
|
||||||
|
|
||||||
|
create table if not exists StorePathContents (
|
||||||
|
storePath integer not null,
|
||||||
|
subPath text not null,
|
||||||
|
type integer not null,
|
||||||
|
fileSize integer,
|
||||||
|
isExecutable integer,
|
||||||
|
primary key (storePath, subPath),
|
||||||
|
foreign key (storePath) references StorePaths(id) on delete cascade
|
||||||
|
);
|
||||||
|
|
||||||
|
)sql";
|
||||||
|
|
||||||
|
state->db = SQLite(path);
|
||||||
|
state->db.exec("pragma foreign_keys = 1");
|
||||||
|
state->db.exec(cacheSchema);
|
||||||
|
|
||||||
|
if (sqlite3_busy_timeout(state->db, 60 * 60 * 1000) != SQLITE_OK)
|
||||||
|
throwSQLiteError(state->db, "setting timeout");
|
||||||
|
|
||||||
|
state->queryPath.create(state->db,
|
||||||
|
"select id from StorePaths where path = ?");
|
||||||
|
state->insertPath.create(state->db,
|
||||||
|
"insert or ignore into StorePaths(path) values (?)");
|
||||||
|
state->queryFiles.create(state->db,
|
||||||
|
"select subPath, type, fileSize, isExecutable from StorePathContents where storePath = ?");
|
||||||
|
state->insertFile.create(state->db,
|
||||||
|
"insert into StorePathContents(storePath, subPath, type, fileSize, isExecutable) values (?, ?, ?, ?, ?)");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return the files in a store path, using a SQLite database to
|
||||||
|
cache the results. */
|
||||||
|
std::map<std::string, FSAccessor::Stat>
|
||||||
|
getFiles(ref<BinaryCacheStore> binaryCache, const Path & storePath)
|
||||||
|
{
|
||||||
|
std::map<std::string, FSAccessor::Stat> files;
|
||||||
|
|
||||||
|
/* Look up the path in the SQLite cache. */
|
||||||
|
{
|
||||||
|
auto state(state_.lock());
|
||||||
|
auto useQueryPath(state->queryPath.use()(storePath));
|
||||||
|
if (useQueryPath.next()) {
|
||||||
|
auto id = useQueryPath.getInt(0);
|
||||||
|
auto useQueryFiles(state->queryFiles.use()(id));
|
||||||
|
while (useQueryFiles.next()) {
|
||||||
|
files[useQueryFiles.getStr(0)] = FSAccessor::Stat{
|
||||||
|
(FSAccessor::Type) useQueryFiles.getInt(1), (uint64_t) useQueryFiles.getInt(2), useQueryFiles.getInt(3) != 0};
|
||||||
|
}
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
std::function<void(const std::string &, json &)> recurse;
|
||||||
|
|
||||||
|
recurse = [&](const std::string & relPath, json & v) {
|
||||||
|
FSAccessor::Stat st;
|
||||||
|
|
||||||
|
std::string type = v["type"];
|
||||||
|
|
||||||
|
if (type == "directory") {
|
||||||
|
st.type = FSAccessor::Type::tDirectory;
|
||||||
|
for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) {
|
||||||
|
std::string name = i.key();
|
||||||
|
recurse(relPath.empty() ? name : relPath + "/" + name, i.value());
|
||||||
|
}
|
||||||
|
} else if (type == "regular") {
|
||||||
|
st.type = FSAccessor::Type::tRegular;
|
||||||
|
st.fileSize = v["size"];
|
||||||
|
st.isExecutable = v.value("executable", false);
|
||||||
|
} else if (type == "symlink") {
|
||||||
|
st.type = FSAccessor::Type::tSymlink;
|
||||||
|
} else return;
|
||||||
|
|
||||||
|
files[relPath] = st;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* It's not in the cache, so get the .ls.xz file (which
|
||||||
|
contains a JSON serialisation of the listing of the NAR
|
||||||
|
contents) from the binary cache. */
|
||||||
|
auto now1 = std::chrono::steady_clock::now();
|
||||||
|
auto s = binaryCache->getFile(storePathToHash(storePath) + ".ls");
|
||||||
|
if (!s)
|
||||||
|
printInfo("warning: no listing of %s in binary cache", storePath);
|
||||||
|
else {
|
||||||
|
try {
|
||||||
|
json ls = json::parse(*s);
|
||||||
|
|
||||||
|
if (ls.value("version", 0) != 1)
|
||||||
|
throw Error("NAR index for ‘%s’ has an unsupported version", storePath);
|
||||||
|
|
||||||
|
recurse("", ls.at("root"));
|
||||||
|
} catch (std::invalid_argument & e) {
|
||||||
|
// FIXME: some filenames have non-UTF8 characters in them,
|
||||||
|
// which is not supported by nlohmann::json. So we have to
|
||||||
|
// skip the entire package.
|
||||||
|
throw BadJSON(e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Insert the store path into the database. */
|
||||||
|
{
|
||||||
|
auto state(state_.lock());
|
||||||
|
SQLiteTxn txn(state->db);
|
||||||
|
|
||||||
|
if (state->queryPath.use()(storePath).next()) return files;
|
||||||
|
state->insertPath.use()(storePath).exec();
|
||||||
|
uint64_t id = sqlite3_last_insert_rowid(state->db);
|
||||||
|
|
||||||
|
for (auto & x : files) {
|
||||||
|
state->insertFile.use()
|
||||||
|
(id)
|
||||||
|
(x.first)
|
||||||
|
(x.second.type)
|
||||||
|
(x.second.fileSize, x.second.type == FSAccessor::Type::tRegular)
|
||||||
|
(x.second.isExecutable, x.second.type == FSAccessor::Type::tRegular)
|
||||||
|
.exec();
|
||||||
|
}
|
||||||
|
|
||||||
|
txn.commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto now2 = std::chrono::steady_clock::now();
|
||||||
|
printInfo("processed %s in %d ms", storePath,
|
||||||
|
std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count());
|
||||||
|
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
|
@ -9,38 +9,14 @@
|
||||||
#include "store-api.hh"
|
#include "store-api.hh"
|
||||||
#include "common-opts.hh"
|
#include "common-opts.hh"
|
||||||
#include "get-drvs.hh"
|
#include "get-drvs.hh"
|
||||||
#include "fs-accessor.hh"
|
|
||||||
#include "thread-pool.hh"
|
#include "thread-pool.hh"
|
||||||
#include "sqlite.hh"
|
#include "sqlite.hh"
|
||||||
#include "download.hh"
|
#include "download.hh"
|
||||||
#include "compression.hh"
|
|
||||||
#include "binary-cache-store.hh"
|
#include "binary-cache-store.hh"
|
||||||
|
|
||||||
#include <nlohmann/json.hpp>
|
#include "file-cache.hh"
|
||||||
|
|
||||||
#include <sqlite3.h>
|
|
||||||
|
|
||||||
using namespace nix;
|
using namespace nix;
|
||||||
using json = nlohmann::json;
|
|
||||||
|
|
||||||
static const char * cacheSchema = R"sql(
|
|
||||||
|
|
||||||
create table if not exists StorePaths (
|
|
||||||
id integer primary key autoincrement not null,
|
|
||||||
path text unique not null
|
|
||||||
);
|
|
||||||
|
|
||||||
create table if not exists StorePathContents (
|
|
||||||
storePath integer not null,
|
|
||||||
subPath text not null,
|
|
||||||
type integer not null,
|
|
||||||
fileSize integer,
|
|
||||||
isExecutable integer,
|
|
||||||
primary key (storePath, subPath),
|
|
||||||
foreign key (storePath) references StorePaths(id) on delete cascade
|
|
||||||
);
|
|
||||||
|
|
||||||
)sql";
|
|
||||||
|
|
||||||
static const char * programsSchema = R"sql(
|
static const char * programsSchema = R"sql(
|
||||||
|
|
||||||
|
@ -53,14 +29,12 @@ static const char * programsSchema = R"sql(
|
||||||
|
|
||||||
)sql";
|
)sql";
|
||||||
|
|
||||||
MakeError(BadJSON, Error);
|
|
||||||
|
|
||||||
void mainWrapped(int argc, char * * argv)
|
void mainWrapped(int argc, char * * argv)
|
||||||
{
|
{
|
||||||
initNix();
|
initNix();
|
||||||
initGC();
|
initGC();
|
||||||
|
|
||||||
if (argc != 6) throw Error("usage: generate-programs-index CACHE-DB PROGRAMS-DB STORE-URI STORE-PATHS NIXPKGS-PATH");
|
if (argc != 6) throw Error("usage: generate-programs-index CACHE-DB PROGRAMS-DB BINARY-CACHE-URI STORE-PATHS NIXPKGS-PATH");
|
||||||
|
|
||||||
Path cacheDbPath = argv[1];
|
Path cacheDbPath = argv[1];
|
||||||
Path programsDbPath = argv[2];
|
Path programsDbPath = argv[2];
|
||||||
|
@ -75,14 +49,6 @@ void mainWrapped(int argc, char * * argv)
|
||||||
if (hasSuffix(binaryCacheUri, "/")) binaryCacheUri.pop_back();
|
if (hasSuffix(binaryCacheUri, "/")) binaryCacheUri.pop_back();
|
||||||
auto binaryCache = openStore(binaryCacheUri).cast<BinaryCacheStore>();
|
auto binaryCache = openStore(binaryCacheUri).cast<BinaryCacheStore>();
|
||||||
|
|
||||||
struct CacheState
|
|
||||||
{
|
|
||||||
SQLite db;
|
|
||||||
SQLiteStmt queryPath, insertPath, queryFiles, insertFile;
|
|
||||||
};
|
|
||||||
|
|
||||||
Sync<CacheState> cacheState_;
|
|
||||||
|
|
||||||
/* Get the allowed store paths to be included in the database. */
|
/* Get the allowed store paths to be included in the database. */
|
||||||
auto allowedPaths = tokenizeString<PathSet>(readFile(storePathsFile, true));
|
auto allowedPaths = tokenizeString<PathSet>(readFile(storePathsFile, true));
|
||||||
|
|
||||||
|
@ -92,26 +58,7 @@ void mainWrapped(int argc, char * * argv)
|
||||||
printMsg(lvlInfo, format("%d top-level paths, %d paths in closure")
|
printMsg(lvlInfo, format("%d top-level paths, %d paths in closure")
|
||||||
% allowedPaths.size() % allowedPathsClosure.size());
|
% allowedPaths.size() % allowedPathsClosure.size());
|
||||||
|
|
||||||
/* Initialise the cache database. */
|
FileCache fileCache(cacheDbPath);
|
||||||
{
|
|
||||||
auto cacheState(cacheState_.lock());
|
|
||||||
|
|
||||||
cacheState->db = SQLite(cacheDbPath);
|
|
||||||
cacheState->db.exec("pragma foreign_keys = 1");
|
|
||||||
cacheState->db.exec(cacheSchema);
|
|
||||||
|
|
||||||
if (sqlite3_busy_timeout(cacheState->db, 60 * 60 * 1000) != SQLITE_OK)
|
|
||||||
throwSQLiteError(cacheState->db, "setting timeout");
|
|
||||||
|
|
||||||
cacheState->queryPath.create(cacheState->db,
|
|
||||||
"select id from StorePaths where path = ?");
|
|
||||||
cacheState->insertPath.create(cacheState->db,
|
|
||||||
"insert or ignore into StorePaths(path) values (?)");
|
|
||||||
cacheState->queryFiles.create(cacheState->db,
|
|
||||||
"select subPath, type, fileSize, isExecutable from StorePathContents where storePath = ?");
|
|
||||||
cacheState->insertFile.create(cacheState->db,
|
|
||||||
"insert into StorePathContents(storePath, subPath, type, fileSize, isExecutable) values (?, ?, ?, ?, ?)");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialise the programs database. */
|
/* Initialise the programs database. */
|
||||||
struct ProgramsState
|
struct ProgramsState
|
||||||
|
@ -179,108 +126,13 @@ void mainWrapped(int argc, char * * argv)
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the files in a store path, using a SQLite database to cache the results. */
|
|
||||||
auto getFiles = [&](const Path & storePath) {
|
|
||||||
std::map<std::string, FSAccessor::Stat> files;
|
|
||||||
|
|
||||||
/* Look up the path in the SQLite cache. */
|
|
||||||
{
|
|
||||||
auto cacheState(cacheState_.lock());
|
|
||||||
auto useQueryPath(cacheState->queryPath.use()(storePath));
|
|
||||||
if (useQueryPath.next()) {
|
|
||||||
auto id = useQueryPath.getInt(0);
|
|
||||||
auto useQueryFiles(cacheState->queryFiles.use()(id));
|
|
||||||
while (useQueryFiles.next()) {
|
|
||||||
files[useQueryFiles.getStr(0)] = FSAccessor::Stat{
|
|
||||||
(FSAccessor::Type) useQueryFiles.getInt(1), (uint64_t) useQueryFiles.getInt(2), useQueryFiles.getInt(3) != 0};
|
|
||||||
}
|
|
||||||
return files;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::function<void(const std::string &, json &)> recurse;
|
|
||||||
|
|
||||||
recurse = [&](const std::string & relPath, json & v) {
|
|
||||||
FSAccessor::Stat st;
|
|
||||||
|
|
||||||
std::string type = v["type"];
|
|
||||||
|
|
||||||
if (type == "directory") {
|
|
||||||
st.type = FSAccessor::Type::tDirectory;
|
|
||||||
for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) {
|
|
||||||
std::string name = i.key();
|
|
||||||
recurse(relPath.empty() ? name : relPath + "/" + name, i.value());
|
|
||||||
}
|
|
||||||
} else if (type == "regular") {
|
|
||||||
st.type = FSAccessor::Type::tRegular;
|
|
||||||
st.fileSize = v["size"];
|
|
||||||
st.isExecutable = v.value("executable", false);
|
|
||||||
} else if (type == "symlink") {
|
|
||||||
st.type = FSAccessor::Type::tSymlink;
|
|
||||||
} else return;
|
|
||||||
|
|
||||||
files[relPath] = st;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* It's not in the cache, so get the .ls.xz file (which
|
|
||||||
contains a JSON serialisation of the listing of the NAR
|
|
||||||
contents) from the binary cache. */
|
|
||||||
auto now1 = std::chrono::steady_clock::now();
|
|
||||||
auto s = binaryCache->getFile(storePathToHash(storePath) + ".ls");
|
|
||||||
if (!s)
|
|
||||||
printInfo("warning: no listing of %s in binary cache", storePath);
|
|
||||||
else {
|
|
||||||
try {
|
|
||||||
json ls = json::parse(*s);
|
|
||||||
|
|
||||||
if (ls.value("version", 0) != 1)
|
|
||||||
throw Error("NAR index for ‘%s’ has an unsupported version", storePath);
|
|
||||||
|
|
||||||
recurse("", ls.at("root"));
|
|
||||||
} catch (std::invalid_argument & e) {
|
|
||||||
// FIXME: some filenames have non-UTF8 characters in them,
|
|
||||||
// which is not supported by nlohmann::json. So we have to
|
|
||||||
// skip the entire package.
|
|
||||||
throw BadJSON(e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Insert the store path into the database. */
|
|
||||||
{
|
|
||||||
auto cacheState(cacheState_.lock());
|
|
||||||
SQLiteTxn txn(cacheState->db);
|
|
||||||
|
|
||||||
if (cacheState->queryPath.use()(storePath).next()) return files;
|
|
||||||
cacheState->insertPath.use()(storePath).exec();
|
|
||||||
uint64_t id = sqlite3_last_insert_rowid(cacheState->db);
|
|
||||||
|
|
||||||
for (auto & x : files) {
|
|
||||||
cacheState->insertFile.use()
|
|
||||||
(id)
|
|
||||||
(x.first)
|
|
||||||
(x.second.type)
|
|
||||||
(x.second.fileSize, x.second.type == FSAccessor::Type::tRegular)
|
|
||||||
(x.second.isExecutable, x.second.type == FSAccessor::Type::tRegular)
|
|
||||||
.exec();
|
|
||||||
}
|
|
||||||
|
|
||||||
txn.commit();
|
|
||||||
}
|
|
||||||
|
|
||||||
auto now2 = std::chrono::steady_clock::now();
|
|
||||||
printInfo("processed %s in %d ms", storePath,
|
|
||||||
std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count());
|
|
||||||
|
|
||||||
return files;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Note: we don't index hidden files. */
|
/* Note: we don't index hidden files. */
|
||||||
std::regex isProgram("bin/([^.][^/]*)");
|
std::regex isProgram("bin/([^.][^/]*)");
|
||||||
|
|
||||||
/* Process each store path. */
|
/* Process each store path. */
|
||||||
auto doPath = [&](const Path & storePath, DrvInfo * package) {
|
auto doPath = [&](const Path & storePath, DrvInfo * package) {
|
||||||
try {
|
try {
|
||||||
auto files = getFiles(storePath);
|
auto files = fileCache.getFiles(binaryCache, storePath);
|
||||||
if (files.empty()) return;
|
if (files.empty()) return;
|
||||||
|
|
||||||
std::set<std::string> programs;
|
std::set<std::string> programs;
|
||||||
|
|
Loading…
Reference in a new issue