lix/src/libstore/nar-info-disk-cache.cc
jade 04f8a14833
tree-wide: shuffle headers around for about 30s compile time
This didn't really feel so worth it afterwards, but I did untangle a
bunch of stuff that should not have been tangled.

The general gist of this change is that variant bullshit was causing a
bunch of compile time, and it seems like the only way to deal with
variant induced compile time is to keep variant types out of headers.
Explicit template instantiation seems to do nothing for them.

I also seem to have gotten some back-end time improvement from
explicitly instantiating regex, but I don't know why. There is no
corresponding front-end time improvement from it: regex is still at the
top of the sinners list.

**** Templates that took longest to instantiate:
 15231 ms: std::basic_regex<char>::_M_compile (28 times, avg 543 ms)
 15066 ms: std::__detail::_Compiler<std::regex_traits<char>>::_Compiler (28 times, avg 538 ms)
 12571 ms: std::__detail::_Compiler<std::regex_traits<char>>::_M_disjunction (28 times, avg 448 ms)
 12454 ms: std::__detail::_Compiler<std::regex_traits<char>>::_M_alternative (28 times, avg 444 ms)
 12225 ms: std::__detail::_Compiler<std::regex_traits<char>>::_M_term (28 times, avg 436 ms)
 11363 ms: nlohmann::basic_json<>::parse<const char *> (21 times, avg 541 ms)
 10628 ms: nlohmann::basic_json<>::basic_json (109 times, avg 97 ms)
 10134 ms: std::__detail::_Compiler<std::regex_traits<char>>::_M_atom (28 times, avg 361 ms)

Back-end time before messing with the regex:
**** Function sets that took longest to compile / optimize:
  8076 ms: void boost::io::detail::put<$>(boost::io::detail::put_holder<$> cons... (177 times, avg 45 ms)
  4382 ms: std::_Rb_tree<$>::_M_erase(std::_Rb_tree_node<$>*) (1247 times, avg 3 ms)
  3137 ms: boost::stacktrace::detail::to_string_impl_base<boost::stacktrace::de... (137 times, avg 22 ms)
  2896 ms: void boost::io::detail::mk_str<$>(std::__cxx11::basic_string<$>&, ch... (177 times, avg 16 ms)
  2304 ms: std::_Rb_tree<$>::_M_get_insert_hint_unique_pos(std::_Rb_tree_const_... (210 times, avg 10 ms)
  2116 ms: bool std::__detail::_Compiler<$>::_M_expression_term<$>(std::__detai... (112 times, avg 18 ms)
  2051 ms: std::_Rb_tree_iterator<$> std::_Rb_tree<$>::_M_emplace_hint_unique<$... (244 times, avg 8 ms)
  2037 ms: toml::result<$> toml::detail::sequence<$>::invoke<$>(toml::detail::l... (93 times, avg 21 ms)
  1928 ms: std::__detail::_Compiler<$>::_M_quantifier() (28 times, avg 68 ms)
  1859 ms: nlohmann::json_abi_v3_11_3::detail::serializer<$>::dump(nlohmann::js... (41 times, avg 45 ms)
  1824 ms: std::_Function_handler<$>::_M_manager(std::_Any_data&, std::_Any_dat... (973 times, avg 1 ms)
  1810 ms: std::__detail::_BracketMatcher<$>::_BracketMatcher(std::__detail::_B... (112 times, avg 16 ms)
  1793 ms: nix::fetchers::GitInputScheme::fetch(nix::ref<$>, nix::fetchers::Inp... (1 times, avg 1793 ms)
  1759 ms: std::_Rb_tree<$>::_M_get_insert_unique_pos(std::__cxx11::basic_strin... (281 times, avg 6 ms)
  1722 ms: bool nlohmann::json_abi_v3_11_3::detail::parser<$>::sax_parse_intern... (19 times, avg 90 ms)
  1677 ms: boost::io::basic_altstringbuf<$>::overflow(int) (194 times, avg 8 ms)
  1674 ms: std::__cxx11::basic_string<$>::_M_mutate(unsigned long, unsigned lon... (249 times, avg 6 ms)
  1660 ms: std::_Rb_tree_node<$>* std::_Rb_tree<$>::_M_copy<$>(std::_Rb_tree_no... (304 times, avg 5 ms)
  1599 ms: bool nlohmann::json_abi_v3_11_3::detail::parser<$>::sax_parse_intern... (19 times, avg 84 ms)
  1568 ms: void std::__detail::_Compiler<$>::_M_insert_bracket_matcher<$>(bool) (112 times, avg 14 ms)
  1541 ms: std::__shared_ptr<$>::~__shared_ptr() (531 times, avg 2 ms)
  1539 ms: nlohmann::json_abi_v3_11_3::detail::serializer<$>::dump_escaped(std:... (41 times, avg 37 ms)
  1471 ms: void std::__detail::_Compiler<$>::_M_insert_character_class_matcher<... (112 times, avg 13 ms)

After messing with the regex (notice std::__detail::_Compiler vanishes
here, but I don't know why):

**** Function sets that took longest to compile / optimize:
  8054 ms: void boost::io::detail::put<$>(boost::io::detail::put_holder<$> cons... (177 times, avg 45 ms)
  4313 ms: std::_Rb_tree<$>::_M_erase(std::_Rb_tree_node<$>*) (1217 times, avg 3 ms)
  3259 ms: boost::stacktrace::detail::to_string_impl_base<boost::stacktrace::de... (137 times, avg 23 ms)
  3045 ms: void boost::io::detail::mk_str<$>(std::__cxx11::basic_string<$>&, ch... (177 times, avg 17 ms)
  2314 ms: std::_Rb_tree<$>::_M_get_insert_hint_unique_pos(std::_Rb_tree_const_... (207 times, avg 11 ms)
  1923 ms: std::_Rb_tree_iterator<$> std::_Rb_tree<$>::_M_emplace_hint_unique<$... (216 times, avg 8 ms)
  1817 ms: bool nlohmann::json_abi_v3_11_3::detail::parser<$>::sax_parse_intern... (18 times, avg 100 ms)
  1816 ms: toml::result<$> toml::detail::sequence<$>::invoke<$>(toml::detail::l... (93 times, avg 19 ms)
  1788 ms: nlohmann::json_abi_v3_11_3::detail::serializer<$>::dump(nlohmann::js... (40 times, avg 44 ms)
  1749 ms: std::_Rb_tree<$>::_M_get_insert_unique_pos(std::__cxx11::basic_strin... (278 times, avg 6 ms)
  1724 ms: std::__cxx11::basic_string<$>::_M_mutate(unsigned long, unsigned lon... (248 times, avg 6 ms)
  1697 ms: boost::io::basic_altstringbuf<$>::overflow(int) (194 times, avg 8 ms)
  1684 ms: nix::fetchers::GitInputScheme::fetch(nix::ref<$>, nix::fetchers::Inp... (1 times, avg 1684 ms)
  1680 ms: std::_Rb_tree_node<$>* std::_Rb_tree<$>::_M_copy<$>(std::_Rb_tree_no... (303 times, avg 5 ms)
  1589 ms: bool nlohmann::json_abi_v3_11_3::detail::parser<$>::sax_parse_intern... (18 times, avg 88 ms)
  1483 ms: non-virtual thunk to boost::wrapexcept<$>::~wrapexcept() (181 times, avg 8 ms)
  1447 ms: nlohmann::json_abi_v3_11_3::detail::serializer<$>::dump_escaped(std:... (40 times, avg 36 ms)
  1441 ms: std::__shared_ptr<$>::~__shared_ptr() (496 times, avg 2 ms)
  1420 ms: boost::stacktrace::basic_stacktrace<$>::init(unsigned long, unsigned... (137 times, avg 10 ms)
  1396 ms: boost::basic_format<$>::~basic_format() (194 times, avg 7 ms)
  1290 ms: std::__cxx11::basic_string<$>::_M_replace_cold(char*, unsigned long,... (231 times, avg 5 ms)
  1258 ms: std::vector<$>::~vector() (354 times, avg 3 ms)
  1222 ms: std::__cxx11::basic_string<$>::_M_replace(unsigned long, unsigned lo... (231 times, avg 5 ms)
  1194 ms: std::_Rb_tree<$>::_M_get_insert_hint_unique_pos(std::_Rb_tree_const_... (49 times, avg 24 ms)
  1186 ms: bool tao::pegtl::internal::sor<$>::match<$>(std::integer_sequence<$>... (1 times, avg 1186 ms)
  1149 ms: std::__detail::_Executor<$>::_M_dfs(std::__detail::_Executor<$>::_Ma... (70 times, avg 16 ms)
  1123 ms: toml::detail::sequence<$>::invoke(toml::detail::location&) (69 times, avg 16 ms)
  1110 ms: nlohmann::json_abi_v3_11_3::basic_json<$>::json_value::destroy(nlohm... (55 times, avg 20 ms)
  1079 ms: std::_Function_handler<$>::_M_manager(std::_Any_data&, std::_Any_dat... (541 times, avg 1 ms)
  1033 ms: nlohmann::json_abi_v3_11_3::detail::lexer<$>::scan_number() (20 times, avg 51 ms)

Change-Id: I10af282bcd4fc39c2d3caae3453e599e4639c70b
2024-08-28 09:55:05 -07:00

403 lines
13 KiB
C++

#include "nar-info-disk-cache.hh"
#include "logging.hh"
#include "sync.hh"
#include "sqlite.hh"
#include "globals.hh"
#include "users.hh"
#include "strings.hh"
#include <sqlite3.h>
#include <nlohmann/json.hpp>
namespace nix {
static const char * schema = R"sql(
create table if not exists BinaryCaches (
id integer primary key autoincrement not null,
url text unique not null,
timestamp integer not null,
storeDir text not null,
wantMassQuery integer not null,
priority integer not null
);
create table if not exists NARs (
cache integer not null,
hashPart text not null,
namePart text,
url text,
compression text,
fileHash text,
fileSize integer,
narHash text,
narSize integer,
refs text,
deriver text,
sigs text,
ca text,
timestamp integer not null,
present integer not null,
primary key (cache, hashPart),
foreign key (cache) references BinaryCaches(id) on delete cascade
);
create table if not exists Realisations (
cache integer not null,
outputId text not null,
content blob, -- Json serialisation of the realisation, or null if the realisation is absent
timestamp integer not null,
primary key (cache, outputId),
foreign key (cache) references BinaryCaches(id) on delete cascade
);
create table if not exists LastPurge (
dummy text primary key,
value integer
);
)sql";
class NarInfoDiskCacheImpl : public NarInfoDiskCache
{
public:
/* How often to purge expired entries from the cache. */
const int purgeInterval = 24 * 3600;
/* How long to cache binary cache info (i.e. /nix-cache-info) */
const int cacheInfoTtl = 7 * 24 * 3600;
struct Cache
{
int id;
Path storeDir;
bool wantMassQuery;
int priority;
};
struct State
{
SQLite db;
SQLiteStmt insertCache, queryCache, insertNAR, insertMissingNAR,
queryNAR, insertRealisation, insertMissingRealisation,
queryRealisation, purgeCache;
std::map<std::string, Cache> caches;
};
Sync<State> _state;
NarInfoDiskCacheImpl(Path dbPath = getCacheDir() + "/nix/binary-cache-v6.sqlite")
{
auto state(_state.lock());
createDirs(dirOf(dbPath));
state->db = SQLite(dbPath);
state->db.isCache();
state->db.exec(schema);
state->insertCache.create(state->db,
"insert into BinaryCaches(url, timestamp, storeDir, wantMassQuery, priority) values (?1, ?2, ?3, ?4, ?5) on conflict (url) do update set timestamp = ?2, storeDir = ?3, wantMassQuery = ?4, priority = ?5 returning id;");
state->queryCache.create(state->db,
"select id, storeDir, wantMassQuery, priority from BinaryCaches where url = ? and timestamp > ?");
state->insertNAR.create(state->db,
"insert or replace into NARs(cache, hashPart, namePart, url, compression, fileHash, fileSize, narHash, "
"narSize, refs, deriver, sigs, ca, timestamp, present) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1)");
state->insertMissingNAR.create(state->db,
"insert or replace into NARs(cache, hashPart, timestamp, present) values (?, ?, ?, 0)");
state->queryNAR.create(state->db,
"select present, namePart, url, compression, fileHash, fileSize, narHash, narSize, refs, deriver, sigs, ca from NARs where cache = ? and hashPart = ? and ((present = 0 and timestamp > ?) or (present = 1 and timestamp > ?))");
state->insertRealisation.create(state->db,
R"(
insert or replace into Realisations(cache, outputId, content, timestamp)
values (?, ?, ?, ?)
)");
state->insertMissingRealisation.create(state->db,
R"(
insert or replace into Realisations(cache, outputId, timestamp)
values (?, ?, ?)
)");
state->queryRealisation.create(state->db,
R"(
select content from Realisations
where cache = ? and outputId = ? and
((content is null and timestamp > ?) or
(content is not null and timestamp > ?))
)");
/* Periodically purge expired entries from the database. */
retrySQLite<void>([&]() {
auto now = time(0);
SQLiteStmt queryLastPurge(state->db, "select value from LastPurge");
auto queryLastPurge_(queryLastPurge.use());
if (!queryLastPurge_.next() || queryLastPurge_.getInt(0) < now - purgeInterval) {
SQLiteStmt(state->db,
"delete from NARs where ((present = 0 and timestamp < ?) or (present = 1 and timestamp < ?))")
.use()
// Use a minimum TTL to prevent --refresh from
// nuking the entire disk cache.
(now - std::max(settings.ttlNegativeNarInfoCache.get(), 3600U))
(now - std::max(settings.ttlPositiveNarInfoCache.get(), 30 * 24 * 3600U))
.exec();
debug("deleted %d entries from the NAR info disk cache", sqlite3_changes(state->db));
SQLiteStmt(state->db,
"insert or replace into LastPurge(dummy, value) values ('', ?)")
.use()(now).exec();
}
});
}
Cache & getCache(State & state, const std::string & uri)
{
auto i = state.caches.find(uri);
if (i == state.caches.end()) abort();
return i->second;
}
private:
std::optional<Cache> queryCacheRaw(State & state, const std::string & uri)
{
auto i = state.caches.find(uri);
if (i == state.caches.end()) {
auto queryCache(state.queryCache.use()(uri)(time(0) - cacheInfoTtl));
if (!queryCache.next())
return std::nullopt;
auto cache = Cache {
.id = (int) queryCache.getInt(0),
.storeDir = queryCache.getStr(1),
.wantMassQuery = queryCache.getInt(2) != 0,
.priority = (int) queryCache.getInt(3),
};
state.caches.emplace(uri, cache);
}
return getCache(state, uri);
}
public:
int createCache(const std::string & uri, const Path & storeDir, bool wantMassQuery, int priority) override
{
return retrySQLite<int>([&]() {
auto state(_state.lock());
SQLiteTxn txn(state->db);
// To avoid the race, we have to check if maybe someone hasn't yet created
// the cache for this URI in the meantime.
auto cache(queryCacheRaw(*state, uri));
if (cache)
return cache->id;
Cache ret {
.id = -1, // set below
.storeDir = storeDir,
.wantMassQuery = wantMassQuery,
.priority = priority,
};
{
auto r(state->insertCache.use()(uri)(time(0))(storeDir)(wantMassQuery)(priority));
assert(r.next());
ret.id = (int) r.getInt(0);
}
state->caches[uri] = ret;
txn.commit();
return ret.id;
});
}
std::optional<CacheInfo> upToDateCacheExists(const std::string & uri) override
{
return retrySQLite<std::optional<CacheInfo>>([&]() -> std::optional<CacheInfo> {
auto state(_state.lock());
auto cache(queryCacheRaw(*state, uri));
if (!cache)
return std::nullopt;
return CacheInfo {
.id = cache->id,
.wantMassQuery = cache->wantMassQuery,
.priority = cache->priority
};
});
}
std::pair<Outcome, std::shared_ptr<NarInfo>> lookupNarInfo(
const std::string & uri, const std::string & hashPart) override
{
return retrySQLite<std::pair<Outcome, std::shared_ptr<NarInfo>>>(
[&]() -> std::pair<Outcome, std::shared_ptr<NarInfo>> {
auto state(_state.lock());
auto & cache(getCache(*state, uri));
auto now = time(0);
auto queryNAR(state->queryNAR.use()
(cache.id)
(hashPart)
(now - settings.ttlNegativeNarInfoCache)
(now - settings.ttlPositiveNarInfoCache));
if (!queryNAR.next())
return {oUnknown, 0};
if (!queryNAR.getInt(0))
return {oInvalid, 0};
auto namePart = queryNAR.getStr(1);
auto narInfo = make_ref<NarInfo>(
StorePath(hashPart + "-" + namePart),
Hash::parseAnyPrefixed(queryNAR.getStr(6)));
narInfo->url = queryNAR.getStr(2);
narInfo->compression = queryNAR.getStr(3);
if (!queryNAR.isNull(4))
narInfo->fileHash = Hash::parseAnyPrefixed(queryNAR.getStr(4));
narInfo->fileSize = queryNAR.getInt(5);
narInfo->narSize = queryNAR.getInt(7);
for (auto & r : tokenizeString<Strings>(queryNAR.getStr(8), " "))
narInfo->references.insert(StorePath(r));
if (!queryNAR.isNull(9))
narInfo->deriver = StorePath(queryNAR.getStr(9));
for (auto & sig : tokenizeString<Strings>(queryNAR.getStr(10), " "))
narInfo->sigs.insert(sig);
narInfo->ca = ContentAddress::parseOpt(queryNAR.getStr(11));
return {oValid, narInfo};
});
}
std::pair<Outcome, std::shared_ptr<Realisation>> lookupRealisation(
const std::string & uri, const DrvOutput & id) override
{
return retrySQLite<std::pair<Outcome, std::shared_ptr<Realisation>>>(
[&]() -> std::pair<Outcome, std::shared_ptr<Realisation>> {
auto state(_state.lock());
auto & cache(getCache(*state, uri));
auto now = time(0);
auto queryRealisation(state->queryRealisation.use()
(cache.id)
(id.to_string())
(now - settings.ttlNegativeNarInfoCache)
(now - settings.ttlPositiveNarInfoCache));
if (!queryRealisation.next())
return {oUnknown, 0};
if (queryRealisation.isNull(0))
return {oInvalid, 0};
auto realisation =
std::make_shared<Realisation>(Realisation::fromJSON(
nlohmann::json::parse(queryRealisation.getStr(0)),
"Local disk cache"));
return {oValid, realisation};
});
}
void upsertNarInfo(
const std::string & uri, const std::string & hashPart,
std::shared_ptr<const ValidPathInfo> info) override
{
retrySQLite<void>([&]() {
auto state(_state.lock());
auto & cache(getCache(*state, uri));
if (info) {
auto narInfo = std::dynamic_pointer_cast<const NarInfo>(info);
//assert(hashPart == storePathToHash(info->path));
state->insertNAR.use()
(cache.id)
(hashPart)
(std::string(info->path.name()))
(narInfo ? narInfo->url : "", narInfo != 0)
(narInfo ? narInfo->compression : "", narInfo != 0)
(narInfo && narInfo->fileHash ? narInfo->fileHash->to_string(Base::Base32, true) : "", narInfo && narInfo->fileHash)
(narInfo ? narInfo->fileSize : 0, narInfo != 0 && narInfo->fileSize)
(info->narHash.to_string(Base::Base32, true))
(info->narSize)
(concatStringsSep(" ", info->shortRefs()))
(info->deriver ? std::string(info->deriver->to_string()) : "", (bool) info->deriver)
(concatStringsSep(" ", info->sigs))
(renderContentAddress(info->ca))
(time(0)).exec();
} else {
state->insertMissingNAR.use()
(cache.id)
(hashPart)
(time(0)).exec();
}
});
}
void upsertRealisation(
const std::string & uri,
const Realisation & realisation) override
{
retrySQLite<void>([&]() {
auto state(_state.lock());
auto & cache(getCache(*state, uri));
state->insertRealisation.use()
(cache.id)
(realisation.id.to_string())
(realisation.toJSON().dump())
(time(0)).exec();
});
}
virtual void upsertAbsentRealisation(
const std::string & uri,
const DrvOutput & id) override
{
retrySQLite<void>([&]() {
auto state(_state.lock());
auto & cache(getCache(*state, uri));
state->insertMissingRealisation.use()
(cache.id)
(id.to_string())
(time(0)).exec();
});
}
};
ref<NarInfoDiskCache> getNarInfoDiskCache()
{
static ref<NarInfoDiskCache> cache = make_ref<NarInfoDiskCacheImpl>();
return cache;
}
ref<NarInfoDiskCache> getTestNarInfoDiskCache(Path dbPath)
{
return make_ref<NarInfoDiskCacheImpl>(dbPath);
}
}