From aaa109565e4fb662e423f23bc48c9ad9831dd281 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 17 Apr 2020 23:04:21 +0200 Subject: [PATCH] Use a more space/time-efficient representation for the eval cache --- src/libexpr/eval.cc | 1 + src/libexpr/eval.hh | 2 +- src/libstore/local-store.cc | 2 +- src/libstore/sqlite.cc | 5 ++ src/libstore/sqlite.hh | 2 + src/nix/flake.cc | 141 +++++++++++++++++++++++------------- 6 files changed, 102 insertions(+), 51 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 83be997e9..4cb2c7654 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -351,6 +351,7 @@ EvalState::EvalState(const Strings & _searchPath, ref store) , sOutputHashMode(symbols.create("outputHashMode")) , sDescription(symbols.create("description")) , sSelf(symbols.create("self")) + , sEpsilon(symbols.create("")) , repair(NoRepair) , store(store) , baseEnv(allocEnv(128)) diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index ab4d2bc4b..994954e44 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -75,7 +75,7 @@ public: sFile, sLine, sColumn, sFunctor, sToString, sRight, sWrong, sStructuredAttrs, sBuilder, sArgs, sOutputHash, sOutputHashAlgo, sOutputHashMode, - sDescription, sSelf; + sDescription, sSelf, sEpsilon; Symbol sDerivationNix; /* If set, force copying files to the Nix store even if they diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index ae7513ad8..b6db627b5 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -588,7 +588,7 @@ uint64_t LocalStore::addValidPath(State & state, (concatStringsSep(" ", info.sigs), !info.sigs.empty()) (info.ca, !info.ca.empty()) .exec(); - uint64_t id = sqlite3_last_insert_rowid(state.db); + uint64_t id = state.db.getLastInsertedRowId(); /* If this is a derivation, then store the derivation outputs in the database. This is useful for the garbage collector: it can diff --git a/src/libstore/sqlite.cc b/src/libstore/sqlite.cc index 63527a811..a1c262f5f 100644 --- a/src/libstore/sqlite.cc +++ b/src/libstore/sqlite.cc @@ -61,6 +61,11 @@ void SQLite::exec(const std::string & stmt) }); } +uint64_t SQLite::getLastInsertedRowId() +{ + return sqlite3_last_insert_rowid(db); +} + void SQLiteStmt::create(sqlite3 * db, const string & sql) { checkInterrupt(); diff --git a/src/libstore/sqlite.hh b/src/libstore/sqlite.hh index 661a384ef..50909a35a 100644 --- a/src/libstore/sqlite.hh +++ b/src/libstore/sqlite.hh @@ -26,6 +26,8 @@ struct SQLite void isCache(); void exec(const std::string & stmt); + + uint64_t getLastInsertedRowId(); }; /* RAII wrapper to create and destroy SQLite prepared statements. */ diff --git a/src/nix/flake.cc b/src/nix/flake.cc index c78e6f2f1..5370841ec 100644 --- a/src/nix/flake.cc +++ b/src/nix/flake.cc @@ -672,13 +672,17 @@ struct CmdFlakeArchive : FlakeCommand, MixJSON, MixDryRun // FIXME: inefficient representation of attrs static const char * schema = R"sql( create table if not exists Attributes ( - attrPath text primary key, + parent integer not null, + name text, type integer not null, - value text not null + value text, + primary key (parent, name) ); )sql"; enum AttrType { + Placeholder = 0, + // FIXME: distinguish between full / partial attrsets Attrs = 1, String = 2, }; @@ -690,8 +694,14 @@ struct AttrDb SQLite db; SQLiteStmt insertAttribute; SQLiteStmt queryAttribute; + SQLiteStmt queryAttributes; }; + struct placeholder_t {}; + typedef uint64_t AttrId; + typedef std::pair AttrKey; + typedef std::variant, std::string, placeholder_t> AttrValue; + std::unique_ptr> _state; AttrDb(const Fingerprint & fingerprint) @@ -709,57 +719,78 @@ struct AttrDb state->db.exec(schema); state->insertAttribute.create(state->db, - "insert or replace into Attributes(attrPath, type, value) values (?, ?, ?)"); + "insert or replace into Attributes(parent, name, type, value) values (?, ?, ?, ?)"); state->queryAttribute.create(state->db, - "select type, value from Attributes where attrPath = ?"); + "select rowid, type, value from Attributes where parent = ? and name = ?"); + + state->queryAttributes.create(state->db, + "select name from Attributes where parent = ?"); } - void setAttr( - const std::vector & attrPath, + AttrId setAttr( + AttrKey key, const std::vector & attrs) { auto state(_state->lock()); state->insertAttribute.use() - (concatStringsSep(".", attrPath)) + (key.first) + (key.second) (AttrType::Attrs) - (concatStringsSep("\n", attrs)).exec(); + (0, false).exec(); + + AttrId rowId = state->db.getLastInsertedRowId(); + assert(rowId); + + for (auto & attr : attrs) + state->insertAttribute.use() + (rowId) + (attr) + (AttrType::Placeholder) + (0, false).exec(); + + return rowId; } - void setAttr( - const std::vector & attrPath, + AttrId setAttr( + AttrKey key, std::string_view s) { auto state(_state->lock()); state->insertAttribute.use() - (concatStringsSep(".", attrPath)) + (key.first) + (key.second) (AttrType::String) (s).exec(); + + return state->db.getLastInsertedRowId(); } - typedef std::variant, std::string> AttrValue; - - std::optional getAttr( - const std::vector & attrPath, + std::optional> getAttr( + AttrKey key, SymbolTable & symbols) { auto state(_state->lock()); - auto queryAttribute(state->queryAttribute.use() - (concatStringsSep(".", attrPath))); + auto queryAttribute(state->queryAttribute.use()(key.first)(key.second)); if (!queryAttribute.next()) return {}; - auto type = (AttrType) queryAttribute.getInt(0); + auto rowId = (AttrType) queryAttribute.getInt(0); + auto type = (AttrType) queryAttribute.getInt(1); - if (type == AttrType::Attrs) { + if (type == AttrType::Placeholder) + return {{rowId, placeholder_t()}}; + else if (type == AttrType::Attrs) { + // FIXME: expensive, should separate this out. std::vector attrs; - for (auto & s : tokenizeString>(queryAttribute.getStr(1), "\n")) - attrs.push_back(symbols.create(s)); - return attrs; + auto queryAttributes(state->queryAttributes.use()(rowId)); + while (queryAttributes.next()) + attrs.push_back(symbols.create(queryAttributes.getStr(0))); + return {{rowId, attrs}}; } else if (type == AttrType::String) { - return queryAttribute.getStr(1); + return {{rowId, queryAttribute.getStr(2)}}; } else throw Error("unexpected type in evaluation cache"); } @@ -803,19 +834,31 @@ struct AttrCursor : std::enable_shared_from_this typedef std::optional, Symbol>> Parent; Parent parent; RootValue _value; - std::optional cachedValue; + std::optional> cachedValue; AttrCursor( ref root, Parent parent, Value * value = nullptr, - std::optional && cachedValue = {}) + std::optional> && cachedValue = {}) : root(root), parent(parent), cachedValue(std::move(cachedValue)) { if (value) _value = allocRootValue(value); } + AttrDb::AttrKey getAttrKey() + { + if (!parent) + return {0, root->state.sEpsilon}; + if (!parent->first->cachedValue) { + parent->first->cachedValue = root->db->getAttr( + parent->first->getAttrKey(), root->state.symbols); + assert(parent->first->cachedValue); + } + return {parent->first->cachedValue->first, parent->second}; + } + Value & getValue() { if (!_value) { @@ -863,20 +906,24 @@ struct AttrCursor : std::enable_shared_from_this { if (root->db) { if (!cachedValue) - cachedValue = root->db->getAttr(getAttrPath(), root->state.symbols); + cachedValue = root->db->getAttr(getAttrKey(), root->state.symbols); if (cachedValue) { - if (auto attrs = std::get_if>(&*cachedValue)) { + if (auto attrs = std::get_if>(&cachedValue->second)) { for (auto & attr : *attrs) if (attr == name) return std::make_shared(root, std::make_pair(shared_from_this(), name)); - } - return nullptr; + return nullptr; + } else if (std::get_if(&cachedValue->second)) { + auto attr = root->db->getAttr({cachedValue->first, name}, root->state.symbols); + if (attr) + return std::make_shared(root, std::make_pair(shared_from_this(), name), nullptr, std::move(attr)); + // Incomplete attrset, so need to fall thru and + // evaluate to see whether 'name' exists + } else + // FIXME: throw error? + return nullptr; } - - auto attr = root->db->getAttr(getAttrPath(name), root->state.symbols); - if (attr) - return std::make_shared(root, std::make_pair(shared_from_this(), name), nullptr, std::move(attr)); } //printError("GET ATTR %s", getAttrPathStr(name)); @@ -916,22 +963,20 @@ struct AttrCursor : std::enable_shared_from_this { if (root->db) { if (!cachedValue) - cachedValue = root->db->getAttr(getAttrPath(), root->state.symbols); - if (cachedValue) { - if (auto s = std::get_if(&*cachedValue)) { + cachedValue = root->db->getAttr(getAttrKey(), root->state.symbols); + if (cachedValue && !std::get_if(&cachedValue->second)) { + if (auto s = std::get_if(&cachedValue->second)) { //printError("GOT STRING %s", getAttrPathStr()); return *s; } else - throw Error("unexpected type mismatch in evaluation cache"); + throw Error("unexpected type mismatch in evaluation cache (string expected)"); } } //printError("GET STRING %s", getAttrPathStr()); auto s = root->state.forceString(getValue()); - if (root->db) { - root->db->setAttr(getAttrPath(), s); - cachedValue = s; - } + if (root->db) + cachedValue = {root->db->setAttr(getAttrKey(), s), s}; return s; } @@ -940,13 +985,13 @@ struct AttrCursor : std::enable_shared_from_this { if (root->db) { if (!cachedValue) - cachedValue = root->db->getAttr(getAttrPath(), root->state.symbols); - if (cachedValue) { - if (auto attrs = std::get_if>(&*cachedValue)) { + cachedValue = root->db->getAttr(getAttrKey(), root->state.symbols); + if (cachedValue && !std::get_if(&cachedValue->second)) { + if (auto attrs = std::get_if>(&cachedValue->second)) { //printError("GOT ATTRS %s", getAttrPathStr()); return *attrs; } else - throw Error("unexpected type mismatch in evaluation cache"); + throw Error("unexpected type mismatch in evaluation cache (attrs expected)"); } } @@ -958,10 +1003,8 @@ struct AttrCursor : std::enable_shared_from_this std::sort(attrs.begin(), attrs.end(), [](const Symbol & a, const Symbol & b) { return (const string &) a < (const string &) b; }); - if (root->db) { - root->db->setAttr(getAttrPath(), attrs); - cachedValue = attrs; - } + if (root->db) + cachedValue = {root->db->setAttr(getAttrKey(), attrs), attrs}; return attrs; }