From c190f051ac34b2df51402bf593150de97f491d86 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 14 Oct 2003 15:33:00 +0000 Subject: [PATCH] * Automatically recover the database in case of a crash. --- src/db.cc | 187 ++++++++++++++++++++++++++++++++++++++++------- src/db.hh | 4 + src/fix.cc | 4 +- src/nix.cc | 4 +- src/pathlocks.cc | 41 ++++++++--- src/pathlocks.hh | 5 ++ 6 files changed, 204 insertions(+), 41 deletions(-) diff --git a/src/db.cc b/src/db.cc index ffb1999fe..f9d618b3b 100644 --- a/src/db.cc +++ b/src/db.cc @@ -1,8 +1,13 @@ -#include "db.hh" -#include "util.hh" +#include +#include +#include #include +#include "db.hh" +#include "util.hh" +#include "pathlocks.hh" + /* Wrapper class to ensure proper destruction. */ class DestroyDbc @@ -89,51 +94,179 @@ Database::Database() Database::~Database() { - if (env) { - debug(format("closing database environment")); + close(); +} - try { - for (map::iterator i = tables.begin(); - i != tables.end(); i++) - { - debug(format("closing table %1%") % i->first); - Db * db = i->second; - db->close(0); - delete db; - } - - env->txn_checkpoint(0, 0, 0); - env->close(0); - - } catch (DbException e) { rethrow(e); } - - delete env; +int getAccessorCount(int fd) +{ + if (lseek(fd, 0, SEEK_SET) == -1) + throw SysError("seeking accessor count"); + char buf[128]; + int len; + if ((len = read(fd, buf, sizeof(buf) - 1)) == -1) + throw SysError("reading accessor count"); + buf[len] = 0; + int count; + if (sscanf(buf, "%d", &count) != 1) { + debug(format("accessor count is invalid: `%1%'") % buf); + return -1; } + return count; +} + + +void setAccessorCount(int fd, int n) +{ + if (lseek(fd, 0, SEEK_SET) == -1) + throw SysError("seeking accessor count"); + string s = (format("%1%") % n).str(); + const char * s2 = s.c_str(); + if (write(fd, s2, strlen(s2)) != (ssize_t) strlen(s2) || + ftruncate(fd, strlen(s2)) != 0) + throw SysError("writing accessor count"); +} + + +void openEnv(DbEnv * env, const string & path, u_int32_t flags) +{ + env->open(path.c_str(), + DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | + DB_CREATE | flags, + 0666); } void Database::open(const string & path) { - try { - - if (env) throw Error(format("environment already open")); + if (env) throw Error(format("environment already open")); + try { + + debug(format("opening database environment")); + + + /* Create the database environment object. */ env = new DbEnv(0); env->set_lg_bsize(32 * 1024); /* default */ env->set_lg_max(256 * 1024); /* must be > 4 * lg_bsize */ env->set_lk_detect(DB_LOCK_DEFAULT); - env->open(path.c_str(), - DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | - DB_CREATE, - 0666); + + /* The following code provides automatic recovery of the + database environment. Recovery is necessary when a process + dies while it has the database open. To detect this, + processes atomically increment a counter when the open the + database, and decrement it when they close it. If we see + that counter is > 0 but no processes are accessing the + database---determined by attempting to obtain a write lock + on a lock file on which all accessors have a read lock---we + must run recovery. Note that this also ensures that we + only run recovery when there are no other accessors (which + could cause database corruption). */ + + /* !!! close fdAccessors / fdLock on exception */ + + /* Open the accessor count file. */ + string accessorsPath = path + "/accessor_count"; + fdAccessors = ::open(accessorsPath.c_str(), O_RDWR | O_CREAT, 0666); + if (fdAccessors == -1) + throw SysError(format("opening file `%1%'") % accessorsPath); + + /* Open the lock file. */ + string lockPath = path + "/access_lock"; + fdLock = ::open(lockPath.c_str(), O_RDWR | O_CREAT, 0666); + if (fdLock == -1) + throw SysError(format("opening lock file `%1%'") % lockPath); + + /* Try to acquire a write lock. */ + debug(format("attempting write lock on `%1%'") % lockPath); + if (lockFile(fdLock, ltWrite, false)) { /* don't wait */ + + debug(format("write lock granted")); + + /* We have a write lock, which means that there are no + other readers or writers. */ + + int n = getAccessorCount(fdAccessors); + setAccessorCount(fdAccessors, 1); + + if (n != 0) { + msg(lvlTalkative, format("accessor count is %1%, running recovery") % n); + + /* Open the environment after running recovery. */ + openEnv(env, path, DB_RECOVER); + } + + else + /* Open the environment normally. */ + openEnv(env, path, 0); + + /* Downgrade to a read lock. */ + debug(format("downgrading to read lock on `%1%'") % lockPath); + lockFile(fdLock, ltRead, true); + + } else { + /* There are other accessors. */ + debug(format("write lock refused")); + + /* Acquire a read lock. */ + debug(format("acquiring read lock on `%1%'") % lockPath); + lockFile(fdLock, ltRead, true); /* wait indefinitely */ + + /* Increment the accessor count. */ + lockFile(fdAccessors, ltWrite, true); + int n = getAccessorCount(fdAccessors) + 1; + setAccessorCount(fdAccessors, n); + debug(format("incremented accessor count to %1%") % n); + lockFile(fdAccessors, ltNone, true); + + /* Open the environment normally. */ + openEnv(env, path, 0); + } } catch (DbException e) { rethrow(e); } } +void Database::close() +{ + if (!env) return; + + /* Close the database environment. */ + debug(format("closing database environment")); + + try { + + for (map::iterator i = tables.begin(); + i != tables.end(); i++) + { + debug(format("closing table %1%") % i->first); + Db * db = i->second; + db->close(0); + delete db; + } + + env->txn_checkpoint(0, 0, 0); + env->close(0); + + } catch (DbException e) { rethrow(e); } + + delete env; + + /* Decrement the accessor count. */ + lockFile(fdAccessors, ltWrite, true); + int n = getAccessorCount(fdAccessors) - 1; + setAccessorCount(fdAccessors, n); + debug(format("decremented accessor count to %1%") % n); + lockFile(fdAccessors, ltNone, true); + + ::close(fdAccessors); + ::close(fdLock); +} + + TableId Database::openTable(const string & tableName) { requireEnv(); diff --git a/src/db.hh b/src/db.hh index 4bac943e5..e3dc7ce7a 100644 --- a/src/db.hh +++ b/src/db.hh @@ -45,6 +45,9 @@ class Database private: DbEnv * env; + int fdLock; + int fdAccessors; + TableId nextId; map tables; @@ -57,6 +60,7 @@ public: ~Database(); void open(const string & path); + void close(); TableId openTable(const string & table); diff --git a/src/fix.cc b/src/fix.cc index 71fd06877..cbf759b31 100644 --- a/src/fix.cc +++ b/src/fix.cc @@ -438,8 +438,6 @@ static void printNixExpr(EvalState & state, Expr e) void run(Strings args) { - openDB(); - EvalState state; Strings files; bool readStdin = false; @@ -467,6 +465,8 @@ void run(Strings args) files.push_back(arg); } + openDB(); + if (readStdin) { Expr e = evalStdin(state); printNixExpr(state, e); diff --git a/src/nix.cc b/src/nix.cc index 9bbbf4ae8..1012780af 100644 --- a/src/nix.cc +++ b/src/nix.cc @@ -267,8 +267,6 @@ static void opVerify(Strings opFlags, Strings opArgs) list. */ void run(Strings args) { - openDB(); - Strings opFlags, opArgs; Operation op = 0; @@ -315,6 +313,8 @@ void run(Strings args) if (!op) throw UsageError("no operation specified"); + openDB(); + op(opFlags, opArgs); } diff --git a/src/pathlocks.cc b/src/pathlocks.cc index ff0226c84..3ecbbbcba 100644 --- a/src/pathlocks.cc +++ b/src/pathlocks.cc @@ -1,10 +1,39 @@ #include +#include +#include #include #include "pathlocks.hh" +bool lockFile(int fd, LockType lockType, bool wait) +{ + struct flock lock; + if (lockType == ltRead) lock.l_type = F_RDLCK; + else if (lockType == ltWrite) lock.l_type = F_WRLCK; + else if (lockType == ltNone) lock.l_type = F_UNLCK; + else abort(); + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; /* entire file */ + + if (wait) { + while (fcntl(fd, F_SETLKW, &lock) != 0) + if (errno != EINTR) + throw SysError(format("acquiring/releasing lock")); + } else { + while (fcntl(fd, F_SETLK, &lock) != 0) { + if (errno == EACCES || errno == EAGAIN) return false; + if (errno != EINTR) + throw SysError(format("acquiring/releasing lock")); + } + } + + return true; +} + + /* This enables us to check whether are not already holding a lock on a file ourselves. POSIX locks (fcntl) suck in this respect: if we close a descriptor, the previous lock will be closed as well. And @@ -43,16 +72,8 @@ PathLocks::PathLocks(const PathSet & _paths) fds.push_back(fd); this->paths.push_back(lockPath); - /* Lock it. */ - struct flock lock; - lock.l_type = F_WRLCK; /* exclusive lock */ - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; /* entire file */ - - while (fcntl(fd, F_SETLKW, &lock) == -1) - if (errno != EINTR) - throw SysError(format("acquiring lock on `%1%'") % lockPath); + /* Acquire an exclusive lock. */ + lockFile(fd, ltWrite, true); lockedPaths.insert(lockPath); } diff --git a/src/pathlocks.hh b/src/pathlocks.hh index 6c36b9b1d..ce61386d6 100644 --- a/src/pathlocks.hh +++ b/src/pathlocks.hh @@ -4,6 +4,11 @@ #include "util.hh" +typedef enum LockType { ltRead, ltWrite, ltNone }; + +bool lockFile(int fd, LockType lockType, bool wait); + + class PathLocks { private: