From 7f5ca6192d091090bc71ab7bf96dd4acf0f1d376 Mon Sep 17 00:00:00 2001 From: Ben Radford Date: Mon, 10 Apr 2023 14:12:10 +0100 Subject: [PATCH 1/4] Add script to reproduce issue by inducing heavy load. --- repro-7998.sh | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 repro-7998.sh diff --git a/repro-7998.sh b/repro-7998.sh new file mode 100755 index 000000000..b022d31ab --- /dev/null +++ b/repro-7998.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -eux +LOG_FILE=/tmp/repro-7998.log +rm -f "$LOG_FILE" +for i in {1..8}; do + ( + while true; do + nix-build \ + --argstr uuid $(uuidgen) \ + --arg drvCount $((RANDOM % 256)) \ + -E ' + { uuid ? "00000000-0000-0000-0000-000000000000", drvCount ? 0 }: + with import { }; + let + mkDrv = name: buildInputs: + stdenv.mkDerivation { + inherit name; + inherit buildInputs; + unpackPhase = "date +\"${uuid} %F %T\" >date.txt"; + installPhase = "mkdir -p $out; cp date.txt $out/"; + }; + mkDrvs = n: + let + name = "repro-7998-${toString n}"; + buildInputs = if n == 0 then [ ] else [ (mkDrvs (n - 1)) ]; + in mkDrv name buildInputs; + in mkDrvs drvCount + ' + done 2>&1 | tee -a "$LOG_FILE" + ) & +done +read # Press enter to stop +pkill -KILL -f repro-7998.sh From 7c56e842133afe14812270c34cda3dc0a3da8aa6 Mon Sep 17 00:00:00 2001 From: Ben Radford Date: Tue, 11 Apr 2023 10:22:07 +0100 Subject: [PATCH 2/4] Warn after a second of being busy instead of immediately. Getting the occasional SQLITE_BUSY is expected when the database is being accessed concurrently. The retry will likely succeed so it is pointless to warn immediately. Instead we track how long each retrySQLite block has been running, and only begin warning after a second has elapsed (and then every 10 seconds subsequently). --- src/libstore/sqlite.cc | 9 ++------- src/libstore/sqlite.hh | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/libstore/sqlite.cc b/src/libstore/sqlite.cc index 871f2f3be..c57e58fe0 100644 --- a/src/libstore/sqlite.cc +++ b/src/libstore/sqlite.cc @@ -239,14 +239,9 @@ SQLiteTxn::~SQLiteTxn() } } -void handleSQLiteBusy(const SQLiteBusy & e) +void handleSQLiteBusy(const SQLiteBusy & e, bool shouldWarn) { - static std::atomic lastWarned{0}; - - time_t now = time(0); - - if (now > lastWarned + 10) { - lastWarned = now; + if (shouldWarn) { logWarning({ .msg = hintfmt(e.what()) }); diff --git a/src/libstore/sqlite.hh b/src/libstore/sqlite.hh index b735838ec..e2c9e28f8 100644 --- a/src/libstore/sqlite.hh +++ b/src/libstore/sqlite.hh @@ -139,7 +139,7 @@ protected: MakeError(SQLiteBusy, SQLiteError); -void handleSQLiteBusy(const SQLiteBusy & e); +void handleSQLiteBusy(const SQLiteBusy & e, bool shouldWarn); /** * Convenience function for retrying a SQLite transaction when the @@ -148,11 +148,22 @@ void handleSQLiteBusy(const SQLiteBusy & e); template T retrySQLite(F && fun) { + time_t nextWarning = time(0) + 1; + while (true) { try { return fun(); + } catch (SQLiteBusy & e) { - handleSQLiteBusy(e); + time_t now = time(0); + bool shouldWarn = false; + + if (now > nextWarning) { + nextWarning = now + 10; + shouldWarn = true; + } + + handleSQLiteBusy(e, shouldWarn); } } } From da322ebda62470acbf5373374c4cee8236705c2f Mon Sep 17 00:00:00 2001 From: Ben Radford Date: Tue, 11 Apr 2023 10:47:53 +0100 Subject: [PATCH 3/4] Revert "Add script to reproduce issue by inducing heavy load." This reverts commit 213b838f9cfb820d2bc76d7c6edc468b27029945. --- repro-7998.sh | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100755 repro-7998.sh diff --git a/repro-7998.sh b/repro-7998.sh deleted file mode 100755 index b022d31ab..000000000 --- a/repro-7998.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -set -eux -LOG_FILE=/tmp/repro-7998.log -rm -f "$LOG_FILE" -for i in {1..8}; do - ( - while true; do - nix-build \ - --argstr uuid $(uuidgen) \ - --arg drvCount $((RANDOM % 256)) \ - -E ' - { uuid ? "00000000-0000-0000-0000-000000000000", drvCount ? 0 }: - with import { }; - let - mkDrv = name: buildInputs: - stdenv.mkDerivation { - inherit name; - inherit buildInputs; - unpackPhase = "date +\"${uuid} %F %T\" >date.txt"; - installPhase = "mkdir -p $out; cp date.txt $out/"; - }; - mkDrvs = n: - let - name = "repro-7998-${toString n}"; - buildInputs = if n == 0 then [ ] else [ (mkDrvs (n - 1)) ]; - in mkDrv name buildInputs; - in mkDrvs drvCount - ' - done 2>&1 | tee -a "$LOG_FILE" - ) & -done -read # Press enter to stop -pkill -KILL -f repro-7998.sh From de3df3009bf003f327d35e246d5904d93273e2e9 Mon Sep 17 00:00:00 2001 From: Ben Radford Date: Tue, 11 Apr 2023 16:03:37 +0100 Subject: [PATCH 4/4] Move warning timing logic into handleSQLiteBusy. --- src/libstore/sqlite.cc | 6 ++++-- src/libstore/sqlite.hh | 13 ++----------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/libstore/sqlite.cc b/src/libstore/sqlite.cc index c57e58fe0..df334c23c 100644 --- a/src/libstore/sqlite.cc +++ b/src/libstore/sqlite.cc @@ -239,9 +239,11 @@ SQLiteTxn::~SQLiteTxn() } } -void handleSQLiteBusy(const SQLiteBusy & e, bool shouldWarn) +void handleSQLiteBusy(const SQLiteBusy & e, time_t & nextWarning) { - if (shouldWarn) { + time_t now = time(0); + if (now > nextWarning) { + nextWarning = now + 10; logWarning({ .msg = hintfmt(e.what()) }); diff --git a/src/libstore/sqlite.hh b/src/libstore/sqlite.hh index e2c9e28f8..6e14852cb 100644 --- a/src/libstore/sqlite.hh +++ b/src/libstore/sqlite.hh @@ -139,7 +139,7 @@ protected: MakeError(SQLiteBusy, SQLiteError); -void handleSQLiteBusy(const SQLiteBusy & e, bool shouldWarn); +void handleSQLiteBusy(const SQLiteBusy & e, time_t & nextWarning); /** * Convenience function for retrying a SQLite transaction when the @@ -153,17 +153,8 @@ T retrySQLite(F && fun) while (true) { try { return fun(); - } catch (SQLiteBusy & e) { - time_t now = time(0); - bool shouldWarn = false; - - if (now > nextWarning) { - nextWarning = now + 10; - shouldWarn = true; - } - - handleSQLiteBusy(e, shouldWarn); + handleSQLiteBusy(e, nextWarning); } } }