From b33b94748cc8cc30e8bdf58f34c4934833515440 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 21 Jan 2020 23:49:32 +0100 Subject: [PATCH] Convert fetchMercurial to a input type This enables Mercurial flakes. It also fixes a bug in pure mode where you could use a branch/tag name rather than a revision. --- src/libexpr/primops/fetchMercurial.cc | 208 ++++--------------- src/libstore/fetchers/mercurial.cc | 274 ++++++++++++++++++++++++++ tests/fetchMercurial.sh | 4 +- 3 files changed, 313 insertions(+), 173 deletions(-) create mode 100644 src/libstore/fetchers/mercurial.cc diff --git a/src/libexpr/primops/fetchMercurial.cc b/src/libexpr/primops/fetchMercurial.cc index 8e949b6d6..8a22963ef 100644 --- a/src/libexpr/primops/fetchMercurial.cc +++ b/src/libexpr/primops/fetchMercurial.cc @@ -1,175 +1,19 @@ #include "primops.hh" #include "eval-inline.hh" -#include "download.hh" #include "store-api.hh" -#include "pathlocks.hh" - -#include +#include "fetchers/fetchers.hh" +#include "fetchers/parse.hh" +#include "fetchers/regex.hh" #include -#include - -using namespace std::string_literals; - namespace nix { -struct HgInfo -{ - Path storePath; - std::string branch; - std::string rev; - uint64_t revCount = 0; -}; - -std::regex commitHashRegex("^[0-9a-fA-F]{40}$"); - -HgInfo exportMercurial(ref store, const std::string & uri, - std::string rev, const std::string & name) -{ - if (rev == "" && hasPrefix(uri, "/") && pathExists(uri + "/.hg")) { - - bool clean = runProgram("hg", true, { "status", "-R", uri, "--modified", "--added", "--removed" }) == ""; - - if (!clean) { - - /* This is an unclean working tree. So copy all tracked - files. */ - - if (!settings.allowDirty) - throw Error("Mercurial tree '%s' is unclean", uri); - - if (settings.warnDirty) - warn("Mercurial tree '%s' is unclean", uri); - - HgInfo hgInfo; - hgInfo.rev = "0000000000000000000000000000000000000000"; - hgInfo.branch = chomp(runProgram("hg", true, { "branch", "-R", uri })); - - auto files = tokenizeString>( - runProgram("hg", true, { "status", "-R", uri, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, uri)); - std::string file(p, uri.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - hgInfo.storePath = store->printStorePath(store->addToStore("source", uri, true, htSHA256, filter)); - - return hgInfo; - } - } - - if (rev == "") rev = "default"; - - Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(htSHA256, uri).to_string(Base32, false)); - - Path stampFile = fmt("%s/.hg/%s.stamp", cacheDir, hashString(htSHA512, rev).to_string(Base32, false)); - - /* If we haven't pulled this repo less than ‘tarball-ttl’ seconds, - do so now. */ - time_t now = time(0); - struct stat st; - if (stat(stampFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now) - { - /* Except that if this is a commit hash that we already have, - we don't have to pull again. */ - if (!(std::regex_match(rev, commitHashRegex) - && pathExists(cacheDir) - && runProgram( - RunOptions("hg", { "log", "-R", cacheDir, "-r", rev, "--template", "1" }) - .killStderr(true)).second == "1")) - { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Mercurial repository '%s'", uri)); - - if (pathExists(cacheDir)) { - try { - runProgram("hg", true, { "pull", "-R", cacheDir, "--", uri }); - } - catch (ExecError & e) { - string transJournal = cacheDir + "/.hg/store/journal"; - /* hg throws "abandoned transaction" error only if this file exists */ - if (pathExists(transJournal)) { - runProgram("hg", true, { "recover", "-R", cacheDir }); - runProgram("hg", true, { "pull", "-R", cacheDir, "--", uri }); - } else { - throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status))); - } - } - } else { - createDirs(dirOf(cacheDir)); - runProgram("hg", true, { "clone", "--noupdate", "--", uri, cacheDir }); - } - } - - writeFile(stampFile, ""); - } - - auto tokens = tokenizeString>( - runProgram("hg", true, { "log", "-R", cacheDir, "-r", rev, "--template", "{node} {rev} {branch}" })); - assert(tokens.size() == 3); - - HgInfo hgInfo; - hgInfo.rev = tokens[0]; - hgInfo.revCount = std::stoull(tokens[1]); - hgInfo.branch = tokens[2]; - - std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + hgInfo.rev).to_string(Base32, false); - Path storeLink = fmt("%s/.hg/%s.link", cacheDir, storeLinkName); - - try { - auto json = nlohmann::json::parse(readFile(storeLink)); - - assert(json["name"] == name && json["rev"] == hgInfo.rev); - - hgInfo.storePath = json["storePath"]; - - if (store->isValidPath(store->parseStorePath(hgInfo.storePath))) { - printTalkative("using cached Mercurial store path '%s'", hgInfo.storePath); - return hgInfo; - } - - } catch (SysError & e) { - if (e.errNo != ENOENT) throw; - } - - Path tmpDir = createTempDir(); - AutoDelete delTmpDir(tmpDir, true); - - runProgram("hg", true, { "archive", "-R", cacheDir, "-r", rev, tmpDir }); - - deletePath(tmpDir + "/.hg_archival.txt"); - - hgInfo.storePath = store->printStorePath(store->addToStore(name, tmpDir)); - - nlohmann::json json; - json["storePath"] = hgInfo.storePath; - json["uri"] = uri; - json["name"] = name; - json["branch"] = hgInfo.branch; - json["rev"] = hgInfo.rev; - json["revCount"] = hgInfo.revCount; - - writeFile(storeLink, json.dump()); - - return hgInfo; -} - static void prim_fetchMercurial(EvalState & state, const Pos & pos, Value * * args, Value & v) { std::string url; - std::string rev; + std::optional rev; + std::optional ref; std::string name = "source"; PathSet context; @@ -183,8 +27,15 @@ static void prim_fetchMercurial(EvalState & state, const Pos & pos, Value * * ar string n(attr.name); if (n == "url") url = state.coerceToString(*attr.pos, *attr.value, context, false, false); - else if (n == "rev") - rev = state.forceStringNoCtx(*attr.value, *attr.pos); + else if (n == "rev") { + // Ugly: unlike fetchGit, here the "rev" attribute can + // be both a revision or a branch/tag name. + auto value = state.forceStringNoCtx(*attr.value, *attr.pos); + if (std::regex_match(value, fetchers::revRegex)) + rev = Hash(value, htSHA1); + else + ref = value; + } else if (n == "name") name = state.forceStringNoCtx(*attr.value, *attr.pos); else @@ -201,21 +52,36 @@ static void prim_fetchMercurial(EvalState & state, const Pos & pos, Value * * ar // whitelist. Ah well. state.checkURI(url); - if (evalSettings.pureEval && rev == "") + if (evalSettings.pureEval && !rev) throw Error("in pure evaluation mode, 'fetchMercurial' requires a Mercurial revision"); - auto hgInfo = exportMercurial(state.store, url, rev, name); + auto parsedUrl = fetchers::parseURL( + url.find("://") != std::string::npos + ? "hg+" + url + : "hg+file://" + url); + if (rev) parsedUrl.query.insert_or_assign("rev", rev->gitRev()); + if (ref) parsedUrl.query.insert_or_assign("ref", *ref); + // FIXME: use name + auto input = inputFromURL(parsedUrl); + + auto [tree, input2] = input->fetchTree(state.store); state.mkAttrs(v, 8); - mkString(*state.allocAttr(v, state.sOutPath), hgInfo.storePath, PathSet({hgInfo.storePath})); - mkString(*state.allocAttr(v, state.symbols.create("branch")), hgInfo.branch); - mkString(*state.allocAttr(v, state.symbols.create("rev")), hgInfo.rev); - mkString(*state.allocAttr(v, state.symbols.create("shortRev")), std::string(hgInfo.rev, 0, 12)); - mkInt(*state.allocAttr(v, state.symbols.create("revCount")), hgInfo.revCount); + auto storePath = state.store->printStorePath(tree.storePath); + mkString(*state.allocAttr(v, state.sOutPath), storePath, PathSet({storePath})); + if (input2->getRef()) + mkString(*state.allocAttr(v, state.symbols.create("branch")), *input2->getRef()); + // Backward compatibility: set 'rev' to + // 0000000000000000000000000000000000000000 for a dirty tree. + auto rev2 = tree.rev.value_or(Hash(htSHA1)); + mkString(*state.allocAttr(v, state.symbols.create("rev")), rev2.gitRev()); + mkString(*state.allocAttr(v, state.symbols.create("shortRev")), std::string(rev2.gitRev(), 0, 12)); + if (tree.revCount) + mkInt(*state.allocAttr(v, state.symbols.create("revCount")), *tree.revCount); v.attrs->sort(); if (state.allowedPaths) - state.allowedPaths->insert(state.store->toRealPath(hgInfo.storePath)); + state.allowedPaths->insert(tree.actualPath); } static RegisterPrimOp r("fetchMercurial", 1, prim_fetchMercurial); diff --git a/src/libstore/fetchers/mercurial.cc b/src/libstore/fetchers/mercurial.cc new file mode 100644 index 000000000..64d5e84e1 --- /dev/null +++ b/src/libstore/fetchers/mercurial.cc @@ -0,0 +1,274 @@ +#include "fetchers.hh" +#include "parse.hh" +#include "globals.hh" +#include "tarfile.hh" +#include "store-api.hh" +#include "regex.hh" + +#include + +#include + +using namespace std::string_literals; + +namespace nix::fetchers { + +struct MercurialInput : Input +{ + ParsedURL url; + std::optional ref; + std::optional rev; + + MercurialInput(const ParsedURL & url) : url(url) + { + type = "hg"; + } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast(&other); + return + other2 + && url.url == other2->url.url + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev; + } + + std::optional getRef() const override { return ref; } + + std::optional getRev() const override { return rev; } + + std::string to_string() const override + { + ParsedURL url2(url); + if (rev) url2.query.insert_or_assign("rev", rev->gitRev()); + if (ref) url2.query.insert_or_assign("ref", *ref); + return url2.to_string(); + } + + std::shared_ptr applyOverrides( + std::optional ref, + std::optional rev) const override + { + if (!ref && !rev) return shared_from_this(); + + auto res = std::make_shared(*this); + + if (ref) res->ref = ref; + if (rev) res->rev = rev; + + return res; + } + + std::optional getSourcePath() const + { + if (url.scheme == "hg+file" && !ref && !rev) + return url.path; + return {}; + } + + std::pair getActualUrl() const + { + bool isLocal = url.scheme == "hg+file"; + return {isLocal, isLocal ? url.path : std::string(url.base, 3)}; + } + + std::pair> fetchTreeInternal(nix::ref store) const override + { + auto name = "source"; + + auto input = std::make_shared(*this); + + auto [isLocal, actualUrl] = getActualUrl(); + + // FIXME: return lastModified. + + if (!input->ref && !input->rev && isLocal && pathExists(actualUrl + "/.hg")) { + + bool clean = runProgram("hg", true, { "status", "-R", actualUrl, "--modified", "--added", "--removed" }) == ""; + + if (!clean) { + + /* This is an unclean working tree. So copy all tracked + files. */ + + if (!settings.allowDirty) + throw Error("Mercurial tree '%s' is unclean", actualUrl); + + if (settings.warnDirty) + warn("Mercurial tree '%s' is unclean", actualUrl); + + input->ref = chomp(runProgram("hg", true, { "branch", "-R", actualUrl })); + + auto files = tokenizeString>( + runProgram("hg", true, { "status", "-R", actualUrl, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualUrl)); + std::string file(p, actualUrl.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore("source", actualUrl, true, htSHA256, filter); + + return {Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + }, input}; + } + } + + if (!input->ref) input->ref = "default"; + + Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(htSHA256, actualUrl).to_string(Base32, false)); + + assert(input->rev || input->ref); + auto revOrRef = input->rev ? input->rev->gitRev() : *input->ref; + + Path stampFile = fmt("%s/.hg/%s.stamp", cacheDir, hashString(htSHA512, revOrRef).to_string(Base32, false)); + + /* If we haven't pulled this repo less than ‘tarball-ttl’ seconds, + do so now. */ + time_t now = time(0); + struct stat st; + if (stat(stampFile.c_str(), &st) != 0 || + (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now) + { + /* Except that if this is a commit hash that we already have, + we don't have to pull again. */ + if (!(input->rev + && pathExists(cacheDir) + && runProgram( + RunOptions("hg", { "log", "-R", cacheDir, "-r", input->rev->gitRev(), "--template", "1" }) + .killStderr(true)).second == "1")) + { + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Mercurial repository '%s'", actualUrl)); + + if (pathExists(cacheDir)) { + try { + runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl }); + } + catch (ExecError & e) { + string transJournal = cacheDir + "/.hg/store/journal"; + /* hg throws "abandoned transaction" error only if this file exists */ + if (pathExists(transJournal)) { + runProgram("hg", true, { "recover", "-R", cacheDir }); + runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl }); + } else { + throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status))); + } + } + } else { + createDirs(dirOf(cacheDir)); + runProgram("hg", true, { "clone", "--noupdate", "--", actualUrl, cacheDir }); + } + } + + writeFile(stampFile, ""); + } + + auto tokens = tokenizeString>( + runProgram("hg", true, { "log", "-R", cacheDir, "-r", revOrRef, "--template", "{node} {rev} {branch}" })); + assert(tokens.size() == 3); + + input->rev = Hash(tokens[0], htSHA1); + auto revCount = std::stoull(tokens[1]); + input->ref = tokens[2]; + + std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + input->rev->gitRev()).to_string(Base32, false); + Path storeLink = fmt("%s/.hg/%s.link", cacheDir, storeLinkName); + + try { + auto json = nlohmann::json::parse(readFile(storeLink)); + + assert(json["name"] == name && json["rev"] == input->rev->gitRev()); + + auto storePath = store->parseStorePath((std::string) json["storePath"]); + + if (store->isValidPath(storePath)) { + printTalkative("using cached Mercurial store path '%s'", store->printStorePath(storePath)); + return {Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + .rev = input->rev, + .revCount = revCount, + }, input}; + } + + } catch (SysError & e) { + if (e.errNo != ENOENT) throw; + } + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + + runProgram("hg", true, { "archive", "-R", cacheDir, "-r", input->rev->gitRev(), tmpDir }); + + deletePath(tmpDir + "/.hg_archival.txt"); + + auto storePath = store->addToStore(name, tmpDir); + + nlohmann::json json; + json["storePath"] = store->printStorePath(storePath); + json["uri"] = actualUrl; + json["name"] = name; + json["branch"] = *input->ref; + json["rev"] = input->rev->gitRev(); + json["revCount"] = revCount; + + writeFile(storeLink, json.dump()); + + return {Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + .rev = input->rev, + .revCount = revCount, + }, input}; + } +}; + +struct MercurialInputScheme : InputScheme +{ + std::unique_ptr inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "hg+http" && + url.scheme != "hg+https" && + url.scheme != "hg+ssh" && + url.scheme != "hg+file") return nullptr; + + auto input = std::make_unique(url); + + for (auto &[name, value] : url.query) { + if (name == "rev") { + if (!std::regex_match(value, revRegex)) + throw BadURL("Mercurial URL '%s' contains an invalid commit hash", url.url); + input->rev = Hash(value, htSHA1); + } + else if (name == "ref") { + if (!std::regex_match(value, refRegex)) + throw BadURL("Mercurial URL '%s' contains an invalid branch/tag name", url.url); + input->ref = value; + } + } + + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique()); }); + +} diff --git a/tests/fetchMercurial.sh b/tests/fetchMercurial.sh index 048a66ee2..6c11e06ba 100644 --- a/tests/fetchMercurial.sh +++ b/tests/fetchMercurial.sh @@ -82,8 +82,8 @@ path2=$(nix eval --impure --raw --expr "(builtins.fetchMercurial $repo).outPath" [[ $(nix eval --impure --raw --expr "(builtins.fetchMercurial $repo).rev") = 0000000000000000000000000000000000000000 ]] -# ... unless we're using an explicit rev. -path3=$(nix eval --raw --expr "(builtins.fetchMercurial { url = $repo; rev = \"default\"; }).outPath") +# ... unless we're using an explicit ref. +path3=$(nix eval --impure --raw --expr "(builtins.fetchMercurial { url = $repo; rev = \"default\"; }).outPath") [[ $path = $path3 ]] # Committing should not affect the store path.