From 60340ce3e2f793caf1704997a4d7a5a066e9ef24 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 9 Apr 2015 11:42:04 +0200 Subject: [PATCH] Implement caching of fetchurl/fetchTarball results ETags are used to prevent redownloading unchanged files. --- src/libexpr/primops.cc | 143 ++++++++++++++++++++++++++++++++++------ src/libutil/util.cc | 20 ++++++ src/libutil/util.hh | 7 ++ src/nix-env/profiles.cc | 11 +--- 4 files changed, 152 insertions(+), 29 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index e81849646..77ca42042 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1490,6 +1490,9 @@ struct Curl { CURL * curl; string data; + string etag, status, expectedETag; + + struct curl_slist * requestHeaders; static size_t writeCallback(void * contents, size_t size, size_t nmemb, void * userp) { @@ -1499,36 +1502,92 @@ struct Curl return realSize; } + static size_t headerCallback(void * contents, size_t size, size_t nmemb, void * userp) + { + Curl & c(* (Curl *) userp); + size_t realSize = size * nmemb; + string line = string((char *) contents, realSize); + printMsg(lvlVomit, format("got header: %1%") % trim(line)); + if (line.compare(0, 5, "HTTP/") == 0) { // new response starts + c.etag = ""; + auto ss = tokenizeString>(line, " "); + c.status = ss.size() >= 2 ? ss[1] : ""; + } else { + auto i = line.find(':'); + if (i != string::npos) { + string name = trim(string(line, 0, i)); + if (name == "ETag") { // FIXME: case + c.etag = trim(string(line, i + 1)); + /* Hack to work around a GitHub bug: it sends + ETags, but ignores If-None-Match. So if we get + the expected ETag on a 200 response, then shut + down the connection because we already have the + data. */ + printMsg(lvlDebug, format("got ETag: %1%") % c.etag); + if (c.etag == c.expectedETag && c.status == "200") { + printMsg(lvlDebug, format("shutting down on 200 HTTP response with expected ETag")); + return 0; + } + } + } + } + return realSize; + } + Curl() { + requestHeaders = 0; + curl = curl_easy_init(); if (!curl) throw Error("unable to initialize curl"); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_CAINFO, getEnv("SSL_CERT_FILE", "/etc/ssl/certs/ca-certificates.crt").c_str()); curl_easy_setopt(curl, CURLOPT_USERAGENT, ("Nix/" + nixVersion).c_str()); + curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &curl); + + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, headerCallback); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, (void *) &curl); } ~Curl() { if (curl) curl_easy_cleanup(curl); + if (requestHeaders) curl_slist_free_all(requestHeaders); } - string fetch(const string & url) + bool fetch(const string & url, const string & expectedETag = "") { curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); data.clear(); - CURLcode res = curl_easy_perform(curl); - if (res != CURLE_OK) - throw Error(format("unable to download ‘%1%’: %2%") - % url % curl_easy_strerror(res)); + if (requestHeaders) { + curl_slist_free_all(requestHeaders); + requestHeaders = 0; + } - return data; + if (!expectedETag.empty()) { + this->expectedETag = expectedETag; + requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + expectedETag).c_str()); + } + + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, requestHeaders); + + CURLcode res = curl_easy_perform(curl); + if (res == CURLE_WRITE_ERROR && etag == expectedETag) return false; + if (res != CURLE_OK) + throw Error(format("unable to download ‘%1%’: %2% (%3%)") + % url % curl_easy_strerror(res) % res); + + long httpStatus = 0; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &httpStatus); + if (httpStatus == 304) return false; + + return true; } }; @@ -1560,25 +1619,71 @@ void fetch(EvalState & state, const Pos & pos, Value * * args, Value & v, } else url = state.forceStringNoCtx(*args[0], pos); - // TODO: cache downloads. + Path cacheDir = getEnv("XDG_CACHE_HOME", getEnv("HOME", "") + "/.cache") + "/nix/tarballs"; + createDirs(cacheDir); - Curl curl; - string data = curl.fetch(url); + string urlHash = printHash32(hashString(htSHA256, url)); - string name; - string::size_type p = url.rfind('/'); - if (p != string::npos) name = string(url, p + 1); + Path dataFile = cacheDir + "/" + urlHash + ".info"; + Path fileLink = cacheDir + "/" + urlHash + "-file"; - Path storePath = store->addTextToStore(name, data, PathSet(), state.repair); + Path storePath; - if (unpack) { - Path tmpDir = createTempDir(); - AutoDelete autoDelete(tmpDir, true); - runProgram("tar", true, {"xf", storePath, "-C", tmpDir, "--strip-components", "1"}, ""); - storePath = store->addToStore(name, tmpDir, true, htSHA256, defaultPathFilter, state.repair); + string expectedETag; + + if (pathExists(fileLink) && pathExists(dataFile)) { + storePath = readLink(fileLink); + store->addTempRoot(storePath); + if (store->isValidPath(storePath)) { + auto ss = tokenizeString>(readFile(dataFile), "\n"); + if (ss.size() >= 2 && ss[0] == url) { + printMsg(lvlDebug, format("verifying previous ETag ‘%1%’") % ss[1]); + expectedETag = ss[1]; + } + } else + storePath = ""; } - mkString(v, storePath, singleton(storePath)); + string name; + auto p = url.rfind('/'); + if (p != string::npos) name = string(url, p + 1); + + if (expectedETag.empty()) + printMsg(lvlInfo, format("downloading ‘%1%’...") % url); + else + printMsg(lvlInfo, format("checking ‘%1%’...") % url); + Curl curl; + + if (curl.fetch(url, expectedETag)) + storePath = store->addTextToStore(name, curl.data, PathSet(), state.repair); + + assert(!storePath.empty()); + replaceSymlink(storePath, fileLink); + + writeFile(dataFile, url + "\n" + curl.etag + "\n"); + + if (unpack) { + Path unpackedLink = cacheDir + "/" + baseNameOf(storePath) + "-unpacked"; + Path unpackedStorePath; + if (pathExists(unpackedLink)) { + unpackedStorePath = readLink(unpackedLink); + store->addTempRoot(unpackedStorePath); + if (!store->isValidPath(unpackedStorePath)) + unpackedStorePath = ""; + } + if (unpackedStorePath.empty()) { + printMsg(lvlDebug, format("unpacking ‘%1%’...") % storePath); + Path tmpDir = createTempDir(); + AutoDelete autoDelete(tmpDir, true); + runProgram("tar", true, {"xf", storePath, "-C", tmpDir, "--strip-components", "1"}, ""); + unpackedStorePath = store->addToStore(name, tmpDir, true, htSHA256, defaultPathFilter, state.repair); + } + replaceSymlink(unpackedStorePath, unpackedLink); + mkString(v, unpackedStorePath, singleton(unpackedStorePath)); + } + + else + mkString(v, storePath, singleton(storePath)); } diff --git a/src/libutil/util.cc b/src/libutil/util.cc index be0a9bf31..ab0a3b303 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -413,6 +413,17 @@ void createSymlink(const Path & target, const Path & link) } +void replaceSymlink(const Path & target, const Path & link) +{ + Path tmp = canonPath(dirOf(link) + "/.new_" + baseNameOf(link)); + + createSymlink(target, tmp); + + if (rename(tmp.c_str(), link.c_str()) != 0) + throw SysError(format("renaming ‘%1%’ to ‘%2%’") % tmp % link); +} + + LogType logType = ltPretty; Verbosity verbosity = lvlInfo; @@ -1076,6 +1087,15 @@ string chomp(const string & s) } +string trim(const string & s, const string & whitespace) +{ + auto i = s.find_first_not_of(whitespace); + if (i == string::npos) return ""; + auto j = s.find_last_not_of(whitespace); + return string(s, i, j == string::npos ? j : j - i + 1); +} + + string statusToString(int status) { if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 20330fb76..7d20351ee 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -110,6 +110,9 @@ Paths createDirs(const Path & path); /* Create a symlink. */ void createSymlink(const Path & target, const Path & link); +/* Atomically create or replace a symlink. */ +void replaceSymlink(const Path & target, const Path & link); + template T singleton(const A & a) @@ -334,6 +337,10 @@ string concatStringsSep(const string & sep, const StringSet & ss); string chomp(const string & s); +/* Remove whitespace from the start and end of a string. */ +string trim(const string & s, const string & whitespace = " \n\r\t"); + + /* Convert the exit status of a child as returned by wait() into an error string. */ string statusToString(int status); diff --git a/src/nix-env/profiles.cc b/src/nix-env/profiles.cc index d8eb0ef52..d93d7fd80 100644 --- a/src/nix-env/profiles.cc +++ b/src/nix-env/profiles.cc @@ -116,16 +116,7 @@ void switchLink(Path link, Path target) /* Hacky. */ if (dirOf(target) == dirOf(link)) target = baseNameOf(target); - Path tmp = canonPath(dirOf(link) + "/.new_" + baseNameOf(link)); - createSymlink(target, tmp); - /* The rename() system call is supposed to be essentially atomic - on Unix. That is, if we have links `current -> X' and - `new_current -> Y', and we rename new_current to current, a - process accessing current will see X or Y, but never a - file-not-found or other error condition. This is sufficient to - atomically switch user environments. */ - if (rename(tmp.c_str(), link.c_str()) != 0) - throw SysError(format("renaming ‘%1%’ to ‘%2%’") % tmp % link); + replaceSymlink(target, link); }