From 220251ba51e7d8e47553d3edae0e736b2327011d Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 01/12] libstore: remove FiletransferRequest::head add a method to FileTransfer that provides this functionality instead. Change-Id: Ic1933a5df76a109c248c9c5efea065356b20a6f9 --- src/libstore/filetransfer.cc | 42 +++++++++++++++++++------ src/libstore/filetransfer.hh | 13 +++++++- src/libstore/http-binary-cache-store.cc | 9 +----- 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 3a4ff04c3..21fe5715e 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -48,6 +48,7 @@ struct curlFileTransfer : public FileTransfer FileTransferResult result; Activity act; std::optional uploadData; + bool noBody = false; // \equiv HTTP HEAD, don't download data bool done = false; // whether either the success or failure function has been called std::packaged_task callback; std::function dataCallback; @@ -92,7 +93,8 @@ struct curlFileTransfer : public FileTransfer ActivityId parentAct, std::invocable auto callback, std::function dataCallback, - std::optional uploadData + std::optional uploadData, + bool noBody ) : fileTransfer(fileTransfer) , request(request) @@ -100,6 +102,7 @@ struct curlFileTransfer : public FileTransfer fmt(uploadData ? "uploading '%s'" : "downloading '%s'", request.uri), {request.uri}, parentAct) , uploadData(std::move(uploadData)) + , noBody(noBody) , callback([cb{std::move(callback)}] (std::exception_ptr ex, FileTransferResult r) { cb(ex); return r; @@ -299,7 +302,7 @@ struct curlFileTransfer : public FileTransfer if (settings.downloadSpeed.get() > 0) curl_easy_setopt(req, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t) (settings.downloadSpeed.get() * 1024)); - if (request.head) + if (noBody) curl_easy_setopt(req, CURLOPT_NOBODY, 1); if (uploadData) { @@ -661,17 +664,18 @@ struct curlFileTransfer : public FileTransfer std::future enqueueDownload(const FileTransferRequest & request) override { - return enqueueFileTransfer(request, std::nullopt); + return enqueueFileTransfer(request, std::nullopt, false); } std::future enqueueUpload(const FileTransferRequest & request, std::string data) override { - return enqueueFileTransfer(request, std::move(data)); + return enqueueFileTransfer(request, std::move(data), false); } - std::future - enqueueFileTransfer(const FileTransferRequest & request, std::optional data) + std::future enqueueFileTransfer( + const FileTransferRequest & request, std::optional data, bool noBody + ) { return enqueueFileTransfer( request, @@ -681,14 +685,16 @@ struct curlFileTransfer : public FileTransfer } }, {}, - std::move(data) + std::move(data), + noBody ); } std::future enqueueFileTransfer(const FileTransferRequest & request, std::invocable auto callback, std::function dataCallback, - std::optional data + std::optional data, + bool noBody ) { /* Ugly hack to support s3:// URIs. */ @@ -724,11 +730,26 @@ struct curlFileTransfer : public FileTransfer getCurActivity(), std::move(callback), std::move(dataCallback), - std::move(data) + std::move(data), + noBody )) ->callback.get_future(); } + bool exists(std::string_view uri) override + { + try { + enqueueFileTransfer(FileTransferRequest{uri}, std::nullopt, true).get(); + return true; + } catch (FileTransferError & e) { + /* S3 buckets return 403 if a file doesn't exist and the + bucket is unlistable, so treat 403 as 404. */ + if (e.error == FileTransfer::NotFound || e.error == FileTransfer::Forbidden) + return false; + throw; + } + } + box_ptr download(FileTransferRequest && request) override { struct State { @@ -776,7 +797,8 @@ struct curlFileTransfer : public FileTransfer state->data.append(data); state->avail.notify_one(); }, - std::nullopt + std::nullopt, + false ); struct InnerSource : Source diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index 63e30aa0c..e246eab80 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -58,7 +58,6 @@ struct FileTransferRequest Headers headers; std::string expectedETag; bool verifyTLS = true; - bool head = false; FileTransferRequest(std::string_view uri) : uri(uri) { } @@ -96,6 +95,18 @@ struct FileTransfer virtual std::future enqueueUpload(const FileTransferRequest & request, std::string data) = 0; + /** + * Checks whether the given URI exists. For historical reasons this function + * treats HTTP 403 responses like HTTP 404 responses and returns `false` for + * both. This was originally done to handle unlistable S3 buckets, which may + * return 403 (not 404) if the reuqested object doesn't exist in the bucket. + * + * ## Bugs + * + * S3 objects are downloaded completely to answer this request. + */ + virtual bool exists(std::string_view uri) = 0; + /** * Download a file, returning its contents through a source. Will not return * before the transfer has fully started, ensuring that any errors thrown by diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index f54e799c9..f3efa8124 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -114,15 +114,8 @@ protected: checkEnabled(); try { - FileTransferRequest request{makeURI(path)}; - request.head = true; - getFileTransfer()->enqueueDownload(request).get(); - return true; + return getFileTransfer()->exists(makeURI(path)); } catch (FileTransferError & e) { - /* S3 buckets return 403 if a file doesn't exist and the - bucket is unlistable, so treat 403 as 404. */ - if (e.error == FileTransfer::NotFound || e.error == FileTransfer::Forbidden) - return false; maybeDisable(); throw; } From d82b212d3393c145a1722529b5d5b01cf65fd9bf Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 02/12] libstore: remove FileTransferRequest::verifyTLS it's never set to false. Change-Id: I1e436c82f1097091a08faa1dfada75e51bd5edf9 --- src/libstore/filetransfer.cc | 9 ++------- src/libstore/filetransfer.hh | 1 - 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 21fe5715e..f4ce04081 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -312,13 +312,8 @@ struct curlFileTransfer : public FileTransfer curl_easy_setopt(req, CURLOPT_INFILESIZE_LARGE, (curl_off_t) uploadData->length()); } - if (request.verifyTLS) { - if (settings.caFile != "") - curl_easy_setopt(req, CURLOPT_CAINFO, settings.caFile.get().c_str()); - } else { - curl_easy_setopt(req, CURLOPT_SSL_VERIFYPEER, 0); - curl_easy_setopt(req, CURLOPT_SSL_VERIFYHOST, 0); - } + if (settings.caFile != "") + curl_easy_setopt(req, CURLOPT_CAINFO, settings.caFile.get().c_str()); curl_easy_setopt(req, CURLOPT_CONNECTTIMEOUT, fileTransferSettings.connectTimeout.get()); diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index e246eab80..86c772fb1 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -57,7 +57,6 @@ struct FileTransferRequest std::string uri; Headers headers; std::string expectedETag; - bool verifyTLS = true; FileTransferRequest(std::string_view uri) : uri(uri) { } From 30bec83fa482f87cb55c355b06bbe78d67889d92 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 03/12] libstore: remove FileTransferResult::bodySize it's only used for internal bookkeeping in TransferItem. Change-Id: I467c5be023488be4a8a76e5f98a4ef25762df6f3 --- src/libstore/filetransfer.cc | 11 ++++++----- src/libstore/filetransfer.hh | 1 - 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index f4ce04081..fc1353349 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -58,6 +58,7 @@ struct curlFileTransfer : public FileTransfer unsigned int attempt = 0; const size_t tries = fileTransferSettings.tries; + uint64_t bodySize = 0; /* Don't start this download until the specified time point has been reached. */ @@ -153,7 +154,7 @@ struct curlFileTransfer : public FileTransfer const size_t realSize = size * nmemb; try { - result.bodySize += realSize; + bodySize += realSize; if (successfulStatuses.count(getHTTPStatus()) && this->dataCallback) { writtenToSink += realSize; @@ -184,7 +185,7 @@ struct curlFileTransfer : public FileTransfer if (std::smatch match; std::regex_match(line, match, statusLine)) { result.etag = ""; result.data.clear(); - result.bodySize = 0; + bodySize = 0; statusMsg = trim(match.str(1)); acceptRanges = false; encoding = ""; @@ -329,7 +330,7 @@ struct curlFileTransfer : public FileTransfer curl_easy_setopt(req, CURLOPT_RESUME_FROM_LARGE, writtenToSink); result.data.clear(); - result.bodySize = 0; + bodySize = 0; } void finish(CURLcode code) @@ -342,7 +343,7 @@ struct curlFileTransfer : public FileTransfer result.effectiveUri = effectiveUriCStr; debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes", - verb(), request.uri, code, httpStatus, result.bodySize); + verb(), request.uri, code, httpStatus, bodySize); // this has to happen here until we can return an actual future. // wrapping user `callback`s instead is not possible because the @@ -358,7 +359,7 @@ struct curlFileTransfer : public FileTransfer else if (code == CURLE_OK && successfulStatuses.count(httpStatus)) { result.cached = httpStatus == 304; - act.progress(result.bodySize, result.bodySize); + act.progress(bodySize, bodySize); done = true; callback(nullptr, std::move(result)); } diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index 86c772fb1..ed22578a2 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -68,7 +68,6 @@ struct FileTransferResult std::string etag; std::string effectiveUri; std::string data; - uint64_t bodySize = 0; /* An "immutable" URL for this resource (i.e. one whose contents will never change), as returned by the `Link: ; rel="immutable"` header. */ From a839c31e6cc4330e3d714c2609a46de49d11ce6a Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 04/12] libstore: remove FileTransferRequest::expectedETag just another http specific used in only one place. Change-Id: I99361a7226f4e6cd8f18170d3683c0025657bcb3 --- src/libfetchers/tarball.cc | 2 +- src/libstore/filetransfer.cc | 2 -- src/libstore/filetransfer.hh | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 69ed53f79..8236c5ea1 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -43,7 +43,7 @@ DownloadFileResult downloadFile( FileTransferRequest request(url); request.headers = headers; if (cached) - request.expectedETag = getStrAttr(cached->infoAttrs, "etag"); + request.headers.emplace_back("If-None-Match", getStrAttr(cached->infoAttrs, "etag")); FileTransferResult res; try { res = getFileTransfer()->enqueueDownload(request).get(); diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index fc1353349..11f369efd 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -111,8 +111,6 @@ struct curlFileTransfer : public FileTransfer , dataCallback(std::move(dataCallback)) { requestHeaders = curl_slist_append(requestHeaders, "Accept-Encoding: zstd, br, gzip, deflate, bzip2, xz"); - if (!request.expectedETag.empty()) - requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str()); for (auto it = request.headers.begin(); it != request.headers.end(); ++it){ requestHeaders = curl_slist_append(requestHeaders, fmt("%s: %s", it->first, it->second).c_str()); } diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index ed22578a2..09f7d5c7f 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -56,7 +56,6 @@ struct FileTransferRequest { std::string uri; Headers headers; - std::string expectedETag; FileTransferRequest(std::string_view uri) : uri(uri) { } From 6f18e1ebde857b6cf536b71656ca46438d5409a7 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 05/12] libstore: remove FileTransferRequest it's just a uri and some headers now. those can be function arguments with no loss of clarity. *actual* additional arguments, for example a TLS context with additional certificates, could be added on a new and improved FileTransfer class that carries not just a backend reference but some real, visible context for its transfers. curl not being very multi-threading-friendly when using multi handles will make sharing a bit hard anyway once we drop the single global download worker thread Change-Id: Id2112c95cbd118c6d920488f38d272d7da926460 --- src/libfetchers/fetchers.hh | 2 +- src/libfetchers/tarball.cc | 8 ++- src/libstore/builtins/fetchurl.cc | 4 +- src/libstore/filetransfer.cc | 72 ++++++++++++++----------- src/libstore/filetransfer.hh | 21 +++----- src/libstore/http-binary-cache-store.cc | 17 +++--- src/nix/prefetch.cc | 5 +- src/nix/upgrade-nix.cc | 3 +- tests/unit/libstore/filetransfer.cc | 14 +++-- 9 files changed, 69 insertions(+), 77 deletions(-) diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index f778908fb..064d5e4ec 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -209,7 +209,7 @@ DownloadFileResult downloadFile( const std::string & url, const std::string & name, bool locked, - const Headers & headers = {}); + Headers headers = {}); struct DownloadTarballResult { diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 8236c5ea1..13ba44b51 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -15,7 +15,7 @@ DownloadFileResult downloadFile( const std::string & url, const std::string & name, bool locked, - const Headers & headers) + Headers headers) { // FIXME: check store @@ -40,13 +40,11 @@ DownloadFileResult downloadFile( if (cached && !cached->expired) return useCached(); - FileTransferRequest request(url); - request.headers = headers; if (cached) - request.headers.emplace_back("If-None-Match", getStrAttr(cached->infoAttrs, "etag")); + headers.emplace_back("If-None-Match", getStrAttr(cached->infoAttrs, "etag")); FileTransferResult res; try { - res = getFileTransfer()->enqueueDownload(request).get(); + res = getFileTransfer()->enqueueDownload(url, headers).get(); } catch (FileTransferError & e) { if (cached) { warn("%s; using cached version", e.msg()); diff --git a/src/libstore/builtins/fetchurl.cc b/src/libstore/builtins/fetchurl.cc index 69a9f993f..e263997e1 100644 --- a/src/libstore/builtins/fetchurl.cc +++ b/src/libstore/builtins/fetchurl.cc @@ -36,9 +36,7 @@ void builtinFetchurl(const BasicDerivation & drv, const std::string & netrcData, auto fetch = [&](const std::string & url) { - FileTransferRequest request(url); - - auto raw = fileTransfer->download(std::move(request)); + auto raw = fileTransfer->download(url); auto decompressor = makeDecompressionSource( unpack && mainUrl.ends_with(".xz") ? "xz" : "none", *raw); diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 11f369efd..5a606b207 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -44,7 +44,7 @@ struct curlFileTransfer : public FileTransfer struct TransferItem : public std::enable_shared_from_this { curlFileTransfer & fileTransfer; - FileTransferRequest request; + std::string uri; FileTransferResult result; Activity act; std::optional uploadData; @@ -90,7 +90,8 @@ struct curlFileTransfer : public FileTransfer } TransferItem(curlFileTransfer & fileTransfer, - const FileTransferRequest & request, + const std::string & uri, + const Headers & headers, ActivityId parentAct, std::invocable auto callback, std::function dataCallback, @@ -98,10 +99,10 @@ struct curlFileTransfer : public FileTransfer bool noBody ) : fileTransfer(fileTransfer) - , request(request) + , uri(uri) , act(*logger, lvlTalkative, actFileTransfer, - fmt(uploadData ? "uploading '%s'" : "downloading '%s'", request.uri), - {request.uri}, parentAct) + fmt(uploadData ? "uploading '%s'" : "downloading '%s'", uri), + {uri}, parentAct) , uploadData(std::move(uploadData)) , noBody(noBody) , callback([cb{std::move(callback)}] (std::exception_ptr ex, FileTransferResult r) { @@ -111,7 +112,7 @@ struct curlFileTransfer : public FileTransfer , dataCallback(std::move(dataCallback)) { requestHeaders = curl_slist_append(requestHeaders, "Accept-Encoding: zstd, br, gzip, deflate, bzip2, xz"); - for (auto it = request.headers.begin(); it != request.headers.end(); ++it){ + for (auto it = headers.begin(); it != headers.end(); ++it){ requestHeaders = curl_slist_append(requestHeaders, fmt("%s: %s", it->first, it->second).c_str()); } } @@ -126,7 +127,7 @@ struct curlFileTransfer : public FileTransfer if (requestHeaders) curl_slist_free_all(requestHeaders); try { if (!done) - fail(FileTransferError(Interrupted, {}, "download of '%s' was interrupted", request.uri)); + fail(FileTransferError(Interrupted, {}, "download of '%s' was interrupted", uri)); } catch (...) { ignoreExceptionInDestructor(); } @@ -177,7 +178,7 @@ struct curlFileTransfer : public FileTransfer { size_t realSize = size * nmemb; std::string line(static_cast(contents), realSize); - printMsg(lvlVomit, "got header for '%s': %s", request.uri, trim(line)); + printMsg(lvlVomit, "got header for '%s': %s", uri, trim(line)); static std::regex statusLine("HTTP/[^ ]+ +[0-9]+(.*)", std::regex::extended | std::regex::icase); if (std::smatch match; std::regex_match(line, match, statusLine)) { @@ -273,7 +274,7 @@ struct curlFileTransfer : public FileTransfer curl_easy_setopt(req, CURLOPT_DEBUGFUNCTION, TransferItem::debugCallback); } - curl_easy_setopt(req, CURLOPT_URL, request.uri.c_str()); + curl_easy_setopt(req, CURLOPT_URL, uri.c_str()); curl_easy_setopt(req, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(req, CURLOPT_MAXREDIRS, 10); curl_easy_setopt(req, CURLOPT_NOSIGNAL, 1); @@ -341,7 +342,7 @@ struct curlFileTransfer : public FileTransfer result.effectiveUri = effectiveUriCStr; debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes", - verb(), request.uri, code, httpStatus, bodySize); + verb(), uri, code, httpStatus, bodySize); // this has to happen here until we can return an actual future. // wrapping user `callback`s instead is not possible because the @@ -419,17 +420,17 @@ struct curlFileTransfer : public FileTransfer response = std::move(result.data); auto exc = code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted - ? FileTransferError(Interrupted, std::move(response), "%s of '%s' was interrupted", verb(), request.uri) + ? FileTransferError(Interrupted, std::move(response), "%s of '%s' was interrupted", verb(), uri) : httpStatus != 0 ? FileTransferError(err, std::move(response), "unable to %s '%s': HTTP error %d (%s)%s", - verb(), request.uri, httpStatus, statusMsg, + verb(), uri, httpStatus, statusMsg, code == CURLE_OK ? "" : fmt(" (curl error: %s)", curl_easy_strerror(code))) : FileTransferError(err, std::move(response), "unable to %s '%s': %s (%d)", - verb(), request.uri, curl_easy_strerror(code), code); + verb(), uri, curl_easy_strerror(code), code); /* If this is a transient error, then maybe retry the download after a while. If we're writing to a @@ -596,7 +597,7 @@ struct curlFileTransfer : public FileTransfer } for (auto & item : incoming) { - debug("starting %s of %s", item->verb(), item->request.uri); + debug("starting %s of %s", item->verb(), item->uri); item->init(); curl_multi_add_handle(curlm, item->req); item->active = true; @@ -626,9 +627,9 @@ struct curlFileTransfer : public FileTransfer std::shared_ptr enqueueItem(std::shared_ptr item) { if (item->uploadData - && !item->request.uri.starts_with("http://") - && !item->request.uri.starts_with("https://")) - throw nix::Error("uploading to '%s' is not supported", item->request.uri); + && !item->uri.starts_with("http://") + && !item->uri.starts_with("https://")) + throw nix::Error("uploading to '%s' is not supported", item->uri); { auto state(state_.lock()); @@ -656,23 +657,28 @@ struct curlFileTransfer : public FileTransfer } #endif - std::future enqueueDownload(const FileTransferRequest & request) override + std::future + enqueueDownload(const std::string & uri, const Headers & headers = {}) override { - return enqueueFileTransfer(request, std::nullopt, false); + return enqueueFileTransfer(uri, headers, std::nullopt, false); } std::future - enqueueUpload(const FileTransferRequest & request, std::string data) override + enqueueUpload(const std::string & uri, std::string data, const Headers & headers) override { - return enqueueFileTransfer(request, std::move(data), false); + return enqueueFileTransfer(uri, headers, std::move(data), false); } std::future enqueueFileTransfer( - const FileTransferRequest & request, std::optional data, bool noBody + const std::string & uri, + const Headers & headers, + std::optional data, + bool noBody ) { return enqueueFileTransfer( - request, + uri, + headers, [](std::exception_ptr ex) { if (ex) { std::rethrow_exception(ex); @@ -684,7 +690,9 @@ struct curlFileTransfer : public FileTransfer ); } - std::future enqueueFileTransfer(const FileTransferRequest & request, + std::future enqueueFileTransfer( + const std::string & uri, + const Headers & headers, std::invocable auto callback, std::function dataCallback, std::optional data, @@ -692,9 +700,9 @@ struct curlFileTransfer : public FileTransfer ) { /* Ugly hack to support s3:// URIs. */ - if (request.uri.starts_with("s3://")) { + if (uri.starts_with("s3://")) { // FIXME: do this on a worker thread - return std::async(std::launch::deferred, [uri{request.uri}]() -> FileTransferResult { + return std::async(std::launch::deferred, [uri]() -> FileTransferResult { #if ENABLE_S3 auto [bucketName, key, params] = parseS3Uri(uri); @@ -720,7 +728,8 @@ struct curlFileTransfer : public FileTransfer return enqueueItem(std::make_shared( *this, - request, + uri, + headers, getCurActivity(), std::move(callback), std::move(dataCallback), @@ -730,10 +739,10 @@ struct curlFileTransfer : public FileTransfer ->callback.get_future(); } - bool exists(std::string_view uri) override + bool exists(const std::string & uri, const Headers & headers) override { try { - enqueueFileTransfer(FileTransferRequest{uri}, std::nullopt, true).get(); + enqueueFileTransfer(uri, headers, std::nullopt, true).get(); return true; } catch (FileTransferError & e) { /* S3 buckets return 403 if a file doesn't exist and the @@ -744,7 +753,7 @@ struct curlFileTransfer : public FileTransfer } } - box_ptr download(FileTransferRequest && request) override + box_ptr download(const std::string & uri, const Headers & headers) override { struct State { bool done = false, failed = false; @@ -756,7 +765,8 @@ struct curlFileTransfer : public FileTransfer auto _state = std::make_shared>(); enqueueFileTransfer( - request, + uri, + headers, [_state](std::exception_ptr ex) { auto state(_state->lock()); state->done = true; diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index 09f7d5c7f..b885c59d5 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -52,15 +52,6 @@ struct FileTransferSettings : Config extern FileTransferSettings fileTransferSettings; -struct FileTransferRequest -{ - std::string uri; - Headers headers; - - FileTransferRequest(std::string_view uri) - : uri(uri) { } -}; - struct FileTransferResult { bool cached = false; @@ -83,14 +74,14 @@ struct FileTransfer * Enqueues a download request, returning a future for the result of * the download. The future may throw a FileTransferError exception. */ - virtual std::future enqueueDownload(const FileTransferRequest & request) = 0; + virtual std::future + enqueueDownload(const std::string & uri, const Headers & headers = {}) = 0; /** - * Enqueue an upload request, returning a future for the result of - * the upload. The future may throw a FileTransferError exception. + * Upload some data. May throw a FileTransferError exception. */ virtual std::future - enqueueUpload(const FileTransferRequest & request, std::string data) = 0; + enqueueUpload(const std::string & uri, std::string data, const Headers & headers = {}) = 0; /** * Checks whether the given URI exists. For historical reasons this function @@ -102,7 +93,7 @@ struct FileTransfer * * S3 objects are downloaded completely to answer this request. */ - virtual bool exists(std::string_view uri) = 0; + virtual bool exists(const std::string & uri, const Headers & headers = {}) = 0; /** * Download a file, returning its contents through a source. Will not return @@ -111,7 +102,7 @@ struct FileTransfer * thrown by the returned source. The source will only throw errors detected * during the transfer itself (decompression errors, connection drops, etc). */ - virtual box_ptr download(FileTransferRequest && request) = 0; + virtual box_ptr download(const std::string & uri, const Headers & headers = {}) = 0; enum Error { NotFound, Forbidden, Misc, Transient, Interrupted }; }; diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index f3efa8124..c7bfb8e0c 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -125,13 +125,15 @@ protected: std::shared_ptr> istream, const std::string & mimeType) override { - FileTransferRequest req{makeURI(path)}; auto data = StreamToSourceAdapter(istream).drain(); - req.headers = {{"Content-Type", mimeType}}; try { - getFileTransfer()->enqueueUpload(req, std::move(data)).get(); + getFileTransfer() + ->enqueueUpload(makeURI(path), std::move(data), {{"Content-Type", mimeType}}) + .get(); } catch (FileTransferError & e) { - throw UploadToHTTP("while uploading to HTTP binary cache at '%s': %s", cacheUri, e.msg()); + throw UploadToHTTP( + "while uploading to HTTP binary cache at '%s': %s", cacheUri, e.msg() + ); } } @@ -146,9 +148,8 @@ protected: box_ptr getFile(const std::string & path) override { checkEnabled(); - FileTransferRequest request{makeURI(path)}; try { - return getFileTransfer()->download(std::move(request)); + return getFileTransfer()->download(makeURI(path)); } catch (FileTransferError & e) { if (e.error == FileTransfer::NotFound || e.error == FileTransfer::Forbidden) throw NoSuchBinaryCacheFile("file '%s' does not exist in binary cache '%s'", path, getUri()); @@ -161,10 +162,8 @@ protected: { checkEnabled(); - FileTransferRequest request{makeURI(path)}; - try { - return std::move(getFileTransfer()->enqueueDownload(request).get().data); + return std::move(getFileTransfer()->enqueueDownload(makeURI(path)).get().data); } catch (FileTransferError & e) { if (e.error == FileTransfer::NotFound || e.error == FileTransfer::Forbidden) return {}; diff --git a/src/nix/prefetch.cc b/src/nix/prefetch.cc index 0183b0008..1fb7bb49b 100644 --- a/src/nix/prefetch.cc +++ b/src/nix/prefetch.cc @@ -48,7 +48,7 @@ std::string resolveMirrorUrl(EvalState & state, const std::string & url) std::tuple prefetchFile( ref store, - std::string_view url, + const std::string & url, std::optional name, HashType hashType, std::optional expectedHash, @@ -98,8 +98,7 @@ std::tuple prefetchFile( FdSink sink(fd.get()); - FileTransferRequest req(url); - getFileTransfer()->download(std::move(req))->drainInto(sink); + getFileTransfer()->download(url)->drainInto(sink); } /* Optionally unpack the file. */ diff --git a/src/nix/upgrade-nix.cc b/src/nix/upgrade-nix.cc index 3cf868a78..bf0d47000 100644 --- a/src/nix/upgrade-nix.cc +++ b/src/nix/upgrade-nix.cc @@ -285,8 +285,7 @@ struct CmdUpgradeNix : MixDryRun, EvalCommand Activity act(*logger, lvlInfo, actUnknown, "querying latest Nix version"); // FIXME: use nixos.org? - auto req = FileTransferRequest(storePathsUrl); - auto res = getFileTransfer()->enqueueDownload(req).get(); + auto res = getFileTransfer()->enqueueDownload(storePathsUrl).get(); auto state = std::make_unique(SearchPath{}, store); auto v = state->allocValue(); diff --git a/tests/unit/libstore/filetransfer.cc b/tests/unit/libstore/filetransfer.cc index 3c152ee84..3d4a8993d 100644 --- a/tests/unit/libstore/filetransfer.cc +++ b/tests/unit/libstore/filetransfer.cc @@ -136,7 +136,7 @@ TEST(FileTransfer, exceptionAbortsDownload) LambdaSink broken([](auto block) { throw Done(); }); - ASSERT_THROW(ft->download(FileTransferRequest("file:///dev/zero"))->drainInto(broken), Done); + ASSERT_THROW(ft->download("file:///dev/zero")->drainInto(broken), Done); // makeFileTransfer returns a ref<>, which cannot be cleared. since we also // can't default-construct it we'll have to overwrite it instead, but we'll @@ -155,7 +155,7 @@ TEST(FileTransfer, exceptionAbortsRead) auto [port, srv] = serveHTTP("200 ok", "content-length: 0\r\n", [] { return ""; }); auto ft = makeFileTransfer(); char buf[10] = ""; - ASSERT_THROW(ft->download(FileTransferRequest(fmt("http://[::1]:%d/index", port)))->read(buf, 10), EndOfFile); + ASSERT_THROW(ft->download(fmt("http://[::1]:%d/index", port))->read(buf, 10), EndOfFile); } TEST(FileTransfer, NOT_ON_DARWIN(reportsSetupErrors)) @@ -163,7 +163,7 @@ TEST(FileTransfer, NOT_ON_DARWIN(reportsSetupErrors)) auto [port, srv] = serveHTTP("404 not found", "", [] { return ""; }); auto ft = makeFileTransfer(); ASSERT_THROW( - ft->enqueueDownload(FileTransferRequest(fmt("http://[::1]:%d/index", port))).get(), + ft->enqueueDownload(fmt("http://[::1]:%d/index", port)).get(), FileTransferError ); } @@ -179,8 +179,7 @@ TEST(FileTransfer, NOT_ON_DARWIN(defersFailures)) return std::string(1024 * 1024, ' '); }); auto ft = makeFileTransfer(0); - FileTransferRequest req(fmt("http://[::1]:%d/index", port)); - auto src = ft->download(std::move(req)); + auto src = ft->download(fmt("http://[::1]:%d/index", port)); ASSERT_THROW(src->drain(), FileTransferError); } @@ -193,7 +192,7 @@ TEST(FileTransfer, NOT_ON_DARWIN(handlesContentEncoding)) auto ft = makeFileTransfer(); StringSink sink; - ft->download(FileTransferRequest(fmt("http://[::1]:%d/index", port)))->drainInto(sink); + ft->download(fmt("http://[::1]:%d/index", port))->drainInto(sink); EXPECT_EQ(sink.s, original); } @@ -216,8 +215,7 @@ TEST(FileTransfer, usesIntermediateLinkHeaders) {"200 ok", "content-length: 1\r\n", [] { return "a"; }}, }); auto ft = makeFileTransfer(0); - FileTransferRequest req(fmt("http://[::1]:%d/first", port)); - auto result = ft->enqueueDownload(req).get(); + auto result = ft->enqueueDownload(fmt("http://[::1]:%d/first", port)).get(); ASSERT_EQ(result.immutableUrl, "http://foo"); } From 5cd7055044d17af68d8cafb18da8b91e2e84b92b Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 06/12] libstore: de-future-ize FileTransfer::enqueueUpload it's only used once, and synchronously at that. Change-Id: Ife9db15dd97bc0de8de59a25d27f3f7afeb8791b --- src/libstore/filetransfer.cc | 5 ++--- src/libstore/filetransfer.hh | 4 ++-- src/libstore/http-binary-cache-store.cc | 4 +--- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 5a606b207..b68c636fa 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -663,10 +663,9 @@ struct curlFileTransfer : public FileTransfer return enqueueFileTransfer(uri, headers, std::nullopt, false); } - std::future - enqueueUpload(const std::string & uri, std::string data, const Headers & headers) override + void upload(const std::string & uri, std::string data, const Headers & headers) override { - return enqueueFileTransfer(uri, headers, std::move(data), false); + enqueueFileTransfer(uri, headers, std::move(data), false).get(); } std::future enqueueFileTransfer( diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index b885c59d5..2900fdee5 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -80,8 +80,8 @@ struct FileTransfer /** * Upload some data. May throw a FileTransferError exception. */ - virtual std::future - enqueueUpload(const std::string & uri, std::string data, const Headers & headers = {}) = 0; + virtual void + upload(const std::string & uri, std::string data, const Headers & headers = {}) = 0; /** * Checks whether the given URI exists. For historical reasons this function diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index c7bfb8e0c..4afec752f 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -127,9 +127,7 @@ protected: { auto data = StreamToSourceAdapter(istream).drain(); try { - getFileTransfer() - ->enqueueUpload(makeURI(path), std::move(data), {{"Content-Type", mimeType}}) - .get(); + getFileTransfer()->upload(makeURI(path), std::move(data), {{"Content-Type", mimeType}}); } catch (FileTransferError & e) { throw UploadToHTTP( "while uploading to HTTP binary cache at '%s': %s", cacheUri, e.msg() From 982d049d3bb2097b232774f8fd3ff97af538a826 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 07/12] libstore: remove FileTransferResult::data return it as a separate item in a pair instead. this will let us remove enqueueDownload() in favor of returning metadata from download() itself Change-Id: I74fad2ca15f920da1eefabc950c2baa2c360f2ba --- src/libfetchers/tarball.cc | 7 +-- src/libstore/filetransfer.cc | 63 ++++++++++++++----------- src/libstore/filetransfer.hh | 3 +- src/libstore/http-binary-cache-store.cc | 2 +- src/nix/upgrade-nix.cc | 4 +- tests/unit/libstore/filetransfer.cc | 2 +- 6 files changed, 44 insertions(+), 37 deletions(-) diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 13ba44b51..e7e934b2a 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -43,8 +43,9 @@ DownloadFileResult downloadFile( if (cached) headers.emplace_back("If-None-Match", getStrAttr(cached->infoAttrs, "etag")); FileTransferResult res; + std::string data; try { - res = getFileTransfer()->enqueueDownload(url, headers).get(); + std::tie(res, data) = getFileTransfer()->enqueueDownload(url, headers).get(); } catch (FileTransferError & e) { if (cached) { warn("%s; using cached version", e.msg()); @@ -69,8 +70,8 @@ DownloadFileResult downloadFile( storePath = std::move(cached->storePath); } else { StringSink sink; - sink << dumpString(res.data); - auto hash = hashString(HashType::SHA256, res.data); + sink << dumpString(data); + auto hash = hashString(HashType::SHA256, data); ValidPathInfo info { *store, name, diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index b68c636fa..6f78dad94 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -48,9 +48,12 @@ struct curlFileTransfer : public FileTransfer FileTransferResult result; Activity act; std::optional uploadData; + std::string downloadData; bool noBody = false; // \equiv HTTP HEAD, don't download data bool done = false; // whether either the success or failure function has been called - std::packaged_task callback; + std::packaged_task< + std::pair(std::exception_ptr, FileTransferResult)> + callback; std::function dataCallback; CURL * req = 0; bool active = false; // whether the handle has been added to the multi object @@ -105,9 +108,9 @@ struct curlFileTransfer : public FileTransfer {uri}, parentAct) , uploadData(std::move(uploadData)) , noBody(noBody) - , callback([cb{std::move(callback)}] (std::exception_ptr ex, FileTransferResult r) { + , callback([this, cb{std::move(callback)}] (std::exception_ptr ex, FileTransferResult r) { cb(ex); - return r; + return std::pair{std::move(r), std::move(downloadData)}; }) , dataCallback(std::move(dataCallback)) { @@ -159,7 +162,7 @@ struct curlFileTransfer : public FileTransfer writtenToSink += realSize; dataCallback(*this, {static_cast(contents), realSize}); } else { - this->result.data.append(static_cast(contents), realSize); + this->downloadData.append(static_cast(contents), realSize); } return realSize; @@ -183,7 +186,7 @@ struct curlFileTransfer : public FileTransfer static std::regex statusLine("HTTP/[^ ]+ +[0-9]+(.*)", std::regex::extended | std::regex::icase); if (std::smatch match; std::regex_match(line, match, statusLine)) { result.etag = ""; - result.data.clear(); + downloadData.clear(); bodySize = 0; statusMsg = trim(match.str(1)); acceptRanges = false; @@ -328,7 +331,7 @@ struct curlFileTransfer : public FileTransfer if (writtenToSink) curl_easy_setopt(req, CURLOPT_RESUME_FROM_LARGE, writtenToSink); - result.data.clear(); + downloadData.clear(); bodySize = 0; } @@ -348,8 +351,8 @@ struct curlFileTransfer : public FileTransfer // wrapping user `callback`s instead is not possible because the // Callback api expects std::functions, and copying Callbacks is // not possible due the promises they hold. - if (code == CURLE_OK && !dataCallback && result.data.length() > 0) { - result.data = decompress(encoding, result.data); + if (code == CURLE_OK && !dataCallback && downloadData.length() > 0) { + downloadData = decompress(encoding, downloadData); } if (writeException) @@ -417,7 +420,7 @@ struct curlFileTransfer : public FileTransfer std::optional response; if (!successfulStatuses.count(httpStatus)) - response = std::move(result.data); + response = std::move(downloadData); auto exc = code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted ? FileTransferError(Interrupted, std::move(response), "%s of '%s' was interrupted", verb(), uri) @@ -657,7 +660,7 @@ struct curlFileTransfer : public FileTransfer } #endif - std::future + std::future> enqueueDownload(const std::string & uri, const Headers & headers = {}) override { return enqueueFileTransfer(uri, headers, std::nullopt, false); @@ -668,7 +671,7 @@ struct curlFileTransfer : public FileTransfer enqueueFileTransfer(uri, headers, std::move(data), false).get(); } - std::future enqueueFileTransfer( + std::future> enqueueFileTransfer( const std::string & uri, const Headers & headers, std::optional data, @@ -689,7 +692,7 @@ struct curlFileTransfer : public FileTransfer ); } - std::future enqueueFileTransfer( + std::future> enqueueFileTransfer( const std::string & uri, const Headers & headers, std::invocable auto callback, @@ -701,28 +704,32 @@ struct curlFileTransfer : public FileTransfer /* Ugly hack to support s3:// URIs. */ if (uri.starts_with("s3://")) { // FIXME: do this on a worker thread - return std::async(std::launch::deferred, [uri]() -> FileTransferResult { + return std::async( + std::launch::deferred, + [uri]() -> std::pair { #if ENABLE_S3 - auto [bucketName, key, params] = parseS3Uri(uri); + auto [bucketName, key, params] = parseS3Uri(uri); - std::string profile = getOr(params, "profile", ""); - std::string region = getOr(params, "region", Aws::Region::US_EAST_1); - std::string scheme = getOr(params, "scheme", ""); - std::string endpoint = getOr(params, "endpoint", ""); + std::string profile = getOr(params, "profile", ""); + std::string region = getOr(params, "region", Aws::Region::US_EAST_1); + std::string scheme = getOr(params, "scheme", ""); + std::string endpoint = getOr(params, "endpoint", ""); - S3Helper s3Helper(profile, region, scheme, endpoint); + S3Helper s3Helper(profile, region, scheme, endpoint); - // FIXME: implement ETag - auto s3Res = s3Helper.getObject(bucketName, key); - FileTransferResult res; - if (!s3Res.data) - throw FileTransferError(NotFound, "S3 object '%s' does not exist", uri); - res.data = std::move(*s3Res.data); - return res; + // FIXME: implement ETag + auto s3Res = s3Helper.getObject(bucketName, key); + FileTransferResult res; + if (!s3Res.data) + throw FileTransferError(NotFound, "S3 object '%s' does not exist", uri); + return {res, std::move(*s3Res.data)}; #else - throw nix::Error("cannot download '%s' because Lix is not built with S3 support", uri); + throw nix::Error( + "cannot download '%s' because Lix is not built with S3 support", uri + ); #endif - }); + } + ); } return enqueueItem(std::make_shared( diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index 2900fdee5..8ee88a7a8 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -57,7 +57,6 @@ struct FileTransferResult bool cached = false; std::string etag; std::string effectiveUri; - std::string data; /* An "immutable" URL for this resource (i.e. one whose contents will never change), as returned by the `Link: ; rel="immutable"` header. */ @@ -74,7 +73,7 @@ struct FileTransfer * Enqueues a download request, returning a future for the result of * the download. The future may throw a FileTransferError exception. */ - virtual std::future + virtual std::future> enqueueDownload(const std::string & uri, const Headers & headers = {}) = 0; /** diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index 4afec752f..80524b58d 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -161,7 +161,7 @@ protected: checkEnabled(); try { - return std::move(getFileTransfer()->enqueueDownload(makeURI(path)).get().data); + return std::move(getFileTransfer()->enqueueDownload(makeURI(path)).get().second); } catch (FileTransferError & e) { if (e.error == FileTransfer::NotFound || e.error == FileTransfer::Forbidden) return {}; diff --git a/src/nix/upgrade-nix.cc b/src/nix/upgrade-nix.cc index bf0d47000..c6cc74672 100644 --- a/src/nix/upgrade-nix.cc +++ b/src/nix/upgrade-nix.cc @@ -285,11 +285,11 @@ struct CmdUpgradeNix : MixDryRun, EvalCommand Activity act(*logger, lvlInfo, actUnknown, "querying latest Nix version"); // FIXME: use nixos.org? - auto res = getFileTransfer()->enqueueDownload(storePathsUrl).get(); + auto [res, data] = getFileTransfer()->enqueueDownload(storePathsUrl).get(); auto state = std::make_unique(SearchPath{}, store); auto v = state->allocValue(); - state->eval(state->parseExprFromString(res.data, state->rootPath(CanonPath("/no-such-path"))), *v); + state->eval(state->parseExprFromString(data, state->rootPath(CanonPath("/no-such-path"))), *v); Bindings & bindings(*state->allocBindings(0)); auto v2 = findAlongAttrPath(*state, settings.thisSystem, bindings, *v).first; diff --git a/tests/unit/libstore/filetransfer.cc b/tests/unit/libstore/filetransfer.cc index 3d4a8993d..0e0a2439e 100644 --- a/tests/unit/libstore/filetransfer.cc +++ b/tests/unit/libstore/filetransfer.cc @@ -215,7 +215,7 @@ TEST(FileTransfer, usesIntermediateLinkHeaders) {"200 ok", "content-length: 1\r\n", [] { return "a"; }}, }); auto ft = makeFileTransfer(0); - auto result = ft->enqueueDownload(fmt("http://[::1]:%d/first", port)).get(); + auto [result, _data] = ft->enqueueDownload(fmt("http://[::1]:%d/first", port)).get(); ASSERT_EQ(result.immutableUrl, "http://foo"); } From c82407fc1e8696d3520b03825f2b66864e61b9e2 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 08/12] libstore: always allocate TransferItem::req there's no reason not to do this. also improve error handling a bit. Change-Id: I35853a919fa58a9a34ad47ffab6de77ba6f7fb86 --- src/libstore/filetransfer.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 6f78dad94..97411bdf3 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -55,7 +55,7 @@ struct curlFileTransfer : public FileTransfer std::pair(std::exception_ptr, FileTransferResult)> callback; std::function dataCallback; - CURL * req = 0; + CURL * req; // must never be nullptr bool active = false; // whether the handle has been added to the multi object std::string statusMsg; @@ -113,7 +113,11 @@ struct curlFileTransfer : public FileTransfer return std::pair{std::move(r), std::move(downloadData)}; }) , dataCallback(std::move(dataCallback)) + , req(curl_easy_init()) { + if (req == nullptr) { + throw FileTransferError(Misc, {}, "could not allocate curl handle"); + } requestHeaders = curl_slist_append(requestHeaders, "Accept-Encoding: zstd, br, gzip, deflate, bzip2, xz"); for (auto it = headers.begin(); it != headers.end(); ++it){ requestHeaders = curl_slist_append(requestHeaders, fmt("%s: %s", it->first, it->second).c_str()); @@ -122,11 +126,9 @@ struct curlFileTransfer : public FileTransfer ~TransferItem() { - if (req) { - if (active) - curl_multi_remove_handle(fileTransfer.curlm, req); - curl_easy_cleanup(req); - } + if (active) + curl_multi_remove_handle(fileTransfer.curlm, req); + curl_easy_cleanup(req); if (requestHeaders) curl_slist_free_all(requestHeaders); try { if (!done) @@ -268,8 +270,6 @@ struct curlFileTransfer : public FileTransfer void init() { - if (!req) req = curl_easy_init(); - curl_easy_reset(req); if (verbosity >= lvlVomit) { From 97c76c46556e5c01f6e895b670311cba09e7d57c Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 09/12] libstore: remove TransferItem::active it's always legal to call curl_multi_remove_handle on a valid pair of multi and easy handles. removing an easy handle that is not currently attached to a multi handle is a no-op, and removing an easy handle of a different multi handle is something we can't reasonably trigger. if we *did* ever manage it would result in an error we'd ignore, and the handles in question would not be changed at all. this is just simpler Change-Id: I85ec62ff89385981ca49d243376b9c32586bd128 --- src/libstore/filetransfer.cc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 97411bdf3..b1a1a4bcf 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -56,7 +56,6 @@ struct curlFileTransfer : public FileTransfer callback; std::function dataCallback; CURL * req; // must never be nullptr - bool active = false; // whether the handle has been added to the multi object std::string statusMsg; unsigned int attempt = 0; @@ -126,8 +125,7 @@ struct curlFileTransfer : public FileTransfer ~TransferItem() { - if (active) - curl_multi_remove_handle(fileTransfer.curlm, req); + curl_multi_remove_handle(fileTransfer.curlm, req); curl_easy_cleanup(req); if (requestHeaders) curl_slist_free_all(requestHeaders); try { @@ -557,7 +555,6 @@ struct curlFileTransfer : public FileTransfer assert(i != items.end()); i->second->finish(msg->data.result); curl_multi_remove_handle(curlm, i->second->req); - i->second->active = false; items.erase(i); } } @@ -603,7 +600,6 @@ struct curlFileTransfer : public FileTransfer debug("starting %s of %s", item->verb(), item->uri); item->init(); curl_multi_add_handle(curlm, item->req); - item->active = true; items[item->req] = item; } } From 2b3bdda027c216b1a0cb2a6e69df0b07f1ffe032 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 10/12] libstore: collect effective url and cachedness earlier this will let us return metadata for a transfer without having to wait for the entire transfer to complete. more importantly for current uses though is that we could now send retries to the effective url directly instead of retreading the entire redirect path. this improves latency, and in such cases where redirects change while we're downloading it'll also improve correctness (previously we'd silently download bad data). Change-Id: I6fcd920eb96fbdb2e960b73773c0b854e0300e99 --- src/libstore/filetransfer.cc | 48 ++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index b1a1a4bcf..24cc67d8a 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -50,7 +50,17 @@ struct curlFileTransfer : public FileTransfer std::optional uploadData; std::string downloadData; bool noBody = false; // \equiv HTTP HEAD, don't download data - bool done = false; // whether either the success or failure function has been called + enum { + /// nothing has been transferred yet + initialSetup, + /// at least some metadata has already been transferred, + /// but the transfer did not succeed and is now retrying + retrySetup, + /// data transfer in progress + transferring, + /// transfer complete, result or failure reported + transferComplete, + } phase = initialSetup; std::packaged_task< std::pair(std::exception_ptr, FileTransferResult)> callback; @@ -129,7 +139,7 @@ struct curlFileTransfer : public FileTransfer curl_easy_cleanup(req); if (requestHeaders) curl_slist_free_all(requestHeaders); try { - if (!done) + if (phase != transferComplete) fail(FileTransferError(Interrupted, {}, "download of '%s' was interrupted", uri)); } catch (...) { ignoreExceptionInDestructor(); @@ -138,8 +148,8 @@ struct curlFileTransfer : public FileTransfer void failEx(std::exception_ptr ex) { - assert(!done); - done = true; + assert(phase != transferComplete); + phase = transferComplete; callback(ex, std::move(result)); } @@ -149,6 +159,22 @@ struct curlFileTransfer : public FileTransfer failEx(std::make_exception_ptr(std::forward(e))); } + void maybeFinishSetup() + { + if (phase > retrySetup) { + return; + } + + char * effectiveUriCStr = nullptr; + curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); + if (effectiveUriCStr) + result.effectiveUri = effectiveUriCStr; + + result.cached = getHTTPStatus() == 304; + + phase = transferring; + } + std::exception_ptr writeException; size_t writeCallback(void * contents, size_t size, size_t nmemb) @@ -156,6 +182,8 @@ struct curlFileTransfer : public FileTransfer const size_t realSize = size * nmemb; try { + maybeFinishSetup(); + bodySize += realSize; if (successfulStatuses.count(getHTTPStatus()) && this->dataCallback) { @@ -268,6 +296,10 @@ struct curlFileTransfer : public FileTransfer void init() { + if (phase > initialSetup) { + phase = retrySetup; + } + curl_easy_reset(req); if (verbosity >= lvlVomit) { @@ -337,10 +369,7 @@ struct curlFileTransfer : public FileTransfer { auto httpStatus = getHTTPStatus(); - char * effectiveUriCStr = nullptr; - curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); - if (effectiveUriCStr) - result.effectiveUri = effectiveUriCStr; + maybeFinishSetup(); debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes", verb(), uri, code, httpStatus, bodySize); @@ -358,9 +387,8 @@ struct curlFileTransfer : public FileTransfer else if (code == CURLE_OK && successfulStatuses.count(httpStatus)) { - result.cached = httpStatus == 304; act.progress(bodySize, bodySize); - done = true; + phase = transferComplete; callback(nullptr, std::move(result)); } From 7c716b9716559eb5c2bcdbe72355678bde30c87e Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 11/12] libstore: use effective transfer url for retries do not retread the entire redirection path if we've seen the end of the road. this avoids silently downloading wrong data, and notifies us when a url we've received data from turns into a redirect when retrying. for reasons of simplicity we don't turn of libcurl redirects on retries. if we did that we'd have to conditionally process http status codes, which sounds annoying and would make the header callback even more of a mess. Change-Id: Ide0c0512ef9b2579350101246d654a2375541a39 --- src/libstore/filetransfer.cc | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 24cc67d8a..c75e7a627 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -167,8 +167,19 @@ struct curlFileTransfer : public FileTransfer char * effectiveUriCStr = nullptr; curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); - if (effectiveUriCStr) + if (effectiveUriCStr) { + if (!result.effectiveUri.empty() && result.effectiveUri != effectiveUriCStr) { + throw FileTransferError( + Misc, + {}, + "uri %s changed final destination from %s to %s during transfer", + uri, + result.effectiveUri, + effectiveUriCStr + ); + } result.effectiveUri = effectiveUriCStr; + } result.cached = getHTTPStatus() == 304; @@ -307,6 +318,10 @@ struct curlFileTransfer : public FileTransfer curl_easy_setopt(req, CURLOPT_DEBUGFUNCTION, TransferItem::debugCallback); } + // use the effective URI of the previous transfer for retries. this avoids + // some silent corruption if a redirect changes between starting and retry. + const auto & uri = result.effectiveUri.empty() ? this->uri : result.effectiveUri; + curl_easy_setopt(req, CURLOPT_URL, uri.c_str()); curl_easy_setopt(req, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(req, CURLOPT_MAXREDIRS, 10); From 212a14bb1f2f8b844c314d75ac31f77785ba6b20 Mon Sep 17 00:00:00 2001 From: eldritch horrors Date: Sun, 27 Oct 2024 18:27:11 +0100 Subject: [PATCH 12/12] libstore: check that transfer headers don't change during retries etag changing implies with high probability that the content of the resource changed. immutable url changing implies that the immutable url we got previously was wrong, which is probably a server bug. if the encoding changes our decoding will break completely, so that is also very illegal. one notable change we still allow is etags going away completely, mostly since this does not imply any data changes. Change-Id: I0220ceddc3fd732cd1b3bb39b40021cc631baadc --- src/libstore/filetransfer.cc | 56 +++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index c75e7a627..5a0f6e634 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -159,6 +159,14 @@ struct curlFileTransfer : public FileTransfer failEx(std::make_exception_ptr(std::forward(e))); } + [[noreturn]] + void throwChangedTarget(std::string_view what, std::string_view from, std::string_view to) + { + throw FileTransferError( + Misc, {}, "uri %s changed %s from %s to %s during transfer", uri, what, from, to + ); + } + void maybeFinishSetup() { if (phase > retrySetup) { @@ -169,14 +177,7 @@ struct curlFileTransfer : public FileTransfer curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); if (effectiveUriCStr) { if (!result.effectiveUri.empty() && result.effectiveUri != effectiveUriCStr) { - throw FileTransferError( - Misc, - {}, - "uri %s changed final destination from %s to %s during transfer", - uri, - result.effectiveUri, - effectiveUriCStr - ); + throwChangedTarget("final destination", result.effectiveUri, effectiveUriCStr); } result.effectiveUri = effectiveUriCStr; } @@ -186,7 +187,7 @@ struct curlFileTransfer : public FileTransfer phase = transferring; } - std::exception_ptr writeException; + std::exception_ptr callbackException; size_t writeCallback(void * contents, size_t size, size_t nmemb) { @@ -206,7 +207,7 @@ struct curlFileTransfer : public FileTransfer return realSize; } catch (...) { - writeException = std::current_exception(); + callbackException = std::current_exception(); return CURL_WRITEFUNC_ERROR; } } @@ -217,30 +218,39 @@ struct curlFileTransfer : public FileTransfer } size_t headerCallback(void * contents, size_t size, size_t nmemb) - { + try { size_t realSize = size * nmemb; std::string line(static_cast(contents), realSize); printMsg(lvlVomit, "got header for '%s': %s", uri, trim(line)); static std::regex statusLine("HTTP/[^ ]+ +[0-9]+(.*)", std::regex::extended | std::regex::icase); if (std::smatch match; std::regex_match(line, match, statusLine)) { - result.etag = ""; downloadData.clear(); bodySize = 0; statusMsg = trim(match.str(1)); acceptRanges = false; - encoding = ""; } else { auto i = line.find(':'); if (i != std::string::npos) { std::string name = toLower(trim(line.substr(0, i))); if (name == "etag") { - result.etag = trim(line.substr(i + 1)); + // NOTE we don't check that the etag hasn't gone *missing*. technically + // this is not an error as long as we get the same data from the remote. + auto etag = trim(line.substr(i + 1)); + if (!result.etag.empty() && result.etag != etag) { + throwChangedTarget("ETag", result.etag, etag); + } + result.etag = std::move(etag); } - else if (name == "content-encoding") - encoding = trim(line.substr(i + 1)); + else if (name == "content-encoding") { + auto encoding = trim(line.substr(i + 1)); + if (!this->encoding.empty() && this->encoding != encoding) { + throwChangedTarget("encoding", this->encoding, encoding); + } + this->encoding = std::move(encoding); + } else if (name == "accept-ranges" && toLower(trim(line.substr(i + 1))) == "bytes") acceptRanges = true; @@ -248,14 +258,20 @@ struct curlFileTransfer : public FileTransfer else if (name == "link" || name == "x-amz-meta-link") { auto value = trim(line.substr(i + 1)); static std::regex linkRegex("<([^>]*)>; rel=\"immutable\"", std::regex::extended | std::regex::icase); - if (std::smatch match; std::regex_match(value, match, linkRegex)) + if (std::smatch match; std::regex_match(value, match, linkRegex)) { + if (result.immutableUrl && result.immutableUrl != match.str(1)) { + throwChangedTarget("immutable url", *result.immutableUrl, match.str(1)); + } result.immutableUrl = match.str(1); - else + } else debug("got invalid link header '%s'", value); } } } return realSize; + } catch (...) { + callbackException = std::current_exception(); + return CURL_WRITEFUNC_ERROR; } static size_t headerCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp) @@ -397,8 +413,8 @@ struct curlFileTransfer : public FileTransfer downloadData = decompress(encoding, downloadData); } - if (writeException) - failEx(writeException); + if (callbackException) + failEx(callbackException); else if (code == CURLE_OK && successfulStatuses.count(httpStatus)) {