lix/src/libfetchers/tarball.cc

312 lines
9.8 KiB
C++
Raw Normal View History

#include "fetchers.hh"
#include "cache.hh"
2020-04-08 12:12:22 +00:00
#include "filetransfer.hh"
#include "globals.hh"
#include "store-api.hh"
#include "archive.hh"
#include "tarfile.hh"
#include "types.hh"
#include "split.hh"
namespace nix::fetchers {
DownloadFileResult downloadFile(
ref<Store> store,
const std::string & url,
const std::string & name,
bool locked,
2020-09-29 11:05:19 +00:00
const Headers & headers)
{
// FIXME: check store
Attrs inAttrs({
{"type", "file"},
{"url", url},
{"name", name},
});
auto cached = getCache()->lookupExpired(store, inAttrs);
auto useCached = [&]() -> DownloadFileResult
{
return {
.storePath = std::move(cached->storePath),
.etag = getStrAttr(cached->infoAttrs, "etag"),
.effectiveUrl = getStrAttr(cached->infoAttrs, "url"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
};
};
if (cached && !cached->expired)
return useCached();
2020-09-29 11:05:19 +00:00
FileTransferRequest request(url);
request.headers = headers;
if (cached)
request.expectedETag = getStrAttr(cached->infoAttrs, "etag");
2020-04-08 12:12:22 +00:00
FileTransferResult res;
try {
res = getFileTransfer()->transfer(request);
2020-04-08 12:12:22 +00:00
} catch (FileTransferError & e) {
if (cached) {
warn("%s; using cached version", e.msg());
return useCached();
} else
throw;
}
// FIXME: write to temporary file.
Attrs infoAttrs({
{"etag", res.etag},
{"url", res.effectiveUri},
});
if (res.immutableUrl)
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
std::optional<StorePath> storePath;
if (res.cached) {
assert(cached);
storePath = std::move(cached->storePath);
} else {
StringSink sink;
sink << dumpString(res.data);
auto hash = hashString(HashType::SHA256, res.data);
2020-08-06 18:31:48 +00:00
ValidPathInfo info {
2020-10-07 13:52:20 +00:00
*store,
name,
FixedOutputInfo {
.method = FileIngestionMethod::Flat,
.hash = hash,
.references = {},
2020-10-07 13:52:20 +00:00
},
hashString(HashType::SHA256, sink.s),
2020-08-06 18:31:48 +00:00
};
info.narSize = sink.s.size();
auto source = StringSource { sink.s };
store->addToStore(info, source, NoRepair, NoCheckSigs);
storePath = std::move(info.path);
}
getCache()->add(
store,
inAttrs,
infoAttrs,
*storePath,
locked);
if (url != res.effectiveUri)
getCache()->add(
store,
{
{"type", "file"},
{"url", res.effectiveUri},
{"name", name},
},
infoAttrs,
*storePath,
locked);
return {
.storePath = std::move(*storePath),
.etag = res.etag,
.effectiveUrl = res.effectiveUri,
.immutableUrl = res.immutableUrl,
};
}
DownloadTarballResult downloadTarball(
ref<Store> store,
const std::string & url,
const std::string & name,
bool locked,
2020-09-29 11:05:19 +00:00
const Headers & headers)
{
Attrs inAttrs({
{"type", "tarball"},
{"url", url},
{"name", name},
});
auto cached = getCache()->lookupExpired(store, inAttrs);
if (cached && !cached->expired)
return {
.tree = Tree { .actualPath = store->toRealPath(cached->storePath), .storePath = std::move(cached->storePath) },
.lastModified = (time_t) getIntAttr(cached->infoAttrs, "lastModified"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
};
auto res = downloadFile(store, url, name, locked, headers);
std::optional<StorePath> unpackedStorePath;
time_t lastModified;
if (cached && res.etag != "" && getStrAttr(cached->infoAttrs, "etag") == res.etag) {
unpackedStorePath = std::move(cached->storePath);
lastModified = getIntAttr(cached->infoAttrs, "lastModified");
} else {
Path tmpDir = createTempDir();
AutoDelete autoDelete(tmpDir, true);
unpackTarfile(store->toRealPath(res.storePath), tmpDir);
auto members = readDirectory(tmpDir);
if (members.size() != 1)
throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url);
auto topDir = tmpDir + "/" + members.begin()->name;
lastModified = lstat(topDir).st_mtime;
unpackedStorePath = store->addToStore(name, topDir, FileIngestionMethod::Recursive, HashType::SHA256, defaultPathFilter, NoRepair);
}
Attrs infoAttrs({
{"lastModified", uint64_t(lastModified)},
{"etag", res.etag},
});
if (res.immutableUrl)
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
getCache()->add(
store,
inAttrs,
infoAttrs,
*unpackedStorePath,
locked);
return {
.tree = Tree { .actualPath = store->toRealPath(*unpackedStorePath), .storePath = std::move(*unpackedStorePath) },
.lastModified = lastModified,
.immutableUrl = res.immutableUrl,
};
}
2022-05-25 13:29:27 +00:00
// An input scheme corresponding to a curl-downloadable resource.
struct CurlInputScheme : InputScheme
{
virtual const std::string inputType() const = 0;
const std::set<std::string> transportUrlSchemes = {"file", "http", "https"};
bool hasTarballExtension(std::string_view path) const
{
return path.ends_with(".zip") || path.ends_with(".tar")
|| path.ends_with(".tgz") || path.ends_with(".tar.gz")
|| path.ends_with(".tar.xz") || path.ends_with(".tar.bz2")
|| path.ends_with(".tar.zst");
}
virtual bool isValidURL(const ParsedURL & url, bool requireTree) const = 0;
std::optional<Input> inputFromURL(const ParsedURL & _url, bool requireTree) const override
{
if (!isValidURL(_url, requireTree))
return std::nullopt;
Revert "libfetchers: make attribute / URL query handling consistent" This reverts commit 35eec921af1043fc6322edc0ad88c872d41623b8. Reason for revert: Regressed nix-eval-jobs, and it appears to be this change is buggy/missing a case. It just needs another pass. Code causing the problem in n-e-j, when invoked with `nix-eval-jobs --flake '.#hydraJobs'`: ``` n-e-j/tests/assets » ../../build/src/nix-eval-jobs --meta --workers 1 --flake .#hydraJobs warning: unknown setting 'trusted-users' warning: `--gc-roots-dir' not specified error: unsupported Git input attribute 'dir' error: worker error: error: unsupported Git input attribute 'dir' ``` ``` nix::Value *vRoot = [&]() { if (args.flake) { auto [flakeRef, fragment, outputSpec] = nix::parseFlakeRefWithFragmentAndExtendedOutputsSpec( args.releaseExpr, nix::absPath(".")); nix::InstallableFlake flake{ {}, state, std::move(flakeRef), fragment, outputSpec, {}, {}, args.lockFlags}; return flake.toValue(*state).first; } else { return releaseExprTopLevelValue(*state, autoArgs, args); } }(); ``` Inspecting the program behaviour reveals that `dir` was in fact set in the URL going into the fetcher. This is in turn because unlike in the case changed in this commit, it was not erased before handing it to libfetchers, which is probably just a mistake. ``` (rr) up 3 0x00007ffff60262ae in nix::fetchers::Input::fromURL (url=..., requireTree=requireTree@entry=true) at src/libfetchers/fetchers.cc:39 warning: Source file is more recent than executable. 39 auto res = inputScheme->inputFromURL(url, requireTree); (rr) p url $1 = (const nix::ParsedURL &) @0x7fffdc874190: {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = {["dir"] = "tests/assets"}, fragment = ""} (rr) up 4 0x00007ffff789d904 in nix::parseFlakeRefWithFragment (url=".#hydraJobs", baseDir=std::optional<std::string> = {...}, allowMissing=allowMissing@entry=false, isFlake=isFlake@entry=true) at src/libexpr/flake/flakeref.cc:179 warning: Source file is more recent than executable. 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), (rr) p parsedURL $2 = {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = { ["dir"] = "tests/assets"}, fragment = ""} (rr) list 174 175 if (pathExists(flakeRoot + "/.git/shallow")) 176 parsedURL.query.insert_or_assign("shallow", "1"); 177 178 return std::make_pair( 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), 180 fragment); 181 } ``` Change-Id: Ib55a882eaeb3e59228857761dc1e3b2e366b0f5e
2024-06-24 22:49:17 +00:00
auto url = _url;
Attrs attrs;
attrs.emplace("type", inputType());
Revert "libfetchers: make attribute / URL query handling consistent" This reverts commit 35eec921af1043fc6322edc0ad88c872d41623b8. Reason for revert: Regressed nix-eval-jobs, and it appears to be this change is buggy/missing a case. It just needs another pass. Code causing the problem in n-e-j, when invoked with `nix-eval-jobs --flake '.#hydraJobs'`: ``` n-e-j/tests/assets » ../../build/src/nix-eval-jobs --meta --workers 1 --flake .#hydraJobs warning: unknown setting 'trusted-users' warning: `--gc-roots-dir' not specified error: unsupported Git input attribute 'dir' error: worker error: error: unsupported Git input attribute 'dir' ``` ``` nix::Value *vRoot = [&]() { if (args.flake) { auto [flakeRef, fragment, outputSpec] = nix::parseFlakeRefWithFragmentAndExtendedOutputsSpec( args.releaseExpr, nix::absPath(".")); nix::InstallableFlake flake{ {}, state, std::move(flakeRef), fragment, outputSpec, {}, {}, args.lockFlags}; return flake.toValue(*state).first; } else { return releaseExprTopLevelValue(*state, autoArgs, args); } }(); ``` Inspecting the program behaviour reveals that `dir` was in fact set in the URL going into the fetcher. This is in turn because unlike in the case changed in this commit, it was not erased before handing it to libfetchers, which is probably just a mistake. ``` (rr) up 3 0x00007ffff60262ae in nix::fetchers::Input::fromURL (url=..., requireTree=requireTree@entry=true) at src/libfetchers/fetchers.cc:39 warning: Source file is more recent than executable. 39 auto res = inputScheme->inputFromURL(url, requireTree); (rr) p url $1 = (const nix::ParsedURL &) @0x7fffdc874190: {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = {["dir"] = "tests/assets"}, fragment = ""} (rr) up 4 0x00007ffff789d904 in nix::parseFlakeRefWithFragment (url=".#hydraJobs", baseDir=std::optional<std::string> = {...}, allowMissing=allowMissing@entry=false, isFlake=isFlake@entry=true) at src/libexpr/flake/flakeref.cc:179 warning: Source file is more recent than executable. 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), (rr) p parsedURL $2 = {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = { ["dir"] = "tests/assets"}, fragment = ""} (rr) list 174 175 if (pathExists(flakeRoot + "/.git/shallow")) 176 parsedURL.query.insert_or_assign("shallow", "1"); 177 178 return std::make_pair( 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), 180 fragment); 181 } ``` Change-Id: Ib55a882eaeb3e59228857761dc1e3b2e366b0f5e
2024-06-24 22:49:17 +00:00
url.scheme = parseUrlScheme(url.scheme).transport;
Revert "libfetchers: make attribute / URL query handling consistent" This reverts commit 35eec921af1043fc6322edc0ad88c872d41623b8. Reason for revert: Regressed nix-eval-jobs, and it appears to be this change is buggy/missing a case. It just needs another pass. Code causing the problem in n-e-j, when invoked with `nix-eval-jobs --flake '.#hydraJobs'`: ``` n-e-j/tests/assets » ../../build/src/nix-eval-jobs --meta --workers 1 --flake .#hydraJobs warning: unknown setting 'trusted-users' warning: `--gc-roots-dir' not specified error: unsupported Git input attribute 'dir' error: worker error: error: unsupported Git input attribute 'dir' ``` ``` nix::Value *vRoot = [&]() { if (args.flake) { auto [flakeRef, fragment, outputSpec] = nix::parseFlakeRefWithFragmentAndExtendedOutputsSpec( args.releaseExpr, nix::absPath(".")); nix::InstallableFlake flake{ {}, state, std::move(flakeRef), fragment, outputSpec, {}, {}, args.lockFlags}; return flake.toValue(*state).first; } else { return releaseExprTopLevelValue(*state, autoArgs, args); } }(); ``` Inspecting the program behaviour reveals that `dir` was in fact set in the URL going into the fetcher. This is in turn because unlike in the case changed in this commit, it was not erased before handing it to libfetchers, which is probably just a mistake. ``` (rr) up 3 0x00007ffff60262ae in nix::fetchers::Input::fromURL (url=..., requireTree=requireTree@entry=true) at src/libfetchers/fetchers.cc:39 warning: Source file is more recent than executable. 39 auto res = inputScheme->inputFromURL(url, requireTree); (rr) p url $1 = (const nix::ParsedURL &) @0x7fffdc874190: {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = {["dir"] = "tests/assets"}, fragment = ""} (rr) up 4 0x00007ffff789d904 in nix::parseFlakeRefWithFragment (url=".#hydraJobs", baseDir=std::optional<std::string> = {...}, allowMissing=allowMissing@entry=false, isFlake=isFlake@entry=true) at src/libexpr/flake/flakeref.cc:179 warning: Source file is more recent than executable. 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), (rr) p parsedURL $2 = {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = { ["dir"] = "tests/assets"}, fragment = ""} (rr) list 174 175 if (pathExists(flakeRoot + "/.git/shallow")) 176 parsedURL.query.insert_or_assign("shallow", "1"); 177 178 return std::make_pair( 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), 180 fragment); 181 } ``` Change-Id: Ib55a882eaeb3e59228857761dc1e3b2e366b0f5e
2024-06-24 22:49:17 +00:00
emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
attrs.emplace("url", url.to_string());
return inputFromAttrs(attrs);
}
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
{
auto type = maybeGetStrAttr(attrs, "type");
if (type != inputType()) return {};
// FIXME: some of these only apply to TarballInputScheme.
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"};
for (auto & [name, value] : attrs)
if (!allowedNames.count(name))
throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name);
Input input;
input.attrs = attrs;
//input.locked = (bool) maybeGetStrAttr(input.attrs, "hash");
return input;
}
ParsedURL toURL(const Input & input) const override
{
auto url = parseURL(getStrAttr(input.attrs, "url"));
// NAR hashes are preferred over file hashes since tar/zip
// files don't have a canonical representation.
if (auto narHash = input.getNarHash())
url.query.insert_or_assign("narHash", narHash->to_string(Base::SRI, true));
return url;
}
bool hasAllInfo(const Input & input) const override
{
return true;
}
};
struct FileInputScheme : CurlInputScheme
{
const std::string inputType() const override { return "file"; }
bool isValidURL(const ParsedURL & url, bool requireTree) const override
{
auto parsedUrlScheme = parseUrlScheme(url.scheme);
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
&& (parsedUrlScheme.application
? parsedUrlScheme.application.value() == inputType()
: (!requireTree && !hasTarballExtension(url.path)));
}
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
{
auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
return {std::move(file.storePath), input};
}
};
struct TarballInputScheme : CurlInputScheme
{
const std::string inputType() const override { return "tarball"; }
bool isValidURL(const ParsedURL & url, bool requireTree) const override
{
auto parsedUrlScheme = parseUrlScheme(url.scheme);
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
&& (parsedUrlScheme.application
? parsedUrlScheme.application.value() == inputType()
: (requireTree || hasTarballExtension(url.path)));
}
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
{
Input input(_input);
auto url = getStrAttr(input.attrs, "url");
auto result = downloadTarball(store, url, input.getName(), false);
if (result.immutableUrl) {
auto immutableInput = Input::fromURL(*result.immutableUrl);
// FIXME: would be nice to support arbitrary flakerefs
// here, e.g. git flakes.
if (immutableInput.getType() != "tarball")
throw Error("tarball 'Link' headers that redirect to non-tarball URLs are not supported");
input = immutableInput;
}
if (result.lastModified && !input.attrs.contains("lastModified"))
input.attrs.insert_or_assign("lastModified", uint64_t(result.lastModified));
return {result.tree.storePath, std::move(input)};
}
};
static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); });
static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); });
}