From fac0c2d54a6b04175b40009506f2720d2594ed4e Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 29 May 2020 16:19:48 -0400 Subject: [PATCH 1/5] Remove addToStore variant as requested by `FIXME` The idea is it's always more flexible to consumer a `Source` than a plain string, and it might even reduce memory consumption. I also looked at `addToStoreFromDump` with its `// FIXME: remove?`, but the worked needed for that is far more up for interpretation, so I punted for now. --- src/libfetchers/tarball.cc | 3 ++- src/libstore/binary-cache-store.cc | 11 ++++++++--- src/libstore/binary-cache-store.hh | 2 +- src/libstore/export-import.cc | 5 ++++- src/libstore/store-api.cc | 15 --------------- src/libstore/store-api.hh | 7 +------ src/nix/add-to-store.cc | 6 ++++-- 7 files changed, 20 insertions(+), 29 deletions(-) diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index bf2b2a5ff..b6e57379b 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -71,7 +71,8 @@ DownloadFileResult downloadFile( info.narHash = hashString(htSHA256, *sink.s); info.narSize = sink.s->size(); info.ca = makeFixedOutputCA(FileIngestionMethod::Flat, hash); - store->addToStore(info, sink.s, NoRepair, NoCheckSigs); + auto source = StringSource { *sink.s }; + store->addToStore(info, source, NoRepair, NoCheckSigs); storePath = std::move(info.path); } diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index f13736c58..357962007 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -113,9 +113,12 @@ void BinaryCacheStore::writeNarInfo(ref narInfo) diskCache->upsertNarInfo(getUri(), hashPart, std::shared_ptr(narInfo)); } -void BinaryCacheStore::addToStore(const ValidPathInfo & info, const ref & nar, +void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource, RepairFlag repair, CheckSigsFlag checkSigs, std::shared_ptr accessor) { + // FIXME: See if we can use the original source to reduce memory usage. + auto nar = make_ref(narSource.drain()); + if (!repair && isValidPath(info.path)) return; /* Verify that all references are valid. This may do some .narinfo @@ -347,7 +350,8 @@ StorePath BinaryCacheStore::addToStore(const string & name, const Path & srcPath ValidPathInfo info(makeFixedOutputPath(method, h, name)); - addToStore(info, sink.s, repair, CheckSigs, nullptr); + auto source = StringSource { *sink.s }; + addToStore(info, source, repair, CheckSigs, nullptr); return std::move(info.path); } @@ -361,7 +365,8 @@ StorePath BinaryCacheStore::addTextToStore(const string & name, const string & s if (repair || !isValidPath(info.path)) { StringSink sink; dumpString(s, sink); - addToStore(info, sink.s, repair, CheckSigs, nullptr); + auto source = StringSource { *sink.s }; + addToStore(info, source, repair, CheckSigs, nullptr); } return std::move(info.path); diff --git a/src/libstore/binary-cache-store.hh b/src/libstore/binary-cache-store.hh index 4ff5609ed..52ef8aa7a 100644 --- a/src/libstore/binary-cache-store.hh +++ b/src/libstore/binary-cache-store.hh @@ -74,7 +74,7 @@ public: std::optional queryPathFromHashPart(const std::string & hashPart) override { unsupported("queryPathFromHashPart"); } - void addToStore(const ValidPathInfo & info, const ref & nar, + void addToStore(const ValidPathInfo & info, Source & narSource, RepairFlag repair, CheckSigsFlag checkSigs, std::shared_ptr accessor) override; diff --git a/src/libstore/export-import.cc b/src/libstore/export-import.cc index 4692d1a7b..f0d01a240 100644 --- a/src/libstore/export-import.cc +++ b/src/libstore/export-import.cc @@ -1,3 +1,4 @@ +#include "serialise.hh" #include "store-api.hh" #include "archive.hh" #include "worker-protocol.hh" @@ -100,7 +101,9 @@ StorePaths Store::importPaths(Source & source, std::shared_ptr acces if (readInt(source) == 1) readString(source); - addToStore(info, tee.source.data, NoRepair, checkSigs, accessor); + // Can't use underlying source, which would have been exhausted + auto source = StringSource { *tee.source.data }; + addToStore(info, source, NoRepair, checkSigs, accessor); res.push_back(info.path.clone()); } diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index d479b86cb..095363d0c 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -840,21 +840,6 @@ std::string makeFixedOutputCA(FileIngestionMethod recursive, const Hash & hash) } -void Store::addToStore(const ValidPathInfo & info, Source & narSource, - RepairFlag repair, CheckSigsFlag checkSigs, - std::shared_ptr accessor) -{ - addToStore(info, make_ref(narSource.drain()), repair, checkSigs, accessor); -} - -void Store::addToStore(const ValidPathInfo & info, const ref & nar, - RepairFlag repair, CheckSigsFlag checkSigs, - std::shared_ptr accessor) -{ - StringSource source(*nar); - addToStore(info, source, repair, checkSigs, accessor); -} - } diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index 3def209a6..b1e25fc7d 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -450,12 +450,7 @@ public: /* Import a path into the store. */ virtual void addToStore(const ValidPathInfo & info, Source & narSource, RepairFlag repair = NoRepair, CheckSigsFlag checkSigs = CheckSigs, - std::shared_ptr accessor = 0); - - // FIXME: remove - virtual void addToStore(const ValidPathInfo & info, const ref & nar, - RepairFlag repair = NoRepair, CheckSigsFlag checkSigs = CheckSigs, - std::shared_ptr accessor = 0); + std::shared_ptr accessor = 0) = 0; /* Copy the contents of a path to the store and register the validity the resulting path. The resulting path is returned. diff --git a/src/nix/add-to-store.cc b/src/nix/add-to-store.cc index 4b4ba81cb..f43f774c1 100644 --- a/src/nix/add-to-store.cc +++ b/src/nix/add-to-store.cc @@ -50,8 +50,10 @@ struct CmdAddToStore : MixDryRun, StoreCommand info.narSize = sink.s->size(); info.ca = makeFixedOutputCA(FileIngestionMethod::Recursive, info.narHash); - if (!dryRun) - store->addToStore(info, sink.s); + if (!dryRun) { + auto source = StringSource { *sink.s }; + store->addToStore(info, source); + } logger->stdout("%s", store->printStorePath(info.path)); } From 77007d4eabcf7090d1d3fbbdf84a67fb2262cf78 Mon Sep 17 00:00:00 2001 From: Nikola Knezevic Date: Sat, 30 May 2020 12:29:35 +0200 Subject: [PATCH 2/5] Improve ref validity checking in fetchGit The previous regex was too strict and did not match what git was allowing. It could lead to `fetchGit` not accepting valid branch names, even though they exist in a repository (for example, branch names containing `/`, which are pretty standard, like `release/1.0` branches). The new regex defines what a branch name should **NOT** contain. It takes the definitions from `refs.c` in https://github.com/git/git and `git help check-ref-format` pages. This change also introduces a test for ref name validity checking, which compares the result from Nix with the result of `git check-ref-format --branch`. --- src/libfetchers/git.cc | 2 +- src/libutil/url.cc | 1 + src/libutil/url.hh | 6 +++ tests/fetchGitRefs.sh | 111 +++++++++++++++++++++++++++++++++++++++++ tests/local.mk | 1 + 5 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 tests/fetchGitRefs.sh diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 17cc60228..e069057eb 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -418,7 +418,7 @@ struct GitInputScheme : InputScheme auto input = std::make_unique(parseURL(getStrAttr(attrs, "url"))); if (auto ref = maybeGetStrAttr(attrs, "ref")) { - if (!std::regex_match(*ref, refRegex)) + if (std::regex_search(*ref, badGitRefRegex)) throw BadURL("invalid Git branch/tag name '%s'", *ref); input->ref = *ref; } diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 5d5328e5d..88c09eef9 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -4,6 +4,7 @@ namespace nix { std::regex refRegex(refRegexS, std::regex::ECMAScript); +std::regex badGitRefRegex(badGitRefRegexS, std::regex::ECMAScript); std::regex revRegex(revRegexS, std::regex::ECMAScript); std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript); diff --git a/src/libutil/url.hh b/src/libutil/url.hh index 1503023a2..4a0d4071b 100644 --- a/src/libutil/url.hh +++ b/src/libutil/url.hh @@ -49,6 +49,12 @@ const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRege const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check extern std::regex refRegex; +// Instead of defining what a good Git Ref is, we define what a bad Git Ref is +// This is because of the definition of a ref in refs.c in https://github.com/git/git +// See tests/fetchGitRefs.sh for the full definition +const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$"; +extern std::regex badGitRefRegex; + // A Git revision (a SHA-1 commit hash). const static std::string revRegexS = "[0-9a-fA-F]{40}"; extern std::regex revRegex; diff --git a/tests/fetchGitRefs.sh b/tests/fetchGitRefs.sh new file mode 100644 index 000000000..93993ae90 --- /dev/null +++ b/tests/fetchGitRefs.sh @@ -0,0 +1,111 @@ +source common.sh + +if [[ -z $(type -p git) ]]; then + echo "Git not installed; skipping Git tests" + exit 99 +fi + +clearStore + +repo="$TEST_ROOT/git" + +rm -rf "$repo" "${repo}-tmp" "$TEST_HOME/.cache/nix" + +git init "$repo" +git -C "$repo" config user.email "foobar@example.com" +git -C "$repo" config user.name "Foobar" + +echo utrecht > "$repo"/hello +git -C "$repo" add hello +git -C "$repo" commit -m 'Bla1' + +path=$(nix eval --raw "(builtins.fetchGit { url = $repo; ref = \"master\"; }).outPath") + +# Test various combinations of ref names +# (taken from the git project) + +# git help check-ref-format +# Git imposes the following rules on how references are named: +# +# 1. They can include slash / for hierarchical (directory) grouping, but no slash-separated component can begin with a dot . or end with the sequence .lock. +# 2. They must contain at least one /. This enforces the presence of a category like heads/, tags/ etc. but the actual names are not restricted. If the --allow-onelevel option is used, this rule is waived. +# 3. They cannot have two consecutive dots .. anywhere. +# 4. They cannot have ASCII control characters (i.e. bytes whose values are lower than \040, or \177 DEL), space, tilde ~, caret ^, or colon : anywhere. +# 5. They cannot have question-mark ?, asterisk *, or open bracket [ anywhere. See the --refspec-pattern option below for an exception to this rule. +# 6. They cannot begin or end with a slash / or contain multiple consecutive slashes (see the --normalize option below for an exception to this rule) +# 7. They cannot end with a dot .. +# 8. They cannot contain a sequence @{. +# 9. They cannot be the single character @. +# 10. They cannot contain a \. + +valid_ref() { + { set +x; printf >&2 '\n>>>>>>>>>> valid_ref %s\b <<<<<<<<<<\n' $(printf %s "$1" | sed -n -e l); set -x; } + git check-ref-format --branch "$1" >/dev/null + git -C "$repo" branch "$1" master >/dev/null + path1=$(nix eval --raw "(builtins.fetchGit { url = $repo; ref = ''$1''; }).outPath") + [[ $path1 = $path ]] + git -C "$repo" branch -D "$1" >/dev/null +} + +invalid_ref() { + { set +x; printf >&2 '\n>>>>>>>>>> invalid_ref %s\b <<<<<<<<<<\n' $(printf %s "$1" | sed -n -e l); set -x; } + # special case for a sole @: + # --branch @ will try to interpret @ as a branch reference and not fail. Thus we need --allow-onelevel + if [ "$1" = "@" ]; then + (! git check-ref-format --allow-onelevel "$1" >/dev/null 2>&1) + else + (! git check-ref-format --branch "$1" >/dev/null 2>&1) + fi + nix --debug eval --raw "(builtins.fetchGit { url = $repo; ref = ''$1''; }).outPath" 2>&1 | grep 'error: invalid Git branch/tag name' >/dev/null +} + + +valid_ref 'foox' +valid_ref '1337' +valid_ref 'foo.baz' +valid_ref 'foo/bar/baz' +valid_ref 'foo./bar' +valid_ref 'heads/foo@bar' +valid_ref "$(printf 'heads/fu\303\237')" +valid_ref 'foo-bar-baz' +valid_ref '$1' +valid_ref 'foo.locke' + +invalid_ref 'refs///heads/foo' +invalid_ref 'heads/foo/' +invalid_ref '///heads/foo' +invalid_ref '.foo' +invalid_ref './foo' +invalid_ref './foo/bar' +invalid_ref 'foo/./bar' +invalid_ref 'foo/bar/.' +invalid_ref 'foo bar' +invalid_ref 'foo?bar' +invalid_ref 'foo^bar' +invalid_ref 'foo~bar' +invalid_ref 'foo:bar' +invalid_ref 'foo[bar' +invalid_ref 'foo/bar/.' +invalid_ref '.refs/foo' +invalid_ref 'refs/heads/foo.' +invalid_ref 'heads/foo..bar' +invalid_ref 'heads/foo?bar' +invalid_ref 'heads/foo.lock' +invalid_ref 'heads///foo.lock' +invalid_ref 'foo.lock/bar' +invalid_ref 'foo.lock///bar' +invalid_ref 'heads/v@{ation' +invalid_ref 'heads/foo\.ar' # should fail due to \ +invalid_ref 'heads/foo\bar' # should fail due to \ +invalid_ref "$(printf 'heads/foo\t')" # should fail because it has a TAB +invalid_ref "$(printf 'heads/foo\177')" +invalid_ref '@' + +invalid_ref 'foo/*' +invalid_ref '*/foo' +invalid_ref 'foo/*/bar' +invalid_ref '*' +invalid_ref 'foo/*/*' +invalid_ref '*/foo/*' +invalid_ref '/foo' +invalid_ref '' diff --git a/tests/local.mk b/tests/local.mk index 56e5640ca..536661af8 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -18,6 +18,7 @@ nix_tests = \ nar-access.sh \ structured-attrs.sh \ fetchGit.sh \ + fetchGitRefs.sh \ fetchGitSubmodules.sh \ fetchMercurial.sh \ signing.sh \ From fb38459d6e58508245553380cccc03c0dbaa1542 Mon Sep 17 00:00:00 2001 From: Nikola Knezevic Date: Sat, 30 May 2020 12:33:38 +0200 Subject: [PATCH 3/5] Ensure we restrict refspec interpretation while fetching As `git fetch` may chose to interpret refspec to it's liking, ensure that we only pass refs that begin with `refs/` as is, otherwise, prepend them with `refs/heads`. Otherwise, branches named `heads/foo` (I know it's bad, but it's allowed), would be fetched as `foo`, instead of `heads/foo`. --- src/libfetchers/git.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index e069057eb..75ce5ee8b 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -282,7 +282,10 @@ struct GitInput : Input // FIXME: git stderr messes up our progress indicator, so // we're using --quiet for now. Should process its stderr. try { - runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", *input->ref, *input->ref) }); + auto fetchRef = input->ref->compare(0, 5, "refs/") == 0 + ? *input->ref + : "refs/heads/" + *input->ref; + runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); } catch (Error & e) { if (!pathExists(localRefFile)) throw; warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); From eca1ff7a9f784926c52533c81cb5403a2cb804d3 Mon Sep 17 00:00:00 2001 From: Tobias Pflug Date: Sun, 31 May 2020 01:05:05 +0200 Subject: [PATCH 4/5] Add tests for lru-cache.hh --- src/libutil/tests/lru-cache.cc | 130 +++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 src/libutil/tests/lru-cache.cc diff --git a/src/libutil/tests/lru-cache.cc b/src/libutil/tests/lru-cache.cc new file mode 100644 index 000000000..598977a6d --- /dev/null +++ b/src/libutil/tests/lru-cache.cc @@ -0,0 +1,130 @@ +#include "lru-cache.hh" +#include + +namespace nix { + + /* ---------------------------------------------------------------------------- + * size + * --------------------------------------------------------------------------*/ + + TEST(LRUCache, sizeOfEmptyCacheIsZero) { + LRUCache c(10); + ASSERT_EQ(c.size(), 0); + } + + TEST(LRUCache, sizeOfSingleElementCacheIsOne) { + LRUCache c(10); + c.upsert("foo", "bar"); + ASSERT_EQ(c.size(), 1); + } + + /* ---------------------------------------------------------------------------- + * upsert / get + * --------------------------------------------------------------------------*/ + + TEST(LRUCache, getFromEmptyCache) { + LRUCache c(10); + auto val = c.get("x"); + ASSERT_EQ(val.has_value(), false); + } + + TEST(LRUCache, getExistingValue) { + LRUCache c(10); + c.upsert("foo", "bar"); + auto val = c.get("foo"); + ASSERT_EQ(val, "bar"); + } + + TEST(LRUCache, getNonExistingValueFromNonEmptyCache) { + LRUCache c(10); + c.upsert("foo", "bar"); + auto val = c.get("another"); + ASSERT_EQ(val.has_value(), false); + } + + TEST(LRUCache, upsertOnZeroCapacityCache) { + LRUCache c(0); + c.upsert("foo", "bar"); + auto val = c.get("foo"); + ASSERT_EQ(val.has_value(), false); + } + + TEST(LRUCache, updateExistingValue) { + LRUCache c(1); + c.upsert("foo", "bar"); + + auto val = c.get("foo"); + ASSERT_EQ(val.value_or("error"), "bar"); + ASSERT_EQ(c.size(), 1); + + c.upsert("foo", "changed"); + val = c.get("foo"); + ASSERT_EQ(val.value_or("error"), "changed"); + ASSERT_EQ(c.size(), 1); + } + + TEST(LRUCache, overwriteOldestWhenCapacityIsReached) { + LRUCache c(3); + c.upsert("one", "eins"); + c.upsert("two", "zwei"); + c.upsert("three", "drei"); + + ASSERT_EQ(c.size(), 3); + ASSERT_EQ(c.get("one").value_or("error"), "eins"); + + // exceed capacity + c.upsert("another", "whatever"); + + ASSERT_EQ(c.size(), 3); + // Retrieving "one" makes it the most recent element thus + // two will be the oldest one and thus replaced. + ASSERT_EQ(c.get("two").has_value(), false); + ASSERT_EQ(c.get("another").value(), "whatever"); + } + + /* ---------------------------------------------------------------------------- + * clear + * --------------------------------------------------------------------------*/ + + TEST(LRUCache, clearEmptyCache) { + LRUCache c(10); + c.clear(); + ASSERT_EQ(c.size(), 0); + } + + TEST(LRUCache, clearNonEmptyCache) { + LRUCache c(10); + c.upsert("one", "eins"); + c.upsert("two", "zwei"); + c.upsert("three", "drei"); + ASSERT_EQ(c.size(), 3); + c.clear(); + ASSERT_EQ(c.size(), 0); + } + + /* ---------------------------------------------------------------------------- + * erase + * --------------------------------------------------------------------------*/ + + TEST(LRUCache, eraseFromEmptyCache) { + LRUCache c(10); + c.erase("foo"); + ASSERT_EQ(c.size(), 0); + } + + TEST(LRUCache, eraseMissingFromNonEmptyCache) { + LRUCache c(10); + c.upsert("one", "eins"); + c.erase("foo"); + ASSERT_EQ(c.size(), 1); + ASSERT_EQ(c.get("one").value_or("error"), "eins"); + } + + TEST(LRUCache, eraseFromNonEmptyCache) { + LRUCache c(10); + c.upsert("one", "eins"); + c.erase("one"); + ASSERT_EQ(c.size(), 0); + ASSERT_EQ(c.get("one").value_or("empty"), "empty"); + } +} From e9fee8e6a7ff8540a04620e7d75a987cfe89ab00 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 2 Jun 2020 12:06:59 +0200 Subject: [PATCH 5/5] src/libutil/tests/lru-cache.cc: Check erase() Co-authored-by: James Lee --- src/libutil/tests/lru-cache.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libutil/tests/lru-cache.cc b/src/libutil/tests/lru-cache.cc index 598977a6d..091d3d5ed 100644 --- a/src/libutil/tests/lru-cache.cc +++ b/src/libutil/tests/lru-cache.cc @@ -108,14 +108,14 @@ namespace nix { TEST(LRUCache, eraseFromEmptyCache) { LRUCache c(10); - c.erase("foo"); + ASSERT_EQ(c.erase("foo"), false); ASSERT_EQ(c.size(), 0); } TEST(LRUCache, eraseMissingFromNonEmptyCache) { LRUCache c(10); c.upsert("one", "eins"); - c.erase("foo"); + ASSERT_EQ(c.erase("foo"), false); ASSERT_EQ(c.size(), 1); ASSERT_EQ(c.get("one").value_or("error"), "eins"); } @@ -123,7 +123,7 @@ namespace nix { TEST(LRUCache, eraseFromNonEmptyCache) { LRUCache c(10); c.upsert("one", "eins"); - c.erase("one"); + ASSERT_EQ(c.erase("one"), true); ASSERT_EQ(c.size(), 0); ASSERT_EQ(c.get("one").value_or("empty"), "empty"); }