diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 139067f20..883fc27a7 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -356,6 +356,7 @@ EvalState::EvalState(const Strings & _searchPath, ref store) , sEpsilon(symbols.create("")) , repair(NoRepair) , store(store) + , regexCache(makeRegexCache()) , baseEnv(allocEnv(128)) , staticBaseEnv(false, 0) { diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 80078d8a5..0e1f61baa 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -6,7 +6,6 @@ #include "symbol-table.hh" #include "config.hh" -#include #include #include #include @@ -65,6 +64,11 @@ typedef std::list SearchPath; void initGC(); +struct RegexCache; + +std::shared_ptr makeRegexCache(); + + class EvalState { public: @@ -120,7 +124,7 @@ private: std::unordered_map resolvedPaths; /* Cache used by prim_match(). */ - std::unordered_map regexCache; + std::shared_ptr regexCache; public: diff --git a/src/libexpr/flake/flakeref.cc b/src/libexpr/flake/flakeref.cc index 6363446f6..d5c2ffe66 100644 --- a/src/libexpr/flake/flakeref.cc +++ b/src/libexpr/flake/flakeref.cc @@ -1,6 +1,7 @@ #include "flakeref.hh" #include "store-api.hh" #include "url.hh" +#include "url-parts.hh" #include "fetchers.hh" #include "registry.hh" diff --git a/src/libexpr/flake/lockfile.cc b/src/libexpr/flake/lockfile.cc index a74846944..78431f000 100644 --- a/src/libexpr/flake/lockfile.cc +++ b/src/libexpr/flake/lockfile.cc @@ -1,5 +1,6 @@ #include "lockfile.hh" #include "store-api.hh" +#include "url-parts.hh" #include diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 7e8526ea1..9cfe3f402 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -3085,17 +3085,25 @@ static RegisterPrimOp primop_hashString({ .fun = prim_hashString, }); -/* Match a regular expression against a string and return either - ‘null’ or a list containing substring matches. */ +struct RegexCache +{ + std::unordered_map cache; +}; + +std::shared_ptr makeRegexCache() +{ + return std::make_shared(); +} + void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v) { auto re = state.forceStringNoCtx(*args[0], pos); try { - auto regex = state.regexCache.find(re); - if (regex == state.regexCache.end()) - regex = state.regexCache.emplace(re, std::regex(re, std::regex::extended)).first; + auto regex = state.regexCache->cache.find(re); + if (regex == state.regexCache->cache.end()) + regex = state.regexCache->cache.emplace(re, std::regex(re, std::regex::extended)).first; PathSet context; const std::string str = state.forceString(*args[1], context, pos); diff --git a/src/libexpr/primops/fetchMercurial.cc b/src/libexpr/primops/fetchMercurial.cc index cef85cfef..1a064ed5c 100644 --- a/src/libexpr/primops/fetchMercurial.cc +++ b/src/libexpr/primops/fetchMercurial.cc @@ -3,8 +3,7 @@ #include "store-api.hh" #include "fetchers.hh" #include "url.hh" - -#include +#include "url-parts.hh" namespace nix { diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 5ca0f8521..ad7638d73 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -3,6 +3,7 @@ #include "globals.hh" #include "tarfile.hh" #include "store-api.hh" +#include "url-parts.hh" #include diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index a4db5c5fa..1737658a7 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -3,6 +3,7 @@ #include "fetchers.hh" #include "globals.hh" #include "store-api.hh" +#include "url-parts.hh" #include diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index b981d4d8e..74332ae3d 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -1,4 +1,5 @@ #include "fetchers.hh" +#include "url-parts.hh" namespace nix::fetchers { diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 3e76ffc4d..d80c2ea7a 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -3,6 +3,7 @@ #include "globals.hh" #include "tarfile.hh" #include "store-api.hh" +#include "url-parts.hh" #include diff --git a/src/libstore/names.cc b/src/libstore/names.cc index d1c8a6101..41e28dc99 100644 --- a/src/libstore/names.cc +++ b/src/libstore/names.cc @@ -1,10 +1,18 @@ #include "names.hh" #include "util.hh" +#include + namespace nix { +struct Regex +{ + std::regex regex; +}; + + DrvName::DrvName() { name = ""; @@ -30,11 +38,18 @@ DrvName::DrvName(std::string_view s) : hits(0) } +DrvName::~DrvName() +{ } + + bool DrvName::matches(DrvName & n) { if (name != "*") { - if (!regex) regex = std::unique_ptr(new std::regex(name, std::regex::extended)); - if (!std::regex_match(n.name, *regex)) return false; + if (!regex) { + regex = std::make_unique(); + regex->regex = std::regex(name, std::regex::extended); + } + if (!std::regex_match(n.name, regex->regex)) return false; } if (version != "" && version != n.version) return false; return true; @@ -99,7 +114,7 @@ DrvNames drvNamesFromArgs(const Strings & opArgs) { DrvNames result; for (auto & i : opArgs) - result.push_back(DrvName(i)); + result.emplace_back(i); return result; } diff --git a/src/libstore/names.hh b/src/libstore/names.hh index 00e14b8c7..bc62aac93 100644 --- a/src/libstore/names.hh +++ b/src/libstore/names.hh @@ -3,10 +3,11 @@ #include #include "types.hh" -#include namespace nix { +struct Regex; + struct DrvName { string fullName; @@ -16,10 +17,12 @@ struct DrvName DrvName(); DrvName(std::string_view s); + ~DrvName(); + bool matches(DrvName & n); private: - std::unique_ptr regex; + std::unique_ptr regex; }; typedef list DrvNames; diff --git a/src/libutil/url-parts.hh b/src/libutil/url-parts.hh new file mode 100644 index 000000000..64e06cfbc --- /dev/null +++ b/src/libutil/url-parts.hh @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + +namespace nix { + +// URI stuff. +const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])"; +const static std::string schemeRegex = "(?:[a-z+.-]+)"; +const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])"; +const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])"; +const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])"; +const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)"; +const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")"; +const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)"; +const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?"; +const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])"; +const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*"; +const static std::string segmentRegex = "(?:" + pcharRegex + "+)"; +const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)"; +const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)"; + +// A Git ref (i.e. branch or tag name). +const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check +extern std::regex refRegex; + +// Instead of defining what a good Git Ref is, we define what a bad Git Ref is +// This is because of the definition of a ref in refs.c in https://github.com/git/git +// See tests/fetchGitRefs.sh for the full definition +const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$"; +extern std::regex badGitRefRegex; + +// A Git revision (a SHA-1 commit hash). +const static std::string revRegexS = "[0-9a-fA-F]{40}"; +extern std::regex revRegex; + +// A ref or revision, or a ref followed by a revision. +const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))"; + +const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*"; +extern std::regex flakeIdRegex; + +} diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 88c09eef9..c1bab866c 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -1,4 +1,5 @@ #include "url.hh" +#include "url-parts.hh" #include "util.hh" namespace nix { diff --git a/src/libutil/url.hh b/src/libutil/url.hh index 1f716ba10..6e77142e3 100644 --- a/src/libutil/url.hh +++ b/src/libutil/url.hh @@ -2,8 +2,6 @@ #include "error.hh" -#include - namespace nix { struct ParsedURL @@ -29,40 +27,4 @@ std::map decodeQuery(const std::string & query); ParsedURL parseURL(const std::string & url); -// URI stuff. -const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])"; -const static std::string schemeRegex = "(?:[a-z+.-]+)"; -const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])"; -const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])"; -const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])"; -const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)"; -const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")"; -const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)"; -const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?"; -const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])"; -const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*"; -const static std::string segmentRegex = "(?:" + pcharRegex + "+)"; -const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)"; -const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)"; - -// A Git ref (i.e. branch or tag name). -const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check -extern std::regex refRegex; - -// Instead of defining what a good Git Ref is, we define what a bad Git Ref is -// This is because of the definition of a ref in refs.c in https://github.com/git/git -// See tests/fetchGitRefs.sh for the full definition -const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$"; -extern std::regex badGitRefRegex; - -// A Git revision (a SHA-1 commit hash). -const static std::string revRegexS = "[0-9a-fA-F]{40}"; -extern std::regex revRegex; - -// A ref or revision, or a ref followed by a revision. -const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))"; - -const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*"; -extern std::regex flakeIdRegex; - } diff --git a/src/nix-env/nix-env.cc b/src/nix-env/nix-env.cc index e5a433ac0..3e7c453fb 100644 --- a/src/nix-env/nix-env.cc +++ b/src/nix-env/nix-env.cc @@ -230,7 +230,7 @@ static DrvInfos filterBySelector(EvalState & state, const DrvInfos & allElems, { DrvNames selectors = drvNamesFromArgs(args); if (selectors.empty()) - selectors.push_back(DrvName("*")); + selectors.emplace_back("*"); DrvInfos elems; set done;