From b1f1347ade81d1f04f2d490baceefb3c4de0b4e3 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 9 Jul 2021 00:47:57 +0200 Subject: [PATCH] nix develop: Don't parse bash environment with regexes Instead have get-env.sh dump the bash environment as JSON. This should be a lot less error-prone. Fixes #4992. --- src/nix/develop.cc | 176 ++++++++++++++++++++++----------------------- src/nix/get-env.sh | 92 +++++++++++++++++++++++- 2 files changed, 174 insertions(+), 94 deletions(-) diff --git a/src/nix/develop.cc b/src/nix/develop.cc index 699ec0b99..e00f0d575 100644 --- a/src/nix/develop.cc +++ b/src/nix/develop.cc @@ -8,7 +8,7 @@ #include "affinity.hh" #include "progress-bar.hh" -#include +#include using namespace nix; @@ -25,94 +25,98 @@ static DevelopSettings developSettings; static GlobalConfig::Register rDevelopSettings(&developSettings); -struct Var -{ - bool exported = true; - bool associative = false; - std::string quoted; // quoted string or array -}; - struct BuildEnvironment { - std::map env; - std::string bashFunctions; -}; + struct String + { + bool exported; + std::string value; + }; -BuildEnvironment readEnvironment(const Path & path) -{ - BuildEnvironment res; + using Array = std::vector; - std::set exported; + using Associative = std::map; - debug("reading environment file '%s'", path); + using Value = std::variant; - auto file = readFile(path); + std::map vars; + std::map bashFunctions; - auto pos = file.cbegin(); + static BuildEnvironment fromJSON(const Path & path) + { + BuildEnvironment res; - static std::string varNameRegex = - R"re((?:[a-zA-Z_][a-zA-Z0-9_]*))re"; + std::set exported; - static std::string simpleStringRegex = - R"re((?:[a-zA-Z0-9_/:\.\-\+=@%]*))re"; + debug("reading environment file '%s'", path); - static std::string dquotedStringRegex = - R"re((?:\$?"(?:[^"\\]|\\[$`"\\\n])*"))re"; + auto json = nlohmann::json::parse(readFile(path)); - static std::string squotedStringRegex = - R"re((?:\$?(?:'(?:[^'\\]|\\[abeEfnrtv\\'"?])*'|\\')+))re"; - - static std::string indexedArrayRegex = - R"re((?:\(( *\[[0-9]+\]="(?:[^"\\]|\\.)*")*\)))re"; - - static std::regex declareRegex( - "^declare -a?x (" + varNameRegex + ")(=(" + - dquotedStringRegex + "|" + indexedArrayRegex + "))?\n"); - - static std::regex varRegex( - "^(" + varNameRegex + ")=(" + simpleStringRegex + "|" + squotedStringRegex + "|" + indexedArrayRegex + ")\n"); - - /* Note: we distinguish between an indexed and associative array - using the space before the closing parenthesis. Will - undoubtedly regret this some day. */ - static std::regex assocArrayRegex( - "^(" + varNameRegex + ")=" + R"re((?:\(( *\[[^\]]+\]="(?:[^"\\]|\\.)*")* *\)))re" + "\n"); - - static std::regex functionRegex( - "^" + varNameRegex + " \\(\\) *\n"); - - while (pos != file.end()) { - - std::smatch match; - - if (std::regex_search(pos, file.cend(), match, declareRegex, std::regex_constants::match_continuous)) { - pos = match[0].second; - exported.insert(match[1]); + for (auto & [name, info] : json["variables"].items()) { + std::string type = info["type"]; + if (type == "var" || type == "exported") + res.vars.insert({name, BuildEnvironment::String { .exported = type == "exported", .value = info["value"] }}); + else if (type == "array") + res.vars.insert({name, (Array) info["value"]}); + else if (type == "associative") + res.vars.insert({name, (Associative) info["value"]}); } - else if (std::regex_search(pos, file.cend(), match, varRegex, std::regex_constants::match_continuous)) { - pos = match[0].second; - res.env.insert({match[1], Var { .exported = exported.count(match[1]) > 0, .quoted = match[2] }}); + for (auto & [name, def] : json["bashFunctions"].items()) { + res.bashFunctions.insert({name, def}); } - else if (std::regex_search(pos, file.cend(), match, assocArrayRegex, std::regex_constants::match_continuous)) { - pos = match[0].second; - res.env.insert({match[1], Var { .associative = true, .quoted = match[2] }}); - } - - else if (std::regex_search(pos, file.cend(), match, functionRegex, std::regex_constants::match_continuous)) { - res.bashFunctions = std::string(pos, file.cend()); - break; - } - - else throw Error("shell environment '%s' has unexpected line '%s'", - path, file.substr(pos - file.cbegin(), 60)); + return res; } - res.env.erase("__output"); + void toBash(std::ostream & out, const std::set & ignoreVars) const + { + for (auto & [name, value] : vars) { + if (!ignoreVars.count(name) && !hasPrefix(name, "BASH_")) { + if (auto str = std::get_if(&value)) { + out << fmt("%s=%s\n", name, shellEscape(str->value)); + if (str->exported) + out << fmt("export %s\n", name); + } + else if (auto arr = std::get_if(&value)) { + out << "declare -a " << name << "=("; + for (auto & s : *arr) + out << shellEscape(s) << " "; + out << ")\n"; + } + else if (auto arr = std::get_if(&value)) { + out << "declare -A " << name << "=("; + for (auto & [n, v] : *arr) + out << "[" << shellEscape(n) << "]=" << shellEscape(v) << " "; + out << ")\n"; + } + } + } - return res; -} + for (auto & [name, def] : bashFunctions) { + out << name << " ()\n{\n" << def << "}\n"; + } + } + + static std::string getString(const Value & value) + { + if (auto str = std::get_if(&value)) + return str->value; + else + throw Error("bash variable is not a string"); + } + + static Array getStrings(const Value & value) + { + if (auto str = std::get_if(&value)) + return tokenizeString(str->value); + else if (auto arr = std::get_if(&value)) { + return *arr; + } + else + throw Error("bash variable is not a string or array"); + } +}; const static std::string getEnvSh = #include "get-env.sh.gen.hh" @@ -185,7 +189,7 @@ StorePath getDerivationEnvironment(ref store, const StorePath & drvPath) struct Common : InstallableCommand, MixProfile { - std::set ignoreVars{ + std::set ignoreVars{ "BASHOPTS", "EUID", "HOME", // FIXME: don't ignore in pure mode? @@ -233,22 +237,10 @@ struct Common : InstallableCommand, MixProfile out << "nix_saved_PATH=\"$PATH\"\n"; - for (auto & i : buildEnvironment.env) { - if (!ignoreVars.count(i.first) && !hasPrefix(i.first, "BASH_")) { - if (i.second.associative) - out << fmt("declare -A %s=(%s)\n", i.first, i.second.quoted); - else { - out << fmt("%s=%s\n", i.first, i.second.quoted); - if (i.second.exported) - out << fmt("export %s\n", i.first); - } - } - } + buildEnvironment.toBash(out, ignoreVars); out << "PATH=\"$PATH:$nix_saved_PATH\"\n"; - out << buildEnvironment.bashFunctions << "\n"; - out << "export NIX_BUILD_TOP=\"$(mktemp -d -t nix-shell.XXXXXX)\"\n"; for (auto & i : {"TMP", "TMPDIR", "TEMP", "TEMPDIR"}) out << fmt("export %s=\"$NIX_BUILD_TOP\"\n", i); @@ -258,16 +250,16 @@ struct Common : InstallableCommand, MixProfile auto script = out.str(); /* Substitute occurrences of output paths. */ - auto outputs = buildEnvironment.env.find("outputs"); - assert(outputs != buildEnvironment.env.end()); + auto outputs = buildEnvironment.vars.find("outputs"); + assert(outputs != buildEnvironment.vars.end()); // FIXME: properly unquote 'outputs'. StringMap rewrites; - for (auto & outputName : tokenizeString>(replaceStrings(outputs->second.quoted, "'", ""))) { - auto from = buildEnvironment.env.find(outputName); - assert(from != buildEnvironment.env.end()); + for (auto & outputName : BuildEnvironment::getStrings(outputs->second)) { + auto from = buildEnvironment.vars.find(outputName); + assert(from != buildEnvironment.vars.end()); // FIXME: unquote - rewrites.insert({from->second.quoted, outputsDir + "/" + outputName}); + rewrites.insert({BuildEnvironment::getString(from->second), outputsDir + "/" + outputName}); } /* Substitute redirects. */ @@ -321,7 +313,7 @@ struct Common : InstallableCommand, MixProfile updateProfile(shellOutPath); - return {readEnvironment(strPath), strPath}; + return {BuildEnvironment::fromJSON(strPath), strPath}; } }; diff --git a/src/nix/get-env.sh b/src/nix/get-env.sh index 091c0f573..834b84e35 100644 --- a/src/nix/get-env.sh +++ b/src/nix/get-env.sh @@ -8,10 +8,98 @@ if [[ -n $stdenv ]]; then source $stdenv/setup fi +# Better to use compgen, but stdenv bash doesn't have it. +__vars="$(declare -p)" +__functions="$(declare -F)" + +__dumpEnv() { + printf '{\n' + + printf ' "bashFunctions": {\n' + local __first=1 + while read __line; do + if ! [[ $__line =~ ^declare\ -f\ (.*) ]]; then continue; fi + __fun_name="${BASH_REMATCH[1]}" + __fun_body="$(type $__fun_name)" + if [[ $__fun_body =~ \{(.*)\} ]]; then + if [[ -z $__first ]]; then printf ',\n'; else __first=; fi + __fun_body="${BASH_REMATCH[1]}" + printf " " + __escapeString "$__fun_name" + printf ':' + __escapeString "$__fun_body" + else + printf "Cannot parse definition of function '%s'.\n" "$__fun_name" >&2 + return 1 + fi + done < <(printf "%s\n" "$__functions") + printf '\n },\n' + + printf ' "variables": {\n' + local __first=1 + while read __line; do + if ! [[ $__line =~ ^declare\ (-[^ ])\ ([^=]*) ]]; then continue; fi + local type="${BASH_REMATCH[1]}" + local __var_name="${BASH_REMATCH[2]}" + + if [[ -z $__first ]]; then printf ',\n'; else __first=; fi + + printf " " + __escapeString "$__var_name" + printf ': {' + + # FIXME: handle -i, -r, -n. + if [[ $type == -x ]]; then + printf '"type": "exported", "value": ' + __escapeString "${!__var_name}" + elif [[ $type == -- ]]; then + printf '"type": "var", "value": ' + __escapeString "${!__var_name}" + elif [[ $type == -a ]]; then + printf '"type": "array", "value": [' + local __first2=1 + __var_name="$__var_name[@]" + for __i in "${!__var_name}"; do + if [[ -z $__first2 ]]; then printf ', '; else __first2=; fi + __escapeString "$__i" + printf ' ' + done + printf ']' + elif [[ $type == -A ]]; then + printf '"type": "associative", "value": {\n' + local __first2=1 + declare -n __var_name2="$__var_name" + for __i in "${!__var_name2[@]}"; do + if [[ -z $__first2 ]]; then printf ',\n'; else __first2=; fi + printf " " + __escapeString "$__i" + printf ": " + __escapeString "${__var_name2[$__i]}" + done + printf '\n }' + else + printf '"type": "unknown"' + fi + + printf "}" + done < <(printf "%s\n" "$__vars") + printf '\n }\n}' +} + +__escapeString() { + local __s="$1" + __s="${__s//\\/\\\\}" + __s="${__s//\"/\\\"}" + __s="${__s//$'\n'/\\n}" + __s="${__s//$'\r'/\\r}" + __s="${__s//$'\t'/\\t}" + printf '"%s"' "$__s" +} + +# Dump the bash environment as JSON. for __output in $outputs; do if [[ -z $__done ]]; then - export > ${!__output} - set >> ${!__output} + __dumpEnv > ${!__output} __done=1 else echo -n >> ${!__output}