nix develop: Don't parse bash environment with regexes

Instead have get-env.sh dump the bash environment as JSON. This should
be a lot less error-prone.

Fixes #4992.
This commit is contained in:
Eelco Dolstra 2021-07-09 00:47:57 +02:00
parent b1cfe8f984
commit b1f1347ade
2 changed files with 174 additions and 94 deletions

View file

@ -8,7 +8,7 @@
#include "affinity.hh"
#include "progress-bar.hh"
#include <regex>
#include <nlohmann/json.hpp>
using namespace nix;
@ -25,94 +25,98 @@ static DevelopSettings developSettings;
static GlobalConfig::Register rDevelopSettings(&developSettings);
struct Var
{
bool exported = true;
bool associative = false;
std::string quoted; // quoted string or array
};
struct BuildEnvironment
{
std::map<std::string, Var> env;
std::string bashFunctions;
};
struct String
{
bool exported;
std::string value;
};
BuildEnvironment readEnvironment(const Path & path)
{
using Array = std::vector<std::string>;
using Associative = std::map<std::string, std::string>;
using Value = std::variant<String, Array, Associative>;
std::map<std::string, Value> vars;
std::map<std::string, std::string> bashFunctions;
static BuildEnvironment fromJSON(const Path & path)
{
BuildEnvironment res;
std::set<std::string> exported;
debug("reading environment file '%s'", path);
auto file = readFile(path);
auto json = nlohmann::json::parse(readFile(path));
auto pos = file.cbegin();
static std::string varNameRegex =
R"re((?:[a-zA-Z_][a-zA-Z0-9_]*))re";
static std::string simpleStringRegex =
R"re((?:[a-zA-Z0-9_/:\.\-\+=@%]*))re";
static std::string dquotedStringRegex =
R"re((?:\$?"(?:[^"\\]|\\[$`"\\\n])*"))re";
static std::string squotedStringRegex =
R"re((?:\$?(?:'(?:[^'\\]|\\[abeEfnrtv\\'"?])*'|\\')+))re";
static std::string indexedArrayRegex =
R"re((?:\(( *\[[0-9]+\]="(?:[^"\\]|\\.)*")*\)))re";
static std::regex declareRegex(
"^declare -a?x (" + varNameRegex + ")(=(" +
dquotedStringRegex + "|" + indexedArrayRegex + "))?\n");
static std::regex varRegex(
"^(" + varNameRegex + ")=(" + simpleStringRegex + "|" + squotedStringRegex + "|" + indexedArrayRegex + ")\n");
/* Note: we distinguish between an indexed and associative array
using the space before the closing parenthesis. Will
undoubtedly regret this some day. */
static std::regex assocArrayRegex(
"^(" + varNameRegex + ")=" + R"re((?:\(( *\[[^\]]+\]="(?:[^"\\]|\\.)*")* *\)))re" + "\n");
static std::regex functionRegex(
"^" + varNameRegex + " \\(\\) *\n");
while (pos != file.end()) {
std::smatch match;
if (std::regex_search(pos, file.cend(), match, declareRegex, std::regex_constants::match_continuous)) {
pos = match[0].second;
exported.insert(match[1]);
for (auto & [name, info] : json["variables"].items()) {
std::string type = info["type"];
if (type == "var" || type == "exported")
res.vars.insert({name, BuildEnvironment::String { .exported = type == "exported", .value = info["value"] }});
else if (type == "array")
res.vars.insert({name, (Array) info["value"]});
else if (type == "associative")
res.vars.insert({name, (Associative) info["value"]});
}
else if (std::regex_search(pos, file.cend(), match, varRegex, std::regex_constants::match_continuous)) {
pos = match[0].second;
res.env.insert({match[1], Var { .exported = exported.count(match[1]) > 0, .quoted = match[2] }});
for (auto & [name, def] : json["bashFunctions"].items()) {
res.bashFunctions.insert({name, def});
}
else if (std::regex_search(pos, file.cend(), match, assocArrayRegex, std::regex_constants::match_continuous)) {
pos = match[0].second;
res.env.insert({match[1], Var { .associative = true, .quoted = match[2] }});
}
else if (std::regex_search(pos, file.cend(), match, functionRegex, std::regex_constants::match_continuous)) {
res.bashFunctions = std::string(pos, file.cend());
break;
}
else throw Error("shell environment '%s' has unexpected line '%s'",
path, file.substr(pos - file.cbegin(), 60));
}
res.env.erase("__output");
return res;
}
}
void toBash(std::ostream & out, const std::set<std::string> & ignoreVars) const
{
for (auto & [name, value] : vars) {
if (!ignoreVars.count(name) && !hasPrefix(name, "BASH_")) {
if (auto str = std::get_if<String>(&value)) {
out << fmt("%s=%s\n", name, shellEscape(str->value));
if (str->exported)
out << fmt("export %s\n", name);
}
else if (auto arr = std::get_if<Array>(&value)) {
out << "declare -a " << name << "=(";
for (auto & s : *arr)
out << shellEscape(s) << " ";
out << ")\n";
}
else if (auto arr = std::get_if<Associative>(&value)) {
out << "declare -A " << name << "=(";
for (auto & [n, v] : *arr)
out << "[" << shellEscape(n) << "]=" << shellEscape(v) << " ";
out << ")\n";
}
}
}
for (auto & [name, def] : bashFunctions) {
out << name << " ()\n{\n" << def << "}\n";
}
}
static std::string getString(const Value & value)
{
if (auto str = std::get_if<String>(&value))
return str->value;
else
throw Error("bash variable is not a string");
}
static Array getStrings(const Value & value)
{
if (auto str = std::get_if<String>(&value))
return tokenizeString<Array>(str->value);
else if (auto arr = std::get_if<Array>(&value)) {
return *arr;
}
else
throw Error("bash variable is not a string or array");
}
};
const static std::string getEnvSh =
#include "get-env.sh.gen.hh"
@ -185,7 +189,7 @@ StorePath getDerivationEnvironment(ref<Store> store, const StorePath & drvPath)
struct Common : InstallableCommand, MixProfile
{
std::set<string> ignoreVars{
std::set<std::string> ignoreVars{
"BASHOPTS",
"EUID",
"HOME", // FIXME: don't ignore in pure mode?
@ -233,22 +237,10 @@ struct Common : InstallableCommand, MixProfile
out << "nix_saved_PATH=\"$PATH\"\n";
for (auto & i : buildEnvironment.env) {
if (!ignoreVars.count(i.first) && !hasPrefix(i.first, "BASH_")) {
if (i.second.associative)
out << fmt("declare -A %s=(%s)\n", i.first, i.second.quoted);
else {
out << fmt("%s=%s\n", i.first, i.second.quoted);
if (i.second.exported)
out << fmt("export %s\n", i.first);
}
}
}
buildEnvironment.toBash(out, ignoreVars);
out << "PATH=\"$PATH:$nix_saved_PATH\"\n";
out << buildEnvironment.bashFunctions << "\n";
out << "export NIX_BUILD_TOP=\"$(mktemp -d -t nix-shell.XXXXXX)\"\n";
for (auto & i : {"TMP", "TMPDIR", "TEMP", "TEMPDIR"})
out << fmt("export %s=\"$NIX_BUILD_TOP\"\n", i);
@ -258,16 +250,16 @@ struct Common : InstallableCommand, MixProfile
auto script = out.str();
/* Substitute occurrences of output paths. */
auto outputs = buildEnvironment.env.find("outputs");
assert(outputs != buildEnvironment.env.end());
auto outputs = buildEnvironment.vars.find("outputs");
assert(outputs != buildEnvironment.vars.end());
// FIXME: properly unquote 'outputs'.
StringMap rewrites;
for (auto & outputName : tokenizeString<std::vector<std::string>>(replaceStrings(outputs->second.quoted, "'", ""))) {
auto from = buildEnvironment.env.find(outputName);
assert(from != buildEnvironment.env.end());
for (auto & outputName : BuildEnvironment::getStrings(outputs->second)) {
auto from = buildEnvironment.vars.find(outputName);
assert(from != buildEnvironment.vars.end());
// FIXME: unquote
rewrites.insert({from->second.quoted, outputsDir + "/" + outputName});
rewrites.insert({BuildEnvironment::getString(from->second), outputsDir + "/" + outputName});
}
/* Substitute redirects. */
@ -321,7 +313,7 @@ struct Common : InstallableCommand, MixProfile
updateProfile(shellOutPath);
return {readEnvironment(strPath), strPath};
return {BuildEnvironment::fromJSON(strPath), strPath};
}
};

View file

@ -8,10 +8,98 @@ if [[ -n $stdenv ]]; then
source $stdenv/setup
fi
# Better to use compgen, but stdenv bash doesn't have it.
__vars="$(declare -p)"
__functions="$(declare -F)"
__dumpEnv() {
printf '{\n'
printf ' "bashFunctions": {\n'
local __first=1
while read __line; do
if ! [[ $__line =~ ^declare\ -f\ (.*) ]]; then continue; fi
__fun_name="${BASH_REMATCH[1]}"
__fun_body="$(type $__fun_name)"
if [[ $__fun_body =~ \{(.*)\} ]]; then
if [[ -z $__first ]]; then printf ',\n'; else __first=; fi
__fun_body="${BASH_REMATCH[1]}"
printf " "
__escapeString "$__fun_name"
printf ':'
__escapeString "$__fun_body"
else
printf "Cannot parse definition of function '%s'.\n" "$__fun_name" >&2
return 1
fi
done < <(printf "%s\n" "$__functions")
printf '\n },\n'
printf ' "variables": {\n'
local __first=1
while read __line; do
if ! [[ $__line =~ ^declare\ (-[^ ])\ ([^=]*) ]]; then continue; fi
local type="${BASH_REMATCH[1]}"
local __var_name="${BASH_REMATCH[2]}"
if [[ -z $__first ]]; then printf ',\n'; else __first=; fi
printf " "
__escapeString "$__var_name"
printf ': {'
# FIXME: handle -i, -r, -n.
if [[ $type == -x ]]; then
printf '"type": "exported", "value": '
__escapeString "${!__var_name}"
elif [[ $type == -- ]]; then
printf '"type": "var", "value": '
__escapeString "${!__var_name}"
elif [[ $type == -a ]]; then
printf '"type": "array", "value": ['
local __first2=1
__var_name="$__var_name[@]"
for __i in "${!__var_name}"; do
if [[ -z $__first2 ]]; then printf ', '; else __first2=; fi
__escapeString "$__i"
printf ' '
done
printf ']'
elif [[ $type == -A ]]; then
printf '"type": "associative", "value": {\n'
local __first2=1
declare -n __var_name2="$__var_name"
for __i in "${!__var_name2[@]}"; do
if [[ -z $__first2 ]]; then printf ',\n'; else __first2=; fi
printf " "
__escapeString "$__i"
printf ": "
__escapeString "${__var_name2[$__i]}"
done
printf '\n }'
else
printf '"type": "unknown"'
fi
printf "}"
done < <(printf "%s\n" "$__vars")
printf '\n }\n}'
}
__escapeString() {
local __s="$1"
__s="${__s//\\/\\\\}"
__s="${__s//\"/\\\"}"
__s="${__s//$'\n'/\\n}"
__s="${__s//$'\r'/\\r}"
__s="${__s//$'\t'/\\t}"
printf '"%s"' "$__s"
}
# Dump the bash environment as JSON.
for __output in $outputs; do
if [[ -z $__done ]]; then
export > ${!__output}
set >> ${!__output}
__dumpEnv > ${!__output}
__done=1
else
echo -n >> ${!__output}