forked from lix-project/lix
Move shell_words
into its own file
Change-Id: I34c0ebfb6dcea49bf632d8880e04075335a132bf
This commit is contained in:
parent
b4d07656ff
commit
17d3572fe8
77
src/libutil/shlex.cc
Normal file
77
src/libutil/shlex.cc
Normal file
|
@ -0,0 +1,77 @@
|
|||
#include "shlex.hh"
|
||||
#include "util.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
std::vector<std::string> shell_split(const std::string & input)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
|
||||
// Hack: `shell_split` is janky and parses ` a` as `{"", "a"}`, so we trim
|
||||
// whitespace before starting.
|
||||
auto inputTrimmed = trim(input);
|
||||
|
||||
if (inputTrimmed.empty()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::regex whitespace("^\\s+");
|
||||
auto begin = inputTrimmed.cbegin();
|
||||
std::string currentToken;
|
||||
enum State { sBegin, sSingleQuote, sDoubleQuote };
|
||||
State state = sBegin;
|
||||
auto iterator = begin;
|
||||
|
||||
for (; iterator != inputTrimmed.cend(); ++iterator) {
|
||||
if (state == sBegin) {
|
||||
std::smatch match;
|
||||
if (regex_search(iterator, inputTrimmed.cend(), match, whitespace)) {
|
||||
currentToken.append(begin, iterator);
|
||||
result.push_back(currentToken);
|
||||
iterator = match[0].second;
|
||||
if (iterator == inputTrimmed.cend()) {
|
||||
return result;
|
||||
}
|
||||
begin = iterator;
|
||||
currentToken.clear();
|
||||
}
|
||||
}
|
||||
|
||||
switch (*iterator) {
|
||||
case '\'':
|
||||
if (state != sDoubleQuote) {
|
||||
currentToken.append(begin, iterator);
|
||||
begin = iterator + 1;
|
||||
state = state == sBegin ? sSingleQuote : sBegin;
|
||||
}
|
||||
break;
|
||||
|
||||
case '"':
|
||||
if (state != sSingleQuote) {
|
||||
currentToken.append(begin, iterator);
|
||||
begin = iterator + 1;
|
||||
state = state == sBegin ? sDoubleQuote : sBegin;
|
||||
}
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
if (state != sSingleQuote) {
|
||||
// perl shellwords mostly just treats the next char as part
|
||||
// of the string with no special processing
|
||||
currentToken.append(begin, iterator);
|
||||
begin = ++iterator;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (state != sBegin) {
|
||||
throw ShlexError(input);
|
||||
}
|
||||
|
||||
currentToken.append(begin, iterator);
|
||||
result.push_back(currentToken);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
30
src/libutil/shlex.hh
Normal file
30
src/libutil/shlex.hh
Normal file
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "error.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
class ShlexError : public Error
|
||||
{
|
||||
public:
|
||||
const std::string input;
|
||||
|
||||
ShlexError(const std::string input)
|
||||
: Error("Failed to parse shell arguments (unterminated quote?): %1%", input)
|
||||
, input(input)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Parse a string into shell arguments.
|
||||
*
|
||||
* Takes care of whitespace, quotes, and backslashes (at least a bit).
|
||||
*/
|
||||
std::vector<std::string> shell_split(const std::string & input);
|
||||
|
||||
} // namespace nix
|
|
@ -23,70 +23,13 @@
|
|||
#include "common-eval-args.hh"
|
||||
#include "attr-path.hh"
|
||||
#include "legacy.hh"
|
||||
#include "shlex.hh"
|
||||
|
||||
using namespace nix;
|
||||
using namespace std::string_literals;
|
||||
|
||||
extern char * * environ __attribute__((weak));
|
||||
|
||||
/* Recreate the effect of the perl shellwords function, breaking up a
|
||||
* string into arguments like a shell word, including escapes
|
||||
*/
|
||||
static std::vector<std::string> shellwords(const std::string & s)
|
||||
{
|
||||
std::regex whitespace("^\\s+");
|
||||
auto begin = s.cbegin();
|
||||
std::vector<std::string> res;
|
||||
std::string cur;
|
||||
enum state {
|
||||
sBegin,
|
||||
sSingleQuote,
|
||||
sDoubleQuote
|
||||
};
|
||||
state st = sBegin;
|
||||
auto it = begin;
|
||||
for (; it != s.cend(); ++it) {
|
||||
if (st == sBegin) {
|
||||
std::smatch match;
|
||||
if (regex_search(it, s.cend(), match, whitespace)) {
|
||||
cur.append(begin, it);
|
||||
res.push_back(cur);
|
||||
it = match[0].second;
|
||||
if (it == s.cend()) return res;
|
||||
begin = it;
|
||||
cur.clear();
|
||||
}
|
||||
}
|
||||
switch (*it) {
|
||||
case '\'':
|
||||
if (st != sDoubleQuote) {
|
||||
cur.append(begin, it);
|
||||
begin = it + 1;
|
||||
st = st == sBegin ? sSingleQuote : sBegin;
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
if (st != sSingleQuote) {
|
||||
cur.append(begin, it);
|
||||
begin = it + 1;
|
||||
st = st == sBegin ? sDoubleQuote : sBegin;
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
if (st != sSingleQuote) {
|
||||
/* perl shellwords mostly just treats the next char as part of the string with no special processing */
|
||||
cur.append(begin, it);
|
||||
begin = ++it;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (st != sBegin) throw Error("unterminated quote in shebang line");
|
||||
cur.append(begin, it);
|
||||
res.push_back(cur);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void main_nix_build(int argc, char * * argv)
|
||||
{
|
||||
auto dryRun = false;
|
||||
|
@ -143,7 +86,7 @@ static void main_nix_build(int argc, char * * argv)
|
|||
line = chomp(line);
|
||||
std::smatch match;
|
||||
if (std::regex_match(line, match, std::regex("^#!\\s*nix-shell\\s+(.*)$")))
|
||||
for (const auto & word : shellwords(match[1].str()))
|
||||
for (const auto & word : shell_split(match[1].str()))
|
||||
args.push_back(word);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,8 @@ constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
|
|||
[](Command const&) -> const char * { return "command"; },
|
||||
[](OutputLine const&) -> const char * { return "output_line"; }},
|
||||
// clang-format on
|
||||
s);
|
||||
s
|
||||
);
|
||||
}
|
||||
|
||||
auto CLILiterateParser::Node::print() const -> std::string
|
||||
|
@ -51,7 +52,8 @@ void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream *
|
|||
}
|
||||
}
|
||||
|
||||
auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
|
||||
auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent)
|
||||
-> std::vector<Node>
|
||||
{
|
||||
CLILiterateParser p{std::move(prompt), indent};
|
||||
p.feed(input);
|
||||
|
@ -105,13 +107,17 @@ void CLILiterateParser::feed(char c)
|
|||
} else {
|
||||
// didn't match the prompt, so it must have actually been output.
|
||||
s.lineAccumulator.push_back(c);
|
||||
transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
|
||||
transition(OutputLine{
|
||||
AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}
|
||||
});
|
||||
return;
|
||||
}
|
||||
s.lineAccumulator.push_back(c);
|
||||
},
|
||||
[&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
|
||||
state_);
|
||||
[&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }
|
||||
},
|
||||
state_
|
||||
);
|
||||
}
|
||||
|
||||
void CLILiterateParser::onNewline()
|
||||
|
@ -140,8 +146,10 @@ void CLILiterateParser::onNewline()
|
|||
[&](Prompt & s) {
|
||||
// INDENT followed by newline is also considered a blank output line
|
||||
return Node::mkOutput(std::move(s.lineAccumulator));
|
||||
}},
|
||||
lastState));
|
||||
}
|
||||
},
|
||||
lastState
|
||||
));
|
||||
|
||||
transition(Indent{});
|
||||
lastWasOutput_ = newLastWasOutput;
|
||||
|
@ -171,8 +179,9 @@ auto CLILiterateParser::syntax() const -> std::vector<Node> const &
|
|||
return syntax_;
|
||||
}
|
||||
|
||||
auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
|
||||
-> std::string
|
||||
auto CLILiterateParser::unparse(
|
||||
const std::string & prompt, const std::vector<Node> & syntax, size_t indent
|
||||
) -> std::string
|
||||
{
|
||||
std::string indent_str(indent, ' ');
|
||||
std::ostringstream out{};
|
||||
|
|
|
@ -79,10 +79,13 @@ public:
|
|||
void feed(std::string_view s);
|
||||
|
||||
/** Parses an input in a non-streaming fashion */
|
||||
static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
|
||||
static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2)
|
||||
-> std::vector<Node>;
|
||||
|
||||
/** Returns, losslessly, the string that would have generated a syntax tree */
|
||||
static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
|
||||
static auto
|
||||
unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2)
|
||||
-> std::string;
|
||||
|
||||
/** Consumes a CLILiterateParser and gives you the syntax out of it */
|
||||
auto intoSyntax() && -> std::vector<Node>;
|
||||
|
@ -115,7 +118,7 @@ private:
|
|||
using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
|
||||
State state_;
|
||||
|
||||
constexpr static auto stateDebug(State const&) -> const char *;
|
||||
constexpr static auto stateDebug(State const &) -> const char *;
|
||||
|
||||
const std::string prompt_;
|
||||
const size_t indent_;
|
||||
|
|
57
tests/unit/libutil/shlex.cc
Normal file
57
tests/unit/libutil/shlex.cc
Normal file
|
@ -0,0 +1,57 @@
|
|||
#include "shlex.hh"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <sstream>
|
||||
|
||||
using testing::Eq;
|
||||
|
||||
namespace nix {
|
||||
|
||||
TEST(Shlex, shell_split) {
|
||||
ASSERT_THAT(shell_split(""), Eq<std::vector<std::string>>({}));
|
||||
ASSERT_THAT(shell_split(" "), Eq<std::vector<std::string>>({}));
|
||||
|
||||
ASSERT_THAT(
|
||||
shell_split("puppy doggy"),
|
||||
Eq<std::vector<std::string>>({
|
||||
"puppy",
|
||||
"doggy",
|
||||
})
|
||||
);
|
||||
|
||||
ASSERT_THAT(
|
||||
shell_split("goldie \"puppy 'doggy'\" sweety"),
|
||||
Eq<std::vector<std::string>>({
|
||||
"goldie",
|
||||
"puppy 'doggy'",
|
||||
"sweety",
|
||||
})
|
||||
);
|
||||
|
||||
ASSERT_THAT(
|
||||
shell_split("\"pupp\\\"y\""),
|
||||
Eq<std::vector<std::string>>({ "pupp\"y" })
|
||||
);
|
||||
|
||||
ASSERT_THAT(
|
||||
shell_split("goldie 'puppy' doggy"),
|
||||
Eq<std::vector<std::string>>({
|
||||
"goldie",
|
||||
"puppy",
|
||||
"doggy",
|
||||
})
|
||||
);
|
||||
|
||||
ASSERT_THAT(
|
||||
shell_split("'pupp\\\"y'"),
|
||||
Eq<std::vector<std::string>>({
|
||||
"pupp\\\"y",
|
||||
})
|
||||
);
|
||||
|
||||
ASSERT_THROW(shell_split("\"puppy"), ShlexError);
|
||||
ASSERT_THROW(shell_split("'puppy"), ShlexError);
|
||||
}
|
||||
|
||||
} // namespace nix
|
Loading…
Reference in a new issue