Move shell_words into its own file

Change-Id: I34c0ebfb6dcea49bf632d8880e04075335a132bf
This commit is contained in:
Rebecca Turner 2024-03-14 17:44:43 -07:00
parent 4cef205233
commit 236ebab365
Signed by: rbt
SSH key fingerprint: SHA256:SiNaEWabvotTldoNb5jIKqjJ3RnpS4aRXA4KLAdW5vs
5 changed files with 168 additions and 59 deletions

View file

@ -20,6 +20,7 @@ libutil_sources = files(
'position.cc',
'references.cc',
'serialise.cc',
'shlex.cc',
'signals.cc',
'source-path.cc',
'suggestions.cc',
@ -69,6 +70,7 @@ libutil_headers = files(
'regex-combinators.hh',
'repair-flag.hh',
'serialise.hh',
'shlex.hh',
'signals.hh',
'source-path.hh',
'split.hh',

77
src/libutil/shlex.cc Normal file
View file

@ -0,0 +1,77 @@
#include "shlex.hh"
#include "util.hh"
namespace nix {
std::vector<std::string> shell_split(const std::string & input)
{
std::vector<std::string> result;
// Hack: `shell_split` is janky and parses ` a` as `{"", "a"}`, so we trim
// whitespace before starting.
auto inputTrimmed = trim(input);
if (inputTrimmed.empty()) {
return result;
}
std::regex whitespace("^\\s+");
auto begin = inputTrimmed.cbegin();
std::string currentToken;
enum State { sBegin, sSingleQuote, sDoubleQuote };
State state = sBegin;
auto iterator = begin;
for (; iterator != inputTrimmed.cend(); ++iterator) {
if (state == sBegin) {
std::smatch match;
if (regex_search(iterator, inputTrimmed.cend(), match, whitespace)) {
currentToken.append(begin, iterator);
result.push_back(currentToken);
iterator = match[0].second;
if (iterator == inputTrimmed.cend()) {
return result;
}
begin = iterator;
currentToken.clear();
}
}
switch (*iterator) {
case '\'':
if (state != sDoubleQuote) {
currentToken.append(begin, iterator);
begin = iterator + 1;
state = state == sBegin ? sSingleQuote : sBegin;
}
break;
case '"':
if (state != sSingleQuote) {
currentToken.append(begin, iterator);
begin = iterator + 1;
state = state == sBegin ? sDoubleQuote : sBegin;
}
break;
case '\\':
if (state != sSingleQuote) {
// perl shellwords mostly just treats the next char as part
// of the string with no special processing
currentToken.append(begin, iterator);
begin = ++iterator;
}
break;
}
}
if (state != sBegin) {
throw ShlexError(input);
}
currentToken.append(begin, iterator);
result.push_back(currentToken);
return result;
}
}

30
src/libutil/shlex.hh Normal file
View file

@ -0,0 +1,30 @@
#pragma once
#include <regex>
#include <string>
#include <vector>
#include "error.hh"
namespace nix {
class ShlexError : public Error
{
public:
const std::string input;
ShlexError(const std::string input)
: Error("Failed to parse shell arguments (unterminated quote?): %1%", input)
, input(input)
{
}
};
/**
* Parse a string into shell arguments.
*
* Takes care of whitespace, quotes, and backslashes (at least a bit).
*/
std::vector<std::string> shell_split(const std::string & input);
} // namespace nix

View file

@ -23,70 +23,13 @@
#include "common-eval-args.hh"
#include "attr-path.hh"
#include "legacy.hh"
#include "shlex.hh"
using namespace nix;
using namespace std::string_literals;
extern char * * environ __attribute__((weak));
/* Recreate the effect of the perl shellwords function, breaking up a
* string into arguments like a shell word, including escapes
*/
static std::vector<std::string> shellwords(const std::string & s)
{
std::regex whitespace("^\\s+");
auto begin = s.cbegin();
std::vector<std::string> res;
std::string cur;
enum state {
sBegin,
sSingleQuote,
sDoubleQuote
};
state st = sBegin;
auto it = begin;
for (; it != s.cend(); ++it) {
if (st == sBegin) {
std::smatch match;
if (regex_search(it, s.cend(), match, whitespace)) {
cur.append(begin, it);
res.push_back(cur);
it = match[0].second;
if (it == s.cend()) return res;
begin = it;
cur.clear();
}
}
switch (*it) {
case '\'':
if (st != sDoubleQuote) {
cur.append(begin, it);
begin = it + 1;
st = st == sBegin ? sSingleQuote : sBegin;
}
break;
case '"':
if (st != sSingleQuote) {
cur.append(begin, it);
begin = it + 1;
st = st == sBegin ? sDoubleQuote : sBegin;
}
break;
case '\\':
if (st != sSingleQuote) {
/* perl shellwords mostly just treats the next char as part of the string with no special processing */
cur.append(begin, it);
begin = ++it;
}
break;
}
}
if (st != sBegin) throw Error("unterminated quote in shebang line");
cur.append(begin, it);
res.push_back(cur);
return res;
}
static void main_nix_build(int argc, char * * argv)
{
auto dryRun = false;
@ -143,7 +86,7 @@ static void main_nix_build(int argc, char * * argv)
line = chomp(line);
std::smatch match;
if (std::regex_match(line, match, std::regex("^#!\\s*nix-shell\\s+(.*)$")))
for (const auto & word : shellwords(match[1].str()))
for (const auto & word : shell_split(match[1].str()))
args.push_back(word);
}
}

View file

@ -0,0 +1,57 @@
#include "shlex.hh"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <sstream>
using testing::Eq;
namespace nix {
TEST(Shlex, shell_split) {
ASSERT_THAT(shell_split(""), Eq<std::vector<std::string>>({}));
ASSERT_THAT(shell_split(" "), Eq<std::vector<std::string>>({}));
ASSERT_THAT(
shell_split("puppy doggy"),
Eq<std::vector<std::string>>({
"puppy",
"doggy",
})
);
ASSERT_THAT(
shell_split("goldie \"puppy 'doggy'\" sweety"),
Eq<std::vector<std::string>>({
"goldie",
"puppy 'doggy'",
"sweety",
})
);
ASSERT_THAT(
shell_split("\"pupp\\\"y\""),
Eq<std::vector<std::string>>({ "pupp\"y" })
);
ASSERT_THAT(
shell_split("goldie 'puppy' doggy"),
Eq<std::vector<std::string>>({
"goldie",
"puppy",
"doggy",
})
);
ASSERT_THAT(
shell_split("'pupp\\\"y'"),
Eq<std::vector<std::string>>({
"pupp\\\"y",
})
);
ASSERT_THROW(shell_split("\"puppy"), ShlexError);
ASSERT_THROW(shell_split("'puppy"), ShlexError);
}
} // namespace nix