primops: Move functions to primops/string.cc

Moved builtins: baseNameOf, compareVersions, concatStringsSep, match,
parseDrvName, replaceStrings, split, splitVersion, stringLength,
substring, toString

Change-Id: I0daf1eb5263fbadcfe4917a4bf017be0ac9bf939
This commit is contained in:
Tom Hubrecht 2024-05-30 01:54:44 +02:00
parent 84e80fa97d
commit 7f9f2f7835
4 changed files with 644 additions and 504 deletions

View file

@ -93,6 +93,7 @@ libexpr_sources = files(
'primops/fetchMercurial.cc',
'primops/fetchTree.cc',
'primops/fromTOML.cc',
'primops/string.cc',
'value/context.cc',
)

View file

@ -1422,27 +1422,6 @@ static RegisterPrimOp primop_pathExists({
.fun = prim_pathExists,
});
/* Return the base name of the given string, i.e., everything
following the last slash. */
static void prim_baseNameOf(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
NixStringContext context;
v.mkString(baseNameOf(*state.coerceToString(pos, *args[0], context,
"while evaluating the first argument passed to builtins.baseNameOf",
false, false)), context);
}
static RegisterPrimOp primop_baseNameOf({
.name = "baseNameOf",
.args = {"s"},
.doc = R"(
Return the *base name* of the string *s*, that is, everything
following the final slash in the string. This is similar to the GNU
`basename` command.
)",
.fun = prim_baseNameOf,
});
/* Return the directory of the given path, i.e., everything before the
last slash. Return either a path or a string depending on the type
of the argument. */
@ -3124,109 +3103,6 @@ static RegisterPrimOp primop_lessThan({
*************************************************************/
/* Convert the argument to a string. Paths are *not* copied to the
store, so `toString /foo/bar' yields `"/foo/bar"', not
`"/nix/store/whatever..."'. */
static void prim_toString(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
NixStringContext context;
auto s = state.coerceToString(pos, *args[0], context,
"while evaluating the first argument passed to builtins.toString",
true, false);
v.mkString(*s, context);
}
static RegisterPrimOp primop_toString({
.name = "toString",
.args = {"e"},
.doc = R"(
Convert the expression *e* to a string. *e* can be:
- A string (in which case the string is returned unmodified).
- A path (e.g., `toString /foo/bar` yields `"/foo/bar"`.
- A set containing `{ __toString = self: ...; }` or `{ outPath = ...; }`.
- An integer.
- A list, in which case the string representations of its elements
are joined with spaces.
- A Boolean (`false` yields `""`, `true` yields `"1"`).
- `null`, which yields the empty string.
)",
.fun = prim_toString,
});
/* `substring start len str' returns the substring of `str' starting
at character position `min(start, stringLength str)' inclusive and
ending at `min(start + len, stringLength str)'. `start' must be
non-negative. */
static void prim_substring(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
int start = state.forceInt(*args[0], pos, "while evaluating the first argument (the start offset) passed to builtins.substring");
if (start < 0)
state.error<EvalError>("negative start position in 'substring'").atPos(pos).debugThrow();
int len = state.forceInt(*args[1], pos, "while evaluating the second argument (the substring length) passed to builtins.substring");
// Special-case on empty substring to avoid O(n) strlen
// This allows for the use of empty substrings to efficently capture string context
if (len == 0) {
state.forceValue(*args[2], pos);
if (args[2]->type() == nString) {
v.mkString("", args[2]->string.context);
return;
}
}
NixStringContext context;
auto s = state.coerceToString(pos, *args[2], context, "while evaluating the third argument (the string) passed to builtins.substring");
v.mkString((unsigned int) start >= s->size() ? "" : s->substr(start, len), context);
}
static RegisterPrimOp primop_substring({
.name = "__substring",
.args = {"start", "len", "s"},
.doc = R"(
Return the substring of *s* from character position *start*
(zero-based) up to but not including *start + len*. If *start* is
greater than the length of the string, an empty string is returned,
and if *start + len* lies beyond the end of the string, only the
substring up to the end of the string is returned. *start* must be
non-negative. For example,
```nix
builtins.substring 0 3 "nixos"
```
evaluates to `"nix"`.
)",
.fun = prim_substring,
});
static void prim_stringLength(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
NixStringContext context;
auto s = state.coerceToString(pos, *args[0], context, "while evaluating the argument passed to builtins.stringLength");
v.mkInt(s->size());
}
static RegisterPrimOp primop_stringLength({
.name = "__stringLength",
.args = {"e"},
.doc = R"(
Return the length of the string *e*. If *e* is not a string,
evaluation is aborted.
)",
.fun = prim_stringLength,
});
/* Return the cryptographic hash of a string in base-16. */
static void prim_hashString(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
@ -3252,391 +3128,11 @@ static RegisterPrimOp primop_hashString({
.fun = prim_hashString,
});
struct RegexCache
{
// TODO use C++20 transparent comparison when available
std::unordered_map<std::string_view, std::regex> cache;
std::list<std::string> keys;
std::regex get(std::string_view re)
{
auto it = cache.find(re);
if (it != cache.end())
return it->second;
keys.emplace_back(re);
return cache.emplace(keys.back(), std::regex(keys.back(), std::regex::extended)).first->second;
}
};
std::shared_ptr<RegexCache> makeRegexCache()
{
return std::make_shared<RegexCache>();
}
void prim_match(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
auto re = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.match");
try {
auto regex = state.regexCache->get(re);
NixStringContext context;
const auto str = state.forceString(*args[1], context, pos, "while evaluating the second argument passed to builtins.match");
std::cmatch match;
if (!std::regex_match(str.begin(), str.end(), match, regex)) {
v.mkNull();
return;
}
// the first match is the whole string
const size_t len = match.size() - 1;
state.mkList(v, len);
for (size_t i = 0; i < len; ++i) {
if (!match[i+1].matched)
(v.listElems()[i] = state.allocValue())->mkNull();
else
(v.listElems()[i] = state.allocValue())->mkString(match[i + 1].str());
}
} catch (std::regex_error & e) {
if (e.code() == std::regex_constants::error_space) {
// limit is _GLIBCXX_REGEX_STATE_LIMIT for libstdc++
state.error<EvalError>("memory limit exceeded by regular expression '%s'", re)
.atPos(pos)
.debugThrow();
} else
state.error<EvalError>("invalid regular expression '%s'", re)
.atPos(pos)
.debugThrow();
}
}
static RegisterPrimOp primop_match({
.name = "__match",
.args = {"regex", "str"},
.doc = R"s(
Returns a list if the [extended POSIX regular
expression](http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04)
*regex* matches *str* precisely, otherwise returns `null`. Each item
in the list is a regex group.
```nix
builtins.match "ab" "abc"
```
Evaluates to `null`.
```nix
builtins.match "abc" "abc"
```
Evaluates to `[ ]`.
```nix
builtins.match "a(b)(c)" "abc"
```
Evaluates to `[ "b" "c" ]`.
```nix
builtins.match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " FOO "
```
Evaluates to `[ "FOO" ]`.
)s",
.fun = prim_match,
});
/* Split a string with a regular expression, and return a list of the
non-matching parts interleaved by the lists of the matching groups. */
void prim_split(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
auto re = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.split");
try {
auto regex = state.regexCache->get(re);
NixStringContext context;
const auto str = state.forceString(*args[1], context, pos, "while evaluating the second argument passed to builtins.split");
auto begin = std::cregex_iterator(str.begin(), str.end(), regex);
auto end = std::cregex_iterator();
// Any matches results are surrounded by non-matching results.
const size_t len = std::distance(begin, end);
state.mkList(v, 2 * len + 1);
size_t idx = 0;
if (len == 0) {
v.listElems()[idx++] = args[1];
return;
}
for (auto i = begin; i != end; ++i) {
assert(idx <= 2 * len + 1 - 3);
auto match = *i;
// Add a string for non-matched characters.
(v.listElems()[idx++] = state.allocValue())->mkString(match.prefix().str());
// Add a list for matched substrings.
const size_t slen = match.size() - 1;
auto elem = v.listElems()[idx++] = state.allocValue();
// Start at 1, beacause the first match is the whole string.
state.mkList(*elem, slen);
for (size_t si = 0; si < slen; ++si) {
if (!match[si + 1].matched)
(elem->listElems()[si] = state.allocValue())->mkNull();
else
(elem->listElems()[si] = state.allocValue())->mkString(match[si + 1].str());
}
// Add a string for non-matched suffix characters.
if (idx == 2 * len)
(v.listElems()[idx++] = state.allocValue())->mkString(match.suffix().str());
}
assert(idx == 2 * len + 1);
} catch (std::regex_error & e) {
if (e.code() == std::regex_constants::error_space) {
// limit is _GLIBCXX_REGEX_STATE_LIMIT for libstdc++
state.error<EvalError>("memory limit exceeded by regular expression '%s'", re)
.atPos(pos)
.debugThrow();
} else
state.error<EvalError>("invalid regular expression '%s'", re)
.atPos(pos)
.debugThrow();
}
}
static RegisterPrimOp primop_split({
.name = "__split",
.args = {"regex", "str"},
.doc = R"s(
Returns a list composed of non matched strings interleaved with the
lists of the [extended POSIX regular
expression](http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04)
*regex* matches of *str*. Each item in the lists of matched
sequences is a regex group.
```nix
builtins.split "(a)b" "abc"
```
Evaluates to `[ "" [ "a" ] "c" ]`.
```nix
builtins.split "([ac])" "abc"
```
Evaluates to `[ "" [ "a" ] "b" [ "c" ] "" ]`.
```nix
builtins.split "(a)|(c)" "abc"
```
Evaluates to `[ "" [ "a" null ] "b" [ null "c" ] "" ]`.
```nix
builtins.split "([[:upper:]]+)" " FOO "
```
Evaluates to `[ " " [ "FOO" ] " " ]`.
)s",
.fun = prim_split,
});
static void prim_concatStringsSep(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
NixStringContext context;
auto sep = state.forceString(*args[0], context, pos, "while evaluating the first argument (the separator string) passed to builtins.concatStringsSep");
state.forceList(*args[1], pos, "while evaluating the second argument (the list of strings to concat) passed to builtins.concatStringsSep");
std::string res;
res.reserve((args[1]->listSize() + 32) * sep.size());
bool first = true;
for (auto elem : args[1]->listItems()) {
if (first) first = false; else res += sep;
res += *state.coerceToString(pos, *elem, context, "while evaluating one element of the list of strings to concat passed to builtins.concatStringsSep");
}
v.mkString(res, context);
}
static RegisterPrimOp primop_concatStringsSep({
.name = "__concatStringsSep",
.args = {"separator", "list"},
.doc = R"(
Concatenate a list of strings with a separator between each
element, e.g. `concatStringsSep "/" ["usr" "local" "bin"] ==
"usr/local/bin"`.
)",
.fun = prim_concatStringsSep,
});
static void prim_replaceStrings(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
state.forceList(*args[0], pos, "while evaluating the first argument passed to builtins.replaceStrings");
state.forceList(*args[1], pos, "while evaluating the second argument passed to builtins.replaceStrings");
if (args[0]->listSize() != args[1]->listSize())
state.error<EvalError>(
"'from' and 'to' arguments passed to builtins.replaceStrings have different lengths"
).atPos(pos).debugThrow();
std::vector<std::string> from;
from.reserve(args[0]->listSize());
for (auto elem : args[0]->listItems())
from.emplace_back(state.forceString(*elem, pos, "while evaluating one of the strings to replace passed to builtins.replaceStrings"));
std::unordered_map<size_t, std::string> cache;
auto to = args[1]->listItems();
NixStringContext context;
auto s = state.forceString(*args[2], context, pos, "while evaluating the third argument passed to builtins.replaceStrings");
std::string res;
// Loops one past last character to handle the case where 'from' contains an empty string.
for (size_t p = 0; p <= s.size(); ) {
bool found = false;
auto i = from.begin();
auto j = to.begin();
size_t j_index = 0;
for (; i != from.end(); ++i, ++j, ++j_index)
if (s.compare(p, i->size(), *i) == 0) {
found = true;
auto v = cache.find(j_index);
if (v == cache.end()) {
NixStringContext ctx;
auto ts = state.forceString(**j, ctx, pos, "while evaluating one of the replacement strings passed to builtins.replaceStrings");
v = (cache.emplace(j_index, ts)).first;
for (auto& path : ctx)
context.insert(path);
}
res += v->second;
if (i->empty()) {
if (p < s.size())
res += s[p];
p++;
} else {
p += i->size();
}
break;
}
if (!found) {
if (p < s.size())
res += s[p];
p++;
}
}
v.mkString(res, context);
}
static RegisterPrimOp primop_replaceStrings({
.name = "__replaceStrings",
.args = {"from", "to", "s"},
.doc = R"(
Given string *s*, replace every occurrence of the strings in *from*
with the corresponding string in *to*.
The argument *to* is lazy, that is, it is only evaluated when its corresponding pattern in *from* is matched in the string *s*
Example:
```nix
builtins.replaceStrings ["oo" "a"] ["a" "i"] "foobar"
```
evaluates to `"fabir"`.
)",
.fun = prim_replaceStrings,
});
/*************************************************************
* Versions
*************************************************************/
static void prim_parseDrvName(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
auto name = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.parseDrvName");
DrvName parsed(name);
auto attrs = state.buildBindings(2);
attrs.alloc(state.sName).mkString(parsed.name);
attrs.alloc("version").mkString(parsed.version);
v.mkAttrs(attrs);
}
static RegisterPrimOp primop_parseDrvName({
.name = "__parseDrvName",
.args = {"s"},
.doc = R"(
Split the string *s* into a package name and version. The package
name is everything up to but not including the first dash not followed
by a letter, and the version is everything following that dash. The
result is returned in a set `{ name, version }`. Thus,
`builtins.parseDrvName "nix-0.12pre12876"` returns `{ name =
"nix"; version = "0.12pre12876"; }`.
)",
.fun = prim_parseDrvName,
});
static void prim_compareVersions(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
auto version1 = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.compareVersions");
auto version2 = state.forceStringNoCtx(*args[1], pos, "while evaluating the second argument passed to builtins.compareVersions");
v.mkInt(compareVersions(version1, version2));
}
static RegisterPrimOp primop_compareVersions({
.name = "__compareVersions",
.args = {"s1", "s2"},
.doc = R"(
Compare two strings representing versions and return `-1` if
version *s1* is older than version *s2*, `0` if they are the same,
and `1` if *s1* is newer than *s2*. The version comparison
algorithm is the same as the one used by [`nix-env
-u`](../command-ref/nix-env.md#operation---upgrade).
)",
.fun = prim_compareVersions,
});
static void prim_splitVersion(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
auto version = state.forceStringNoCtx(*args[0], pos, "while evaluating the first argument passed to builtins.splitVersion");
auto iter = version.cbegin();
Strings components;
while (iter != version.cend()) {
auto component = nextComponent(iter, version.cend());
if (component.empty())
break;
components.emplace_back(component);
}
state.mkList(v, components.size());
for (const auto & [n, component] : enumerate(components))
(v.listElems()[n] = state.allocValue())->mkString(std::move(component));
}
static RegisterPrimOp primop_splitVersion({
.name = "__splitVersion",
.args = {"s"},
.doc = R"(
Split a string representing a version into its components, by the
same version splitting logic underlying the version comparison in
[`nix-env -u`](../command-ref/nix-env.md#operation---upgrade).
)",
.fun = prim_splitVersion,
});
/*************************************************************
* Primop registration

View file

@ -3,6 +3,7 @@
#include "eval.hh"
#include <regex>
#include <tuple>
#include <vector>
@ -65,4 +66,24 @@ typedef std::list<Value *> ValueList;
Bindings::iterator getAttr(EvalState & state, Symbol attrSym, Bindings * attrSet, std::string_view errorCtx);
/**
* Struct definitions
*/
struct RegexCache
{
// TODO use C++20 transparent comparison when available
std::unordered_map<std::string_view, std::regex> cache;
std::list<std::string> keys;
std::regex get(std::string_view re)
{
auto it = cache.find(re);
if (it != cache.end())
return it->second;
keys.emplace_back(re);
return cache.emplace(keys.back(), std::regex(keys.back(), std::regex::extended)).first->second;
}
};
}

View file

@ -0,0 +1,622 @@
#include "names.hh"
#include "primops.hh"
namespace nix {
std::shared_ptr<RegexCache> makeRegexCache()
{
return std::make_shared<RegexCache>();
}
/**
* builtins.baseNameOf
*/
static void prim_baseNameOf(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
NixStringContext context;
v.mkString(
baseNameOf(*state.coerceToString(
pos,
*args[0],
context,
"while evaluating the first argument passed to builtins.baseNameOf",
false,
false
)),
context
);
}
static RegisterPrimOp primop_baseNameOf({
.name = "baseNameOf",
.args = {"s"},
.doc = R"(
Return the *base name* of the string *s*, that is, everything
following the final slash in the string. This is similar to the GNU
`basename` command.
)",
.fun = prim_baseNameOf,
});
/**
* builtins.compareVersions
*/
static void prim_compareVersions(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
auto version1 = state.forceStringNoCtx(
*args[0], pos, "while evaluating the first argument passed to builtins.compareVersions"
);
auto version2 = state.forceStringNoCtx(
*args[1], pos, "while evaluating the second argument passed to builtins.compareVersions"
);
v.mkInt(compareVersions(version1, version2));
}
static RegisterPrimOp primop_compareVersions({
.name = "__compareVersions",
.args = {"s1", "s2"},
.doc = R"(
Compare two strings representing versions and return `-1` if
version *s1* is older than version *s2*, `0` if they are the same,
and `1` if *s1* is newer than *s2*. The version comparison
algorithm is the same as the one used by [`nix-env
-u`](../command-ref/nix-env.md#operation---upgrade).
)",
.fun = prim_compareVersions,
});
/**
* builtins.concatStringsSep
*/
static void prim_concatStringsSep(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
NixStringContext context;
auto sep = state.forceString(
*args[0],
context,
pos,
"while evaluating the first argument (the separator string) passed to "
"builtins.concatStringsSep"
);
state.forceList(
*args[1],
pos,
"while evaluating the second argument (the list of strings to concat) passed to "
"builtins.concatStringsSep"
);
std::string res;
res.reserve((args[1]->listSize() + 32) * sep.size());
bool first = true;
for (auto elem : args[1]->listItems()) {
if (first) {
first = false;
} else {
res += sep;
}
res += *state.coerceToString(
pos,
*elem,
context,
"while evaluating one element of the list of strings to concat passed to "
"builtins.concatStringsSep"
);
}
v.mkString(res, context);
}
static RegisterPrimOp primop_concatStringsSep(
{.name = "__concatStringsSep",
.args = {"separator", "list"},
.doc = R"(
Concatenate a list of strings with a separator between each
element, e.g. `concatStringsSep "/" ["usr" "local" "bin"] ==
"usr/local/bin"`.
)",
.fun = prim_concatStringsSep}
);
/**
* builtins.match
*/
void prim_match(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
auto re = state.forceStringNoCtx(
*args[0], pos, "while evaluating the first argument passed to builtins.match"
);
try {
auto regex = state.regexCache->get(re);
NixStringContext context;
const auto str = state.forceString(
*args[1], context, pos, "while evaluating the second argument passed to builtins.match"
);
std::cmatch match;
if (!std::regex_match(str.begin(), str.end(), match, regex)) {
v.mkNull();
return;
}
// the first match is the whole string
const size_t len = match.size() - 1;
state.mkList(v, len);
for (size_t i = 0; i < len; ++i) {
if (!match[i + 1].matched) {
(v.listElems()[i] = state.allocValue())->mkNull();
} else {
(v.listElems()[i] = state.allocValue())->mkString(match[i + 1].str());
}
}
} catch (std::regex_error & e) {
if (e.code() == std::regex_constants::error_space) {
// limit is _GLIBCXX_REGEX_STATE_LIMIT for libstdc++
state.error<EvalError>("memory limit exceeded by regular expression '%s'", re)
.atPos(pos)
.debugThrow();
} else {
state.error<EvalError>("invalid regular expression '%s'", re).atPos(pos).debugThrow();
}
}
}
static RegisterPrimOp primop_match({
.name = "__match",
.args = {"regex", "str"},
.doc = R"s(
Returns a list if the [extended POSIX regular
expression](http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04)
*regex* matches *str* precisely, otherwise returns `null`. Each item
in the list is a regex group.
```nix
builtins.match "ab" "abc"
```
Evaluates to `null`.
```nix
builtins.match "abc" "abc"
```
Evaluates to `[ ]`.
```nix
builtins.match "a(b)(c)" "abc"
```
Evaluates to `[ "b" "c" ]`.
```nix
builtins.match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " FOO "
```
Evaluates to `[ "FOO" ]`.
)s",
.fun = prim_match,
});
/**
* builtins.parseDrvName
*/
static void prim_parseDrvName(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
auto name = state.forceStringNoCtx(
*args[0], pos, "while evaluating the first argument passed to builtins.parseDrvName"
);
DrvName parsed(name);
auto attrs = state.buildBindings(2);
attrs.alloc(state.sName).mkString(parsed.name);
attrs.alloc("version").mkString(parsed.version);
v.mkAttrs(attrs);
}
static RegisterPrimOp primop_parseDrvName({
.name = "__parseDrvName",
.args = {"s"},
.doc = R"(
Split the string *s* into a package name and version. The package
name is everything up to but not including the first dash not followed
by a letter, and the version is everything following that dash. The
result is returned in a set `{ name, version }`. Thus,
`builtins.parseDrvName "nix-0.12pre12876"` returns `{ name =
"nix"; version = "0.12pre12876"; }`.
)",
.fun = prim_parseDrvName,
});
/**
* builtins.replaceStrings
*/
static void prim_replaceStrings(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
state.forceList(
*args[0], pos, "while evaluating the first argument passed to builtins.replaceStrings"
);
state.forceList(
*args[1], pos, "while evaluating the second argument passed to builtins.replaceStrings"
);
if (args[0]->listSize() != args[1]->listSize()) {
state
.error<EvalError>(
"'from' and 'to' arguments passed to builtins.replaceStrings have different lengths"
)
.atPos(pos)
.debugThrow();
}
std::vector<std::string> from;
from.reserve(args[0]->listSize());
for (auto elem : args[0]->listItems()) {
from.emplace_back(state.forceString(
*elem,
pos,
"while evaluating one of the strings to replace passed to builtins.replaceStrings"
));
}
std::unordered_map<size_t, std::string> cache;
auto to = args[1]->listItems();
NixStringContext context;
auto s = state.forceString(
*args[2],
context,
pos,
"while evaluating the third argument passed to builtins.replaceStrings"
);
std::string res;
// Loops one past last character to handle the case where 'from' contains an empty string.
for (size_t p = 0; p <= s.size();) {
bool found = false;
auto i = from.begin();
auto j = to.begin();
size_t j_index = 0;
for (; i != from.end(); ++i, ++j, ++j_index) {
if (s.compare(p, i->size(), *i) == 0) {
found = true;
auto v = cache.find(j_index);
if (v == cache.end()) {
NixStringContext ctx;
auto ts = state.forceString(
**j,
ctx,
pos,
"while evaluating one of the replacement strings passed to "
"builtins.replaceStrings"
);
v = (cache.emplace(j_index, ts)).first;
for (auto & path : ctx) {
context.insert(path);
}
}
res += v->second;
if (i->empty()) {
if (p < s.size()) {
res += s[p];
}
p++;
} else {
p += i->size();
}
break;
}
}
if (!found) {
if (p < s.size()) {
res += s[p];
}
p++;
}
}
v.mkString(res, context);
}
static RegisterPrimOp primop_replaceStrings({
.name = "__replaceStrings",
.args = {"from", "to", "s"},
.doc = R"(
Given string *s*, replace every occurrence of the strings in *from*
with the corresponding string in *to*.
The argument *to* is lazy, that is, it is only evaluated when its corresponding pattern in *from* is matched in the string *s*
Example:
```nix
builtins.replaceStrings ["oo" "a"] ["a" "i"] "foobar"
```
evaluates to `"fabir"`.
)",
.fun = prim_replaceStrings,
});
/**
* builtins.split
*/
void prim_split(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
auto re = state.forceStringNoCtx(
*args[0], pos, "while evaluating the first argument passed to builtins.split"
);
try {
auto regex = state.regexCache->get(re);
NixStringContext context;
const auto str = state.forceString(
*args[1], context, pos, "while evaluating the second argument passed to builtins.split"
);
auto begin = std::cregex_iterator(str.begin(), str.end(), regex);
auto end = std::cregex_iterator();
// Any matches results are surrounded by non-matching results.
const size_t len = std::distance(begin, end);
state.mkList(v, 2 * len + 1);
size_t idx = 0;
if (len == 0) {
v.listElems()[idx++] = args[1];
return;
}
for (auto i = begin; i != end; ++i) {
assert(idx <= 2 * len + 1 - 3);
auto match = *i;
// Add a string for non-matched characters.
(v.listElems()[idx++] = state.allocValue())->mkString(match.prefix().str());
// Add a list for matched substrings.
const size_t slen = match.size() - 1;
auto elem = v.listElems()[idx++] = state.allocValue();
// Start at 1, beacause the first match is the whole string.
state.mkList(*elem, slen);
for (size_t si = 0; si < slen; ++si) {
if (!match[si + 1].matched) {
(elem->listElems()[si] = state.allocValue())->mkNull();
} else {
(elem->listElems()[si] = state.allocValue())->mkString(match[si + 1].str());
}
}
// Add a string for non-matched suffix characters.
if (idx == 2 * len) {
(v.listElems()[idx++] = state.allocValue())->mkString(match.suffix().str());
}
}
assert(idx == 2 * len + 1);
} catch (std::regex_error & e) {
if (e.code() == std::regex_constants::error_space) {
// limit is _GLIBCXX_REGEX_STATE_LIMIT for libstdc++
state.error<EvalError>("memory limit exceeded by regular expression '%s'", re)
.atPos(pos)
.debugThrow();
} else {
state.error<EvalError>("invalid regular expression '%s'", re).atPos(pos).debugThrow();
}
}
}
static RegisterPrimOp primop_split({
.name = "__split",
.args = {"regex", "str"},
.doc = R"s(
Returns a list composed of non matched strings interleaved with the
lists of the [extended POSIX regular
expression](http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04)
*regex* matches of *str*. Each item in the lists of matched
sequences is a regex group.
```nix
builtins.split "(a)b" "abc"
```
Evaluates to `[ "" [ "a" ] "c" ]`.
```nix
builtins.split "([ac])" "abc"
```
Evaluates to `[ "" [ "a" ] "b" [ "c" ] "" ]`.
```nix
builtins.split "(a)|(c)" "abc"
```
Evaluates to `[ "" [ "a" null ] "b" [ null "c" ] "" ]`.
```nix
builtins.split "([[:upper:]]+)" " FOO "
```
Evaluates to `[ " " [ "FOO" ] " " ]`.
)s",
.fun = prim_split,
});
/**
* builtins.splitVersion
*/
static void prim_splitVersion(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
auto version = state.forceStringNoCtx(
*args[0], pos, "while evaluating the first argument passed to builtins.splitVersion"
);
auto iter = version.cbegin();
Strings components;
while (iter != version.cend()) {
auto component = nextComponent(iter, version.cend());
if (component.empty()) {
break;
}
components.emplace_back(component);
}
state.mkList(v, components.size());
for (const auto & [n, component] : enumerate(components)) {
(v.listElems()[n] = state.allocValue())->mkString(std::move(component));
}
}
static RegisterPrimOp primop_splitVersion({
.name = "__splitVersion",
.args = {"s"},
.doc = R"(
Split a string representing a version into its components, by the
same version splitting logic underlying the version comparison in
[`nix-env -u`](../command-ref/nix-env.md#operation---upgrade).
)",
.fun = prim_splitVersion,
});
/**
* builtins.stringLength
*/
static void prim_stringLength(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
NixStringContext context;
auto s = state.coerceToString(
pos, *args[0], context, "while evaluating the argument passed to builtins.stringLength"
);
v.mkInt(s->size());
}
static RegisterPrimOp primop_stringLength({
.name = "__stringLength",
.args = {"e"},
.doc = R"(
Return the length of the string *e*. If *e* is not a string,
evaluation is aborted.
)",
.fun = prim_stringLength,
});
/**
* builtins.substring
*/
static void prim_substring(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
int start = state.forceInt(
*args[0],
pos,
"while evaluating the first argument (the start offset) passed to builtins.substring"
);
if (start < 0) {
state.error<EvalError>("negative start position in 'substring'").atPos(pos).debugThrow();
}
int len = state.forceInt(
*args[1],
pos,
"while evaluating the second argument (the substring length) passed to builtins.substring"
);
// Special-case on empty substring to avoid O(n) strlen
// This allows for the use of empty substrings to efficently capture string context
if (len == 0) {
state.forceValue(*args[2], pos);
if (args[2]->type() == nString) {
v.mkString("", args[2]->string.context);
return;
}
}
NixStringContext context;
auto s = state.coerceToString(
pos,
*args[2],
context,
"while evaluating the third argument (the string) passed to builtins.substring"
);
v.mkString((unsigned int) start >= s->size() ? "" : s->substr(start, len), context);
}
static RegisterPrimOp primop_substring({
.name = "__substring",
.args = {"start", "len", "s"},
.doc = R"(
Return the substring of *s* from character position *start*
(zero-based) up to but not including *start + len*. If *start* is
greater than the length of the string, an empty string is returned,
and if *start + len* lies beyond the end of the string, only the
substring up to the end of the string is returned. *start* must be
non-negative. For example,
```nix
builtins.substring 0 3 "nixos"
```
evaluates to `"nix"`.
)",
.fun = prim_substring,
});
/**
* builtins.toString
*/
static void prim_toString(EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
NixStringContext context;
auto s = state.coerceToString(
pos,
*args[0],
context,
"while evaluating the first argument passed to builtins.toString",
true,
false
);
v.mkString(*s, context);
}
static RegisterPrimOp primop_toString({
.name = "toString",
.args = {"e"},
.doc = R"(
Convert the expression *e* to a string. *e* can be:
- A string (in which case the string is returned unmodified).
- A path (e.g., `toString /foo/bar` yields `"/foo/bar"`.
- A set containing `{ __toString = self: ...; }` or `{ outPath = ...; }`.
- An integer.
- A list, in which case the string representations of its elements
are joined with spaces.
- A Boolean (`false` yields `""`, `true` yields `"1"`).
- `null`, which yields the empty string.
)",
.fun = prim_toString,
});
}