forked from lix-project/lix
Add a primop for regular expression pattern matching
The function ‘builtins.match’ takes a POSIX extended regular expression and an arbitrary string. It returns ‘null’ if the string does not match the regular expression. Otherwise, it returns a list containing substring matches corresponding to parenthesis groups in the regex. The regex must match the entire string (i.e. there is an implied "^<pat>$" around the regex). For example: match "foo" "foobar" => null match "foo" "foo" => [] match "f(o+)(.*)" "foooobar" => ["oooo" "bar"] match "(.*/)?([^/]*)" "/dir/file.nix" => ["/dir/" "file.nix"] match "(.*/)?([^/]*)" "file.nix" => [null "file.nix"] The following example finds all regular files with extension .nix or .patch underneath the current directory: let findFiles = pat: dir: concatLists (mapAttrsToList (name: type: if type == "directory" then findFiles pat (dir + "/" + name) else if type == "regular" && match pat name != null then [(dir + "/" + name)] else []) (readDir dir)); in findFiles ".*\\.(nix|patch)" (toString ./.)
This commit is contained in:
parent
4e340a983f
commit
976df480c9
5 changed files with 84 additions and 5 deletions
|
@ -1430,7 +1430,34 @@ static void prim_hashString(EvalState & state, const Pos & pos, Value * * args,
|
||||||
string s = state.forceString(*args[1], context, pos);
|
string s = state.forceString(*args[1], context, pos);
|
||||||
|
|
||||||
mkString(v, printHash(hashString(ht, s)), context);
|
mkString(v, printHash(hashString(ht, s)), context);
|
||||||
};
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Match a regular expression against a string and return either
|
||||||
|
‘null’ or a list containing substring matches. */
|
||||||
|
static void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v)
|
||||||
|
{
|
||||||
|
Regex regex(state.forceStringNoCtx(*args[0], pos), true);
|
||||||
|
|
||||||
|
PathSet context;
|
||||||
|
string s = state.forceString(*args[1], context, pos);
|
||||||
|
|
||||||
|
Regex::Subs subs;
|
||||||
|
if (!regex.matches(s, subs)) {
|
||||||
|
mkNull(v);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int len = subs.empty() ? 0 : subs.rbegin()->first + 1;
|
||||||
|
state.mkList(v, len);
|
||||||
|
for (unsigned int n = 0; n < len; ++n) {
|
||||||
|
auto i = subs.find(n);
|
||||||
|
if (i == subs.end())
|
||||||
|
mkNull(*(v.list.elems[n] = state.allocValue()));
|
||||||
|
else
|
||||||
|
mkString(*(v.list.elems[n] = state.allocValue()), i->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*************************************************************
|
/*************************************************************
|
||||||
|
@ -1584,6 +1611,7 @@ void EvalState::createBaseEnv()
|
||||||
addPrimOp("__unsafeDiscardStringContext", 1, prim_unsafeDiscardStringContext);
|
addPrimOp("__unsafeDiscardStringContext", 1, prim_unsafeDiscardStringContext);
|
||||||
addPrimOp("__unsafeDiscardOutputDependency", 1, prim_unsafeDiscardOutputDependency);
|
addPrimOp("__unsafeDiscardOutputDependency", 1, prim_unsafeDiscardOutputDependency);
|
||||||
addPrimOp("__hashString", 2, prim_hashString);
|
addPrimOp("__hashString", 2, prim_hashString);
|
||||||
|
addPrimOp("__match", 2, prim_match);
|
||||||
|
|
||||||
// Versions
|
// Versions
|
||||||
addPrimOp("__parseDrvName", 1, prim_parseDrvName);
|
addPrimOp("__parseDrvName", 1, prim_parseDrvName);
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
#include "regex.hh"
|
#include "regex.hh"
|
||||||
#include "types.hh"
|
#include "types.hh"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
||||||
Regex::Regex(const string & pattern)
|
Regex::Regex(const string & pattern, bool subs)
|
||||||
{
|
{
|
||||||
/* Patterns must match the entire string. */
|
/* Patterns must match the entire string. */
|
||||||
int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), REG_NOSUB | REG_EXTENDED);
|
int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), (subs ? 0 : REG_NOSUB) | REG_EXTENDED);
|
||||||
if (err) throw Error(format("compiling pattern ‘%1%’: %2%") % pattern % showError(err));
|
if (err) throw RegexError(format("compiling pattern ‘%1%’: %2%") % pattern % showError(err));
|
||||||
|
nrParens = subs ? std::count(pattern.begin(), pattern.end(), '(') : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Regex::~Regex()
|
Regex::~Regex()
|
||||||
|
@ -23,6 +26,20 @@ bool Regex::matches(const string & s)
|
||||||
throw Error(format("matching string ‘%1%’: %2%") % s % showError(err));
|
throw Error(format("matching string ‘%1%’: %2%") % s % showError(err));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Regex::matches(const string & s, Subs & subs)
|
||||||
|
{
|
||||||
|
regmatch_t pmatch[nrParens + 2];
|
||||||
|
int err = regexec(&preg, s.c_str(), nrParens + 2, pmatch, 0);
|
||||||
|
if (err == 0) {
|
||||||
|
for (unsigned int n = 2; n < nrParens + 2; ++n)
|
||||||
|
if (pmatch[n].rm_eo != -1)
|
||||||
|
subs[n - 2] = string(s, pmatch[n].rm_so, pmatch[n].rm_eo - pmatch[n].rm_so);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (err == REG_NOMATCH) return false;
|
||||||
|
throw Error(format("matching string ‘%1%’: %2%") % s % showError(err));
|
||||||
|
}
|
||||||
|
|
||||||
string Regex::showError(int err)
|
string Regex::showError(int err)
|
||||||
{
|
{
|
||||||
char buf[256];
|
char buf[256];
|
||||||
|
|
|
@ -5,16 +5,23 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <regex.h>
|
#include <regex.h>
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
||||||
|
MakeError(RegexError, Error)
|
||||||
|
|
||||||
class Regex
|
class Regex
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Regex(const string & pattern);
|
Regex(const string & pattern, bool subs = false);
|
||||||
~Regex();
|
~Regex();
|
||||||
bool matches(const string & s);
|
bool matches(const string & s);
|
||||||
|
typedef std::map<unsigned int, string> Subs;
|
||||||
|
bool matches(const string & s, Subs & subs);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
unsigned nrParens;
|
||||||
regex_t preg;
|
regex_t preg;
|
||||||
string showError(int err);
|
string showError(int err);
|
||||||
};
|
};
|
||||||
|
|
1
tests/lang/eval-okay-regex-match.exp
Normal file
1
tests/lang/eval-okay-regex-match.exp
Normal file
|
@ -0,0 +1 @@
|
||||||
|
true
|
26
tests/lang/eval-okay-regex-match.nix
Normal file
26
tests/lang/eval-okay-regex-match.nix
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
with builtins;
|
||||||
|
|
||||||
|
let
|
||||||
|
|
||||||
|
matches = pat: s: match pat s != null;
|
||||||
|
|
||||||
|
splitFN = match "((.*)/)?([^/]*)\\.(nix|cc)";
|
||||||
|
|
||||||
|
in
|
||||||
|
|
||||||
|
assert matches "foobar" "foobar";
|
||||||
|
assert matches "fo*" "f";
|
||||||
|
assert !matches "fo+" "f";
|
||||||
|
assert matches "fo*" "fo";
|
||||||
|
assert matches "fo*" "foo";
|
||||||
|
assert matches "fo+" "foo";
|
||||||
|
assert matches "fo{1,2}" "foo";
|
||||||
|
assert !matches "fo{1,2}" "fooo";
|
||||||
|
assert !matches "fo*" "foobar";
|
||||||
|
|
||||||
|
assert match "(.*)\\.nix" "foobar.nix" == [ "foobar" ];
|
||||||
|
|
||||||
|
assert splitFN "/path/to/foobar.nix" == [ "/path/to/" "/path/to" "foobar" "nix" ];
|
||||||
|
assert splitFN "foobar.cc" == [ null null "foobar" "cc" ];
|
||||||
|
|
||||||
|
true
|
Loading…
Reference in a new issue