initial commit
This commit is contained in:
commit
c8bdd1547b
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
/.cache
|
||||
/.direnv
|
||||
result
|
||||
result-*
|
47
README.md
Normal file
47
README.md
Normal file
|
@ -0,0 +1,47 @@
|
|||
# Regular expressions are hard
|
||||
|
||||
Writing a high-quality implementation of POSIX Extended Regular Expressions does not seem easy.
|
||||
Ideally, the following features would be offered at the same time:
|
||||
|
||||
* Strict standards compliance (see https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap09.html and https://pubs.opengroup.org/onlinepubs/9799919799/functions/regexec.html).
|
||||
* Predictable performance (polynomial in the length of the inputs, and linear in the length of the matched string).
|
||||
* Limited resource usage (memory, CPU time (also related to the previous point)).
|
||||
* Absence of vendor-specific syntax extensions (for portability).
|
||||
|
||||
In practice, most implementations fall short of these goals, due to a variety of issues:
|
||||
|
||||
* Unclear wording in the standards, leading to diverging outcomes between implementations.
|
||||
* Exponential matching time due to backtracking implementation (sometimes even mandated by non-standard syntax extensions).
|
||||
* Excessive consumption of stack memory.
|
||||
* Spurious matching failures (either indication of a non-match, or an error).
|
||||
* Incorrect capturing behaviour of parenthesised subexpressions.
|
||||
|
||||
Here we test a small sample of regular expressions that are known to be hard to match properly against a couple of popular regex engines.
|
||||
|
||||
## Run it
|
||||
|
||||
Build using Lix:
|
||||
|
||||
nix-build -A default # or libcxx, or musl
|
||||
|
||||
List all available engines:
|
||||
|
||||
result/bin/driver list
|
||||
|
||||
Check the specified engines (tries all if none is specified, not recommended on libstdc++ because `std` may crash):
|
||||
|
||||
result/bin/driver check [engine]…
|
||||
|
||||
Print the match results of the specified engines (tries all if none is specified, not recommended on libstdc++ because `std` may crash):
|
||||
|
||||
result/bin/driver results [engine]…
|
||||
|
||||
## List of supported engines
|
||||
|
||||
* Boost.Regex (`boost`)
|
||||
* C standard library (`c`)
|
||||
* Oniguruma (`oniguruma`, does not claim POSIX compliance)
|
||||
* PCRE2 (`pcre`, does not claim POSIX compliance)
|
||||
* RE2 (`re2`, does not claim POSIX compliance)
|
||||
* C++ standard library (`std`)
|
||||
* TRE (`tre`)
|
14
default.nix
Normal file
14
default.nix
Normal file
|
@ -0,0 +1,14 @@
|
|||
let
|
||||
pkgs = import (builtins.fetchTarball {
|
||||
url = "https://github.com/NixOS/nixpkgs/archive/d0e1602ddde669d5beb01aec49d71a51937ed7be.tar.gz";
|
||||
sha256 = "0g0m7zhpnbgzwn4gmqhjvqd9v6d917p1dg3fk1kwxs2x7v7c1zd4";
|
||||
}) { };
|
||||
in
|
||||
{
|
||||
inherit pkgs;
|
||||
default = pkgs.callPackage ./package.nix { };
|
||||
libcxx = pkgs.pkgsLLVM.callPackage ./package.nix {
|
||||
boost = null; # fails to compile
|
||||
};
|
||||
musl = pkgs.pkgsStatic.callPackage ./package.nix { };
|
||||
}
|
30
meson.build
Normal file
30
meson.build
Normal file
|
@ -0,0 +1,30 @@
|
|||
project('regex-is-hard', 'cpp', default_options: [
|
||||
'buildtype=debugoptimized',
|
||||
'cpp_std=c++20',
|
||||
'warning_level=3',
|
||||
])
|
||||
|
||||
boost = dependency('boost', modules: ['regex'], required: false)
|
||||
oniguruma = dependency('oniguruma')
|
||||
pcre = dependency('libpcre2-8')
|
||||
re2 = dependency('re2')
|
||||
tre = dependency('tre')
|
||||
|
||||
config_h = configure_file(configuration: {
|
||||
'HAVE_BOOST': boost.found().to_int(),
|
||||
}, output: 'config.h')
|
||||
|
||||
sources = [
|
||||
'src/main.cc',
|
||||
'src/data.cc',
|
||||
'src/engine_c.cc',
|
||||
'src/engine_oniguruma.cc',
|
||||
'src/engine_pcre.cc',
|
||||
'src/engine_re2.cc',
|
||||
'src/engine_std.cc',
|
||||
'src/engine_tre.cc',
|
||||
]
|
||||
if boost.found()
|
||||
sources += 'src/engine_boost.cc'
|
||||
endif
|
||||
driver = executable('driver', sources, dependencies: [boost, oniguruma, pcre, re2, tre], install: true)
|
39
package.nix
Normal file
39
package.nix
Normal file
|
@ -0,0 +1,39 @@
|
|||
{
|
||||
stdenv,
|
||||
lib,
|
||||
meson,
|
||||
ninja,
|
||||
pkg-config,
|
||||
boost,
|
||||
oniguruma,
|
||||
pcre2,
|
||||
re2,
|
||||
tre,
|
||||
}:
|
||||
stdenv.mkDerivation {
|
||||
name = "regex-is-hard";
|
||||
|
||||
src = lib.fileset.toSource {
|
||||
root = ./.;
|
||||
fileset = lib.fileset.unions [
|
||||
./meson.build
|
||||
./src
|
||||
];
|
||||
};
|
||||
|
||||
strictDeps = true;
|
||||
|
||||
nativeBuildInputs = [
|
||||
meson
|
||||
ninja
|
||||
pkg-config
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
boost
|
||||
oniguruma
|
||||
pcre2
|
||||
re2
|
||||
tre
|
||||
];
|
||||
}
|
15
shell.nix
Normal file
15
shell.nix
Normal file
|
@ -0,0 +1,15 @@
|
|||
let
|
||||
defaultNix = import ./.;
|
||||
inherit (defaultNix) pkgs;
|
||||
in
|
||||
pkgs.mkShell {
|
||||
strictDeps = true;
|
||||
|
||||
inputsFrom = [ defaultNix.default ];
|
||||
|
||||
nativeBuildInputs = with pkgs; [
|
||||
clang-tools
|
||||
nil
|
||||
nixfmt-rfc-style
|
||||
];
|
||||
}
|
47
src/data.cc
Normal file
47
src/data.cc
Normal file
|
@ -0,0 +1,47 @@
|
|||
#include "config.h"
|
||||
#include "data.hh"
|
||||
|
||||
#if HAVE_BOOST
|
||||
std::unique_ptr<Regex> compileBoost(const std::string & re);
|
||||
#endif
|
||||
std::unique_ptr<Regex> compileC(const std::string & re);
|
||||
std::unique_ptr<Regex> compileOniguruma(const std::string & re);
|
||||
std::unique_ptr<Regex> compilePCRE(const std::string & re);
|
||||
std::unique_ptr<Regex> compileRE2(const std::string & re);
|
||||
std::unique_ptr<Regex> compileStd(const std::string & re);
|
||||
std::unique_ptr<Regex> compileTRE(const std::string & re);
|
||||
|
||||
std::map<std::string, std::unique_ptr<Regex>(*)(const std::string &)> engines {
|
||||
#if HAVE_BOOST
|
||||
{"boost", compileBoost},
|
||||
#endif
|
||||
{"c", compileC},
|
||||
{"oniguruma", compileOniguruma},
|
||||
{"pcre", compilePCRE},
|
||||
{"re2", compileRE2},
|
||||
{"std", compileStd},
|
||||
{"tre", compileTRE},
|
||||
};
|
||||
|
||||
std::string aaaaa(500000, 'a');
|
||||
|
||||
std::vector<std::pair<std::string, std::vector<std::pair<std::string, MatchResult>>>> testCases {
|
||||
{"\\.*(.*)", {{".keep", {{".keep", "keep"}}}}},
|
||||
{".*(ex|gexp).*", {{"regexp", {{"regexp", "ex"}}}}},
|
||||
{".*(gexp|ex).*", {{"regexp", {{"regexp", "ex"}}}}},
|
||||
{"F.chsin", {{"Füchsin", {{"Füchsin"}}}}},
|
||||
{"F..chsin", {{"Füchsin", std::nullopt}}},
|
||||
{"(a*)*", {
|
||||
{"a", {{"a", "a"}}}, // unclear
|
||||
{"aaaaaaaaaaaaaah", std::nullopt},
|
||||
{"aaaaaaaaaaaaaaah", std::nullopt},
|
||||
{"aaaaaaaaaaaaaaaah", std::nullopt},
|
||||
}},
|
||||
{"(a+)+", {
|
||||
{"a", {{"a", "a"}}},
|
||||
{"aaaaaaaaaaaaaaaaaaaaaah", std::nullopt},
|
||||
}},
|
||||
{"(a(h)?)*", {{"aha", {{"aha", "a", std::nullopt}}}}},
|
||||
{"((aa)*)*", {{"aa", {{"aa", "", std::nullopt}}}}}, // unclear
|
||||
{".*", {{aaaaa, {{aaaaa}}}}}, // keep this at the bottom because libstdc++ crashes
|
||||
};
|
9
src/data.hh
Normal file
9
src/data.hh
Normal file
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include "engine.hh"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
extern std::map<std::string, std::unique_ptr<Regex>(*)(const std::string &)> engines;
|
||||
extern std::vector<std::pair<std::string, std::vector<std::pair<std::string, MatchResult>>>> testCases;
|
13
src/engine.hh
Normal file
13
src/engine.hh
Normal file
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
using MatchResult = std::optional<std::vector<std::optional<std::string_view>>>;
|
||||
|
||||
class Regex {
|
||||
public:
|
||||
virtual ~Regex() {}
|
||||
virtual MatchResult match(std::string_view haystack) = 0;
|
||||
};
|
33
src/engine_boost.cc
Normal file
33
src/engine_boost.cc
Normal file
|
@ -0,0 +1,33 @@
|
|||
#include "engine.hh"
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
class RegexBoost : public Regex {
|
||||
boost::regex inner;
|
||||
|
||||
public:
|
||||
RegexBoost(const std::string & re)
|
||||
: inner(re, boost::regex::extended)
|
||||
{}
|
||||
|
||||
MatchResult match(std::string_view haystack) override {
|
||||
boost::cmatch matches;
|
||||
if (boost::regex_match(haystack.begin(), haystack.end(), matches, inner)) {
|
||||
std::vector<std::optional<std::string_view>> result;
|
||||
for (const auto & match : matches) {
|
||||
if (match.matched) {
|
||||
result.push_back(std::string_view(match.first, match.second));
|
||||
} else {
|
||||
result.push_back(std::nullopt);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<Regex> compileBoost(const std::string & re) {
|
||||
return std::make_unique<RegexBoost>(re);
|
||||
}
|
52
src/engine_c.cc
Normal file
52
src/engine_c.cc
Normal file
|
@ -0,0 +1,52 @@
|
|||
#include "engine.hh"
|
||||
|
||||
#include <format>
|
||||
#include <memory>
|
||||
#include <regex.h>
|
||||
#include <string>
|
||||
|
||||
class RegexC : public Regex {
|
||||
regex_t inner;
|
||||
std::vector<regmatch_t> matches;
|
||||
|
||||
public:
|
||||
RegexC(const std::string & re) {
|
||||
// Bug: does not work for regex with embedded null bytes.
|
||||
int code = regcomp(&inner, std::format("^{}$", re).data(), REG_EXTENDED);
|
||||
if (code != 0) {
|
||||
throw code;
|
||||
}
|
||||
matches = std::vector<regmatch_t>(inner.re_nsub + 1);
|
||||
}
|
||||
|
||||
RegexC(const RegexC &) = delete;
|
||||
RegexC & operator =(const RegexC &) = delete;
|
||||
|
||||
~RegexC() {
|
||||
regfree(&inner);
|
||||
}
|
||||
|
||||
MatchResult match(std::string_view haystack) override {
|
||||
// Bug: does not work for haystack with embedded null bytes.
|
||||
int code = regexec(&inner, std::string(haystack).data(), inner.re_nsub + 1, matches.data(), 0);
|
||||
if (code == 0) {
|
||||
std::vector<std::optional<std::string_view>> result;
|
||||
for (const auto & match : matches) {
|
||||
if (match.rm_so != -1 || match.rm_eo != -1) {
|
||||
result.push_back(std::string_view(haystack.data() + match.rm_so, haystack.data() + match.rm_eo));
|
||||
} else {
|
||||
result.push_back(std::nullopt);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} else if (code == REG_NOMATCH) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
throw code;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<Regex> compileC(const std::string & re) {
|
||||
return std::make_unique<RegexC>(re);
|
||||
}
|
69
src/engine_oniguruma.cc
Normal file
69
src/engine_oniguruma.cc
Normal file
|
@ -0,0 +1,69 @@
|
|||
#include "engine.hh"
|
||||
|
||||
#include <memory>
|
||||
#include <oniguruma.h>
|
||||
|
||||
bool onigurumaInitialised = false;
|
||||
|
||||
class RegexOniguruma : public Regex {
|
||||
OnigRegex inner;
|
||||
OnigRegion * matches;
|
||||
|
||||
public:
|
||||
RegexOniguruma(const std::string & re) {
|
||||
if (!onigurumaInitialised) {
|
||||
OnigEncoding encodings[] { ONIG_ENCODING_UTF8 };
|
||||
onig_initialize(encodings, sizeof(encodings) / sizeof(OnigEncoding));
|
||||
onigurumaInitialised = true;
|
||||
}
|
||||
OnigErrorInfo error;
|
||||
int code = onig_new(
|
||||
&inner,
|
||||
reinterpret_cast<const unsigned char *>(re.data()),
|
||||
reinterpret_cast<const unsigned char *>(re.data() + re.size()),
|
||||
ONIG_OPTION_NONE,
|
||||
ONIG_ENCODING_UTF8,
|
||||
ONIG_SYNTAX_POSIX_EXTENDED,
|
||||
&error
|
||||
);
|
||||
if (code != ONIG_NORMAL) {
|
||||
throw code;
|
||||
}
|
||||
matches = onig_region_new();
|
||||
}
|
||||
|
||||
RegexOniguruma(const RegexOniguruma &) = delete;
|
||||
RegexOniguruma & operator =(const RegexOniguruma &) = delete;
|
||||
|
||||
~RegexOniguruma() {
|
||||
onig_region_free(matches, 1);
|
||||
onig_free(inner);
|
||||
}
|
||||
|
||||
MatchResult match(std::string_view haystack) override {
|
||||
int code = onig_match(
|
||||
inner,
|
||||
reinterpret_cast<const unsigned char *>(haystack.data()),
|
||||
reinterpret_cast<const unsigned char *>(haystack.data() + haystack.size()),
|
||||
reinterpret_cast<const unsigned char *>(haystack.data()),
|
||||
matches,
|
||||
ONIG_OPTION_MATCH_WHOLE_STRING
|
||||
);
|
||||
if (code >= 0) {
|
||||
std::vector<std::optional<std::string_view>> result;
|
||||
size_t n = matches->num_regs;
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
result.push_back(std::string_view(haystack.data() + matches->beg[i], haystack.data() + matches->end[i]));
|
||||
}
|
||||
return result;
|
||||
} else if (code == ONIG_MISMATCH) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
throw code;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<Regex> compileOniguruma(const std::string & re) {
|
||||
return std::make_unique<RegexOniguruma>(re);
|
||||
}
|
57
src/engine_pcre.cc
Normal file
57
src/engine_pcre.cc
Normal file
|
@ -0,0 +1,57 @@
|
|||
#include "engine.hh"
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
||||
#include <memory>
|
||||
#include <pcre2.h>
|
||||
#include <string>
|
||||
|
||||
class RegexPCRE : public Regex {
|
||||
pcre2_code * inner;
|
||||
pcre2_match_data * matches;
|
||||
|
||||
public:
|
||||
RegexPCRE(const std::string & re) {
|
||||
int errcode;
|
||||
size_t erroffset;
|
||||
inner = pcre2_compile(reinterpret_cast<const unsigned char *>(re.data()), re.size(), PCRE2_ANCHORED | PCRE2_ENDANCHORED, &errcode, &erroffset, nullptr);
|
||||
if (!inner) {
|
||||
throw errcode;
|
||||
}
|
||||
matches = pcre2_match_data_create_from_pattern(inner, nullptr);
|
||||
}
|
||||
|
||||
RegexPCRE(const RegexPCRE &) = delete;
|
||||
RegexPCRE & operator =(const RegexPCRE &) = delete;
|
||||
|
||||
~RegexPCRE() {
|
||||
pcre2_match_data_free(matches);
|
||||
pcre2_code_free(inner);
|
||||
}
|
||||
|
||||
MatchResult match(std::string_view haystack) override {
|
||||
int code = pcre2_match(inner, reinterpret_cast<const unsigned char *>(haystack.data()), haystack.size(), 0, 0, matches, nullptr);
|
||||
if (code > 0) {
|
||||
auto pMatch = pcre2_get_ovector_pointer(matches);
|
||||
std::vector<std::optional<std::string_view>> result;
|
||||
for (size_t i = 0; i < static_cast<size_t>(pcre2_get_ovector_count(matches)); ++i) {
|
||||
auto start = pMatch[2 * i];
|
||||
auto end = pMatch[2 * i + 1];
|
||||
if (start != static_cast<size_t>(-1) || end != static_cast<size_t>(-1)) {
|
||||
result.push_back(std::string_view(haystack.data() + start, haystack.data() + end));
|
||||
} else {
|
||||
result.push_back(std::nullopt);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} else if (code == PCRE2_ERROR_NOMATCH) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
throw code;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<Regex> compilePCRE(const std::string & re) {
|
||||
return std::make_unique<RegexPCRE>(re);
|
||||
}
|
37
src/engine_re2.cc
Normal file
37
src/engine_re2.cc
Normal file
|
@ -0,0 +1,37 @@
|
|||
#include "engine.hh"
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
class RegexRE2 : public Regex {
|
||||
RE2 inner;
|
||||
std::vector<std::optional<std::string_view>> matches;
|
||||
std::vector<RE2::Arg> matchArgs;
|
||||
std::vector<const RE2::Arg *> matchArgPointers;
|
||||
|
||||
public:
|
||||
RegexRE2(const std::string & re)
|
||||
: inner(re)
|
||||
{
|
||||
size_t n = inner.NumberOfCapturingGroups();
|
||||
matches = std::vector<std::optional<std::string_view>>(n + 1);
|
||||
for (size_t i = 1; i <= n; ++i) {
|
||||
matchArgs.emplace_back(&matches[i]);
|
||||
}
|
||||
for (const auto & arg : matchArgs) {
|
||||
matchArgPointers.push_back(&arg);
|
||||
}
|
||||
}
|
||||
|
||||
MatchResult match(std::string_view haystack) override {
|
||||
if (RE2::FullMatchN(haystack, inner, matchArgPointers.data(), matchArgPointers.size())) {
|
||||
matches[0] = haystack;
|
||||
return matches;
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<Regex> compileRE2(const std::string & re) {
|
||||
return std::make_unique<RegexRE2>(re);
|
||||
}
|
33
src/engine_std.cc
Normal file
33
src/engine_std.cc
Normal file
|
@ -0,0 +1,33 @@
|
|||
#include "engine.hh"
|
||||
|
||||
#include <regex>
|
||||
|
||||
class RegexStd : public Regex {
|
||||
std::regex inner;
|
||||
|
||||
public:
|
||||
RegexStd(const std::string & re)
|
||||
: inner(re, std::regex::extended)
|
||||
{}
|
||||
|
||||
MatchResult match(std::string_view haystack) override {
|
||||
std::cmatch matches;
|
||||
if (std::regex_match(haystack.begin(), haystack.end(), matches, inner)) {
|
||||
std::vector<std::optional<std::string_view>> result;
|
||||
for (const auto & match : matches) {
|
||||
if (match.matched) {
|
||||
result.push_back(std::string_view(match.first, match.second));
|
||||
} else {
|
||||
result.push_back(std::nullopt);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<Regex> compileStd(const std::string & re) {
|
||||
return std::make_unique<RegexStd>(re);
|
||||
}
|
50
src/engine_tre.cc
Normal file
50
src/engine_tre.cc
Normal file
|
@ -0,0 +1,50 @@
|
|||
#include "engine.hh"
|
||||
|
||||
#include <format>
|
||||
#include <memory>
|
||||
#include <tre/tre.h>
|
||||
|
||||
class RegexTRE : public Regex {
|
||||
regex_t inner;
|
||||
std::vector<regmatch_t> matches;
|
||||
|
||||
public:
|
||||
RegexTRE(const std::string & re) {
|
||||
auto anchored = std::format("^{}$", re);
|
||||
int code = tre_regncomp(&inner, anchored.data(), anchored.size(), REG_EXTENDED);
|
||||
if (code != 0) {
|
||||
throw code;
|
||||
}
|
||||
matches = std::vector<regmatch_t>(inner.re_nsub + 1);
|
||||
}
|
||||
|
||||
RegexTRE(const RegexTRE &) = delete;
|
||||
RegexTRE & operator =(const RegexTRE &) = delete;
|
||||
|
||||
~RegexTRE() {
|
||||
tre_regfree(&inner);
|
||||
}
|
||||
|
||||
MatchResult match(std::string_view haystack) override {
|
||||
int code = tre_regnexec(&inner, haystack.data(), haystack.size(), inner.re_nsub + 1, matches.data(), 0);
|
||||
if (code == 0) {
|
||||
std::vector<std::optional<std::string_view>> result;
|
||||
for (const auto & match : matches) {
|
||||
if (match.rm_so != -1 || match.rm_eo != -1) {
|
||||
result.push_back(std::string_view(haystack.data() + match.rm_so, haystack.data() + match.rm_eo));
|
||||
} else {
|
||||
result.push_back(std::nullopt);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} else if (code == REG_NOMATCH) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
throw code;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<Regex> compileTRE(const std::string & re) {
|
||||
return std::make_unique<RegexTRE>(re);
|
||||
}
|
102
src/main.cc
Normal file
102
src/main.cc
Normal file
|
@ -0,0 +1,102 @@
|
|||
#include "data.hh"
|
||||
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <format>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
|
||||
void runStep(const std::string & description, std::function<bool()> action) {
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
const char * result;
|
||||
try {
|
||||
result = action() ? "\x1b[32mOK\x1b[0m" : "\x1b[31mFAIL\x1b[0m";
|
||||
} catch (...) {
|
||||
result = "\x1b[31mEXCEPTION\x1b[0m";
|
||||
}
|
||||
auto time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - start);
|
||||
std::cout << std::format("{}: {} ({})", description, result, time) << std::endl;
|
||||
}
|
||||
|
||||
void check(std::vector<std::string> enginesToTest) {
|
||||
for (const auto & engine : enginesToTest) {
|
||||
std::cout << std::format("Engine: {}", engine) << std::endl;
|
||||
for (const auto & [re, examples] : testCases) {
|
||||
std::unique_ptr<Regex> needle;
|
||||
runStep(std::format(" compile \x1b[35m\"{}\"\x1b[0m", re), [&]() {
|
||||
needle = engines.at(engine)(re);
|
||||
return true;
|
||||
});
|
||||
if (!needle) {
|
||||
continue;
|
||||
}
|
||||
for (const auto & [haystack, expectedMatches] : examples) {
|
||||
auto haystackDescription = haystack.size() <= 20 ? haystack : std::format("{}[{} characters omitted]{}", haystack.substr(0, 10), haystack.size() - 20, haystack.substr(haystack.size() - 10));
|
||||
runStep(std::format(" match \x1b[35m\"{}\"\x1b[0m", haystackDescription), [&]() {
|
||||
return needle->match(haystack) == expectedMatches;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void results(std::vector<std::string> enginesToTest) {
|
||||
for (const auto & engine : enginesToTest) {
|
||||
std::cout << std::format("Engine: {}", engine) << std::endl;
|
||||
for (const auto & [re, examples] : testCases) {
|
||||
std::unique_ptr<Regex> needle;
|
||||
runStep(std::format(" compile \x1b[35m\"{}\"\x1b[0m", re), [&]() {
|
||||
needle = engines.at(engine)(re);
|
||||
return true;
|
||||
});
|
||||
if (!needle) {
|
||||
continue;
|
||||
}
|
||||
for (const auto & [haystack, _] : examples) {
|
||||
try {
|
||||
auto matches = needle->match(haystack);
|
||||
std::cout << std::format(" match \x1b[35m\"{}\"\x1b[0m: ", haystack);
|
||||
if (matches) {
|
||||
std::cout << "[";
|
||||
for (const auto & match : *matches) {
|
||||
if (match) {
|
||||
std::cout << std::format(" \x1b[35m\"{}\"\x1b[0m", *match);
|
||||
} else {
|
||||
std::cout << " null";
|
||||
}
|
||||
}
|
||||
std::cout << " ]" << std::endl;
|
||||
} else {
|
||||
std::cout << "null" << std::endl;
|
||||
}
|
||||
} catch (...) {
|
||||
std::cout << std::format(" match \x1b[35m\"{}\"\x1b[0m: \x1b[31mEXCEPTION\x1b[0m", haystack) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
std::vector<std::string> enginesToTest;
|
||||
if (argc >= 3) {
|
||||
for (int i = 2; i < argc; ++i) {
|
||||
enginesToTest.emplace_back(argv[i]);
|
||||
}
|
||||
} else {
|
||||
for (const auto & [key, _] : engines) {
|
||||
enginesToTest.push_back(key);
|
||||
}
|
||||
}
|
||||
if (argc <= 1 || strcmp(argv[1], "check") == 0) {
|
||||
check(enginesToTest);
|
||||
} else if (strcmp(argv[1], "results") == 0) {
|
||||
results(enginesToTest);
|
||||
} else if (strcmp(argv[1], "list") == 0) {
|
||||
for (const auto & engine : enginesToTest) {
|
||||
std::cout << engine << std::endl;
|
||||
}
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue