diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 89814785e..850c5bae6 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -179,8 +179,8 @@ public: Expr * parseExprFromFile(const Path & path, StaticEnv & staticEnv); /* Parse a Nix expression from the specified string. */ - Expr * parseExprFromString(std::string_view s, const Path & basePath, StaticEnv & staticEnv); - Expr * parseExprFromString(std::string_view s, const Path & basePath); + Expr * parseExprFromString(std::string s, const Path & basePath, StaticEnv & staticEnv); + Expr * parseExprFromString(std::string s, const Path & basePath); Expr * parseStdin(); @@ -308,7 +308,7 @@ private: friend struct ExprAttrs; friend struct ExprLet; - Expr * parse(const char * text, FileOrigin origin, const Path & path, + Expr * parse(char * text, size_t length, FileOrigin origin, const Path & path, const Path & basePath, StaticEnv & staticEnv); public: diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index c18877e29..a0e7a1877 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -64,29 +64,32 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) } -// FIXME: optimize -static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length) +// we make use of the fact that the parser receives a private copy of the input +// string and can munge around in it. +static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length) { - string t; - t.reserve(length); + char * result = s; + char * t = s; char c; + // the input string is terminated with *two* NULs, so we can safely take + // *one* character after the one being checked against. while ((c = *s++)) { if (c == '\\') { - assert(*s); c = *s++; - if (c == 'n') t += '\n'; - else if (c == 'r') t += '\r'; - else if (c == 't') t += '\t'; - else t += c; + if (c == 'n') *t = '\n'; + else if (c == 'r') *t = '\r'; + else if (c == 't') *t = '\t'; + else *t = c; } else if (c == '\r') { /* Normalise CR and CR/LF into LF. */ - t += '\n'; + *t = '\n'; if (*s == '\n') s++; /* cr/lf */ } - else t += c; + else *t = c; + t++; } - return new ExprString(symbols.create(t)); + return new ExprString(symbols.create({result, size_t(t - result)})); } @@ -139,7 +142,7 @@ or { return OR_KW; } \/\/ { return UPDATE; } \+\+ { return CONCAT; } -{ID} { yylval->id = strdup(yytext); return ID; } +{ID} { yylval->id = {yytext, (size_t) yyleng}; return ID; } {INT} { errno = 0; try { yylval->n = boost::lexical_cast(yytext); @@ -221,14 +224,14 @@ or { return OR_KW; } {PATH_SEG} { POP_STATE(); PUSH_STATE(INPATH_SLASH); - yylval->path = strdup(yytext); + yylval->path = {yytext, (size_t) yyleng}; return PATH; } {HPATH_START} { POP_STATE(); PUSH_STATE(INPATH_SLASH); - yylval->path = strdup(yytext); + yylval->path = {yytext, (size_t) yyleng}; return HPATH; } @@ -237,7 +240,7 @@ or { return OR_KW; } PUSH_STATE(INPATH_SLASH); else PUSH_STATE(INPATH); - yylval->path = strdup(yytext); + yylval->path = {yytext, (size_t) yyleng}; return PATH; } {HPATH} { @@ -245,7 +248,7 @@ or { return OR_KW; } PUSH_STATE(INPATH_SLASH); else PUSH_STATE(INPATH); - yylval->path = strdup(yytext); + yylval->path = {yytext, (size_t) yyleng}; return HPATH; } @@ -280,8 +283,8 @@ or { return OR_KW; } throw ParseError("path has a trailing slash"); } -{SPATH} { yylval->path = strdup(yytext); return SPATH; } -{URI} { yylval->uri = strdup(yytext); return URI; } +{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; } +{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; } [ \t\r\n]+ /* eat up whitespace */ \#[^\r\n]* /* single-line comments */ diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc index a75357871..640c44c01 100644 --- a/src/libexpr/nixexpr.cc +++ b/src/libexpr/nixexpr.cc @@ -473,7 +473,7 @@ string ExprLambda::showNamePos() const size_t SymbolTable::totalSize() const { size_t n = 0; - for (auto & i : symbols) + for (auto & i : store) n += i.size(); return n; } diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index f8aaea582..a3e713937 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -273,9 +273,16 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err nix::Formal * formal; nix::NixInt n; nix::NixFloat nf; - const char * id; // !!! -> Symbol - char * path; - char * uri; + // using C a struct allows us to avoid having to define the special + // members that using string_view here would implicitly delete. + struct StringToken { + const char * p; + size_t l; + operator std::string_view() const { return {p, l}; } + }; + StringToken id; // !!! -> Symbol + StringToken path; + StringToken uri; std::vector * attrNames; std::vector > * string_parts; } @@ -397,7 +404,7 @@ expr_select expr_simple : ID { - if (strcmp($1, "__curPos") == 0) + if (strncmp($1.p, "__curPos", $1.l) == 0) $$ = new ExprPos(CUR_POS); else $$ = new ExprVar(CUR_POS, data->symbols.create($1)); @@ -414,7 +421,7 @@ expr_simple $$ = new ExprConcatStrings(CUR_POS, false, $2); } | SPATH { - string path($1 + 1, strlen($1) - 2); + string path($1.p + 1, $1.l - 2); $$ = new ExprCall(CUR_POS, new ExprVar(data->symbols.create("__findFile")), {new ExprVar(data->symbols.create("__nixPath")), @@ -460,14 +467,14 @@ string_parts_interpolated path_start : PATH { - Path path(absPath($1, data->basePath)); + Path path(absPath({$1.p, $1.l}, data->basePath)); /* add back in the trailing '/' to the first segment */ - if ($1[strlen($1)-1] == '/' && strlen($1) > 1) + if ($1.p[$1.l-1] == '/' && $1.l > 1) path += "/"; $$ = new ExprPath(path); } | HPATH { - Path path(getHome() + string($1 + 1)); + Path path(getHome() + string($1.p + 1, $1.l - 1)); $$ = new ExprPath(path); } ; @@ -543,7 +550,7 @@ attrpath attr : ID { $$ = $1; } - | OR_KW { $$ = "or"; } + | OR_KW { $$ = {"or", 2}; } ; string_attr @@ -589,7 +596,7 @@ formal namespace nix { -Expr * EvalState::parse(const char * text, FileOrigin origin, +Expr * EvalState::parse(char * text, size_t length, FileOrigin origin, const Path & path, const Path & basePath, StaticEnv & staticEnv) { yyscan_t scanner; @@ -609,7 +616,7 @@ Expr * EvalState::parse(const char * text, FileOrigin origin, data.basePath = basePath; yylex_init(&scanner); - yy_scan_string(text, scanner); + yy_scan_buffer(text, length, scanner); int res = yyparse(scanner, &data); yylex_destroy(scanner); @@ -655,26 +662,33 @@ Expr * EvalState::parseExprFromFile(const Path & path) Expr * EvalState::parseExprFromFile(const Path & path, StaticEnv & staticEnv) { - return parse(readFile(path).c_str(), foFile, path, dirOf(path), staticEnv); + auto buffer = readFile(path); + // readFile should have left some extra space for terminators + buffer.append("\0\0", 2); + return parse(buffer.data(), buffer.size(), foFile, path, dirOf(path), staticEnv); } -Expr * EvalState::parseExprFromString(std::string_view s, const Path & basePath, StaticEnv & staticEnv) +Expr * EvalState::parseExprFromString(std::string s, const Path & basePath, StaticEnv & staticEnv) { - return parse(s.data(), foString, "", basePath, staticEnv); + s.append("\0\0", 2); + return parse(s.data(), s.size(), foString, "", basePath, staticEnv); } -Expr * EvalState::parseExprFromString(std::string_view s, const Path & basePath) +Expr * EvalState::parseExprFromString(std::string s, const Path & basePath) { - return parseExprFromString(s, basePath, staticBaseEnv); + return parseExprFromString(std::move(s), basePath, staticBaseEnv); } Expr * EvalState::parseStdin() { //Activity act(*logger, lvlTalkative, format("parsing standard input")); - return parse(drainFD(0).data(), foStdin, "", absPath("."), staticBaseEnv); + auto buffer = drainFD(0); + // drainFD should have left some extra space for terminators + buffer.append("\0\0", 2); + return parse(buffer.data(), buffer.size(), foStdin, "", absPath("."), staticBaseEnv); } diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 1d2a7d5d2..b819918ad 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -378,7 +378,7 @@ void prim_exec(EvalState & state, const Pos & pos, Value * * args, Value & v) auto output = runProgram(program, true, commandArgs); Expr * parsed; try { - parsed = state.parseExprFromString(output, pos.file); + parsed = state.parseExprFromString(std::move(output), pos.file); } catch (Error & e) { e.addTrace(pos, "While parsing the output from '%1%'", program); throw; @@ -3915,9 +3915,12 @@ void EvalState::createBaseEnv() /* Note: we have to initialize the 'derivation' constant *after* building baseEnv/staticBaseEnv because it uses 'builtins'. */ - eval(parse( + char code[] = #include "primops/derivation.nix.gen.hh" - , foFile, sDerivationNix, "/", staticBaseEnv), *vDerivation); + // the parser needs two NUL bytes as terminators; one of them + // is implied by being a C string. + "\0"; + eval(parse(code, sizeof(code), foFile, sDerivationNix, "/", staticBaseEnv), *vDerivation); } diff --git a/src/libexpr/symbol-table.hh b/src/libexpr/symbol-table.hh index 4eb6dac81..a090ebae5 100644 --- a/src/libexpr/symbol-table.hh +++ b/src/libexpr/symbol-table.hh @@ -1,7 +1,8 @@ #pragma once +#include #include -#include +#include #include "types.hh" @@ -70,15 +71,21 @@ public: class SymbolTable { private: - typedef std::unordered_set Symbols; - Symbols symbols; + std::unordered_map symbols; + std::list store; public: Symbol create(std::string_view s) { - // FIXME: avoid allocation if 's' already exists in the symbol table. - std::pair res = symbols.emplace(std::string(s)); - return Symbol(&*res.first); + // Most symbols are looked up more than once, so we trade off insertion performance + // for lookup performance. + // TODO: could probably be done more efficiently with transparent Hash and Equals + // on the original implementation using unordered_set + auto it = symbols.find(s); + if (it != symbols.end()) return it->second; + + const string & rawSym = store.emplace_back(s); + return symbols.emplace(rawSym, Symbol(&rawSym)).first->second; } size_t size() const @@ -91,7 +98,7 @@ public: template void dump(T callback) { - for (auto & s : symbols) + for (auto & s : store) callback(s); } }; diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 9edd69c64..938808276 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -668,7 +668,9 @@ void writeFull(int fd, std::string_view s, bool allowInterrupts) string drainFD(int fd, bool block, const size_t reserveSize) { - StringSink sink(reserveSize); + // the parser needs two extra bytes to append terminating characters, other users will + // not care very much about the extra memory. + StringSink sink(reserveSize + 2); drainFD(fd, sink, block); return std::move(*sink.s); } diff --git a/src/nix-build/nix-build.cc b/src/nix-build/nix-build.cc index b73eb0795..471a03ac7 100755 --- a/src/nix-build/nix-build.cc +++ b/src/nix-build/nix-build.cc @@ -292,7 +292,7 @@ static void main_nix_build(int argc, char * * argv) else for (auto i : left) { if (fromArgs) - exprs.push_back(state->parseExprFromString(i, absPath("."))); + exprs.push_back(state->parseExprFromString(std::move(i), absPath("."))); else { auto absolute = i; try { diff --git a/src/nix/repl.cc b/src/nix/repl.cc index a62ad66c2..be9f96836 100644 --- a/src/nix/repl.cc +++ b/src/nix/repl.cc @@ -703,7 +703,7 @@ void NixRepl::addVarToScope(const Symbol & name, Value & v) Expr * NixRepl::parseString(string s) { - Expr * e = state->parseExprFromString(s, curDir, staticEnv); + Expr * e = state->parseExprFromString(std::move(s), curDir, staticEnv); return e; }