diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index a0e7a1877..e276b0467 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -66,7 +66,7 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) // we make use of the fact that the parser receives a private copy of the input // string and can munge around in it. -static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length) +static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length) { char * result = s; char * t = s; @@ -89,7 +89,7 @@ static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length) else *t = c; t++; } - return new ExprString(symbols.create({result, size_t(t - result)})); + return {result, size_t(t - result)}; } @@ -176,7 +176,7 @@ or { return OR_KW; } /* It is impossible to match strings ending with '$' with one regex because trailing contexts are only valid at the end of a rule. (A sane but undocumented limitation.) */ - yylval->e = unescapeStr(data->symbols, yytext, yyleng); + yylval->str = unescapeStr(data->symbols, yytext, yyleng); return STR; } \$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } @@ -191,26 +191,26 @@ or { return OR_KW; } \'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; } ([^\$\']|\$[^\{\']|\'[^\'\$])+ { - yylval->e = new ExprIndStr(yytext); + yylval->str = {yytext, (size_t) yyleng, true}; return IND_STR; } \'\'\$ | \$ { - yylval->e = new ExprIndStr("$"); + yylval->str = {"$", 1}; return IND_STR; } \'\'\' { - yylval->e = new ExprIndStr("''"); + yylval->str = {"''", 2}; return IND_STR; } \'\'\\{ANY} { - yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2); + yylval->str = unescapeStr(data->symbols, yytext + 2, yyleng - 2); return IND_STR; } \$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } \'\' { POP_STATE(); return IND_STRING_CLOSE; } \' { - yylval->e = new ExprIndStr("'"); + yylval->str = {"'", 1}; return IND_STR; } @@ -264,7 +264,7 @@ or { return OR_KW; } PUSH_STATE(INPATH_SLASH); else PUSH_STATE(INPATH); - yylval->e = new ExprString(data->symbols.create(string(yytext))); + yylval->str = {yytext, (size_t) yyleng}; return STR; } {ANY} | diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh index 0a60057e5..ae11560ea 100644 --- a/src/libexpr/nixexpr.hh +++ b/src/libexpr/nixexpr.hh @@ -117,13 +117,6 @@ struct ExprString : Expr Value * maybeThunk(EvalState & state, Env & env); }; -/* Temporary class used during parsing of indented strings. */ -struct ExprIndStr : Expr -{ - string s; - ExprIndStr(const string & s) : s(s) { }; -}; - struct ExprPath : Expr { string s; diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index a3e713937..38b218156 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -16,6 +16,8 @@ #ifndef BISON_HEADER #define BISON_HEADER +#include + #include "util.hh" #include "nixexpr.hh" @@ -41,6 +43,15 @@ namespace nix { } +// using C a struct allows us to avoid having to define the special +// members that using string_view here would implicitly delete. +struct StringToken { + const char * p; + size_t l; + bool hasIndentation; + operator std::string_view() const { return {p, l}; } +}; + #define YY_DECL int yylex \ (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data) @@ -152,7 +163,8 @@ static void addFormal(const Pos & pos, Formals * formals, const Formal & formal) } -static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector > & es) +static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, + vector > > & es) { if (es.empty()) return new ExprString(symbols.create("")); @@ -163,20 +175,20 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector(i); - if (!e) { - /* Anti-quotations end the current start-of-line whitespace. */ + auto * str = std::get_if(&i); + if (!str || !str->hasIndentation) { + /* Anti-quotations and escaped characters end the current start-of-line whitespace. */ if (atStartOfLine) { atStartOfLine = false; if (curIndent < minIndent) minIndent = curIndent; } continue; } - for (size_t j = 0; j < e->s.size(); ++j) { + for (size_t j = 0; j < str->l; ++j) { if (atStartOfLine) { - if (e->s[j] == ' ') + if (str->p[j] == ' ') curIndent++; - else if (e->s[j] == '\n') { + else if (str->p[j] == '\n') { /* Empty line, doesn't influence minimum indentation. */ curIndent = 0; @@ -184,7 +196,7 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vectors[j] == '\n') { + } else if (str->p[j] == '\n') { atStartOfLine = true; curIndent = 0; } @@ -196,33 +208,31 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector >::iterator i = es.begin(); i != es.end(); ++i, --n) { - ExprIndStr * e = dynamic_cast(i->second); - if (!e) { - atStartOfLine = false; - curDropped = 0; - es2->push_back(*i); - continue; - } - + auto i = es.begin(); + const auto trimExpr = [&] (Expr * e) { + atStartOfLine = false; + curDropped = 0; + es2->emplace_back(i->first, e); + }; + const auto trimString = [&] (const StringToken & t) { string s2; - for (size_t j = 0; j < e->s.size(); ++j) { + for (size_t j = 0; j < t.l; ++j) { if (atStartOfLine) { - if (e->s[j] == ' ') { + if (t.p[j] == ' ') { if (curDropped++ >= minIndent) - s2 += e->s[j]; + s2 += t.p[j]; } - else if (e->s[j] == '\n') { + else if (t.p[j] == '\n') { curDropped = 0; - s2 += e->s[j]; + s2 += t.p[j]; } else { atStartOfLine = false; curDropped = 0; - s2 += e->s[j]; + s2 += t.p[j]; } } else { - s2 += e->s[j]; - if (e->s[j] == '\n') atStartOfLine = true; + s2 += t.p[j]; + if (t.p[j] == '\n') atStartOfLine = true; } } @@ -235,6 +245,9 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vectoremplace_back(i->first, new ExprString(symbols.create(s2))); + }; + for (; i != es.end(); ++i, --n) { + std::visit(overloaded { trimExpr, trimString }, i->second); } /* If this is a single string, then don't do a concatenation. */ @@ -273,18 +286,13 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err nix::Formal * formal; nix::NixInt n; nix::NixFloat nf; - // using C a struct allows us to avoid having to define the special - // members that using string_view here would implicitly delete. - struct StringToken { - const char * p; - size_t l; - operator std::string_view() const { return {p, l}; } - }; StringToken id; // !!! -> Symbol StringToken path; StringToken uri; + StringToken str; std::vector * attrNames; std::vector > * string_parts; + std::vector > > * ind_string_parts; } %type start expr expr_function expr_if expr_op @@ -294,11 +302,12 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err %type formals %type formal %type attrs attrpath -%type string_parts_interpolated ind_string_parts +%type string_parts_interpolated +%type ind_string_parts %type path_start string_parts string_attr %type attr %token ID ATTRPATH -%token STR IND_STR +%token STR IND_STR %token INT %token FLOAT %token PATH HPATH SPATH PATH_END @@ -449,18 +458,19 @@ expr_simple ; string_parts - : STR + : STR { $$ = new ExprString(data->symbols.create($1)); } | string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); } | { $$ = new ExprString(data->symbols.create("")); } ; string_parts_interpolated - : string_parts_interpolated STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); } + : string_parts_interpolated STR + { $$ = $1; $1->emplace_back(makeCurPos(@2, data), new ExprString(data->symbols.create($2))); } | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); } | DOLLAR_CURLY expr '}' { $$ = new vector >; $$->emplace_back(makeCurPos(@1, data), $2); } | STR DOLLAR_CURLY expr '}' { $$ = new vector >; - $$->emplace_back(makeCurPos(@1, data), $1); + $$->emplace_back(makeCurPos(@1, data), new ExprString(data->symbols.create($1))); $$->emplace_back(makeCurPos(@2, data), $3); } ; @@ -482,7 +492,7 @@ path_start ind_string_parts : ind_string_parts IND_STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); } | ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); } - | { $$ = new vector >; } + | { $$ = new vector > >; } ; binds