From a4744254250e170f1b858e46a8b1c7904a030a2b Mon Sep 17 00:00:00 2001 From: Guillaume Maudoux Date: Mon, 1 May 2017 01:05:41 +0200 Subject: [PATCH 1/2] Fix lexer to support `$'` in multiline strings. --- src/libexpr/lexer.l | 3 ++- tests/lang/eval-okay-ind-string.exp | 2 +- tests/lang/eval-okay-ind-string.nix | 10 +++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index 5b1ff0350..d4fae2d7d 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -160,7 +160,8 @@ or { return OR_KW; } yylval->e = new ExprIndStr(yytext); return IND_STR; } -\'\'\$ { +\'\'\$ | +\$ { yylval->e = new ExprIndStr("$"); return IND_STR; } diff --git a/tests/lang/eval-okay-ind-string.exp b/tests/lang/eval-okay-ind-string.exp index 886219dcf..9cf4bd2ee 100644 --- a/tests/lang/eval-okay-ind-string.exp +++ b/tests/lang/eval-okay-ind-string.exp @@ -1 +1 @@ -"This is an indented multi-line string\nliteral. An amount of whitespace at\nthe start of each line matching the minimum\nindentation of all lines in the string\nliteral together will be removed. Thus,\nin this case four spaces will be\nstripped from each line, even though\n THIS LINE is indented six spaces.\n\nAlso, empty lines don't count in the\ndetermination of the indentation level (the\nprevious empty line has indentation 0, but\nit doesn't matter).\nIf the string starts with whitespace\n followed by a newline, it's stripped, but\n that's not the case here. Two spaces are\n stripped because of the \" \" at the start. \nThis line is indented\na bit further.\nAnti-quotations, like so, are\nalso allowed.\n The \\ is not special here.\n' can be followed by any character except another ', e.g. 'x'.\nLikewise for $, e.g. $$ or $varName.\nBut ' followed by ' is special, as is $ followed by {.\nIf you want them, use anti-quotations: '', ${.\n Tabs are not interpreted as whitespace (since we can't guess\n what tab settings are intended), so don't use them.\n\tThis line starts with a space and a tab, so only one\n space will be stripped from each line.\nAlso note that if the last line (just before the closing ' ')\nconsists only of whitespace, it's ignored. But here there is\nsome non-whitespace stuff, so the line isn't removed. \nThis shows a hacky way to preserve an empty line after the start.\nBut there's no reason to do so: you could just repeat the empty\nline.\n Similarly you can force an indentation level,\n in this case to 2 spaces. This works because the anti-quote\n is significant (not whitespace).\nstart on network-interfaces\n\nstart script\n\n rm -f /var/run/opengl-driver\n ln -sf 123 /var/run/opengl-driver\n\n rm -f /var/log/slim.log\n \nend script\n\nenv SLIM_CFGFILE=abc\nenv SLIM_THEMESDIR=def\nenv FONTCONFIG_FILE=/etc/fonts/fonts.conf \t\t\t\t# !!! cleanup\nenv XKB_BINDIR=foo/bin \t\t\t\t# Needed for the Xkb extension.\nenv LD_LIBRARY_PATH=libX11/lib:libXext/lib:/usr/lib/ # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)\n\nenv XORG_DRI_DRIVER_PATH=nvidiaDrivers/X11R6/lib/modules/drivers/ \n\nexec slim/bin/slim\nEscaping of ' followed by ': ''\nEscaping of $ followed by {: ${\nAnd finally to interpret \\n etc. as in a string: \n, \r, \t.\nfoo\n'bla'\nbar\n" +"This is an indented multi-line string\nliteral. An amount of whitespace at\nthe start of each line matching the minimum\nindentation of all lines in the string\nliteral together will be removed. Thus,\nin this case four spaces will be\nstripped from each line, even though\n THIS LINE is indented six spaces.\n\nAlso, empty lines don't count in the\ndetermination of the indentation level (the\nprevious empty line has indentation 0, but\nit doesn't matter).\nIf the string starts with whitespace\n followed by a newline, it's stripped, but\n that's not the case here. Two spaces are\n stripped because of the \" \" at the start. \nThis line is indented\na bit further.\nAnti-quotations, like so, are\nalso allowed.\n The \\ is not special here.\n' can be followed by any character except another ', e.g. 'x'.\nLikewise for $, e.g. $$ or $varName.\nBut ' followed by ' is special, as is $ followed by {.\nIf you want them, use anti-quotations: '', ${.\n Tabs are not interpreted as whitespace (since we can't guess\n what tab settings are intended), so don't use them.\n\tThis line starts with a space and a tab, so only one\n space will be stripped from each line.\nAlso note that if the last line (just before the closing ' ')\nconsists only of whitespace, it's ignored. But here there is\nsome non-whitespace stuff, so the line isn't removed. \nThis shows a hacky way to preserve an empty line after the start.\nBut there's no reason to do so: you could just repeat the empty\nline.\n Similarly you can force an indentation level,\n in this case to 2 spaces. This works because the anti-quote\n is significant (not whitespace).\nstart on network-interfaces\n\nstart script\n\n rm -f /var/run/opengl-driver\n ln -sf 123 /var/run/opengl-driver\n\n rm -f /var/log/slim.log\n \nend script\n\nenv SLIM_CFGFILE=abc\nenv SLIM_THEMESDIR=def\nenv FONTCONFIG_FILE=/etc/fonts/fonts.conf \t\t\t\t# !!! cleanup\nenv XKB_BINDIR=foo/bin \t\t\t\t# Needed for the Xkb extension.\nenv LD_LIBRARY_PATH=libX11/lib:libXext/lib:/usr/lib/ # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)\n\nenv XORG_DRI_DRIVER_PATH=nvidiaDrivers/X11R6/lib/modules/drivers/ \n\nexec slim/bin/slim\nEscaping of ' followed by ': ''\nEscaping of $ followed by {: ${\nAnd finally to interpret \\n etc. as in a string: \n, \r, \t.\nfoo\n'bla'\nbar\ncut -d $'\\t' -f 1\nending dollar $$\n" diff --git a/tests/lang/eval-okay-ind-string.nix b/tests/lang/eval-okay-ind-string.nix index 1556aae9f..1669dc064 100644 --- a/tests/lang/eval-okay-ind-string.nix +++ b/tests/lang/eval-okay-ind-string.nix @@ -117,4 +117,12 @@ let bar ''; -in s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13 + s14 + s15 + # Regression test: accept $'. + s16 = '' + cut -d $'\t' -f 1 + ''; + + # Accept dollars at end of strings + s17 = ''ending dollar $'' + ''$'' + "\n"; + +in s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13 + s14 + s15 + s16 + s17 From a143014d738758b5558efb73fee9f351cd00cbda Mon Sep 17 00:00:00 2001 From: Guillaume Maudoux Date: Mon, 1 May 2017 01:07:33 +0200 Subject: [PATCH 2/2] lexer: remove catch-all rules hiding real errors With catch-all rules, we hide potential errors. It turns out that a4744254 made one cath-all useless. Flex detected that is was impossible to reach. The other is more subtle, as it can only trigger on unfinished escapes in unfinished strings, which only occurs at EOF. --- src/libexpr/lexer.l | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index d4fae2d7d..40ca77258 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -142,18 +142,26 @@ or { return OR_KW; } \{ { return '{'; } \{ { PUSH_STATE(INSIDE_DOLLAR_CURLY); return '{'; } -\" { PUSH_STATE(STRING); return '"'; } +\" { + PUSH_STATE(STRING); return '"'; + } ([^\$\"\\]|\$[^\{\"\\]|\\.|\$\\.)*\$/\" | ([^\$\"\\]|\$[^\{\"\\]|\\.|\$\\.)+ { - /* It is impossible to match strings ending with '$' with one - regex because trailing contexts are only valid at the end - of a rule. (A sane but undocumented limitation.) */ - yylval->e = unescapeStr(data->symbols, yytext); - return STR; - } + /* It is impossible to match strings ending with '$' with one + regex because trailing contexts are only valid at the end + of a rule. (A sane but undocumented limitation.) */ + yylval->e = unescapeStr(data->symbols, yytext); + return STR; + } \$\{ { PUSH_STATE(INSIDE_DOLLAR_CURLY); return DOLLAR_CURLY; } -\" { POP_STATE(); return '"'; } -. return yytext[0]; /* just in case: shouldn't be reached */ +\" { POP_STATE(); return '"'; } +\$|\\|\$\\ { + /* This can only occur when we reach EOF, otherwise the above + (...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered. + This is technically invalid, but we leave the problem to the + parser who fails with exact location. */ + return STR; + } \'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; } ([^\$\']|\$[^\{\']|\'[^\'\$])+ { @@ -179,7 +187,6 @@ or { return OR_KW; } yylval->e = new ExprIndStr("'"); return IND_STR; } -. return yytext[0]; /* just in case: shouldn't be reached */ {