* Added a new kind of multi-line string literal delimited by two

single quotes.  Example (from NixOS):

    job = ''
      start on network-interfaces

      start script

        rm -f /var/run/opengl-driver
        ${if videoDriver == "nvidia"        
          then "ln -sf ${nvidiaDrivers} /var/run/opengl-driver"
          else if cfg.driSupport
          then "ln -sf ${mesa} /var/run/opengl-driver"
          else ""
        }

        rm -f /var/log/slim.log

      end script
    '';

  This style has two big advantages:

  - \, ' and " aren't special, only '' and ${.  So you get a lot less
    escaping in shell scripts / configuration files in Nixpkgs/NixOS.
    The delimiter '' is rare in scripts (and can usually be written as
    "").  ${ is also fairly rare.

    Other delimiters such as <<...>>, {{...}} and <|...|> were also
    considered but this one appears to have the fewest drawbacks
    (thanks Martin).

  - Indentation is intelligently stripped so that multi-line strings
    can follow the nesting structure of the containing Nix
    expression.  E.g. in the example above 6 spaces are stripped from
    the start of each line.  This prevents unnecessary indentation in
    generated files (which sometimes even breaks things).

  See tests/lang/eval-okay-ind-string.nix for some examples.
This commit is contained in:
Eelco Dolstra 2007-11-30 16:48:45 +00:00
parent 633518628f
commit 6d6c68c0d2
6 changed files with 235 additions and 4 deletions

View file

@ -131,6 +131,10 @@
<listitem><para>TODO: chroot support.</para></listitem>
<listitem><para>TODO: <literal>''</literal>-style string
literals.</para></listitem>
</itemizedlist>
</section>

View file

@ -4,6 +4,7 @@
%x STRING
%x IND_STRING
%{
@ -122,6 +123,14 @@ inherit { return INHERIT; }
<STRING>\" { BEGIN(INITIAL); return '"'; }
<STRING>. return yytext[0]; /* just in case: shouldn't be reached */
\'\'(\ *\n)? { BEGIN(IND_STRING); return IND_STRING_OPEN; }
<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'])+ {
yylval->t = makeIndStr(toATerm(yytext));
return IND_STR;
}
<IND_STRING>\$\{ { BEGIN(INITIAL); return DOLLAR_CURLY; }
<IND_STRING>\'\' { BEGIN(INITIAL); return IND_STRING_CLOSE; }
<IND_STRING>. return yytext[0]; /* just in case: shouldn't be reached */
{PATH} { yylval->t = toATerm(yytext); return PATH; /* !!! alloc */ }
{URI} { yylval->t = toATerm(yytext); return URI; /* !!! alloc */ }
@ -148,4 +157,10 @@ void backToString(yyscan_t scanner)
BEGIN(STRING);
}
void backToIndString(yyscan_t scanner)
{
struct yyguts_t * yyg = (struct yyguts_t *) scanner;
BEGIN(IND_STRING);
}
}

View file

@ -46,6 +46,9 @@ Int | int | Expr |
Str | string ATermList | Expr |
Str | string | Expr | ObsoleteStr
# Internal to the parser, doesn't occur in ASTs.
IndStr | string | Expr |
# A path is a reference to a file system object that is to be copied
# to the Nix store when used as a derivation attribute. When it is
# concatenated to a string (i.e., `str + path'), it is also copied and

View file

@ -68,9 +68,100 @@ static Expr fixAttrs(int recursive, ATermList as)
}
void backToString(yyscan_t scanner);
static Expr stripIndentation(ATermList es)
{
if (es == ATempty) return makeStr("");
/* Figure out the minimum indentation. Note that by design
whitespace-only final lines are not taken into account. (So
the " " in "\n ''" is ignored, but the " " in "\n foo''" is.) */
bool atStartOfLine = true; /* = seen only whitespace in the current line */
unsigned int minIndent = 1000000;
unsigned int curIndent = 0;
ATerm e;
for (ATermIterator i(es); i; ++i) {
if (!matchIndStr(*i, e)) {
/* Anti-quotations end the current start-of-line whitespace. */
if (atStartOfLine) {
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
continue;
}
string s = aterm2String(e);
for (unsigned int j = 0; j < s.size(); ++j) {
if (atStartOfLine) {
if (s[j] == ' ')
curIndent++;
else if (s[j] == '\n') {
/* Empty line, doesn't influence minimum
indentation. */
curIndent = 0;
} else {
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
} else if (s[j] == '\n') {
atStartOfLine = true;
curIndent = 0;
}
}
}
/* Strip spaces from each line. */
ATermList es2 = ATempty;
atStartOfLine = true;
unsigned int curDropped = 0;
unsigned int n = ATgetLength(es);
for (ATermIterator i(es); i; ++i, --n) {
if (!matchIndStr(*i, e)) {
atStartOfLine = false;
curDropped = 0;
es2 = ATinsert(es2, *i);
continue;
}
string s = aterm2String(e);
string s2;
for (unsigned int j = 0; j < s.size(); ++j) {
if (atStartOfLine) {
if (s[j] == ' ') {
if (curDropped++ >= minIndent)
s2 += s[j];
}
else if (s[j] == '\n') {
curDropped = 0;
s2 += s[j];
} else {
atStartOfLine = false;
curDropped = 0;
s2 += s[j];
}
} else {
s2 += s[j];
if (s[j] == '\n') atStartOfLine = true;
}
}
/* Remove the last line if it is empty and consists only of
spaces. */
if (n == 1) {
unsigned int p = s2.find_last_of('\n');
if (p != string::npos && s2.find_first_not_of(' ', p + 1) == string::npos)
s2 = string(s2, 0, p + 1);
}
es2 = ATinsert(es2, makeStr(s2));
}
return makeConcatStrings(ATreverse(es2));
}
void backToString(yyscan_t scanner);
void backToIndString(yyscan_t scanner);
static Pos makeCurPos(YYLTYPE * loc, ParseData * data)
{
return makePos(toATerm(data->path),
@ -121,10 +212,11 @@ static void freeAndUnprotect(void * p)
%type <t> start expr expr_function expr_if expr_op
%type <t> expr_app expr_select expr_simple bind inheritsrc formal
%type <ts> binds ids expr_list formals string_parts
%token <t> ID INT STR PATH URI
%type <ts> binds ids expr_list formals string_parts ind_string_parts
%token <t> ID INT STR IND_STR PATH URI
%token IF THEN ELSE ASSERT WITH LET IN REC INHERIT EQ NEQ AND OR IMPL
%token DOLLAR_CURLY /* == ${ */
%token IND_STRING_OPEN IND_STRING_CLOSE
%nonassoc IMPL
%left OR
@ -199,6 +291,9 @@ expr_simple
else if (ATgetNext($2) == ATempty) $$ = ATgetFirst($2);
else $$ = makeConcatStrings(ATreverse($2));
}
| IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE {
$$ = stripIndentation(ATreverse($2));
}
| PATH { $$ = makePath(toATerm(absPath(aterm2String($1), data->basePath))); }
| URI { $$ = makeStr($1, ATempty); }
| '(' expr ')' { $$ = $2; }
@ -219,6 +314,12 @@ string_parts
| { $$ = ATempty; }
;
ind_string_parts
: ind_string_parts IND_STR { $$ = ATinsert($1, $2); }
| ind_string_parts DOLLAR_CURLY expr '}' { backToIndString(scanner); $$ = ATinsert($1, $3); }
| { $$ = ATempty; }
;
binds
: binds bind { $$ = ATinsert($1, $2); }
| { $$ = ATempty; }

View file

@ -0,0 +1 @@
Str("This is an indented multi-line string\nliteral. An amount of whitespace at\nthe start of each line matching the minimum\nindentation of all lines in the string\nliteral together will be removed. Thus,\nin this case four spaces will be\nstripped from each line, even though\n THIS LINE is indented six spaces.\n\nAlso, empty lines don't count in the\ndetermination of the indentation level (the\nprevious empty line has indentation 0, but\nit doesn't matter).\nIf the string starts with whitespace\n followed by a newline, it's stripped, but\n that's not the case here. Two spaces are\n stripped because of the \" \" at the start. \nThis line is indented\na bit further.\nAnti-quotations, like so, are\nalso allowed.\n The \\ is not special here.\n' can be followed by any character except another ', e.g. 'x'.\nLikewise for $, e.g. $$ or $varName.\nBut ' followed by ' is special, as is $ followed by {.\nIf you want them, use anti-quotations: '', ${.\n Tabs are not interpreted as whitespace (since we can't guess\n what tab settings are intended), so don't use them.\n\tThis line starts with a space and a tab, so only one\n space will be stripped from each line.\nAlso note that if the last line (just before the closing ' ')\nconsists only of whitespace, it's ignored. But here there is\nsome non-whitespace stuff, so the line isn't removed. \nThis shows a hacky way to preserve an empty line after the start.\nBut there's no reason to do so: you could just repeat the empty\nline.\n Similarly you can force an indentation level,\n in this case to 2 spaces. This works because the anti-quote\n is significant (not whitespace).\nstart on network-interfaces\n\nstart script\n\n rm -f /var/run/opengl-driver\n ln -sf 123 /var/run/opengl-driver\n\n rm -f /var/log/slim.log\n \nend script\n\nenv SLIM_CFGFILE=abc\nenv SLIM_THEMESDIR=def\nenv FONTCONFIG_FILE=/etc/fonts/fonts.conf \t\t\t\t# !!! cleanup\nenv XKB_BINDIR=foo/bin \t\t\t\t# Needed for the Xkb extension.\nenv LD_LIBRARY_PATH=libX11/lib:libXext/lib:/usr/lib/ # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)\n\nenv XORG_DRI_DRIVER_PATH=nvidiaDrivers/X11R6/lib/modules/drivers/ \n\nexec slim/bin/slim\n",[])

View file

@ -0,0 +1,107 @@
let
s1 = ''
This is an indented multi-line string
literal. An amount of whitespace at
the start of each line matching the minimum
indentation of all lines in the string
literal together will be removed. Thus,
in this case four spaces will be
stripped from each line, even though
THIS LINE is indented six spaces.
Also, empty lines don't count in the
determination of the indentation level (the
previous empty line has indentation 0, but
it doesn't matter).
'';
s2 = '' If the string starts with whitespace
followed by a newline, it's stripped, but
that's not the case here. Two spaces are
stripped because of the " " at the start.
'';
s3 = ''
This line is indented
a bit further.
''; # indentation of last line doesn't count if it's empty
s4 = ''
Anti-quotations, like ${if true then "so" else "not so"}, are
also allowed.
'';
s5 = ''
The \ is not special here.
' can be followed by any character except another ', e.g. 'x'.
Likewise for $, e.g. $$ or $varName.
But ' followed by ' is special, as is $ followed by {.
If you want them, use anti-quotations: ${"''"}, ${"\${"}.
'';
s6 = ''
Tabs are not interpreted as whitespace (since we can't guess
what tab settings are intended), so don't use them.
This line starts with a space and a tab, so only one
space will be stripped from each line.
'';
s7 = ''
Also note that if the last line (just before the closing ' ')
consists only of whitespace, it's ignored. But here there is
some non-whitespace stuff, so the line isn't removed. '';
s8 = '' ${""}
This shows a hacky way to preserve an empty line after the start.
But there's no reason to do so: you could just repeat the empty
line.
'';
s9 = ''
${""} Similarly you can force an indentation level,
in this case to 2 spaces. This works because the anti-quote
is significant (not whitespace).
'';
s10 = ''
'';
s11 = '''';
s12 = '' '';
s13 = ''
start on network-interfaces
start script
rm -f /var/run/opengl-driver
${if true
then "ln -sf 123 /var/run/opengl-driver"
else if true
then "ln -sf 456 /var/run/opengl-driver"
else ""
}
rm -f /var/log/slim.log
end script
env SLIM_CFGFILE=${"abc"}
env SLIM_THEMESDIR=${"def"}
env FONTCONFIG_FILE=/etc/fonts/fonts.conf # !!! cleanup
env XKB_BINDIR=${"foo"}/bin # Needed for the Xkb extension.
env LD_LIBRARY_PATH=${"libX11"}/lib:${"libXext"}/lib:/usr/lib/ # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)
${if true
then "env XORG_DRI_DRIVER_PATH=${"nvidiaDrivers"}/X11R6/lib/modules/drivers/"
else if true
then "env XORG_DRI_DRIVER_PATH=${"mesa"}/lib/modules/dri"
else ""
}
exec ${"slim"}/bin/slim
'';
in s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13