From 0a7746603eda58c4b368e977e87d0aa4db397f5b Mon Sep 17 00:00:00 2001
From: pennae <github@quasiparticle.net>
Date: Wed, 19 Jan 2022 13:39:42 +0100
Subject: [PATCH 1/3] remove ExprIndStr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

it can be replaced with StringToken if we add another bit if information to
StringToken, namely whether this string should take part in indentation scanning
or not. since all escaping terminates indentation scanning we need to set this
bit only for the non-escaped IND_STRING rule.

this improves performance by about 1%.

 before

  nix search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      8.880 s ±  0.048 s    [User: 6.809 s, System: 1.643 s]
    Range (min … max):    8.781 s …  8.993 s    20 runs

  nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     375.0 ms ±   2.2 ms    [User: 339.8 ms, System: 35.2 ms]
    Range (min … max):   371.5 ms … 379.3 ms    20 runs

  nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.831 s ±  0.040 s    [User: 2.536 s, System: 0.225 s]
    Range (min … max):    2.769 s …  2.912 s    20 runs

 after

  nix search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      8.832 s ±  0.048 s    [User: 6.757 s, System: 1.657 s]
    Range (min … max):    8.743 s …  8.921 s    20 runs

  nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     367.4 ms ±   3.2 ms    [User: 332.7 ms, System: 34.7 ms]
    Range (min … max):   364.6 ms … 374.6 ms    20 runs

  nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.810 s ±  0.030 s    [User: 2.517 s, System: 0.225 s]
    Range (min … max):    2.742 s …  2.854 s    20 runs
---
 src/libexpr/lexer.l    | 18 ++++-----
 src/libexpr/nixexpr.hh |  7 ----
 src/libexpr/parser.y   | 86 +++++++++++++++++++++++-------------------
 3 files changed, 57 insertions(+), 54 deletions(-)
diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l
index a0e7a1877..e276b0467 100644
--- a/src/libexpr/lexer.l
+++ b/src/libexpr/lexer.l
@@ -66,7 +66,7 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
 
 // we make use of the fact that the parser receives a private copy of the input
 // string and can munge around in it.
-static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
+static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
 {
     char * result = s;
     char * t = s;
@@ -89,7 +89,7 @@ static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
         else *t = c;
         t++;
     }
-    return new ExprString(symbols.create({result, size_t(t - result)}));
+    return {result, size_t(t - result)};
 }
 
 
@@ -176,7 +176,7 @@ or          { return OR_KW; }
                 /* It is impossible to match strings ending with '$' with one
                    regex because trailing contexts are only valid at the end
                    of a rule. (A sane but undocumented limitation.) */
-                yylval->e = unescapeStr(data->symbols, yytext, yyleng);
+                yylval->str = unescapeStr(data->symbols, yytext, yyleng);
                 return STR;
               }
 <STRING>\$\{  { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
@@ -191,26 +191,26 @@ or          { return OR_KW; }
 
 \'\'(\ *\n)?     { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
 <IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
-                   yylval->e = new ExprIndStr(yytext);
+                   yylval->str = {yytext, (size_t) yyleng, true};
                    return IND_STR;
                  }
 <IND_STRING>\'\'\$ |
 <IND_STRING>\$   {
-                   yylval->e = new ExprIndStr("$");
+                   yylval->str = {"$", 1};
                    return IND_STR;
                  }
 <IND_STRING>\'\'\' {
-                   yylval->e = new ExprIndStr("''");
+                   yylval->str = {"''", 2};
                    return IND_STR;
                  }
 <IND_STRING>\'\'\\{ANY} {
-                   yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
+                   yylval->str = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
                    return IND_STR;
                  }
 <IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
 <IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
 <IND_STRING>\'   {
-                   yylval->e = new ExprIndStr("'");
+                   yylval->str = {"'", 1};
                    return IND_STR;
                  }
 
@@ -264,7 +264,7 @@ or          { return OR_KW; }
       PUSH_STATE(INPATH_SLASH);
   else
       PUSH_STATE(INPATH);
-  yylval->e = new ExprString(data->symbols.create(string(yytext)));
+  yylval->str = {yytext, (size_t) yyleng};
   return STR;
 }
 <INPATH>{ANY} |
diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh
index 0a60057e5..ae11560ea 100644
--- a/src/libexpr/nixexpr.hh
+++ b/src/libexpr/nixexpr.hh
@@ -117,13 +117,6 @@ struct ExprString : Expr
     Value * maybeThunk(EvalState & state, Env & env);
 };
 
-/* Temporary class used during parsing of indented strings. */
-struct ExprIndStr : Expr
-{
-    string s;
-    ExprIndStr(const string & s) : s(s) { };
-};
-
 struct ExprPath : Expr
 {
     string s;
diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y
index a3e713937..38b218156 100644
--- a/src/libexpr/parser.y
+++ b/src/libexpr/parser.y
@@ -16,6 +16,8 @@
 #ifndef BISON_HEADER
 #define BISON_HEADER
 
+#include <variant>
+
 #include "util.hh"
 
 #include "nixexpr.hh"
@@ -41,6 +43,15 @@ namespace nix {
 
 }
 
+// using C a struct allows us to avoid having to define the special
+// members that using string_view here would implicitly delete.
+struct StringToken {
+  const char * p;
+  size_t l;
+  bool hasIndentation;
+  operator std::string_view() const { return {p, l}; }
+};
+
 #define YY_DECL int yylex \
     (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data)
 
@@ -152,7 +163,8 @@ static void addFormal(const Pos & pos, Formals * formals, const Formal & formal)
 }
 
 
-static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<std::pair<Pos, Expr *> > & es)
+static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols,
+    vector<std::pair<Pos, std::variant<Expr *, StringToken> > > & es)
 {
     if (es.empty()) return new ExprString(symbols.create(""));
 
@@ -163,20 +175,20 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
     size_t minIndent = 1000000;
     size_t curIndent = 0;
     for (auto & [i_pos, i] : es) {
-        ExprIndStr * e = dynamic_cast<ExprIndStr *>(i);
-        if (!e) {
-            /* Anti-quotations end the current start-of-line whitespace. */
+        auto * str = std::get_if<StringToken>(&i);
+        if (!str || !str->hasIndentation) {
+            /* Anti-quotations and escaped characters end the current start-of-line whitespace. */
             if (atStartOfLine) {
                 atStartOfLine = false;
                 if (curIndent < minIndent) minIndent = curIndent;
             }
             continue;
         }
-        for (size_t j = 0; j < e->s.size(); ++j) {
+        for (size_t j = 0; j < str->l; ++j) {
             if (atStartOfLine) {
-                if (e->s[j] == ' ')
+                if (str->p[j] == ' ')
                     curIndent++;
-                else if (e->s[j] == '\n') {
+                else if (str->p[j] == '\n') {
                     /* Empty line, doesn't influence minimum
                        indentation. */
                     curIndent = 0;
@@ -184,7 +196,7 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
                     atStartOfLine = false;
                     if (curIndent < minIndent) minIndent = curIndent;
                 }
-            } else if (e->s[j] == '\n') {
+            } else if (str->p[j] == '\n') {
                 atStartOfLine = true;
                 curIndent = 0;
             }
@@ -196,33 +208,31 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
     atStartOfLine = true;
     size_t curDropped = 0;
     size_t n = es.size();
-    for (vector<std::pair<Pos, Expr *> >::iterator i = es.begin(); i != es.end(); ++i, --n) {
-        ExprIndStr * e = dynamic_cast<ExprIndStr *>(i->second);
-        if (!e) {
-            atStartOfLine = false;
-            curDropped = 0;
-            es2->push_back(*i);
-            continue;
-        }
-
+    auto i = es.begin();
+    const auto trimExpr = [&] (Expr * e) {
+        atStartOfLine = false;
+        curDropped = 0;
+        es2->emplace_back(i->first, e);
+    };
+    const auto trimString = [&] (const StringToken & t) {
         string s2;
-        for (size_t j = 0; j < e->s.size(); ++j) {
+        for (size_t j = 0; j < t.l; ++j) {
             if (atStartOfLine) {
-                if (e->s[j] == ' ') {
+                if (t.p[j] == ' ') {
                     if (curDropped++ >= minIndent)
-                        s2 += e->s[j];
+                        s2 += t.p[j];
                 }
-                else if (e->s[j] == '\n') {
+                else if (t.p[j] == '\n') {
                     curDropped = 0;
-                    s2 += e->s[j];
+                    s2 += t.p[j];
                 } else {
                     atStartOfLine = false;
                     curDropped = 0;
-                    s2 += e->s[j];
+                    s2 += t.p[j];
                 }
             } else {
-                s2 += e->s[j];
-                if (e->s[j] == '\n') atStartOfLine = true;
+                s2 += t.p[j];
+                if (t.p[j] == '\n') atStartOfLine = true;
             }
         }
 
@@ -235,6 +245,9 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<st
         }
 
         es2->emplace_back(i->first, new ExprString(symbols.create(s2)));
+    };
+    for (; i != es.end(); ++i, --n) {
+        std::visit(overloaded { trimExpr, trimString }, i->second);
     }
 
     /* If this is a single string, then don't do a concatenation. */
@@ -273,18 +286,13 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
   nix::Formal * formal;
   nix::NixInt n;
   nix::NixFloat nf;
-  // using C a struct allows us to avoid having to define the special
-  // members that using string_view here would implicitly delete.
-  struct StringToken {
-    const char * p;
-    size_t l;
-    operator std::string_view() const { return {p, l}; }
-  };
   StringToken id; // !!! -> Symbol
   StringToken path;
   StringToken uri;
+  StringToken str;
   std::vector<nix::AttrName> * attrNames;
   std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
+  std::vector<std::pair<nix::Pos, std::variant<nix::Expr *, StringToken> > > * ind_string_parts;
 }
 
 %type <e> start expr expr_function expr_if expr_op
@@ -294,11 +302,12 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
 %type <formals> formals
 %type <formal> formal
 %type <attrNames> attrs attrpath
-%type <string_parts> string_parts_interpolated ind_string_parts
+%type <string_parts> string_parts_interpolated
+%type <ind_string_parts> ind_string_parts
 %type <e> path_start string_parts string_attr
 %type <id> attr
 %token <id> ID ATTRPATH
-%token <e> STR IND_STR
+%token <str> STR IND_STR
 %token <n> INT
 %token <nf> FLOAT
 %token <path> PATH HPATH SPATH PATH_END
@@ -449,18 +458,19 @@ expr_simple
   ;
 
 string_parts
-  : STR
+  : STR { $$ = new ExprString(data->symbols.create($1)); }
   | string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); }
   | { $$ = new ExprString(data->symbols.create("")); }
   ;
 
 string_parts_interpolated
-  : string_parts_interpolated STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
+  : string_parts_interpolated STR
+  { $$ = $1; $1->emplace_back(makeCurPos(@2, data), new ExprString(data->symbols.create($2))); }
   | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
   | DOLLAR_CURLY expr '}' { $$ = new vector<std::pair<Pos, Expr *> >; $$->emplace_back(makeCurPos(@1, data), $2); }
   | STR DOLLAR_CURLY expr '}' {
       $$ = new vector<std::pair<Pos, Expr *> >;
-      $$->emplace_back(makeCurPos(@1, data), $1);
+      $$->emplace_back(makeCurPos(@1, data), new ExprString(data->symbols.create($1)));
       $$->emplace_back(makeCurPos(@2, data), $3);
     }
   ;
@@ -482,7 +492,7 @@ path_start
 ind_string_parts
   : ind_string_parts IND_STR { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $2); }
   | ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
-  | { $$ = new vector<std::pair<Pos, Expr *> >; }
+  | { $$ = new vector<std::pair<Pos, std::variant<Expr *, StringToken> > >; }
   ;
 
 binds

From 9ac836d1d6d7e3480ad3bc4dbe23286d9052052b Mon Sep 17 00:00:00 2001
From: pennae <github@quasiparticle.net>
Date: Wed, 19 Jan 2022 14:31:30 +0100
Subject: [PATCH 2/3] don't use Symbols for strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

string expressions by and large do not need the benefits a Symbol gives us,
instead they pollute the symbol table and cause unnecessary overhead for almost
all strings. the one place we can think of that benefits from them (attrpaths
with expressions) extracts the benefit in the parser, which we'll have to touch
anyway when changing ExprString to hold strings.

this gives a sizeable improvement on of 3-5% on all benchmarks we've run.

 before

  nix search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      8.844 s ±  0.045 s    [User: 6.750 s, System: 1.663 s]
    Range (min … max):    8.758 s …  8.922 s    20 runs

  nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     367.4 ms ±   3.3 ms    [User: 332.3 ms, System: 35.2 ms]
    Range (min … max):   364.0 ms … 375.2 ms    20 runs

  nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.810 s ±  0.030 s    [User: 2.517 s, System: 0.225 s]
    Range (min … max):    2.742 s …  2.854 s    20 runs

 after

  nix search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      8.533 s ±  0.068 s    [User: 6.485 s, System: 1.642 s]
    Range (min … max):    8.404 s …  8.657 s    20 runs

  nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     347.6 ms ±   3.1 ms    [User: 313.1 ms, System: 34.5 ms]
    Range (min … max):   343.3 ms … 354.6 ms    20 runs

  nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.709 s ±  0.032 s    [User: 2.414 s, System: 0.232 s]
    Range (min … max):    2.655 s …  2.788 s    20 runs
---
 src/libexpr/nixexpr.hh |  4 ++--
 src/libexpr/parser.y   | 22 +++++++++++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh
index ae11560ea..a2070ff69 100644
--- a/src/libexpr/nixexpr.hh
+++ b/src/libexpr/nixexpr.hh
@@ -110,9 +110,9 @@ struct ExprFloat : Expr
 
 struct ExprString : Expr
 {
-    Symbol s;
+    string s;
     Value v;
-    ExprString(const Symbol & s) : s(s) { v.mkString(s); };
+    ExprString(std::string s) : s(std::move(s)) { v.mkString(this->s.data()); };
     COMMON_METHODS
     Value * maybeThunk(EvalState & state, Env & env);
 };
diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y
index 38b218156..88bdebcd6 100644
--- a/src/libexpr/parser.y
+++ b/src/libexpr/parser.y
@@ -166,7 +166,7 @@ static void addFormal(const Pos & pos, Formals * formals, const Formal & formal)
 static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols,
     vector<std::pair<Pos, std::variant<Expr *, StringToken> > > & es)
 {
-    if (es.empty()) return new ExprString(symbols.create(""));
+    if (es.empty()) return new ExprString("");
 
     /* Figure out the minimum indentation.  Note that by design
        whitespace-only final lines are not taken into account.  (So
@@ -244,7 +244,7 @@ static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols,
                 s2 = string(s2, 0, p + 1);
         }
 
-        es2->emplace_back(i->first, new ExprString(symbols.create(s2)));
+        es2->emplace_back(i->first, new ExprString(s2));
     };
     for (; i != es.end(); ++i, --n) {
         std::visit(overloaded { trimExpr, trimString }, i->second);
@@ -434,7 +434,7 @@ expr_simple
       $$ = new ExprCall(CUR_POS,
           new ExprVar(data->symbols.create("__findFile")),
           {new ExprVar(data->symbols.create("__nixPath")),
-           new ExprString(data->symbols.create(path))});
+           new ExprString(path)});
   }
   | URI {
       static bool noURLLiterals = settings.isExperimentalFeatureEnabled(Xp::NoUrlLiterals);
@@ -443,7 +443,7 @@ expr_simple
               .msg = hintfmt("URL literals are disabled"),
               .errPos = CUR_POS
           });
-      $$ = new ExprString(data->symbols.create($1));
+      $$ = new ExprString(string($1));
   }
   | '(' expr ')' { $$ = $2; }
   /* Let expressions `let {..., body = ...}' are just desugared
@@ -458,19 +458,19 @@ expr_simple
   ;
 
 string_parts
-  : STR { $$ = new ExprString(data->symbols.create($1)); }
+  : STR { $$ = new ExprString(string($1)); }
   | string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); }
-  | { $$ = new ExprString(data->symbols.create("")); }
+  | { $$ = new ExprString(""); }
   ;
 
 string_parts_interpolated
   : string_parts_interpolated STR
-  { $$ = $1; $1->emplace_back(makeCurPos(@2, data), new ExprString(data->symbols.create($2))); }
+  { $$ = $1; $1->emplace_back(makeCurPos(@2, data), new ExprString(string($2))); }
   | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(makeCurPos(@2, data), $3); }
   | DOLLAR_CURLY expr '}' { $$ = new vector<std::pair<Pos, Expr *> >; $$->emplace_back(makeCurPos(@1, data), $2); }
   | STR DOLLAR_CURLY expr '}' {
       $$ = new vector<std::pair<Pos, Expr *> >;
-      $$->emplace_back(makeCurPos(@1, data), new ExprString(data->symbols.create($1)));
+      $$->emplace_back(makeCurPos(@1, data), new ExprString(string($1)));
       $$->emplace_back(makeCurPos(@2, data), $3);
     }
   ;
@@ -524,7 +524,7 @@ attrs
     { $$ = $1;
       ExprString * str = dynamic_cast<ExprString *>($2);
       if (str) {
-          $$->push_back(AttrName(str->s));
+          $$->push_back(AttrName(data->symbols.create(str->s)));
           delete str;
       } else
           throw ParseError({
@@ -541,7 +541,7 @@ attrpath
     { $$ = $1;
       ExprString * str = dynamic_cast<ExprString *>($3);
       if (str) {
-          $$->push_back(AttrName(str->s));
+          $$->push_back(AttrName(data->symbols.create(str->s)));
           delete str;
       } else
           $$->push_back(AttrName($3));
@@ -551,7 +551,7 @@ attrpath
     { $$ = new vector<AttrName>;
       ExprString *str = dynamic_cast<ExprString *>($1);
       if (str) {
-          $$->push_back(AttrName(str->s));
+          $$->push_back(AttrName(data->symbols.create(str->s)));
           delete str;
       } else
           $$->push_back(AttrName($1));

From 7d4cc5515ce5e4972fd5b474ae18cf95f6ea257e Mon Sep 17 00:00:00 2001
From: pennae <github@quasiparticle.net>
Date: Wed, 19 Jan 2022 16:49:02 +0100
Subject: [PATCH 3/3] defer formals duplicate check for incresed efficiency all
 round
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

if we defer the duplicate argument check for lambda formals we can use more
efficient data structures for the formals set, and we can get rid of the
duplication of formals names to boot. instead of a list of formals we've seen
and a set of names we'll keep a vector instead and run a sort+dupcheck step
before moving the parsed formals into a newly created lambda. this improves
performance on search and rebuild by ~1%, pure parsing gains more (about 4%).

this does reorder lambda arguments in the xml output, but the output is still
stable. this shouldn't be a problem since argument order is not semantically
important anyway.

 before

  nix search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      8.550 s ±  0.060 s    [User: 6.470 s, System: 1.664 s]
    Range (min … max):    8.435 s …  8.666 s    20 runs

  nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     346.7 ms ±   2.1 ms    [User: 312.4 ms, System: 34.2 ms]
    Range (min … max):   343.8 ms … 353.4 ms    20 runs

  nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.720 s ±  0.031 s    [User: 2.415 s, System: 0.231 s]
    Range (min … max):    2.662 s …  2.780 s    20 runs

 after

  nix search --no-eval-cache --offline ../nixpkgs hello
    Time (mean ± σ):      8.462 s ±  0.063 s    [User: 6.398 s, System: 1.661 s]
    Range (min … max):    8.339 s …  8.542 s    20 runs

  nix eval -f ../nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix
    Time (mean ± σ):     329.1 ms ±   1.4 ms    [User: 296.8 ms, System: 32.3 ms]
    Range (min … max):   326.1 ms … 330.8 ms    20 runs

  nix eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system'
    Time (mean ± σ):      2.687 s ±  0.035 s    [User: 2.392 s, System: 0.228 s]
    Range (min … max):    2.626 s …  2.754 s    20 runs
---
 src/libexpr/eval.cc              |  2 +-
 src/libexpr/nixexpr.hh           | 24 +++++++++----
 src/libexpr/parser.y             | 61 +++++++++++++++++++++++++-------
 src/libexpr/value-to-xml.cc      |  2 +-
 src/nix/flake.cc                 |  4 +--
 tests/check.nix                  |  2 +-
 tests/lang/eval-okay-xml.exp.xml |  2 +-
 7 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc
index 94ffab175..73ed3a8a1 100644
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@@ -1368,7 +1368,7 @@ void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value &
                     /* Nope, so show the first unexpected argument to the
                        user. */
                     for (auto & i : *args[0]->attrs)
-                        if (!lambda.formals->argNames.count(i.name))
+                        if (!lambda.formals->has(i.name))
                             throwTypeError(pos, "%1% called with unexpected argument '%2%'", lambda, i.name);
                     abort(); // can't happen
                 }
diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh
index a2070ff69..4e923ac89 100644
--- a/src/libexpr/nixexpr.hh
+++ b/src/libexpr/nixexpr.hh
@@ -216,10 +216,25 @@ struct Formal
 
 struct Formals
 {
-    typedef std::list<Formal> Formals_;
+    typedef std::vector<Formal> Formals_;
     Formals_ formals;
-    std::set<Symbol> argNames; // used during parsing
     bool ellipsis;
+
+    bool has(Symbol arg) const {
+        auto it = std::lower_bound(formals.begin(), formals.end(), arg,
+            [] (const Formal & f, const Symbol & sym) { return f.name < sym; });
+        return it != formals.end() && it->name == arg;
+    }
+
+    std::vector<Formal> lexicographicOrder() const
+    {
+        std::vector<Formal> result(formals.begin(), formals.end());
+        std::sort(result.begin(), result.end(),
+            [] (const Formal & a, const Formal & b) {
+                return std::string_view(a.name) < std::string_view(b.name);
+            });
+        return result;
+    }
 };
 
 struct ExprLambda : Expr
@@ -232,11 +247,6 @@ struct ExprLambda : Expr
     ExprLambda(const Pos & pos, const Symbol & arg, Formals * formals, Expr * body)
         : pos(pos), arg(arg), formals(formals), body(body)
     {
-        if (!arg.empty() && formals && formals->argNames.find(arg) != formals->argNames.end())
-            throw ParseError({
-                .msg = hintfmt("duplicate formal function argument '%1%'", arg),
-                .errPos = pos
-            });
     };
     void setName(Symbol & name);
     string showNamePos() const;
diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y
index 88bdebcd6..1b0177e4a 100644
--- a/src/libexpr/parser.y
+++ b/src/libexpr/parser.y
@@ -41,6 +41,11 @@ namespace nix {
             { };
     };
 
+    struct ParserFormals {
+        std::vector<Formal> formals;
+        bool ellipsis = false;
+    };
+
 }
 
 // using C a struct allows us to avoid having to define the special
@@ -151,15 +156,39 @@ static void addAttr(ExprAttrs * attrs, AttrPath & attrPath,
 }
 
 
-static void addFormal(const Pos & pos, Formals * formals, const Formal & formal)
+static Formals * toFormals(ParseData & data, ParserFormals * formals,
+    Pos pos = noPos, Symbol arg = {})
 {
-    if (!formals->argNames.insert(formal.name).second)
+    std::sort(formals->formals.begin(), formals->formals.end(),
+        [] (const auto & a, const auto & b) {
+            return std::tie(a.name, a.pos) < std::tie(b.name, b.pos);
+        });
+
+    std::optional<std::pair<Symbol, Pos>> duplicate;
+    for (size_t i = 0; i + 1 < formals->formals.size(); i++) {
+        if (formals->formals[i].name != formals->formals[i + 1].name)
+            continue;
+        std::pair thisDup{formals->formals[i].name, formals->formals[i + 1].pos};
+        duplicate = std::min(thisDup, duplicate.value_or(thisDup));
+    }
+    if (duplicate)
         throw ParseError({
-            .msg = hintfmt("duplicate formal function argument '%1%'",
-                formal.name),
+            .msg = hintfmt("duplicate formal function argument '%1%'", duplicate->first),
+            .errPos = duplicate->second
+        });
+
+    Formals result;
+    result.ellipsis = formals->ellipsis;
+    result.formals = std::move(formals->formals);
+
+    if (arg.set() && result.has(arg))
+        throw ParseError({
+            .msg = hintfmt("duplicate formal function argument '%1%'", arg),
             .errPos = pos
         });
-    formals->formals.push_front(formal);
+
+    delete formals;
+    return new Formals(std::move(result));
 }
 
 
@@ -282,7 +311,7 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
   nix::Expr * e;
   nix::ExprList * list;
   nix::ExprAttrs * attrs;
-  nix::Formals * formals;
+  nix::ParserFormals * formals;
   nix::Formal * formal;
   nix::NixInt n;
   nix::NixFloat nf;
@@ -340,11 +369,17 @@ expr_function
   : ID ':' expr_function
     { $$ = new ExprLambda(CUR_POS, data->symbols.create($1), 0, $3); }
   | '{' formals '}' ':' expr_function
-    { $$ = new ExprLambda(CUR_POS, data->symbols.create(""), $2, $5); }
+    { $$ = new ExprLambda(CUR_POS, data->symbols.create(""), toFormals(*data, $2), $5); }
   | '{' formals '}' '@' ID ':' expr_function
-    { $$ = new ExprLambda(CUR_POS, data->symbols.create($5), $2, $7); }
+    {
+      Symbol arg = data->symbols.create($5);
+      $$ = new ExprLambda(CUR_POS, arg, toFormals(*data, $2, CUR_POS, arg), $7);
+    }
   | ID '@' '{' formals '}' ':' expr_function
-    { $$ = new ExprLambda(CUR_POS, data->symbols.create($1), $4, $7); }
+    {
+      Symbol arg = data->symbols.create($1);
+      $$ = new ExprLambda(CUR_POS, arg, toFormals(*data, $4, CUR_POS, arg), $7);
+    }
   | ASSERT expr ';' expr_function
     { $$ = new ExprAssert(CUR_POS, $2, $4); }
   | WITH expr ';' expr_function
@@ -575,13 +610,13 @@ expr_list
 
 formals
   : formal ',' formals
-    { $$ = $3; addFormal(CUR_POS, $$, *$1); }
+    { $$ = $3; $$->formals.push_back(*$1); }
   | formal
-    { $$ = new Formals; addFormal(CUR_POS, $$, *$1); $$->ellipsis = false; }
+    { $$ = new ParserFormals; $$->formals.push_back(*$1); $$->ellipsis = false; }
   |
-    { $$ = new Formals; $$->ellipsis = false; }
+    { $$ = new ParserFormals; $$->ellipsis = false; }
   | ELLIPSIS
-    { $$ = new Formals; $$->ellipsis = true; }
+    { $$ = new ParserFormals; $$->ellipsis = true; }
   ;
 
 formal
diff --git a/src/libexpr/value-to-xml.cc b/src/libexpr/value-to-xml.cc
index a875f82d7..a9fb60b0e 100644
--- a/src/libexpr/value-to-xml.cc
+++ b/src/libexpr/value-to-xml.cc
@@ -142,7 +142,7 @@ static void printValueAsXML(EvalState & state, bool strict, bool location,
                 if (!v.lambda.fun->arg.empty()) attrs["name"] = v.lambda.fun->arg;
                 if (v.lambda.fun->formals->ellipsis) attrs["ellipsis"] = "1";
                 XMLOpenElement _(doc, "attrspat", attrs);
-                for (auto & i : v.lambda.fun->formals->formals)
+                for (auto & i : v.lambda.fun->formals->lexicographicOrder())
                     doc.writeEmptyElement("attr", singletonAttrs("name", i.name));
             } else
                 doc.writeEmptyElement("varpat", singletonAttrs("name", v.lambda.fun->arg));
diff --git a/src/nix/flake.cc b/src/nix/flake.cc
index 97f4d911c..101d38c6c 100644
--- a/src/nix/flake.cc
+++ b/src/nix/flake.cc
@@ -476,8 +476,8 @@ struct CmdFlakeCheck : FlakeCommand
                 if (!v.isLambda())
                     throw Error("bundler must be a function");
                 if (!v.lambda.fun->formals ||
-                    !v.lambda.fun->formals->argNames.count(state->symbols.create("program")) ||
-                    !v.lambda.fun->formals->argNames.count(state->symbols.create("system")))
+                    !v.lambda.fun->formals->has(state->symbols.create("program")) ||
+                    !v.lambda.fun->formals->has(state->symbols.create("system")))
                     throw Error("bundler must take formal arguments 'program' and 'system'");
             } catch (Error & e) {
                 e.addTrace(pos, hintfmt("while checking the template '%s'", attrPath));
diff --git a/tests/check.nix b/tests/check.nix
index ec455ae2d..ed91ff845 100644
--- a/tests/check.nix
+++ b/tests/check.nix
@@ -50,6 +50,6 @@ with import ./config.nix;
 
   fetchurl = import <nix/fetchurl.nix> {
     url = "file://" + toString ./lang/eval-okay-xml.exp.xml;
-    sha256 = "0kg4sla7ihm8ijr8cb3117fhl99zrc2bwy1jrngsfmkh8bav4m0v";
+    sha256 = "sha256-behBlX+DQK/Pjvkuc8Tx68Jwi4E5v86wDq+ZLaHyhQE=";
   };
 }
diff --git a/tests/lang/eval-okay-xml.exp.xml b/tests/lang/eval-okay-xml.exp.xml
index 92b75e0b8..20099326c 100644
--- a/tests/lang/eval-okay-xml.exp.xml
+++ b/tests/lang/eval-okay-xml.exp.xml
@@ -31,9 +31,9 @@
     <attr name="f">
       <function>
         <attrspat>
-          <attr name="z" />
           <attr name="x" />
           <attr name="y" />
+          <attr name="z" />
         </attrspat>
       </function>
     </attr>