libmain: move ProgressBar implementation out of the header

Change-Id: Ib4b42ebea290ee575294df6b2f17a38a5d850b80
libstore/build: use an allowlist approach to syscall filtering
2024-06-26 09:19:19 +02:00 · 2024-06-26 09:19:19 +02:00 · 2024-06-26 09:19:14 +02:00 · 2024-06-25 22:18:26 +00:00 · 2024-06-25 22:16:04 +00:00 · 2024-06-25 20:42:46 +00:00
48 changed files with 1962 additions and 1268 deletions
--- a/.envrc
+++ b/.envrc
@ -1,9 +1,5 @@
 # shellcheck shell=bash
 source_env_if_exists .envrc.local
-# TODO: `use flake .#native-clangStdenvPackages` on macOS?
-use flake ".#${LIX_SHELL_VARIANT:-default}" "${LIX_SHELL_EXTRA_ARGS[@]}"
-export MAKEFLAGS="$MAKEFLAGS -e"
-if [[ -n "$NIX_BUILD_CORES" ]]; then
-    export MAKEFLAGS="$MAKEFLAGS -j $NIX_BUILD_CORES"
-fi
+# Use native-clangStdenvPackages to get clangd by default.
+use flake ".#${LIX_SHELL_VARIANT:-native-clangStdenvPackages}" "${LIX_SHELL_EXTRA_ARGS[@]}"
 export GTEST_BRIEF=1
--- a/.this-is-lix
+++ b/.this-is-lix
@ -0,0 +1 @@
+This is a file used by the dev shell shellHook in package.nix to check that this is actually a Lix repo before installing git hooks. Its contents have no meaning.
--- a/doc/manual/process-includes.sh
+++ b/doc/manual/process-includes.sh
@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash

 set -euo pipefail

--- a/doc/manual/render-manpage.sh
+++ b/doc/manual/render-manpage.sh
@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash

 set -euo pipefail

--- a/doc/manual/rl-next/build-dir.md
+++ b/doc/manual/rl-next/build-dir.md
@ -0,0 +1,12 @@
+---
+synopsis: "Add a `build-dir` setting to set the backing directory for builds"
+cls: 1514
+credits: [roberth, tomberek]
+category: Improvements
+---
+
+`build-dir` can now be set in the Nix configuration to choose the backing directory for the build sandbox.
+This can be useful on systems with `/tmp` on tmpfs, or simply to relocate large builds to another disk.
+
+Also, `XDG_RUNTIME_DIR` is no longer considered when selecting the default temporary directory,
+as it's not intended to be used for large amounts of data.
--- a/doc/manual/rl-next/nix-copy-is-fast.md
+++ b/doc/manual/rl-next/nix-copy-is-fast.md
@ -0,0 +1,12 @@
+---
+synopsis: "`nix copy` is now several times faster at `querying info about /nix/store/...`"
+cls: [1462]
+issues: [fj#366]
+credits: [jade]
+category: Fixes
+---
+
+We fixed a locking bug that serialized `querying info about /nix/store/...`
+onto just one thread such that it was eating `O(paths to copy * latency)` time
+while setting up to copy paths to s3 and other stores. It is now `nproc` times
+faster.
--- a/doc/manual/rl-next/old-protocol-removal.md
+++ b/doc/manual/rl-next/old-protocol-removal.md
@ -0,0 +1,21 @@
+---
+synopsis: "Lix no longer speaks the Nix remote-build worker protocol to clients or servers older than CppNix 2.3"
+cls: [1207, 1208, 1206, 1205, 1204, 1203, 1479]
+issues: [fj#325]
+credits: [jade]
+category: Breaking Changes
+---
+
+CppNix 2.3 was released in 2019, and is the new oldest supported version. We
+will increase our support baseline in the future up to a final version of CppNix
+2.18 (which may happen soon given that it is the only still-packaged and thus
+still-tested >2.3 version), but this step already removes a significant amount
+of dead, untested, code paths.
+
+Lix speaks the same version of the protocol as CppNix 2.18 and that fact will
+never change in the future; the Lix plans to replace the protocol for evolution
+will entail a complete incompatible replacement that will be supported in
+parallel with the old protocol. Lix will thus retain remote build compatibility
+with CppNix as long as CppNix maintains protocol compatibility with 2.18, and
+as long as Lix retains legacy protocol support (which will likely be a long
+time given that we plan to convert it to a frozen-in-time shim).
--- a/doc/manual/rl-next/sanitizers.md
+++ b/doc/manual/rl-next/sanitizers.md
@ -0,0 +1,10 @@
+---
+synopsis: "Lix now supports building with UndefinedBehaviorSanitizer"
+cls: [1483]
+credits: [jade]
+category: Development
+---
+
+You can now build Lix with the configuration option `-Db_sanitize=undefined` and it will both work and pass tests. AddressSanitizer support is also coming soon.
+
+For a list of undefined behaviour fixed by sanitizer usage, see [the gerrit topic "undefined-behaviour"](https://gerrit.lix.systems/q/topic:%22undefined-behaviour%22).
--- a/flake.nix
+++ b/flake.nix
@ -196,6 +196,8 @@
            busybox-sandbox-shell = final.busybox-sandbox-shell or final.default-busybox-sandbox-shell;
          };

+          pegtl = final.callPackage ./misc/pegtl.nix { };
+
          # Export the patched version of boehmgc that Lix uses into the overlay
          # for consumers of this flake.
          boehmgc-nix = final.nix.boehmgc-nix;
@ -386,7 +388,7 @@
              nix = pkgs.callPackage ./package.nix {
                inherit stdenv officialRelease versionSuffix;
                busybox-sandbox-shell = pkgs.busybox-sandbox-shell or pkgs.default-busybox-sandbox;
-                internalApiDocs = true;
+                internalApiDocs = false;
              };
              pre-commit = self.hydraJobs.pre-commit.${pkgs.system} or { };
            in
--- a/4
+++ b/4
@ -9,8 +9,8 @@ clean:
    rm -rf build

 # Prepare meson for building
-setup:
-    meson setup build --prefix="$PWD/outputs/out" $mesonFlags
+setup *OPTIONS:
+    meson setup build --prefix="$PWD/outputs/out" $mesonFlags {{ OPTIONS }}

 # Build lix
 build *OPTIONS:
--- a/maintainers/buildtime_report.sh
+++ b/maintainers/buildtime_report.sh
@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash

 # Generates a report of build time based on a meson build using -ftime-trace in
 # Clang.
--- a/meson.build
+++ b/meson.build
@ -287,6 +287,14 @@ gtest = [

 toml11 = dependency('toml11', version : '>=3.7.0', required : true, method : 'cmake')

+pegtl = dependency(
+  'pegtl',
+  version : '>=3.2.7',
+  required : true,
+  method : 'cmake',
+  modules : [ 'taocpp::pegtl' ],
+)
+
 nlohmann_json = dependency('nlohmann_json', required : true)

 # lix-doc is a Rust project provided via buildInputs and unfortunately doesn't have any way to be detected.
@ -335,8 +343,6 @@ endif
 # that busybox sh won't run busybox applets as builtins (which would break our sandbox).

 lsof = find_program('lsof', native : true)
-bison = find_program('bison', native : true)
-flex = find_program('flex', native : true)

 # This is how Nix does generated headers...
 # other instances of header generation use a very similar command.
--- a/meson/cleanup-install.bash
+++ b/meson/cleanup-install.bash
@ -1,50 +0,0 @@
-#!/usr/bin/env bash
-# Meson will call this with an absolute path to Bash.
-# The shebang is just for convenience.
-
-# The parser and lexer tab are generated via custom Meson targets in src/libexpr/meson.build,
-# but Meson doesn't support marking only part of a target for install. The generation creates
-# both headers (parser-tab.hh, lexer-tab.hh) and source files (parser-tab.cc, lexer-tab.cc),
-# and we definitely want the former installed, but not the latter. This script is added to
-# Meson's install steps to correct this, as the logic for it is just complex enough to
-# warrant separate and careful handling, because both Meson's configured include directory
-# may or may not be an absolute path, and DESTDIR may or may not be set at all, but can't be
-# manipulated in Meson logic.
-
-set -euo pipefail
-
-echo "cleanup-install: removing Meson-placed C++ sources from dest includedir"
-
-if [[ "${1/--help/}" != "$1" ]]; then
-	echo "cleanup-install: this script should only be called from the Meson build system"
-	exit 1
-fi
-
-# Ensure the includedir was passed as the first argument
-# (set -u will make this fail otherwise).
-includedir="$1"
-# And then ensure that first argument is a directory that exists.
-if ! [[ -d "$1" ]]; then
-	echo "cleanup-install: this script should only be called from the Meson build system"
-	echo "argv[1] (${1@Q}) is not a directory"
-	exit 2
-fi
-
-# If DESTDIR environment variable is set, prepend it to the include dir.
-# Unfortunately, we cannot do this on the Meson side. We do have an environment variable
-# `MESON_INSTALL_DESTDIR_PREFIX`, but that will not refer to the include directory if
-# includedir has been set separately, which Lix's split-output derivation does.
-# We also cannot simply do an inline bash conditional like "${DESTDIR:=}" or similar,
-# because we need to specifically *join* DESTDIR and includedir with a slash, and *not*
-# have a slash if DESTDIR isn't set at all, since $includedir could be a relative directory.
-# Finally, DESTDIR is only available to us as an environment variable in these install scripts,
-# not in Meson logic.
-# Therefore, our best option is to have Meson pass this script the configured includedir,
-# and perform this dance with it and $DESTDIR.
-if [[ -n "${DESTDIR:-}" ]]; then
-	includedir="$DESTDIR/$includedir"
-fi
-
-# Intentionally not using -f.
-# If these files don't exist then our assumptions have been violated and we should fail.
-rm -v "$includedir/lix/libexpr/parser-tab.cc" "$includedir/lix/libexpr/lexer-tab.cc"
--- a/misc/pegtl.nix
+++ b/misc/pegtl.nix
@ -0,0 +1,23 @@
+{
+  stdenv,
+  cmake,
+  ninja,
+  fetchFromGitHub,
+}:
+
+stdenv.mkDerivation {
+  pname = "pegtl";
+  version = "3.2.7";
+
+  src = fetchFromGitHub {
+    repo = "PEGTL";
+    owner = "taocpp";
+    rev = "refs/tags/3.2.7";
+    hash = "sha256-IV5YNGE4EWVrmg2Sia/rcU8jCuiBynQGJM6n3DCWTQU=";
+  };
+
+  nativeBuildInputs = [
+    cmake
+    ninja
+  ];
+}
--- a/package.nix
+++ b/package.nix
@ -10,7 +10,6 @@
  boehmgc-nix ? __forDefaults.boehmgc-nix,
  boehmgc,
  nlohmann_json,
-  bison,
  build-release-notes ? __forDefaults.build-release-notes,
  boost,
  brotli,
@ -20,7 +19,6 @@
  doxygen,
  editline-lix ? __forDefaults.editline-lix,
  editline,
-  flex,
  git,
  gtest,
  jq,
@ -36,6 +34,7 @@
  meson,
  ninja,
  openssl,
+  pegtl,
  pkg-config,
  python3,
  rapidcheck,
@ -210,8 +209,6 @@ stdenv.mkDerivation (finalAttrs: {

  nativeBuildInputs =
    [
-      bison
-      flex
      python3
      meson
      ninja
@ -250,6 +247,7 @@ stdenv.mkDerivation (finalAttrs: {
      libsodium
      toml11
      lix-doc
+      pegtl
    ]
    ++ lib.optionals hostPlatform.isLinux [
      libseccomp
@ -304,6 +302,9 @@ stdenv.mkDerivation (finalAttrs: {
      else
        appendToVar configureFlags "--disable-tests"
      fi
+
+      # Fix up /usr/bin/env shebangs relied on by the build
+      patchShebangs --build tests/ doc/manual/
    '';

  mesonBuildType = "debugoptimized";
@ -392,6 +393,7 @@ stdenv.mkDerivation (finalAttrs: {
        bashInteractive,
        clang-tools,
        clangbuildanalyzer,
+        doxygen,
        glibcLocales,
        just,
        llvmPackages,
@ -459,6 +461,10 @@ stdenv.mkDerivation (finalAttrs: {
              check-syscalls
              just
              nixfmt
+              # Included above when internalApiDocs is true, but we set that to
+              # false intentionally to save dev build time.
+              # To build them in a dev shell, you can set -Dinternal-api-docs=enabled when configuring.
+              doxygen
              # Load-bearing order. Must come before clang-unwrapped below, but after clang_tools above.
              stdenv.cc
            ]
@ -478,7 +484,7 @@ stdenv.mkDerivation (finalAttrs: {
              # https://git.lix.systems/lix-project/lix/src/commit/7575db522e9008685c4009423398f6900a16bcce/src/nix/develop.cc#L240-L241
              # this is, of course, absurd.
              if [[ $name != lix-shell-env && $name != lix-shell-env-env ]]; then
-                return;
+                return
              fi

              PATH=$prefix/bin:$PATH
@ -488,6 +494,11 @@ stdenv.mkDerivation (finalAttrs: {
              # Make bash completion work.
              XDG_DATA_DIRS+=:$out/share

+              if [[ ! -f ./.this-is-lix ]]; then
+                echo "Dev shell not started from inside a Lix repo, skipping repo setup" >&2
+                return
+              fi
+
              ${lib.optionalString (pre-commit-checks ? shellHook) pre-commit-checks.shellHook}
              # Allow `touch .nocontribmsg` to turn this notice off.
              if ! [[ -f .nocontribmsg ]]; then
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@ -18,7 +18,6 @@
 #include "gc-small-vector.hh"
 #include "fetch-to-store.hh"
 #include "flake/flakeref.hh"
-#include "parser-tab.hh"

 #include <algorithm>
 #include <chrono>
@ -2958,21 +2957,6 @@ std::optional<std::string> EvalState::resolveSearchPathPath(const SearchPath::Pa
 }


-Expr * EvalState::parse(
-    char * text,
-    size_t length,
-    Pos::Origin origin,
-    const SourcePath & basePath,
-    std::shared_ptr<StaticEnv> & staticEnv)
-{
-    auto result = parseExprFromBuf(text, length, origin, basePath, symbols, positions, exprSymbols);
-
-    result->bindVars(*this, staticEnv);
-
-    return result;
-}
-
-
 std::string ExternalValueBase::coerceToString(EvalState & state, const PosIdx & pos, NixStringContext & context, bool copyMore, bool copyToStore) const
 {
    state.error<TypeError>(
--- a/src/libexpr/flake/flakeref.cc
+++ b/src/libexpr/flake/flakeref.cc
@ -204,13 +204,7 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
        std::string fragment;
        std::swap(fragment, parsedURL.fragment);

-        // This has a special meaning for flakes and must not be passed to libfetchers.
-        // Of course this means that libfetchers cannot have fetchers
-        // expecting an argument `dir` 🫠
-        ParsedURL urlForFetchers(parsedURL);
-        urlForFetchers.query.erase("dir");
-
-        auto input = Input::fromURL(urlForFetchers, isFlake);
+        auto input = Input::fromURL(parsedURL, isFlake);
        input.parent = baseDir;

        return std::make_pair(
--- a/src/libexpr/lexer.l
+++ b/src/libexpr/lexer.l
@ -1,302 +0,0 @@
-%option reentrant bison-bridge bison-locations
-%option align
-%option noyywrap
-%option never-interactive
-%option stack
-%option nodefault
-%option nounput noyy_top_state
-
-
-%s DEFAULT
-%x STRING
-%x IND_STRING
-%x INPATH
-%x INPATH_SLASH
-%x PATH_START
-
-
-%{
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
-#endif
-
-// yacc generates code that uses unannotated fallthrough.
-#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
-#endif
-
-#include "nixexpr.hh"
-#include "parser-tab.hh"
-#include "strings.hh"
-
-using namespace nix;
-
-#define THROW(...)                             \
-  do {                                         \
-    state->error.reset(new auto(__VA_ARGS__)); \
-    return YYerror;                            \
-  } while (0)
-
-namespace nix {
-
-#define CUR_POS state->at(*yylloc)
-
-static void initLoc(YYLTYPE * loc)
-{
-    loc->first_line = loc->last_line = 0;
-    loc->first_column = loc->last_column = 0;
-}
-
-static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
-{
-    loc->stash();
-
-    loc->first_column = loc->last_column;
-    loc->last_column += len;
-}
-
-
-// we make use of the fact that the parser receives a private copy of the input
-// string and can munge around in it.
-static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
-{
-    char * result = s;
-    char * t = s;
-    char c;
-    // the input string is terminated with *two* NULs, so we can safely take
-    // *one* character after the one being checked against.
-    while ((c = *s++)) {
-        if (c == '\\') {
-            c = *s++;
-            if (c == 'n') *t = '\n';
-            else if (c == 'r') *t = '\r';
-            else if (c == 't') *t = '\t';
-            else *t = c;
-        }
-        else if (c == '\r') {
-            /* Normalise CR and CR/LF into LF. */
-            *t = '\n';
-            if (*s == '\n') s++; /* cr/lf */
-        }
-        else *t = c;
-        t++;
-    }
-    return {result, size_t(t - result)};
-}
-
-
-}
-
-#define YY_USER_INIT initLoc(yylloc)
-#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
-
-#define PUSH_STATE(state) yy_push_state(state, yyscanner)
-#define POP_STATE() yy_pop_state(yyscanner)
-
-%}
-
-
-ANY         .|\n
-ID          [a-zA-Z\_][a-zA-Z0-9\_\'\-]*
-INT         [0-9]+
-FLOAT       (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?
-PATH_CHAR   [a-zA-Z0-9\.\_\-\+]
-PATH        {PATH_CHAR}*(\/{PATH_CHAR}+)+\/?
-PATH_SEG    {PATH_CHAR}*\/
-HPATH       \~(\/{PATH_CHAR}+)+\/?
-HPATH_START \~\/
-SPATH       \<{PATH_CHAR}+(\/{PATH_CHAR}+)*\>
-URI         [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
-
-
-%%
-
-
-if          { return IF; }
-then        { return THEN; }
-else        { return ELSE; }
-assert      { return ASSERT; }
-with        { return WITH; }
-let         { return LET; }
-in          { return IN; }
-rec         { return REC; }
-inherit     { return INHERIT; }
-or          { return OR_KW; }
-\.\.\.      { return ELLIPSIS; }
-
-\=\=        { return EQ; }
-\!\=        { return NEQ; }
-\<\=        { return LEQ; }
-\>\=        { return GEQ; }
-\&\&        { return AND; }
-\|\|        { return OR; }
-\-\>        { return IMPL; }
-\/\/        { return UPDATE; }
-\+\+        { return CONCAT; }
-
-{ID}        { yylval->id = {yytext, (size_t) yyleng}; return ID; }
-{INT}       { errno = 0;
-              std::optional<int64_t> numMay = string2Int<int64_t>(yytext);
-              if (numMay.has_value()) {
-                  yylval->n = *numMay;
-              } else {
-                  THROW(ParseError(ErrorInfo{
-                      .msg = HintFmt("invalid integer '%1%'", yytext),
-                      .pos = state->positions[CUR_POS],
-                  }));
-              }
-              return INT;
-            }
-{FLOAT}     { errno = 0;
-              yylval->nf = strtod(yytext, 0);
-              if (errno != 0)
-                  THROW(ParseError(ErrorInfo{
-                      .msg = HintFmt("invalid float '%1%'", yytext),
-                      .pos = state->positions[CUR_POS],
-                  }));
-              return FLOAT;
-            }
-
-\$\{        { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
-
-\}          { /* State INITIAL only exists at the bottom of the stack and is
-                 used as a marker. DEFAULT replaces it everywhere else.
-                 Popping when in INITIAL state causes an empty stack exception,
-                 so don't */
-              if (YYSTATE != INITIAL)
-                POP_STATE();
-              return '}';
-            }
-\{          { PUSH_STATE(DEFAULT); return '{'; }
-
-\"          { PUSH_STATE(STRING); return '"'; }
-<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" |
-<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ {
-                /* It is impossible to match strings ending with '$' with one
-                   regex because trailing contexts are only valid at the end
-                   of a rule. (A sane but undocumented limitation.) */
-                yylval->str = unescapeStr(state->symbols, yytext, yyleng);
-                return STR;
-              }
-<STRING>\$\{  { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
-<STRING>\"    { POP_STATE(); return '"'; }
-<STRING>\$|\\|\$\\ {
-                /* This can only occur when we reach EOF, otherwise the above
-                   (...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered.
-                   This is technically invalid, but we leave the problem to the
-                   parser who fails with exact location. */
-                return EOF;
-              }
-
-\'\'(\ *\n)?     { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
-<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
-                   yylval->str = {yytext, (size_t) yyleng, true};
-                   return IND_STR;
-                 }
-<IND_STRING>\'\'\$ |
-<IND_STRING>\$   {
-                   yylval->str = {"$", 1};
-                   return IND_STR;
-                 }
-<IND_STRING>\'\'\' {
-                   yylval->str = {"''", 2};
-                   return IND_STR;
-                 }
-<IND_STRING>\'\'\\{ANY} {
-                   yylval->str = unescapeStr(state->symbols, yytext + 2, yyleng - 2);
-                   return IND_STR;
-                 }
-<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
-<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
-<IND_STRING>\'   {
-                   yylval->str = {"'", 1};
-                   return IND_STR;
-                 }
-
-{PATH_SEG}\$\{ |
-{HPATH_START}\$\{ {
-  PUSH_STATE(PATH_START);
-  yyless(0);
-  yylloc->unstash();
-}
-
-<PATH_START>{PATH_SEG} {
-  POP_STATE();
-  PUSH_STATE(INPATH_SLASH);
-  yylval->path = {yytext, (size_t) yyleng};
-  return PATH;
-}
-
-<PATH_START>{HPATH_START} {
-  POP_STATE();
-  PUSH_STATE(INPATH_SLASH);
-  yylval->path = {yytext, (size_t) yyleng};
-  return HPATH;
-}
-
-{PATH} {
-  if (yytext[yyleng-1] == '/')
-    PUSH_STATE(INPATH_SLASH);
-  else
-    PUSH_STATE(INPATH);
-  yylval->path = {yytext, (size_t) yyleng};
-  return PATH;
-}
-{HPATH} {
-  if (yytext[yyleng-1] == '/')
-    PUSH_STATE(INPATH_SLASH);
-  else
-    PUSH_STATE(INPATH);
-  yylval->path = {yytext, (size_t) yyleng};
-  return HPATH;
-}
-
-<INPATH,INPATH_SLASH>\$\{ {
-  POP_STATE();
-  PUSH_STATE(INPATH);
-  PUSH_STATE(DEFAULT);
-  return DOLLAR_CURLY;
-}
-<INPATH,INPATH_SLASH>{PATH}|{PATH_SEG}|{PATH_CHAR}+ {
-  POP_STATE();
-  if (yytext[yyleng-1] == '/')
-      PUSH_STATE(INPATH_SLASH);
-  else
-      PUSH_STATE(INPATH);
-  yylval->str = {yytext, (size_t) yyleng};
-  return STR;
-}
-<INPATH>{ANY} |
-<INPATH><<EOF>> {
-  /* if we encounter a non-path character we inform the parser that the path has
-     ended with a PATH_END token and re-parse this character in the default
-     context (it may be ')', ';', or something of that sort) */
-  POP_STATE();
-  yyless(0);
-  yylloc->unstash();
-  return PATH_END;
-}
-
-<INPATH_SLASH>{ANY} |
-<INPATH_SLASH><<EOF>> {
-  THROW(ParseError(ErrorInfo{
-      .msg = HintFmt("path has a trailing slash"),
-      .pos = state->positions[CUR_POS],
-  }));
-}
-
-{SPATH}     { yylval->path = {yytext, (size_t) yyleng}; return SPATH; }
-{URI}       { yylval->uri = {yytext, (size_t) yyleng}; return URI; }
-
-[ \t\r\n]+    /* eat up whitespace */
-\#[^\r\n]*    /* single-line comments */
-\/\*([^*]|\*+[^*/])*\*+\/  /* long comments */
-
-{ANY}       {
-              /* Don't return a negative number, as this will cause
-                 Bison to stop parsing without an error. */
-              return (unsigned char) yytext[0];
-            }
-
-%%
--- a/src/libexpr/meson.build
+++ b/src/libexpr/meson.build
@ -1,54 +1,3 @@
-parser_tab = custom_target(
-  input : 'parser.y',
-  output : [
-    'parser-tab.cc',
-    'parser-tab.hh',
-  ],
-  command : [
-    'bison',
-    '-v',
-    '-o',
-    '@OUTPUT0@',
-    '@INPUT@',
-    '-d',
-  ],
-  # NOTE(Qyriad): Meson doesn't support installing only part of a custom target, so we add
-  # an install script below which removes parser-tab.cc.
-  install : true,
-  install_dir : includedir / 'lix/libexpr',
-)
-
-lexer_tab = custom_target(
-  input : [
-    'lexer.l',
-    parser_tab,
-  ],
-  output : [
-    'lexer-tab.cc',
-    'lexer-tab.hh',
-  ],
-  command : [
-    'flex',
-    '--outfile',
-    '@OUTPUT0@',
-    '--header-file=' + '@OUTPUT1@',
-    '@INPUT0@',
-  ],
-  # NOTE(Qyriad): Meson doesn't support installing only part of a custom target, so we add
-  # an install script below which removes lexer-tab.cc.
-  install : true,
-  install_dir : includedir / 'lix/libexpr',
-)
-
-# TODO(Qyriad): When the parser and lexer are rewritten this should be removed.
-# NOTE(Qyriad): We do this this way instead of an inline bash or rm command
-# due to subtleties in Meson. Check the comments in cleanup-install.bash for details.
-meson.add_install_script(
-  bash,
-  meson.project_source_root() / 'meson/cleanup-install.bash',
-  '@0@'.format(includedir),
-)
-
 libexpr_generated_headers = [
  gen_header.process('primops/derivation.nix', preserve_path_from : meson.current_source_dir()),
 ]
@ -75,6 +24,7 @@ libexpr_sources = files(
  'get-drvs.cc',
  'json-to-value.cc',
  'nixexpr.cc',
+  'parser/parser.cc',
  'paths.cc',
  'primops.cc',
  'print-ambiguous.cc',
@ -110,7 +60,9 @@ libexpr_headers = files(
  'get-drvs.hh',
  'json-to-value.hh',
  'nixexpr.hh',
-  'parser-state.hh',
+  'parser/change_head.hh',
+  'parser/grammar.hh',
+  'parser/state.hh',
  'pos-idx.hh',
  'pos-table.hh',
  'primops.hh',
@ -129,8 +81,6 @@ libexpr_headers = files(
 libexpr = library(
  'lixexpr',
  libexpr_sources,
-  parser_tab,
-  lexer_tab,
  libexpr_generated_headers,
  dependencies : [
    liblixutil,
--- a/src/libexpr/parser.y
+++ b/src/libexpr/parser.y
@ -1,503 +0,0 @@
-%glr-parser
-%define api.pure
-%locations
-%define parse.error verbose
-%defines
-/* %no-lines */
-%parse-param { void * scanner }
-%parse-param { nix::ParserState * state }
-%lex-param { void * scanner }
-%lex-param { nix::ParserState * state }
-%expect 1
-%expect-rr 1
-
-%code requires {
-
-#ifndef BISON_HEADER
-#define BISON_HEADER
-
-#include <variant>
-
-#include "finally.hh"
-#include "users.hh"
-
-#include "nixexpr.hh"
-#include "eval.hh"
-#include "eval-settings.hh"
-#include "globals.hh"
-#include "parser-state.hh"
-
-#define YYLTYPE ::nix::ParserLocation
-#define YY_DECL int yylex \
-    (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParserState * state)
-
-namespace nix {
-
-Expr * parseExprFromBuf(
-    char * text,
-    size_t length,
-    Pos::Origin origin,
-    const SourcePath & basePath,
-    SymbolTable & symbols,
-    PosTable & positions,
-    const Expr::AstSymbols & astSymbols);
-
-}
-
-#endif
-
-}
-
-%{
-
-#include "parser-tab.hh"
-#include "lexer-tab.hh"
-
-YY_DECL;
-
-using namespace nix;
-
-#define CUR_POS state->at(*yylocp)
-
-// otherwise destructors cause compiler errors
-#pragma GCC diagnostic ignored "-Wswitch-enum"
-
-#define THROW(err, ...)                              \
-  do {                                               \
-    state->error.reset(new auto(err));               \
-    [](auto... d) { (delete d, ...); }(__VA_ARGS__); \
-    YYABORT;                                         \
-  } while (0)
-
-void yyerror(YYLTYPE * loc, yyscan_t scanner, ParserState * state, const char * error)
-{
-    if (std::string_view(error).starts_with("syntax error, unexpected end of file")) {
-        loc->first_column = loc->last_column;
-        loc->first_line = loc->last_line;
-    }
-    throw ParseError({
-        .msg = HintFmt(error),
-        .pos = state->positions[state->at(*loc)]
-    });
-}
-
-template<typename T>
-static std::unique_ptr<T> unp(T * e)
-{
-  return std::unique_ptr<T>(e);
-}
-
-template<typename T = std::unique_ptr<nix::Expr>, typename... Args>
-static std::vector<T> vec(Args && ... args)
-{
-  std::vector<T> result;
-  result.reserve(sizeof...(Args));
-  (result.emplace_back(std::forward<Args>(args)), ...);
-  return result;
-}
-
-
-%}
-
-%union {
-  // !!! We're probably leaking stuff here.
-  nix::Expr * e;
-  nix::ExprList * list;
-  nix::ExprAttrs * attrs;
-  nix::Formals * formals;
-  nix::Formal * formal;
-  nix::NixInt n;
-  nix::NixFloat nf;
-  nix::StringToken id; // !!! -> Symbol
-  nix::StringToken path;
-  nix::StringToken uri;
-  nix::StringToken str;
-  std::vector<nix::AttrName> * attrNames;
-  std::vector<std::pair<nix::AttrName, nix::PosIdx>> * inheritAttrs;
-  std::vector<std::pair<nix::PosIdx, std::unique_ptr<nix::Expr>>> * string_parts;
-  std::vector<std::pair<nix::PosIdx, std::variant<std::unique_ptr<nix::Expr>, nix::StringToken>>> * ind_string_parts;
-}
-
-%destructor { delete $$; } <e>
-%destructor { delete $$; } <list>
-%destructor { delete $$; } <attrs>
-%destructor { delete $$; } <formals>
-%destructor { delete $$; } <formal>
-%destructor { delete $$; } <attrNames>
-%destructor { delete $$; } <inheritAttrs>
-%destructor { delete $$; } <string_parts>
-%destructor { delete $$; } <ind_string_parts>
-
-%type <e> start
-%type <e> expr expr_function expr_if expr_op
-%type <e> expr_select expr_simple expr_app
-%type <list> expr_list
-%type <attrs> binds
-%type <formals> formals
-%type <formal> formal
-%type <attrNames> attrpath
-%type <inheritAttrs> attrs
-%type <string_parts> string_parts_interpolated
-%type <ind_string_parts> ind_string_parts
-%type <e> path_start string_parts string_attr
-%type <id> attr
-%token <id> ID
-%token <str> STR IND_STR
-%token <n> INT
-%token <nf> FLOAT
-%token <path> PATH HPATH SPATH PATH_END
-%token <uri> URI
-%token IF THEN ELSE ASSERT WITH LET IN REC INHERIT EQ NEQ AND OR IMPL OR_KW
-%token DOLLAR_CURLY /* == ${ */
-%token IND_STRING_OPEN IND_STRING_CLOSE
-%token ELLIPSIS
-
-%right IMPL
-%left OR
-%left AND
-%nonassoc EQ NEQ
-%nonassoc '<' '>' LEQ GEQ
-%right UPDATE
-%left NOT
-%left '+' '-'
-%left '*' '/'
-%right CONCAT
-%nonassoc '?'
-%nonassoc NEGATE
-
-%%
-
-start: expr { state->result = $1; $$ = 0; };
-
-expr: expr_function;
-
-expr_function
-  : ID ':' expr_function
-    { $$ = new ExprLambda(CUR_POS, state->symbols.create($1), nullptr, unp($3)); }
-  | '{' formals '}' ':' expr_function
-    { if (auto e = state->validateFormals($2)) THROW(*e);
-      $$ = new ExprLambda(CUR_POS, unp($2), unp($5));
-    }
-  | '{' formals '}' '@' ID ':' expr_function
-    {
-      auto arg = state->symbols.create($5);
-      if (auto e = state->validateFormals($2, CUR_POS, arg)) THROW(*e, $2, $7);
-      $$ = new ExprLambda(CUR_POS, arg, unp($2), unp($7));
-    }
-  | ID '@' '{' formals '}' ':' expr_function
-    {
-      auto arg = state->symbols.create($1);
-      if (auto e = state->validateFormals($4, CUR_POS, arg)) THROW(*e, $4, $7);
-      $$ = new ExprLambda(CUR_POS, arg, unp($4), unp($7));
-    }
-  | ASSERT expr ';' expr_function
-    { $$ = new ExprAssert(CUR_POS, unp($2), unp($4)); }
-  | WITH expr ';' expr_function
-    { $$ = new ExprWith(CUR_POS, unp($2), unp($4)); }
-  | LET binds IN expr_function
-    { if (!$2->dynamicAttrs.empty())
-        THROW(ParseError({
-            .msg = HintFmt("dynamic attributes not allowed in let"),
-            .pos = state->positions[CUR_POS]
-        }), $2, $4);
-      $$ = new ExprLet(unp($2), unp($4));
-    }
-  | expr_if
-  ;
-
-expr_if
-  : IF expr THEN expr ELSE expr { $$ = new ExprIf(CUR_POS, unp($2), unp($4), unp($6)); }
-  | expr_op
-  ;
-
-expr_op
-  : '!' expr_op %prec NOT { $$ = new ExprOpNot(unp($2)); }
-  | '-' expr_op %prec NEGATE { $$ = new ExprCall(CUR_POS, std::make_unique<ExprVar>(state->s.sub), vec(std::make_unique<ExprInt>(0), unp($2))); }
-  | expr_op EQ expr_op { $$ = new ExprOpEq(unp($1), unp($3)); }
-  | expr_op NEQ expr_op { $$ = new ExprOpNEq(unp($1), unp($3)); }
-  | expr_op '<' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($1, $3)); }
-  | expr_op LEQ expr_op { $$ = new ExprOpNot(std::make_unique<ExprCall>(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($3, $1))); }
-  | expr_op '>' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($3, $1)); }
-  | expr_op GEQ expr_op { $$ = new ExprOpNot(std::make_unique<ExprCall>(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($1, $3))); }
-  | expr_op AND expr_op { $$ = new ExprOpAnd(state->at(@2), unp($1), unp($3)); }
-  | expr_op OR expr_op { $$ = new ExprOpOr(state->at(@2), unp($1), unp($3)); }
-  | expr_op IMPL expr_op { $$ = new ExprOpImpl(state->at(@2), unp($1), unp($3)); }
-  | expr_op UPDATE expr_op { $$ = new ExprOpUpdate(state->at(@2), unp($1), unp($3)); }
-  | expr_op '?' attrpath { $$ = new ExprOpHasAttr(unp($1), std::move(*$3)); delete $3; }
-  | expr_op '+' expr_op
-    { $$ = new ExprConcatStrings(state->at(@2), false, vec<std::pair<PosIdx, std::unique_ptr<Expr>>>(std::pair(state->at(@1), unp($1)), std::pair(state->at(@3), unp($3)))); }
-  | expr_op '-' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.sub), vec($1, $3)); }
-  | expr_op '*' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.mul), vec($1, $3)); }
-  | expr_op '/' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.div), vec($1, $3)); }
-  | expr_op CONCAT expr_op { $$ = new ExprOpConcatLists(state->at(@2), unp($1), unp($3)); }
-  | expr_app
-  ;
-
-expr_app
-  : expr_app expr_select {
-      if (auto e2 = dynamic_cast<ExprCall *>($1)) {
-          e2->args.emplace_back($2);
-          $$ = $1;
-      } else
-          $$ = new ExprCall(CUR_POS, unp($1), vec(unp($2)));
-  }
-  | expr_select
-  ;
-
-expr_select
-  : expr_simple '.' attrpath
-    { $$ = new ExprSelect(CUR_POS, unp($1), std::move(*$3), nullptr); delete $3; }
-  | expr_simple '.' attrpath OR_KW expr_select
-    { $$ = new ExprSelect(CUR_POS, unp($1), std::move(*$3), unp($5)); delete $3; }
-  | /* Backwards compatibility: because Nixpkgs has a rarely used
-       function named ‘or’, allow stuff like ‘map or [...]’. */
-    expr_simple OR_KW
-    { $$ = new ExprCall(CUR_POS, unp($1), vec(std::make_unique<ExprVar>(CUR_POS, state->s.or_))); }
-  | expr_simple
-  ;
-
-expr_simple
-  : ID {
-      std::string_view s = "__curPos";
-      if ($1.l == s.size() && strncmp($1.p, s.data(), s.size()) == 0)
-          $$ = new ExprPos(CUR_POS);
-      else
-          $$ = new ExprVar(CUR_POS, state->symbols.create($1));
-  }
-  | INT { $$ = new ExprInt($1); }
-  | FLOAT { $$ = new ExprFloat($1); }
-  | '"' string_parts '"' { $$ = $2; }
-  | IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE {
-      $$ = state->stripIndentation(CUR_POS, std::move(*$2)).release();
-      delete $2;
-  }
-  | path_start PATH_END
-  | path_start string_parts_interpolated PATH_END {
-      $2->emplace($2->begin(), state->at(@1), $1);
-      $$ = new ExprConcatStrings(CUR_POS, false, std::move(*$2));
-      delete $2;
-  }
-  | SPATH {
-      std::string path($1.p + 1, $1.l - 2);
-      $$ = new ExprCall(CUR_POS,
-          std::make_unique<ExprVar>(state->s.findFile),
-          vec(std::make_unique<ExprVar>(state->s.nixPath),
-              std::make_unique<ExprString>(std::move(path))));
-  }
-  | URI {
-      static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals);
-      if (noURLLiterals)
-          THROW(ParseError({
-              .msg = HintFmt("URL literals are disabled"),
-              .pos = state->positions[CUR_POS]
-          }));
-      $$ = new ExprString(std::string($1));
-  }
-  | '(' expr ')' { $$ = $2; }
-  /* Let expressions `let {..., body = ...}' are just desugared
-     into `(rec {..., body = ...}).body'. */
-  | LET '{' binds '}'
-    { $3->recursive = true; $$ = new ExprSelect(noPos, unp($3), state->s.body); }
-  | REC '{' binds '}'
-    { $3->recursive = true; $$ = $3; }
-  | '{' binds '}'
-    { $$ = $2; }
-  | '[' expr_list ']' { $$ = $2; }
-  ;
-
-string_parts
-  : STR { $$ = new ExprString(std::string($1)); }
-  | string_parts_interpolated
-    { $$ = new ExprConcatStrings(CUR_POS, true, std::move(*$1));
-      delete $1;
-    }
-  | { $$ = new ExprString(""); }
-  ;
-
-string_parts_interpolated
-  : string_parts_interpolated STR
-  { $$ = $1; $1->emplace_back(state->at(@2), new ExprString(std::string($2))); }
-  | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), $3); }
-  | DOLLAR_CURLY expr '}' { $$ = new std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>>; $$->emplace_back(state->at(@1), $2); }
-  | STR DOLLAR_CURLY expr '}' {
-      $$ = new std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>>;
-      $$->emplace_back(state->at(@1), new ExprString(std::string($1)));
-      $$->emplace_back(state->at(@2), $3);
-    }
-  ;
-
-path_start
-  : PATH {
-    Path path(absPath({$1.p, $1.l}, state->basePath.path.abs()));
-    /* add back in the trailing '/' to the first segment */
-    if ($1.p[$1.l-1] == '/' && $1.l > 1)
-      path += "/";
-    $$ = new ExprPath(path);
-  }
-  | HPATH {
-    if (evalSettings.pureEval) {
-        THROW(Error(
-            "the path '%s' can not be resolved in pure mode",
-            std::string_view($1.p, $1.l)
-        ));
-    }
-    Path path(getHome() + std::string($1.p + 1, $1.l - 1));
-    $$ = new ExprPath(path);
-  }
-  ;
-
-ind_string_parts
-  : ind_string_parts IND_STR { $$ = $1; $1->emplace_back(state->at(@2), $2); }
-  | ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), unp($3)); }
-  | { $$ = new std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>>; }
-  ;
-
-binds
-  : binds attrpath '=' expr ';'
-    { $$ = $1;
-      if (auto e = state->addAttr($$, std::move(*$2), unp($4), state->at(@2))) THROW(*e, $1, $2);
-      delete $2;
-    }
-  | binds INHERIT attrs ';'
-    { $$ = $1;
-      for (auto & [i, iPos] : *$3) {
-          if ($$->attrs.find(i.symbol) != $$->attrs.end())
-              THROW(state->dupAttr(i.symbol, iPos, $$->attrs[i.symbol].pos), $1);
-          $$->attrs.emplace(
-              i.symbol,
-              ExprAttrs::AttrDef(std::make_unique<ExprVar>(iPos, i.symbol), iPos, ExprAttrs::AttrDef::Kind::Inherited));
-      }
-      delete $3;
-    }
-  | binds INHERIT '(' expr ')' attrs ';'
-    { $$ = $1;
-      if (!$$->inheritFromExprs)
-          $$->inheritFromExprs = std::make_unique<std::vector<std::unique_ptr<Expr>>>();
-      $$->inheritFromExprs->push_back(unp($4));
-      for (auto & [i, iPos] : *$6) {
-          if ($$->attrs.find(i.symbol) != $$->attrs.end())
-              THROW(state->dupAttr(i.symbol, iPos, $$->attrs[i.symbol].pos), $1);
-          auto from = std::make_unique<nix::ExprInheritFrom>(state->at(@4), $$->inheritFromExprs->size() - 1);
-          $$->attrs.emplace(
-              i.symbol,
-              ExprAttrs::AttrDef(
-                  std::make_unique<ExprSelect>(iPos, std::move(from), i.symbol),
-                  iPos,
-                  ExprAttrs::AttrDef::Kind::InheritedFrom));
-      }
-      delete $6;
-    }
-  | { $$ = new ExprAttrs(state->at(@0)); }
-  ;
-
-attrs
-  : attrs attr { $$ = $1; $1->emplace_back(AttrName(state->symbols.create($2)), state->at(@2)); }
-  | attrs string_attr
-    { $$ = $1;
-      ExprString * str = dynamic_cast<ExprString *>($2);
-      if (str) {
-          $$->emplace_back(AttrName(state->symbols.create(str->s)), state->at(@2));
-          delete str;
-      } else
-          THROW(ParseError({
-              .msg = HintFmt("dynamic attributes not allowed in inherit"),
-              .pos = state->positions[state->at(@2)]
-          }), $1, $2);
-    }
-  | { $$ = new std::vector<std::pair<AttrName, PosIdx>>; }
-  ;
-
-attrpath
-  : attrpath '.' attr { $$ = $1; $1->push_back(AttrName(state->symbols.create($3))); }
-  | attrpath '.' string_attr
-    { $$ = $1;
-      ExprString * str = dynamic_cast<ExprString *>($3);
-      if (str) {
-          $$->push_back(AttrName(state->symbols.create(str->s)));
-          delete str;
-      } else
-          $$->emplace_back(unp($3));
-    }
-  | attr { $$ = new std::vector<AttrName>; $$->push_back(AttrName(state->symbols.create($1))); }
-  | string_attr
-    { $$ = new std::vector<AttrName>;
-      ExprString *str = dynamic_cast<ExprString *>($1);
-      if (str) {
-          $$->push_back(AttrName(state->symbols.create(str->s)));
-          delete str;
-      } else
-          $$->emplace_back(unp($1));
-    }
-  ;
-
-attr
-  : ID
-  | OR_KW { $$ = {"or", 2}; }
-  ;
-
-string_attr
-  : '"' string_parts '"' { $$ = $2; }
-  | DOLLAR_CURLY expr '}' { $$ = $2; }
-  ;
-
-expr_list
-  : expr_list expr_select { $$ = $1; $1->elems.emplace_back($2); /* !!! dangerous */ }
-  | { $$ = new ExprList; }
-  ;
-
-formals
-  : formal ',' formals
-    { $$ = $3; $$->formals.emplace_back(std::move(*$1)); delete $1; }
-  | formal
-    { $$ = new Formals; $$->formals.emplace_back(std::move(*$1)); $$->ellipsis = false; delete $1; }
-  |
-    { $$ = new Formals; $$->ellipsis = false; }
-  | ELLIPSIS
-    { $$ = new Formals; $$->ellipsis = true; }
-  ;
-
-formal
-  : ID { $$ = new Formal{CUR_POS, state->symbols.create($1), nullptr}; }
-  | ID '?' expr { $$ = new Formal{CUR_POS, state->symbols.create($1), unp($3)}; }
-  ;
-
-%%
-
-#include "eval.hh"
-
-
-namespace nix {
-
-Expr * parseExprFromBuf(
-    char * text,
-    size_t length,
-    Pos::Origin origin,
-    const SourcePath & basePath,
-    SymbolTable & symbols,
-    PosTable & positions,
-    const Expr::AstSymbols & astSymbols)
-{
-    yyscan_t scanner;
-    ParserState state {
-        .symbols = symbols,
-        .positions = positions,
-        .basePath = basePath,
-        .origin = positions.addOrigin(origin, length),
-        .s = astSymbols,
-    };
-
-    yylex_init(&scanner);
-    Finally _destroy([&] { yylex_destroy(scanner); });
-
-    yy_scan_buffer(text, length, scanner);
-    yyparse(scanner, &state);
-    if (state.error) {
-      delete state.result;
-      throw *state.error;
-    }
-
-    return state.result;
-}
-
-
-}
--- a/src/libexpr/parser/change_head.hh
+++ b/src/libexpr/parser/change_head.hh
@ -0,0 +1,66 @@
+#pragma once
+///@file
+
+#include <tao/pegtl.hpp>
+
+namespace nix::parser {
+
+// modified copy of change_state, as the manual suggest for more involved
+// state manipulation. we want to change only the first state parameter,
+// and we care about the *initial* position of a rule application (not the
+// past-the-end position as pegtl change_state provides)
+template<typename NewState>
+struct change_head : tao::pegtl::maybe_nothing
+{
+    template<
+        typename Rule,
+        tao::pegtl::apply_mode A,
+        tao::pegtl::rewind_mode M,
+        template<typename...> class Action,
+        template<typename...> class Control,
+        typename ParseInput,
+        typename State,
+        typename... States
+    >
+    [[nodiscard]] static bool match(ParseInput & in, State && st, States &&... sts)
+    {
+        const auto begin = in.iterator();
+
+        if constexpr (std::is_constructible_v<NewState, State, States...>) {
+            NewState s(st, sts...);
+            if (tao::pegtl::match<Rule, A, M, Action, Control>(in, s, sts...)) {
+                if constexpr (A == tao::pegtl::apply_mode::action) {
+                    _success<Action<Rule>>(0, begin, in, s, st, sts...);
+                }
+                return true;
+            }
+            return false;
+        } else if constexpr (std::is_default_constructible_v<NewState>) {
+            NewState s;
+            if (tao::pegtl::match<Rule, A, M, Action, Control>(in, s, sts...)) {
+                if constexpr (A == tao::pegtl::apply_mode::action) {
+                    _success<Action<Rule>>(0, begin, in, s, st, sts...);
+                }
+                return true;
+            }
+            return false;
+        } else {
+            static_assert(decltype(sizeof(NewState))(), "unable to instantiate new state");
+        }
+    }
+
+    template<typename Target, typename ParseInput, typename... S>
+    static void _success(void *, auto & begin, ParseInput & in, S & ... sts)
+    {
+        const typename ParseInput::action_t at(begin, in);
+        Target::success(at, sts...);
+    }
+
+    template<typename Target, typename... S>
+    static void _success(decltype(Target::success0(std::declval<S &>()...), 0), auto &, auto &, S & ... sts)
+    {
+        Target::success0(sts...);
+    }
+};
+
+}
--- a/src/libexpr/parser/grammar.hh
+++ b/src/libexpr/parser/grammar.hh
@ -0,0 +1,707 @@
+#pragma once
+///@file
+
+#include "tao/pegtl.hpp"
+#include <type_traits>
+#include <variant>
+
+#include <boost/container/small_vector.hpp>
+
+// NOTE
+// nix line endings are \n, \r\n, \r. the grammar does not use eol or
+// eolf rules in favor of reproducing the old flex lexer as faithfully as
+// possible, and deferring calculation of positions to downstream users.
+
+namespace nix::parser::grammar {
+
+using namespace tao::pegtl;
+namespace p = tao::pegtl;
+
+// character classes
+namespace c {
+
+struct path : sor<
+    ranges<'a', 'z', 'A', 'Z', '0', '9'>,
+    one<'.', '_', '-', '+'>
+> {};
+struct path_sep : one<'/'> {};
+
+struct id_first : ranges<'a', 'z', 'A', 'Z', '_'> {};
+struct id_rest : sor<
+    ranges<'a', 'z', 'A', 'Z', '0', '9'>,
+    one<'_', '\'', '-'>
+> {};
+
+struct uri_scheme_first : ranges<'a', 'z', 'A', 'Z'> {};
+struct uri_scheme_rest : sor<
+    ranges<'a', 'z', 'A', 'Z', '0', '9'>,
+    one<'+', '-', '.'>
+> {};
+struct uri_sep : one<':'> {};
+struct uri_rest : sor<
+    ranges<'a', 'z', 'A', 'Z', '0', '9'>,
+    one<'%', '/', '?', ':', '@', '&', '=', '+', '$', ',', '-', '_', '.', '!', '~', '*', '\''>
+> {};
+
+}
+
+// "tokens". PEGs don't really care about tokens, we merely use them as a convenient
+// way of writing down keywords and a couple complicated syntax rules.
+namespace t {
+
+struct _extend_as_path : seq<
+    star<c::path>,
+    not_at<TAO_PEGTL_STRING("/*")>,
+    not_at<TAO_PEGTL_STRING("//")>,
+    c::path_sep,
+    sor<c::path, TAO_PEGTL_STRING("${")>
+> {};
+struct _extend_as_uri : seq<
+    star<c::uri_scheme_rest>,
+    c::uri_sep,
+    c::uri_rest
+> {};
+
+// keywords might be extended to identifiers, paths, or uris.
+// NOTE this assumes that keywords are a-zA-Z only, otherwise uri schemes would never
+// match correctly.
+// NOTE not a simple seq<...> because this would report incorrect positions for
+// keywords used inside must<> if a prefix of the keyword matches.
+template<typename S>
+struct _keyword : sor<
+    seq<
+        S,
+        not_at<c::id_rest>,
+        not_at<_extend_as_path>,
+        not_at<_extend_as_uri>
+    >,
+    failure
+> {};
+
+struct kw_if      : _keyword<TAO_PEGTL_STRING("if")> {};
+struct kw_then    : _keyword<TAO_PEGTL_STRING("then")> {};
+struct kw_else    : _keyword<TAO_PEGTL_STRING("else")> {};
+struct kw_assert  : _keyword<TAO_PEGTL_STRING("assert")> {};
+struct kw_with    : _keyword<TAO_PEGTL_STRING("with")> {};
+struct kw_let     : _keyword<TAO_PEGTL_STRING("let")> {};
+struct kw_in      : _keyword<TAO_PEGTL_STRING("in")> {};
+struct kw_rec     : _keyword<TAO_PEGTL_STRING("rec")> {};
+struct kw_inherit : _keyword<TAO_PEGTL_STRING("inherit")> {};
+struct kw_or      : _keyword<TAO_PEGTL_STRING("or")> {};
+
+// `-` can be a unary prefix op, a binary infix op, or the first character
+// of a path or -> (ex 1->1--1)
+// `/` can be a path leader or an operator (ex a?a /a)
+struct op_minus : seq<one<'-'>, not_at<one<'>'>>, not_at<_extend_as_path>> {};
+struct op_div   : seq<one<'/'>, not_at<c::path>> {};
+
+// match a rule, making sure we are not matching it where a keyword would match.
+// using minus like this is a lot faster than flipping the order and using seq.
+template<typename... Rules>
+struct _not_at_any_keyword : minus<
+    seq<Rules...>,
+    sor<
+        TAO_PEGTL_STRING("inherit"),
+        TAO_PEGTL_STRING("assert"),
+        TAO_PEGTL_STRING("else"),
+        TAO_PEGTL_STRING("then"),
+        TAO_PEGTL_STRING("with"),
+        TAO_PEGTL_STRING("let"),
+        TAO_PEGTL_STRING("rec"),
+        TAO_PEGTL_STRING("if"),
+        TAO_PEGTL_STRING("in"),
+        TAO_PEGTL_STRING("or")
+    >
+> {};
+
+// identifiers are kind of horrid:
+//
+//   - uri_scheme_first ⊂ id_first
+//   - uri_scheme_first ⊂ uri_scheme_rest ⊂ path
+//   - id_first ⊂ id_rest ∖ { ' } ⊂ path
+//   - id_first ∩ (path ∖ uri_scheme_first) = { _ }
+//   - uri_sep ∉ ⋃ { id_first, id_rest, uri_scheme_first, uri_scheme_rest, path }
+//   - path_sep ∉ ⋃ { id_first, id_rest, uri_scheme_first, uri_scheme_rest }
+//
+// and we want, without reading the input more than once, a string that
+// matches (id_first id_rest*) and is not followed by any number of
+// characters such that the extended string matches path or uri rules.
+//
+// since the first character must be either _ or a uri scheme character
+// we can ignore path-like bits at the beginning. uri_sep cannot appear anywhere
+// in an identifier, so it's only needed in lookahead checks at the uri-like
+// prefix. likewise path_sep cannot appear anywhere in the idenfier, so it's
+// only needed in lookahead checks in the path-like prefix.
+//
+// in total that gives us a decomposition of
+//
+//     (uri-scheme-like? (?! continues-as-uri) | _)
+//     (path-segment-like? (?! continues-as-path))
+//     id_rest*
+struct identifier : _not_at_any_keyword<
+    // we don't use (at<id_rest>, ...) matches here because identifiers are
+    // a really hot path and rewinding as needed by at<> isn't entirely free.
+    sor<
+        seq<
+            c::uri_scheme_first,
+            star<ranges<'a', 'z', 'A', 'Z', '0', '9', '-'>>,
+            not_at<_extend_as_uri>
+        >,
+        one<'_'>
+    >,
+    star<sor<ranges<'a', 'z', 'A', 'Z', '0', '9'>, one<'_', '-'>>>,
+    not_at<_extend_as_path>,
+    star<c::id_rest>
+> {};
+
+// floats may extend ints, thus these rules are very similar.
+struct integer : seq<
+    sor<
+        seq<range<'1', '9'>, star<digit>, not_at<one<'.'>>>,
+        seq<one<'0'>, not_at<one<'.'>, digit>, star<digit>>
+    >,
+    not_at<_extend_as_path>
+> {};
+
+struct floating : seq<
+    sor<
+        seq<range<'1', '9'>, star<digit>, one<'.'>, star<digit>>,
+        seq<opt<one<'0'>>, one<'.'>, plus<digit>>
+    >,
+    opt<one<'E', 'e'>, opt<one<'+', '-'>>, plus<digit>>,
+    not_at<_extend_as_path>
+> {};
+
+struct uri : seq<
+    c::uri_scheme_first,
+    star<c::uri_scheme_rest>,
+    c::uri_sep,
+    plus<c::uri_rest>
+> {};
+
+struct sep : sor<
+    plus<one<' ', '\t', '\r', '\n'>>,
+    seq<one<'#'>, star<not_one<'\r', '\n'>>>,
+    seq<string<'/', '*'>, until<string<'*', '/'>>>
+> {};
+
+}
+
+
+
+using seps = star<t::sep>;
+
+
+// marker for semantic rules. not handling one of these in an action that cares about
+// semantics is probably an error.
+struct semantic {};
+
+
+struct expr;
+
+struct _string {
+    template<typename... Inner>
+    struct literal : semantic, seq<Inner...> {};
+    struct cr_lf : semantic, seq<one<'\r'>, opt<one<'\n'>>> {};
+    struct interpolation : semantic, seq<
+        p::string<'$', '{'>, seps,
+        must<expr>, seps,
+        must<one<'}'>>
+    > {};
+    struct escape : semantic, must<any> {};
+};
+struct string : _string, seq<
+    one<'"'>,
+    star<
+        sor<
+            _string::literal<plus<not_one<'$', '"', '\\', '\r'>>>,
+            _string::cr_lf,
+            _string::interpolation,
+            _string::literal<one<'$'>, opt<one<'$'>>>,
+            seq<one<'\\'>, _string::escape>
+        >
+    >,
+    must<one<'"'>>
+> {};
+
+struct _ind_string {
+    template<bool Indented, typename... Inner>
+    struct literal : semantic, seq<Inner...> {};
+    struct interpolation : semantic, seq<
+        p::string<'$', '{'>, seps,
+        must<expr>, seps,
+        must<one<'}'>>
+    > {};
+    struct escape : semantic, must<any> {};
+};
+struct ind_string : _ind_string, seq<
+    TAO_PEGTL_STRING("''"),
+    opt<star<one<' '>>, one<'\n'>>,
+    star<
+        sor<
+            _ind_string::literal<
+                true,
+                plus<
+                    sor<
+                        not_one<'$', '\''>,
+                        seq<one<'$'>, not_one<'{', '\''>>,
+                        seq<one<'\''>, not_one<'\'', '$'>>
+                    >
+                >
+            >,
+            _ind_string::interpolation,
+            _ind_string::literal<false, one<'$'>>,
+            _ind_string::literal<false, one<'\''>, not_at<one<'\''>>>,
+            seq<one<'\''>, _ind_string::literal<false, p::string<'\'', '\''>>>,
+            seq<
+                p::string<'\'', '\''>,
+                sor<
+                    _ind_string::literal<false, one<'$'>>,
+                    seq<one<'\\'>, _ind_string::escape>
+                >
+            >
+        >
+    >,
+    must<TAO_PEGTL_STRING("''")>
+> {};
+
+struct _path {
+    // legacy lexer rules. extra l_ to avoid reserved c++ identifiers.
+    struct _l_PATH : seq<star<c::path>, plus<c::path_sep, plus<c::path>>, opt<c::path_sep>> {};
+    struct _l_PATH_SEG : seq<star<c::path>, c::path_sep> {};
+    struct _l_HPATH : seq<one<'~'>, plus<c::path_sep, plus<c::path>>, opt<c::path_sep>> {};
+    struct _l_HPATH_START : TAO_PEGTL_STRING("~/") {};
+    struct _path_str : sor<_l_PATH, _l_PATH_SEG, plus<c::path>> {};
+    // modern rules
+    template<typename... Inner>
+    struct literal : semantic, seq<Inner...> {};
+    struct interpolation : semantic, seq<
+        p::string<'$', '{'>, seps,
+        must<expr>, seps,
+        must<one<'}'>>
+    > {};
+    struct anchor : semantic, sor<
+        _l_PATH,
+        seq<_l_PATH_SEG, at<TAO_PEGTL_STRING("${")>>
+    > {};
+    struct home_anchor : semantic, sor<
+        _l_HPATH,
+        seq<_l_HPATH_START, at<TAO_PEGTL_STRING("${")>>
+    > {};
+    struct searched_path : semantic, list<plus<c::path>, c::path_sep> {};
+    struct forbid_prefix_triple_slash : sor<not_at<c::path_sep>, failure> {};
+    struct forbid_prefix_double_slash_no_interp : sor<
+        not_at<c::path_sep, star<c::path>, not_at<TAO_PEGTL_STRING("${")>>,
+        failure
+    > {};
+    // legacy parser rules
+    struct _str_rest : seq<
+        must<forbid_prefix_double_slash_no_interp>,
+        opt<literal<_path_str>>,
+        must<forbid_prefix_triple_slash>,
+        star<
+            sor<
+                literal<_path_str>,
+                interpolation
+            >
+        >
+    > {};
+};
+struct path : _path, sor<
+    seq<
+        sor<_path::anchor, _path::home_anchor>,
+        _path::_str_rest
+    >,
+    seq<one<'<'>, _path::searched_path, one<'>'>>
+> {};
+
+struct _formal {
+    struct name : semantic, t::identifier {};
+    struct default_value : semantic, must<expr> {};
+};
+struct formal : semantic, _formal, seq<
+    _formal::name,
+    opt<seps, one<'?'>, seps, _formal::default_value>
+> {};
+
+struct _formals {
+    struct ellipsis : semantic, p::ellipsis {};
+};
+struct formals : semantic, _formals, seq<
+    one<'{'>, seps,
+    // formals and attrsets share a two-token head sequence ('{' <id>).
+    // this rule unrolls the formals list a bit to provide better error messages than
+    // "expected '='" at the first ',' if formals are incorrect.
+    sor<
+        one<'}'>,
+        seq<_formals::ellipsis, seps, must<one<'}'>>>,
+        seq<
+            formal, seps,
+            if_then_else<
+                at<one<','>>,
+                seq<
+                    star<one<','>, seps, formal, seps>,
+                    opt<one<','>, seps, opt<_formals::ellipsis, seps>>,
+                    must<one<'}'>>
+                >,
+                one<'}'>
+            >
+        >
+    >
+> {};
+
+struct _attr {
+    struct simple : semantic, sor<t::identifier, t::kw_or> {};
+    struct string : semantic, seq<grammar::string> {};
+    struct expr : semantic, seq<
+        TAO_PEGTL_STRING("${"), seps,
+        must<grammar::expr>, seps,
+        must<one<'}'>>
+    > {};
+};
+struct attr : _attr, sor<
+    _attr::simple,
+    _attr::string,
+    _attr::expr
+> {};
+
+struct attrpath : list<attr, one<'.'>, t::sep> {};
+
+struct _inherit {
+    struct from : semantic, must<expr> {};
+    struct attrs : list<attr, seps> {};
+};
+struct inherit : _inherit, seq<
+    t::kw_inherit, seps,
+    opt<one<'('>, seps, _inherit::from, seps, must<one<')'>>, seps>,
+    opt<_inherit::attrs, seps>,
+    must<one<';'>>
+> {};
+
+struct _binding {
+    struct path : semantic, attrpath {};
+    struct equal : one<'='> {};
+    struct value : semantic, must<expr> {};
+};
+struct binding : _binding, seq<
+    _binding::path, seps,
+    must<_binding::equal>, seps,
+    _binding::value, seps,
+    must<one<';'>>
+> {};
+
+struct bindings : opt<list<sor<inherit, binding>, seps>> {};
+
+struct op {
+    enum class kind {
+        // NOTE non-associativity is *NOT* handled in the grammar structure.
+        // handling it in the grammar itself instead of in semantic actions
+        // slows down the parser significantly and makes the rules *much*
+        // harder to read. maybe this will be different at some point when
+        // ! does not sit between two binary precedence levels.
+        nonAssoc,
+        leftAssoc,
+        rightAssoc,
+        unary,
+    };
+    template<typename Rule, unsigned Precedence, kind Kind = kind::leftAssoc>
+    struct _op : Rule {
+        static constexpr unsigned precedence = Precedence;
+        static constexpr op::kind kind = Kind;
+    };
+
+    struct unary_minus : _op<t::op_minus,           3, kind::unary> {};
+
+    // treating this like a unary postfix operator is sketchy, but that's
+    // the most reasonable way to implement the operator precedence set forth
+    // by the language way back. it'd be much better if `.` and `?` had the same
+    // precedence, but alas.
+    struct has_attr   : _op<seq<one<'?'>, seps, must<attrpath>>, 4> {};
+
+    struct concat     : _op<TAO_PEGTL_STRING("++"),  5, kind::rightAssoc> {};
+    struct mul        : _op<one<'*'>,                6> {};
+    struct div        : _op<t::op_div,               6> {};
+    struct plus       : _op<one<'+'>,                7> {};
+    struct minus      : _op<t::op_minus,             7> {};
+    struct not_       : _op<one<'!'>,                8, kind::unary> {};
+    struct update     : _op<TAO_PEGTL_STRING("//"),  9, kind::rightAssoc> {};
+    struct less_eq    : _op<TAO_PEGTL_STRING("<="), 10, kind::nonAssoc> {};
+    struct greater_eq : _op<TAO_PEGTL_STRING(">="), 10, kind::nonAssoc> {};
+    struct less       : _op<one<'<'>,               10, kind::nonAssoc> {};
+    struct greater    : _op<one<'>'>,               10, kind::nonAssoc> {};
+    struct equals     : _op<TAO_PEGTL_STRING("=="), 11, kind::nonAssoc> {};
+    struct not_equals : _op<TAO_PEGTL_STRING("!="), 11, kind::nonAssoc> {};
+    struct and_       : _op<TAO_PEGTL_STRING("&&"), 12> {};
+    struct or_        : _op<TAO_PEGTL_STRING("||"), 13> {};
+    struct implies    : _op<TAO_PEGTL_STRING("->"), 14, kind::rightAssoc> {};
+};
+
+struct _expr {
+    template<template<typename...> class OpenMod = seq, typename... Init>
+    struct _attrset : seq<
+        Init...,
+        OpenMod<one<'{'>>, seps,
+        bindings, seps,
+        must<one<'}'>>
+    > {};
+
+    struct select;
+
+    struct id : semantic, t::identifier {};
+    struct int_ : semantic, t::integer {};
+    struct float_ : semantic, t::floating {};
+    struct string : semantic, seq<grammar::string> {};
+    struct ind_string : semantic, seq<grammar::ind_string> {};
+    struct path : semantic, seq<grammar::path> {};
+    struct uri : semantic, t::uri {};
+    struct ancient_let : semantic, _attrset<must, t::kw_let, seps> {};
+    struct rec_set : semantic, _attrset<must, t::kw_rec, seps> {};
+    struct set : semantic, _attrset<> {};
+
+    struct _list {
+        struct entry : semantic, seq<select> {};
+    };
+    struct list : semantic, _list, seq<
+        one<'['>, seps,
+        opt<p::list<_list::entry, seps>, seps>,
+        must<one<']'>>
+    > {};
+
+    struct _simple : sor<
+        id,
+        int_,
+        float_,
+        string,
+        ind_string,
+        path,
+        uri,
+        seq<one<'('>, seps, must<expr>, seps, must<one<')'>>>,
+        ancient_let,
+        rec_set,
+        set,
+        list
+    > {};
+
+    struct _select {
+        struct head : _simple {};
+        struct attr : semantic, seq<attrpath> {};
+        struct attr_or : semantic, must<select> {};
+        struct as_app_or : semantic, t::kw_or {};
+    };
+    struct _app {
+        struct first_arg : semantic, seq<select> {};
+        struct another_arg : semantic, seq<select> {};
+        // can be used to stash a position of the application head node
+        struct select_or_fn : seq<select> {};
+    };
+
+    struct select : _select, seq<
+        _select::head, seps,
+        opt<
+            sor<
+                seq<
+                    one<'.'>, seps, _select::attr,
+                    opt<seps, t::kw_or, seps, _select::attr_or>
+                >,
+                _select::as_app_or
+            >
+        >
+    > {};
+
+    struct app : _app, seq<
+        _app::select_or_fn,
+        opt<seps, _app::first_arg, star<seps, _app::another_arg>>
+    > {};
+
+    template<typename Op>
+    struct operator_ : semantic, Op {};
+
+    struct unary : seq<
+        star<sor<operator_<op::not_>, operator_<op::unary_minus>>, seps>,
+        app
+    > {};
+
+    struct _binary_operator : sor<
+        operator_<op::implies>,
+        operator_<op::update>,
+        operator_<op::concat>,
+        operator_<op::plus>,
+        operator_<op::minus>,
+        operator_<op::mul>,
+        operator_<op::div>,
+        operator_<op::less_eq>,
+        operator_<op::greater_eq>,
+        operator_<op::less>,
+        operator_<op::greater>,
+        operator_<op::equals>,
+        operator_<op::not_equals>,
+        operator_<op::or_>,
+        operator_<op::and_>
+    > {};
+
+    struct _binop : seq<
+        unary,
+        star<
+            seps,
+            sor<
+                seq<_binary_operator, seps, must<unary>>,
+                operator_<op::has_attr>
+            >
+        >
+    > {};
+
+    struct _lambda {
+        struct arg : semantic, t::identifier {};
+    };
+    struct lambda : semantic, _lambda, sor<
+        seq<
+            _lambda::arg, seps,
+            sor<
+                seq<one<':'>, seps, must<expr>>,
+                seq<one<'@'>, seps, must<formals, seps, one<':'>, seps, expr>>
+            >
+        >,
+        seq<
+            formals, seps,
+            sor<
+                seq<one<':'>, seps, must<expr>>,
+                seq<one<'@'>, seps, must<_lambda::arg, seps, one<':'>, seps, expr>>
+            >
+        >
+    > {};
+
+    struct assert_ : semantic, seq<
+        t::kw_assert, seps,
+        must<expr>, seps,
+        must<one<';'>>, seps,
+        must<expr>
+    > {};
+    struct with : semantic, seq<
+        t::kw_with, seps,
+        must<expr>, seps,
+        must<one<';'>>, seps,
+        must<expr>
+    > {};
+    struct let : seq<
+        t::kw_let, seps,
+        not_at<one<'{'>>, // exclude ancient_let so we can must<kw_in>
+        bindings, seps,
+        must<t::kw_in>, seps,
+        must<expr>
+    > {};
+    struct if_ : semantic, seq<
+        t::kw_if, seps,
+        must<expr>, seps,
+        must<t::kw_then>, seps,
+        must<expr>, seps,
+        must<t::kw_else>, seps,
+        must<expr>
+    > {};
+};
+struct expr : semantic, _expr, sor<
+    _expr::lambda,
+    _expr::assert_,
+    _expr::with,
+    _expr::let,
+    _expr::if_,
+    _expr::_binop
+> {};
+
+// legacy support: \0 terminates input if passed from flex to bison as a token
+struct eof : sor<p::eof, one<0>> {};
+
+struct root : must<seps, expr, seps, eof> {};
+
+
+
+template<typename Rule>
+struct nothing : p::nothing<Rule> {
+    static_assert(!std::is_base_of_v<semantic, Rule>);
+};
+
+
+
+template<typename Self, typename OpCtx, typename AttrPathT, typename ExprT>
+struct operator_semantics {
+    struct has_attr : grammar::op::has_attr {
+        AttrPathT path;
+    };
+
+    struct OpEntry {
+        OpCtx ctx;
+        uint8_t prec;
+        grammar::op::kind assoc;
+        std::variant<
+            grammar::op::not_,
+            grammar::op::unary_minus,
+            grammar::op::implies,
+            grammar::op::or_,
+            grammar::op::and_,
+            grammar::op::equals,
+            grammar::op::not_equals,
+            grammar::op::less_eq,
+            grammar::op::greater_eq,
+            grammar::op::update,
+            grammar::op::concat,
+            grammar::op::less,
+            grammar::op::greater,
+            grammar::op::plus,
+            grammar::op::minus,
+            grammar::op::mul,
+            grammar::op::div,
+            has_attr
+        > op;
+    };
+
+    // statistics here are taken from nixpkgs commit de502c4d0ba96261e5de803e4d1d1925afd3e22f.
+    // over 99.9% of contexts in nixpkgs need at most 4 slots, ~85% need only 1
+    boost::container::small_vector<ExprT, 4> exprs;
+    // over 99.9% of contexts in nixpkgs need at most 2 slots, ~85% need only 1
+    boost::container::small_vector<OpEntry, 2> ops;
+
+    // derived class is expected to define members:
+    //
+    // ExprT applyOp(OpCtx & pos, auto & op, auto &... args);
+    // [[noreturn]] static void badOperator(OpCtx & pos, auto &... args);
+
+    void reduce(uint8_t toPrecedence, auto &... args) {
+        while (!ops.empty()) {
+            auto & [ctx, precedence, kind, op] = ops.back();
+            // NOTE this relies on associativity not being mixed within a precedence level.
+            if ((precedence > toPrecedence)
+                || (kind != grammar::op::kind::leftAssoc && precedence == toPrecedence))
+                break;
+            std::visit([&, ctx=std::move(ctx)] (auto & op) {
+                exprs.push_back(static_cast<Self &>(*this).applyOp(ctx, op, args...));
+            }, op);
+            ops.pop_back();
+        }
+    }
+
+    ExprT popExpr()
+    {
+        auto r = std::move(exprs.back());
+        exprs.pop_back();
+        return r;
+    }
+
+    void pushOp(OpCtx ctx, auto o, auto &... args)
+    {
+        if (o.kind != grammar::op::kind::unary)
+            reduce(o.precedence, args...);
+        if (!ops.empty() && o.kind == grammar::op::kind::nonAssoc) {
+            auto & [_pos, _prec, _kind, _o] = ops.back();
+            if (_kind == o.kind && _prec == o.precedence)
+                Self::badOperator(ctx, args...);
+        }
+        ops.emplace_back(ctx, o.precedence, o.kind, std::move(o));
+    }
+
+    ExprT finish(auto &... args)
+    {
+        reduce(255, args...);
+        return popExpr();
+    }
+};
+
+}
--- a/src/libexpr/parser/parser.cc
+++ b/src/libexpr/parser/parser.cc
@ -0,0 +1,862 @@
+#include "attr-set.hh"
+#include "error.hh"
+#include "eval-settings.hh"
+#include "eval.hh"
+#include "finally.hh"
+#include "nixexpr.hh"
+#include "symbol-table.hh"
+#include "users.hh"
+
+#include "change_head.hh"
+#include "grammar.hh"
+#include "state.hh"
+
+#include <charconv>
+#include <clocale>
+#include <memory>
+
+// flip this define when doing parser development to enable some g checks.
+#if 0
+#include <tao/pegtl/contrib/analyze.hpp>
+#define ANALYZE_GRAMMAR \
+    ([] { \
+        const std::size_t issues = tao::pegtl::analyze<grammar::root>(); \
+        assert(issues == 0); \
+    })()
+#else
+#define ANALYZE_GRAMMAR ((void) 0)
+#endif
+
+namespace p = tao::pegtl;
+
+namespace nix::parser {
+namespace {
+
+template<typename>
+inline constexpr const char * error_message = nullptr;
+
+#define error_message_for(...) \
+    template<> inline constexpr auto error_message<__VA_ARGS__>
+
+error_message_for(p::one<'{'>) = "expecting '{'";
+error_message_for(p::one<'}'>) = "expecting '}'";
+error_message_for(p::one<'"'>) = "expecting '\"'";
+error_message_for(p::one<';'>) = "expecting ';'";
+error_message_for(p::one<')'>) = "expecting ')'";
+error_message_for(p::one<'='>) = "expecting '='";
+error_message_for(p::one<']'>) = "expecting ']'";
+error_message_for(p::one<':'>) = "expecting ':'";
+error_message_for(p::string<'\'', '\''>) = "expecting \"''\"";
+error_message_for(p::any) = "expecting any character";
+error_message_for(grammar::eof) = "expecting end of file";
+error_message_for(grammar::seps) = "expecting separators";
+error_message_for(grammar::path::forbid_prefix_triple_slash) = "too many slashes in path";
+error_message_for(grammar::path::forbid_prefix_double_slash_no_interp) = "path has a trailing slash";
+error_message_for(grammar::expr) = "expecting expression";
+error_message_for(grammar::expr::unary) = "expecting expression";
+error_message_for(grammar::binding::equal) = "expecting '='";
+error_message_for(grammar::expr::lambda::arg) = "expecting identifier";
+error_message_for(grammar::formals) = "expecting formals";
+error_message_for(grammar::attrpath) = "expecting attribute path";
+error_message_for(grammar::expr::select) = "expecting selection expression";
+error_message_for(grammar::t::kw_then) = "expecting 'then'";
+error_message_for(grammar::t::kw_else) = "expecting 'else'";
+error_message_for(grammar::t::kw_in) = "expecting 'in'";
+
+struct SyntaxErrors
+{
+    template<typename Rule>
+    static constexpr auto message = error_message<Rule>;
+
+    template<typename Rule>
+    static constexpr bool raise_on_failure = false;
+};
+
+template<typename Rule>
+struct Control : p::must_if<SyntaxErrors>::control<Rule>
+{
+    template<typename ParseInput, typename... States>
+    [[noreturn]] static void raise(const ParseInput & in, States &&... st)
+    {
+        if (in.empty()) {
+            std::string expected;
+            if constexpr (constexpr auto msg = error_message<Rule>)
+                expected = fmt(", %s", msg);
+            throw p::parse_error("unexpected end of file" + expected, in);
+        }
+        p::must_if<SyntaxErrors>::control<Rule>::raise(in, st...);
+    }
+};
+
+struct ExprState
+    : grammar::
+          operator_semantics<ExprState, PosIdx, AttrPath, std::pair<PosIdx, std::unique_ptr<Expr>>>
+{
+    std::unique_ptr<Expr> popExprOnly() {
+        return std::move(popExpr().second);
+    }
+
+    template<typename Op, typename... Args>
+    std::unique_ptr<Expr> applyUnary(Args &&... args) {
+        return std::make_unique<Op>(popExprOnly(), std::forward<Args>(args)...);
+    }
+
+    template<typename Op>
+    std::unique_ptr<Expr> applyBinary(PosIdx pos) {
+        auto right = popExprOnly(), left = popExprOnly();
+        return std::make_unique<Op>(pos, std::move(left), std::move(right));
+    }
+
+    std::unique_ptr<Expr> call(PosIdx pos, Symbol fn, bool flip = false)
+    {
+        std::vector<std::unique_ptr<Expr>> args(2);
+        args[flip ? 0 : 1] = popExprOnly();
+        args[flip ? 1 : 0] = popExprOnly();
+        return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(fn), std::move(args));
+    }
+
+    std::unique_ptr<Expr> order(PosIdx pos, bool less, State & state)
+    {
+        return call(pos, state.s.lessThan, !less);
+    }
+
+    std::unique_ptr<Expr> concatStrings(PosIdx pos)
+    {
+        std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>> args(2);
+        args[1] = popExpr();
+        args[0] = popExpr();
+        return std::make_unique<ExprConcatStrings>(pos, false, std::move(args));
+    }
+
+    std::unique_ptr<Expr> negate(PosIdx pos, State & state)
+    {
+        std::vector<std::unique_ptr<Expr>> args(2);
+        args[0] = std::make_unique<ExprInt>(0);
+        args[1] = popExprOnly();
+        return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(state.s.sub), std::move(args));
+    }
+
+    std::pair<PosIdx, std::unique_ptr<Expr>> applyOp(PosIdx pos, auto & op, State & state) {
+        using Op = grammar::op;
+
+        auto not_ = [] (auto e) {
+            return std::make_unique<ExprOpNot>(std::move(e));
+        };
+
+        return {
+            pos,
+            (overloaded {
+                [&] (Op::implies)     { return applyBinary<ExprOpImpl>(pos); },
+                [&] (Op::or_)         { return applyBinary<ExprOpOr>(pos); },
+                [&] (Op::and_)        { return applyBinary<ExprOpAnd>(pos); },
+                [&] (Op::equals)      { return applyBinary<ExprOpEq>(pos); },
+                [&] (Op::not_equals)  { return applyBinary<ExprOpNEq>(pos); },
+                [&] (Op::less)        { return order(pos, true, state); },
+                [&] (Op::greater_eq)  { return not_(order(pos, true, state)); },
+                [&] (Op::greater)     { return order(pos, false, state); },
+                [&] (Op::less_eq)     { return not_(order(pos, false, state)); },
+                [&] (Op::update)      { return applyBinary<ExprOpUpdate>(pos); },
+                [&] (Op::not_)        { return applyUnary<ExprOpNot>(); },
+                [&] (Op::plus)        { return concatStrings(pos); },
+                [&] (Op::minus)       { return call(pos, state.s.sub); },
+                [&] (Op::mul)         { return call(pos, state.s.mul); },
+                [&] (Op::div)         { return call(pos, state.s.div); },
+                [&] (Op::concat)      { return applyBinary<ExprOpConcatLists>(pos); },
+                [&] (has_attr & a)    { return applyUnary<ExprOpHasAttr>(std::move(a.path)); },
+                [&] (Op::unary_minus) { return negate(pos, state); },
+            })(op)
+        };
+    }
+
+    // always_inline is needed, otherwise pushOp slows down considerably
+    [[noreturn, gnu::always_inline]]
+    static void badOperator(PosIdx pos, State & state)
+    {
+        throw ParseError({
+            .msg = HintFmt("syntax error, unexpected operator"),
+            .pos = state.positions[pos]
+        });
+    }
+
+    template<typename Expr, typename... Args>
+    Expr & pushExpr(PosIdx pos, Args && ... args)
+    {
+        auto p = std::make_unique<Expr>(std::forward<Args>(args)...);
+        auto & result = *p;
+        exprs.emplace_back(pos, std::move(p));
+        return result;
+    }
+};
+
+struct SubexprState {
+private:
+    ExprState * up;
+
+public:
+    explicit SubexprState(ExprState & up, auto &...) : up(&up) {}
+    operator ExprState &() { return *up; }
+    ExprState * operator->() { return up; }
+};
+
+
+
+template<typename Rule>
+struct BuildAST : grammar::nothing<Rule> {};
+
+struct LambdaState : SubexprState {
+    using SubexprState::SubexprState;
+
+    Symbol arg;
+    std::unique_ptr<Formals> formals;
+};
+
+struct FormalsState : SubexprState {
+    using SubexprState::SubexprState;
+
+    Formals formals{};
+    Formal formal{};
+};
+
+template<> struct BuildAST<grammar::formal::name> {
+    static void apply(const auto & in, FormalsState & s, State & ps) {
+        s.formal = {
+            .pos = ps.at(in),
+            .name = ps.symbols.create(in.string_view()),
+        };
+    }
+};
+
+template<> struct BuildAST<grammar::formal> {
+    static void apply0(FormalsState & s, State &) {
+        s.formals.formals.emplace_back(std::move(s.formal));
+    }
+};
+
+template<> struct BuildAST<grammar::formal::default_value> {
+    static void apply0(FormalsState & s, State & ps) {
+        s.formal.def = s->popExprOnly();
+    }
+};
+
+template<> struct BuildAST<grammar::formals::ellipsis> {
+    static void apply0(FormalsState & s, State &) {
+        s.formals.ellipsis = true;
+    }
+};
+
+template<> struct BuildAST<grammar::formals> : change_head<FormalsState> {
+    static void success0(FormalsState & f, LambdaState & s, State &) {
+        s.formals = std::make_unique<Formals>(std::move(f.formals));
+    }
+};
+
+struct AttrState : SubexprState {
+    using SubexprState::SubexprState;
+
+    std::vector<AttrName> attrs;
+
+    void pushAttr(auto && attr, PosIdx) { attrs.emplace_back(std::move(attr)); }
+};
+
+template<> struct BuildAST<grammar::attr::simple> {
+    static void apply(const auto & in, auto & s, State & ps) {
+        s.pushAttr(ps.symbols.create(in.string_view()), ps.at(in));
+    }
+};
+
+template<> struct BuildAST<grammar::attr::string> {
+    static void apply(const auto & in, auto & s, State & ps) {
+        auto e = s->popExprOnly();
+        if (auto str = dynamic_cast<ExprString *>(e.get()))
+            s.pushAttr(ps.symbols.create(str->s), ps.at(in));
+        else
+            s.pushAttr(std::move(e), ps.at(in));
+    }
+};
+
+template<> struct BuildAST<grammar::attr::expr> : BuildAST<grammar::attr::string> {};
+
+struct BindingsState : SubexprState {
+    using SubexprState::SubexprState;
+
+    ExprAttrs attrs;
+    AttrPath path;
+    std::unique_ptr<Expr> value;
+};
+
+struct InheritState : SubexprState {
+    using SubexprState::SubexprState;
+
+    std::vector<std::pair<AttrName, PosIdx>> attrs;
+    std::unique_ptr<Expr> from;
+    PosIdx fromPos;
+
+    void pushAttr(auto && attr, PosIdx pos) { attrs.emplace_back(std::move(attr), pos); }
+};
+
+template<> struct BuildAST<grammar::inherit::from> {
+    static void apply(const auto & in, InheritState & s, State & ps) {
+        s.from = s->popExprOnly();
+        s.fromPos = ps.at(in);
+    }
+};
+
+template<> struct BuildAST<grammar::inherit> : change_head<InheritState> {
+    static void success0(InheritState & s, BindingsState & b, State & ps) {
+        auto & attrs = b.attrs.attrs;
+        // TODO this should not reuse generic attrpath rules.
+        for (auto & [i, iPos] : s.attrs) {
+            if (i.symbol)
+                continue;
+            if (auto str = dynamic_cast<ExprString *>(i.expr.get()))
+                i = AttrName(ps.symbols.create(str->s));
+            else {
+                throw ParseError({
+                    .msg = HintFmt("dynamic attributes not allowed in inherit"),
+                    .pos = ps.positions[iPos]
+                });
+            }
+        }
+        if (auto fromE = std::move(s.from)) {
+            if (!b.attrs.inheritFromExprs)
+                b.attrs.inheritFromExprs = std::make_unique<std::vector<std::unique_ptr<Expr>>>();
+            b.attrs.inheritFromExprs->push_back(std::move(fromE));
+            for (auto & [i, iPos] : s.attrs) {
+                if (attrs.find(i.symbol) != attrs.end())
+                    ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos);
+                auto from = std::make_unique<ExprInheritFrom>(s.fromPos, b.attrs.inheritFromExprs->size() - 1);
+                attrs.emplace(
+                    i.symbol,
+                    ExprAttrs::AttrDef(
+                        std::make_unique<ExprSelect>(iPos, std::move(from), i.symbol),
+                        iPos,
+                        ExprAttrs::AttrDef::Kind::InheritedFrom));
+            }
+        } else {
+            for (auto & [i, iPos] : s.attrs) {
+                if (attrs.find(i.symbol) != attrs.end())
+                    ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos);
+                attrs.emplace(
+                    i.symbol,
+                    ExprAttrs::AttrDef(
+                        std::make_unique<ExprVar>(iPos, i.symbol),
+                        iPos,
+                        ExprAttrs::AttrDef::Kind::Inherited));
+            }
+        }
+    }
+};
+
+template<> struct BuildAST<grammar::binding::path> : change_head<AttrState> {
+    static void success0(AttrState & a, BindingsState & s, State & ps) {
+        s.path = std::move(a.attrs);
+    }
+};
+
+template<> struct BuildAST<grammar::binding::value> {
+    static void apply0(BindingsState & s, State & ps) {
+        s.value = s->popExprOnly();
+    }
+};
+
+template<> struct BuildAST<grammar::binding> {
+    static void apply(const auto & in, BindingsState & s, State & ps) {
+        ps.addAttr(&s.attrs, std::move(s.path), std::move(s.value), ps.at(in));
+    }
+};
+
+template<> struct BuildAST<grammar::expr::id> {
+    static void apply(const auto & in, ExprState & s, State & ps) {
+        if (in.string_view() == "__curPos")
+            s.pushExpr<ExprPos>(ps.at(in), ps.at(in));
+        else
+            s.pushExpr<ExprVar>(ps.at(in), ps.at(in), ps.symbols.create(in.string_view()));
+    }
+};
+
+template<> struct BuildAST<grammar::expr::int_> {
+    static void apply(const auto & in, ExprState & s, State & ps) {
+        int64_t v;
+        if (std::from_chars(in.begin(), in.end(), v).ec != std::errc{}) {
+            throw ParseError({
+                .msg = HintFmt("invalid integer '%1%'", in.string_view()),
+                .pos = ps.positions[ps.at(in)],
+            });
+        }
+        s.pushExpr<ExprInt>(noPos, v);
+    }
+};
+
+template<> struct BuildAST<grammar::expr::float_> {
+    static void apply(const auto & in, ExprState & s, State & ps) {
+        // copy the input into a temporary string so we can call stod.
+        // can't use from_chars because libc++ (thus darwin) does not have it,
+        // and floats are not performance-sensitive anyway. if they were you'd
+        // be in much bigger trouble than this.
+        //
+        // we also get to do a locale-save dance because stod is locale-aware and
+        // something (a plugin?) may have called setlocale or uselocale.
+        static struct locale_hack {
+            locale_t posix;
+            locale_hack(): posix(newlocale(LC_ALL_MASK, "POSIX", 0))
+            {
+                if (posix == 0)
+                    throw SysError("could not get POSIX locale");
+            }
+        } locale;
+
+        auto tmp = in.string();
+        double v = [&] {
+            auto oldLocale = uselocale(locale.posix);
+            Finally resetLocale([=] { uselocale(oldLocale); });
+            try {
+                return std::stod(tmp);
+            } catch (...) {
+                throw ParseError({
+                    .msg = HintFmt("invalid float '%1%'", in.string_view()),
+                    .pos = ps.positions[ps.at(in)],
+                });
+            }
+        }();
+        s.pushExpr<ExprFloat>(noPos, v);
+    }
+};
+
+struct StringState : SubexprState {
+    using SubexprState::SubexprState;
+
+    std::string currentLiteral;
+    PosIdx currentPos;
+    std::vector<std::pair<nix::PosIdx, std::unique_ptr<Expr>>> parts;
+
+    void append(PosIdx pos, std::string_view s)
+    {
+        if (currentLiteral.empty())
+            currentPos = pos;
+        currentLiteral += s;
+    }
+
+    // FIXME this truncates strings on NUL for compat with the old parser. ideally
+    // we should use the decomposition the g gives us instead of iterating over
+    // the entire string again.
+    static void unescapeStr(std::string & str)
+    {
+        char * s = str.data();
+        char * t = s;
+        char c;
+        while ((c = *s++)) {
+            if (c == '\\') {
+                c = *s++;
+                if (c == 'n') *t = '\n';
+                else if (c == 'r') *t = '\r';
+                else if (c == 't') *t = '\t';
+                else *t = c;
+            }
+            else if (c == '\r') {
+                /* Normalise CR and CR/LF into LF. */
+                *t = '\n';
+                if (*s == '\n') s++; /* cr/lf */
+            }
+            else *t = c;
+            t++;
+        }
+        str.resize(t - str.data());
+    }
+
+    void endLiteral()
+    {
+        if (!currentLiteral.empty()) {
+            unescapeStr(currentLiteral);
+            parts.emplace_back(currentPos, std::make_unique<ExprString>(std::move(currentLiteral)));
+        }
+    }
+
+    std::unique_ptr<Expr> finish()
+    {
+        if (parts.empty()) {
+            unescapeStr(currentLiteral);
+            return std::make_unique<ExprString>(std::move(currentLiteral));
+        } else {
+            endLiteral();
+            auto pos = parts[0].first;
+            return std::make_unique<ExprConcatStrings>(pos, true, std::move(parts));
+        }
+    }
+};
+
+template<typename... Content> struct BuildAST<grammar::string::literal<Content...>> {
+    static void apply(const auto & in, StringState & s, State & ps) {
+        s.append(ps.at(in), in.string_view());
+    }
+};
+
+template<> struct BuildAST<grammar::string::cr_lf> {
+    static void apply(const auto & in, StringState & s, State & ps) {
+        s.append(ps.at(in), in.string_view()); // FIXME compat with old parser
+    }
+};
+
+template<> struct BuildAST<grammar::string::interpolation> {
+    static void apply(const auto & in, StringState & s, State & ps) {
+        s.endLiteral();
+        s.parts.emplace_back(ps.at(in), s->popExprOnly());
+    }
+};
+
+template<> struct BuildAST<grammar::string::escape> {
+    static void apply(const auto & in, StringState & s, State & ps) {
+        s.append(ps.at(in), "\\"); // FIXME compat with old parser
+        s.append(ps.at(in), in.string_view());
+    }
+};
+
+template<> struct BuildAST<grammar::string> : change_head<StringState> {
+    static void success0(StringState & s, ExprState & e, State &) {
+        e.exprs.emplace_back(noPos, s.finish());
+    }
+};
+
+struct IndStringState : SubexprState {
+    using SubexprState::SubexprState;
+
+    std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> parts;
+};
+
+template<bool Indented, typename... Content>
+struct BuildAST<grammar::ind_string::literal<Indented, Content...>> {
+    static void apply(const auto & in, IndStringState & s, State & ps) {
+        s.parts.emplace_back(ps.at(in), StringToken{in.string_view(), Indented});
+    }
+};
+
+template<> struct BuildAST<grammar::ind_string::interpolation> {
+    static void apply(const auto & in, IndStringState & s, State & ps) {
+        s.parts.emplace_back(ps.at(in), s->popExprOnly());
+    }
+};
+
+template<> struct BuildAST<grammar::ind_string::escape> {
+    static void apply(const auto & in, IndStringState & s, State & ps) {
+        switch (*in.begin()) {
+        case 'n': s.parts.emplace_back(ps.at(in), StringToken{"\n"}); break;
+        case 'r': s.parts.emplace_back(ps.at(in), StringToken{"\r"}); break;
+        case 't': s.parts.emplace_back(ps.at(in), StringToken{"\t"}); break;
+        default:  s.parts.emplace_back(ps.at(in), StringToken{in.string_view()}); break;
+        }
+    }
+};
+
+template<> struct BuildAST<grammar::ind_string> : change_head<IndStringState> {
+    static void success(const auto & in, IndStringState & s, ExprState & e, State & ps) {
+        e.exprs.emplace_back(noPos, ps.stripIndentation(ps.at(in), std::move(s.parts)));
+    }
+};
+
+template<typename... Content> struct BuildAST<grammar::path::literal<Content...>> {
+    static void apply(const auto & in, StringState & s, State & ps) {
+        s.append(ps.at(in), in.string_view());
+        s.endLiteral();
+    }
+};
+
+template<> struct BuildAST<grammar::path::interpolation> : BuildAST<grammar::string::interpolation> {};
+
+template<> struct BuildAST<grammar::path::anchor> {
+    static void apply(const auto & in, StringState & s, State & ps) {
+        Path path(absPath(in.string(), ps.basePath.path.abs()));
+        /* add back in the trailing '/' to the first segment */
+        if (in.string_view().ends_with('/') && in.size() > 1)
+            path += "/";
+        s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path)));
+    }
+};
+
+template<> struct BuildAST<grammar::path::home_anchor> {
+    static void apply(const auto & in, StringState & s, State & ps) {
+        if (evalSettings.pureEval)
+            throw Error("the path '%s' can not be resolved in pure mode", in.string_view());
+        Path path(getHome() + in.string_view().substr(1));
+        s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path)));
+    }
+};
+
+template<> struct BuildAST<grammar::path::searched_path> {
+    static void apply(const auto & in, StringState & s, State & ps) {
+        std::vector<std::unique_ptr<Expr>> args{2};
+        args[0] = std::make_unique<ExprVar>(ps.s.nixPath);
+        args[1] = std::make_unique<ExprString>(in.string());
+        s.parts.emplace_back(
+            ps.at(in),
+            std::make_unique<ExprCall>(
+                ps.at(in),
+                std::make_unique<ExprVar>(ps.s.findFile),
+                std::move(args)));
+    }
+};
+
+template<> struct BuildAST<grammar::path> : change_head<StringState> {
+    template<typename E>
+    static void check_slash(PosIdx end, StringState & s, State & ps) {
+        auto e = dynamic_cast<E *>(s.parts.back().second.get());
+        if (!e || !e->s.ends_with('/'))
+            return;
+        if (s.parts.size() > 1 || e->s != "/")
+            throw ParseError({
+                .msg = HintFmt("path has a trailing slash"),
+                .pos = ps.positions[end],
+            });
+    }
+
+    static void success(const auto & in, StringState & s, ExprState & e, State & ps) {
+        s.endLiteral();
+        check_slash<ExprPath>(ps.atEnd(in), s, ps);
+        check_slash<ExprString>(ps.atEnd(in), s, ps);
+        if (s.parts.size() == 1) {
+            e.exprs.emplace_back(noPos, std::move(s.parts.back().second));
+        } else {
+            e.pushExpr<ExprConcatStrings>(ps.at(in), ps.at(in), false, std::move(s.parts));
+        }
+    }
+};
+
+// strings and paths sare handled fully by the grammar-level rule for now
+template<> struct BuildAST<grammar::expr::string> : p::maybe_nothing {};
+template<> struct BuildAST<grammar::expr::ind_string> : p::maybe_nothing {};
+template<> struct BuildAST<grammar::expr::path> : p::maybe_nothing {};
+
+template<> struct BuildAST<grammar::expr::uri> {
+    static void apply(const auto & in, ExprState & s, State & ps) {
+       static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals);
+       if (noURLLiterals)
+           throw ParseError({
+               .msg = HintFmt("URL literals are disabled"),
+               .pos = ps.positions[ps.at(in)]
+           });
+       s.pushExpr<ExprString>(ps.at(in), in.string());
+    }
+};
+
+template<> struct BuildAST<grammar::expr::ancient_let> : change_head<BindingsState> {
+    static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+        b.attrs.pos = ps.at(in);
+        b.attrs.recursive = true;
+        s.pushExpr<ExprSelect>(b.attrs.pos, b.attrs.pos, std::make_unique<ExprAttrs>(std::move(b.attrs)), ps.s.body);
+    }
+};
+
+template<> struct BuildAST<grammar::expr::rec_set> : change_head<BindingsState> {
+    static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+        b.attrs.pos = ps.at(in);
+        b.attrs.recursive = true;
+        s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs));
+    }
+};
+
+template<> struct BuildAST<grammar::expr::set> : change_head<BindingsState> {
+    static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+        b.attrs.pos = ps.at(in);
+        s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs));
+    }
+};
+
+using ListState = std::vector<std::unique_ptr<Expr>>;
+
+template<> struct BuildAST<grammar::expr::list> : change_head<ListState> {
+    static void success(const auto & in, ListState & ls, ExprState & s, State & ps) {
+        auto e = std::make_unique<ExprList>();
+        e->elems = std::move(ls);
+        s.exprs.emplace_back(ps.at(in), std::move(e));
+    }
+};
+
+template<> struct BuildAST<grammar::expr::list::entry> : change_head<ExprState> {
+    static void success0(ExprState & e, ListState & s, State & ps) {
+        s.emplace_back(e.finish(ps).second);
+    }
+};
+
+struct SelectState : SubexprState {
+    using SubexprState::SubexprState;
+
+    PosIdx pos;
+    ExprSelect * e = nullptr;
+};
+
+template<> struct BuildAST<grammar::expr::select::head> {
+    static void apply(const auto & in, SelectState & s, State & ps) {
+        s.pos = ps.at(in);
+    }
+};
+
+template<> struct BuildAST<grammar::expr::select::attr> : change_head<AttrState> {
+    static void success0(AttrState & a, SelectState & s, State &) {
+        s.e = &s->pushExpr<ExprSelect>(s.pos, s.pos, s->popExprOnly(), std::move(a.attrs), nullptr);
+    }
+};
+
+template<> struct BuildAST<grammar::expr::select::attr_or> {
+    static void apply0(SelectState & s, State &) {
+        s.e->def = s->popExprOnly();
+    }
+};
+
+template<> struct BuildAST<grammar::expr::select::as_app_or> {
+    static void apply(const auto & in, SelectState & s, State & ps) {
+        std::vector<std::unique_ptr<Expr>> args(1);
+        args[0] = std::make_unique<ExprVar>(ps.at(in), ps.s.or_);
+        s->pushExpr<ExprCall>(s.pos, s.pos, s->popExprOnly(), std::move(args));
+    }
+};
+
+template<> struct BuildAST<grammar::expr::select> : change_head<SelectState> {
+    static void success0(const auto &...) {}
+};
+
+struct AppState : SubexprState {
+    using SubexprState::SubexprState;
+
+    PosIdx pos;
+    ExprCall * e = nullptr;
+};
+
+template<> struct BuildAST<grammar::expr::app::select_or_fn> {
+    static void apply(const auto & in, AppState & s, State & ps) {
+        s.pos = ps.at(in);
+    }
+};
+
+template<> struct BuildAST<grammar::expr::app::first_arg> {
+    static void apply(auto & in, AppState & s, State & ps) {
+        auto arg = s->popExprOnly(), fn = s->popExprOnly();
+        if ((s.e = dynamic_cast<ExprCall *>(fn.get()))) {
+            // TODO remove.
+            // AST compat with old parser, semantics are the same.
+            // this can happen on occasions such as `<p> <p>` or `a or b or`,
+            // neither of which are super worth optimizing.
+            s.e->args.push_back(std::move(arg));
+            s->exprs.emplace_back(noPos, std::move(fn));
+        } else {
+            std::vector<std::unique_ptr<Expr>> args{1};
+            args[0] = std::move(arg);
+            s.e = &s->pushExpr<ExprCall>(s.pos, s.pos, std::move(fn), std::move(args));
+        }
+    }
+};
+
+template<> struct BuildAST<grammar::expr::app::another_arg> {
+    static void apply0(AppState & s, State & ps) {
+        s.e->args.push_back(s->popExprOnly());
+    }
+};
+
+template<> struct BuildAST<grammar::expr::app> : change_head<AppState> {
+    static void success0(const auto &...) {}
+};
+
+template<typename Op> struct BuildAST<grammar::expr::operator_<Op>> {
+    static void apply(const auto & in, ExprState & s, State & ps) {
+        s.pushOp(ps.at(in), Op{}, ps);
+    }
+};
+template<> struct BuildAST<grammar::expr::operator_<grammar::op::has_attr>> : change_head<AttrState> {
+    static void success(const auto & in, AttrState & a, ExprState & s, State & ps) {
+        s.pushOp(ps.at(in), ExprState::has_attr{{}, std::move(a.attrs)}, ps);
+    }
+};
+
+template<> struct BuildAST<grammar::expr::lambda::arg> {
+    static void apply(const auto & in, LambdaState & s, State & ps) {
+        s.arg = ps.symbols.create(in.string_view());
+    }
+};
+
+template<> struct BuildAST<grammar::expr::lambda> : change_head<LambdaState> {
+    static void success(const auto & in, LambdaState & l, ExprState & s, State & ps) {
+        if (l.formals)
+            l.formals = ps.validateFormals(std::move(l.formals), ps.at(in), l.arg);
+        s.pushExpr<ExprLambda>(ps.at(in), ps.at(in), l.arg, std::move(l.formals), l->popExprOnly());
+    }
+};
+
+template<> struct BuildAST<grammar::expr::assert_> {
+    static void apply(const auto & in, ExprState & s, State & ps) {
+        auto body = s.popExprOnly(), cond = s.popExprOnly();
+        s.pushExpr<ExprAssert>(ps.at(in), ps.at(in), std::move(cond), std::move(body));
+    }
+};
+
+template<> struct BuildAST<grammar::expr::with> {
+    static void apply(const auto & in, ExprState & s, State & ps) {
+        auto body = s.popExprOnly(), scope = s.popExprOnly();
+        s.pushExpr<ExprWith>(ps.at(in), ps.at(in), std::move(scope), std::move(body));
+    }
+};
+
+template<> struct BuildAST<grammar::expr::let> : change_head<BindingsState> {
+    static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+        if (!b.attrs.dynamicAttrs.empty())
+            throw ParseError({
+                .msg = HintFmt("dynamic attributes not allowed in let"),
+                .pos = ps.positions[ps.at(in)]
+            });
+
+        s.pushExpr<ExprLet>(ps.at(in), std::make_unique<ExprAttrs>(std::move(b.attrs)), b->popExprOnly());
+    }
+};
+
+template<> struct BuildAST<grammar::expr::if_> {
+    static void apply(const auto & in, ExprState & s, State & ps) {
+        auto else_ = s.popExprOnly(), then = s.popExprOnly(), cond = s.popExprOnly();
+        s.pushExpr<ExprIf>(ps.at(in), ps.at(in), std::move(cond), std::move(then), std::move(else_));
+    }
+};
+
+template<> struct BuildAST<grammar::expr> : change_head<ExprState> {
+    static void success0(ExprState & inner, ExprState & outer, State & ps) {
+        outer.exprs.push_back(inner.finish(ps));
+    }
+};
+
+}
+}
+
+namespace nix {
+
+Expr * EvalState::parse(
+    char * text,
+    size_t length,
+    Pos::Origin origin,
+    const SourcePath & basePath,
+    std::shared_ptr<StaticEnv> & staticEnv)
+{
+    parser::State s = {
+        symbols,
+        positions,
+        basePath,
+        positions.addOrigin(origin, length),
+        exprSymbols,
+    };
+    parser::ExprState x;
+
+    assert(length >= 2);
+    assert(text[length - 1] == 0);
+    assert(text[length - 2] == 0);
+    length -= 2;
+
+    p::string_input<p::tracking_mode::lazy> inp{std::string_view{text, length}, "input"};
+    try {
+        p::parse<parser::grammar::root, parser::BuildAST, parser::Control>(inp, x, s);
+    } catch (p::parse_error & e) {
+        auto pos = e.positions().back();
+        throw ParseError({
+            .msg = HintFmt("syntax error, %s", e.message()),
+            .pos = positions[s.positions.add(s.origin, pos.byte)]
+        });
+    }
+
+    auto [_pos, result] = x.finish(s);
+    result->bindVars(*this, staticEnv);
+    return result.release();
+}
+
+}
--- a/src/libexpr/parser/state.hh
+++ b/src/libexpr/parser/state.hh
@ -3,77 +3,61 @@

 #include "eval.hh"

-namespace nix {
+namespace nix::parser {

-/**
- * @note Storing a C-style `char *` and `size_t` allows us to avoid
- * having to define the special members that using string_view here
- * would implicitly delete.
- */
 struct StringToken
 {
-    const char * p;
-    size_t l;
+    std::string_view s;
    bool hasIndentation;
-    operator std::string_view() const { return {p, l}; }
+    operator std::string_view() const { return s; }
 };

-struct ParserLocation
-{
-    int first_line, first_column;
-    int last_line, last_column;
-
-    // backup to recover from yyless(0)
-    int stashed_first_column, stashed_last_column;
-
-    void stash() {
-        stashed_first_column = first_column;
-        stashed_last_column = last_column;
-    }
-
-    void unstash() {
-        first_column = stashed_first_column;
-        last_column = stashed_last_column;
-    }
-};
-
-struct ParserState
+struct State
 {
    SymbolTable & symbols;
    PosTable & positions;
-    Expr * result;
    SourcePath basePath;
    PosTable::Origin origin;
    const Expr::AstSymbols & s;
-    std::unique_ptr<Error> error;

-    [[nodiscard]] ParseError dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos);
-    [[nodiscard]] ParseError dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos);
-    [[nodiscard]] std::optional<ParseError> addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos);
-    [[nodiscard]] std::optional<ParseError> validateFormals(Formals * formals, PosIdx pos = noPos, Symbol arg = {});
+    void dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos);
+    void dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos);
+    void addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos);
+    std::unique_ptr<Formals> validateFormals(std::unique_ptr<Formals> formals, PosIdx pos = noPos, Symbol arg = {});
    std::unique_ptr<Expr> stripIndentation(const PosIdx pos,
        std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> && es);
-    PosIdx at(const ParserLocation & loc);
+
+    // lazy positioning means we don't get byte offsets directly, in.position() would work
+    // but also requires line and column (which is expensive)
+    PosIdx at(const auto & in)
+    {
+        return positions.add(origin, in.begin() - in.input().begin());
+    }
+
+    PosIdx atEnd(const auto & in)
+    {
+        return positions.add(origin, in.end() - in.input().begin());
+    }
 };

-inline ParseError ParserState::dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos)
+inline void State::dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos)
 {
-    return ParseError({
+    throw ParseError({
         .msg = HintFmt("attribute '%1%' already defined at %2%",
             showAttrPath(symbols, attrPath), positions[prevPos]),
         .pos = positions[pos]
    });
 }

-inline ParseError ParserState::dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos)
+inline void State::dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos)
 {
-    return ParseError({
+    throw ParseError({
        .msg = HintFmt("attribute '%1%' already defined at %2%", symbols[attr], positions[prevPos]),
        .pos = positions[pos]
    });
 }

-inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos)
+inline void State::addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos)
 {
    AttrPath::iterator i;
    // All attrpaths have at least one attr
@ -88,12 +72,12 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat
                    ExprAttrs * attrs2 = dynamic_cast<ExprAttrs *>(j->second.e.get());
                    if (!attrs2) {
                        attrPath.erase(i + 1, attrPath.end());
-                        return dupAttr(attrPath, pos, j->second.pos);
+                        dupAttr(attrPath, pos, j->second.pos);
                    }
                    attrs = attrs2;
                } else {
                    attrPath.erase(i + 1, attrPath.end());
-                    return dupAttr(attrPath, pos, j->second.pos);
+                    dupAttr(attrPath, pos, j->second.pos);
                }
            } else {
                auto next = attrs->attrs.emplace(std::piecewise_construct,
@ -135,7 +119,7 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat
                if (ae->inheritFromExprs)
                    std::ranges::move(*ae->inheritFromExprs, std::back_inserter(*jAttrs->inheritFromExprs));
            } else {
-                return dupAttr(attrPath, pos, j->second.pos);
+                dupAttr(attrPath, pos, j->second.pos);
            }
        } else {
            // This attr path is not defined. Let's create it.
@ -147,11 +131,9 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat
    } else {
        attrs->dynamicAttrs.emplace_back(std::move(i->expr), std::move(e), pos);
    }
-
-    return {};
 }

-inline std::optional<ParseError> ParserState::validateFormals(Formals * formals, PosIdx pos, Symbol arg)
+inline std::unique_ptr<Formals> State::validateFormals(std::unique_ptr<Formals> formals, PosIdx pos, Symbol arg)
 {
    std::sort(formals->formals.begin(), formals->formals.end(),
        [] (const auto & a, const auto & b) {
@ -166,21 +148,21 @@ inline std::optional<ParseError> ParserState::validateFormals(Formals * formals,
        duplicate = std::min(thisDup, duplicate.value_or(thisDup));
    }
    if (duplicate)
-        return ParseError({
+        throw ParseError({
            .msg = HintFmt("duplicate formal function argument '%1%'", symbols[duplicate->first]),
            .pos = positions[duplicate->second]
        });

    if (arg && formals->has(arg))
-        return ParseError({
+        throw ParseError({
            .msg = HintFmt("duplicate formal function argument '%1%'", symbols[arg]),
            .pos = positions[pos]
        });

-    return {};
+    return formals;
 }

-inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
+inline std::unique_ptr<Expr> State::stripIndentation(const PosIdx pos,
    std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> && es)
 {
    if (es.empty()) return std::make_unique<ExprString>("");
@ -201,11 +183,11 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
            }
            continue;
        }
-        for (size_t j = 0; j < str->l; ++j) {
+        for (size_t j = 0; j < str->s.size(); ++j) {
            if (atStartOfLine) {
-                if (str->p[j] == ' ')
+                if (str->s[j] == ' ')
                    curIndent++;
-                else if (str->p[j] == '\n') {
+                else if (str->s[j] == '\n') {
                    /* Empty line, doesn't influence minimum
                       indentation. */
                    curIndent = 0;
@ -213,7 +195,7 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
                    atStartOfLine = false;
                    if (curIndent < minIndent) minIndent = curIndent;
                }
-            } else if (str->p[j] == '\n') {
+            } else if (str->s[j] == '\n') {
                atStartOfLine = true;
                curIndent = 0;
            }
@ -233,23 +215,23 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
    };
    const auto trimString = [&] (const StringToken t) {
        std::string s2;
-        for (size_t j = 0; j < t.l; ++j) {
+        for (size_t j = 0; j < t.s.size(); ++j) {
            if (atStartOfLine) {
-                if (t.p[j] == ' ') {
+                if (t.s[j] == ' ') {
                    if (curDropped++ >= minIndent)
-                        s2 += t.p[j];
+                        s2 += t.s[j];
                }
-                else if (t.p[j] == '\n') {
+                else if (t.s[j] == '\n') {
                    curDropped = 0;
-                    s2 += t.p[j];
+                    s2 += t.s[j];
                } else {
                    atStartOfLine = false;
                    curDropped = 0;
-                    s2 += t.p[j];
+                    s2 += t.s[j];
                }
            } else {
-                s2 += t.p[j];
-                if (t.p[j] == '\n') atStartOfLine = true;
+                s2 += t.s[j];
+                if (t.s[j] == '\n') atStartOfLine = true;
            }
        }

@ -274,9 +256,4 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
    return std::make_unique<ExprConcatStrings>(pos, true, std::move(es2));
 }

-inline PosIdx ParserState::at(const ParserLocation & loc)
-{
-    return positions.add(origin, loc.first_column);
-}
-
 }
--- a/src/libfetchers/fetchers.hh
+++ b/src/libfetchers/fetchers.hh
@ -159,37 +159,6 @@ struct InputScheme
        std::optional<std::string> commitMsg) const;

    virtual std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) = 0;
-
-protected:
-    void emplaceURLQueryIntoAttrs(
-        const ParsedURL & parsedURL,
-        Attrs & attrs,
-        const StringSet & numericParams,
-        const StringSet & booleanParams) const
-    {
-        for (auto &[name, value] : parsedURL.query) {
-            if (name == "url") {
-                throw BadURL(
-                    "URL '%s' must not override url via query param!",
-                    parsedURL.to_string()
-                );
-            } else if (numericParams.count(name) != 0) {
-                if (auto n = string2Int<uint64_t>(value)) {
-                    attrs.insert_or_assign(name, *n);
-                } else {
-                    throw BadURL(
-                        "URL '%s' has non-numeric parameter '%s'",
-                        parsedURL.to_string(),
-                        name
-                    );
-                }
-            } else if (booleanParams.count(name) != 0) {
-                attrs.emplace(name, Explicit<bool> { value == "1" });
-            } else {
-                attrs.emplace(name, value);
-            }
-        }
-    }
 };

 void registerInputScheme(std::shared_ptr<InputScheme> && fetcher);
--- a/src/libfetchers/git.cc
+++ b/src/libfetchers/git.cc
@ -273,14 +273,17 @@ struct GitInputScheme : InputScheme

        Attrs attrs;
        attrs.emplace("type", "git");
-        attrs.emplace("url", url2.to_string());

-        emplaceURLQueryIntoAttrs(
-            url,
-            attrs,
-            {"lastModified", "revCount"},
-            {"shallow", "submodules", "allRefs"}
-        );
+        for (auto & [name, value] : url.query) {
+            if (name == "rev" || name == "ref")
+                attrs.emplace(name, value);
+            else if (name == "shallow" || name == "submodules" || name == "allRefs")
+                attrs.emplace(name, Explicit<bool> { value == "1" });
+            else
+                url2.query.emplace(name, value);
+        }
+
+        attrs.emplace("url", url2.to_string());

        return inputFromAttrs(attrs);
    }
--- a/src/libfetchers/github.cc
+++ b/src/libfetchers/github.cc
@ -1,4 +1,3 @@
-#include "attrs.hh"
 #include "filetransfer.hh"
 #include "cache.hh"
 #include "globals.hh"
@ -37,11 +36,18 @@ struct GitArchiveInputScheme : InputScheme

        auto path = tokenizeString<std::vector<std::string>>(url.path, "/");

-        std::optional<std::string> refOrRev;
+        std::optional<Hash> rev;
+        std::optional<std::string> ref;
+        std::optional<std::string> host_url;

        auto size = path.size();
        if (size == 3) {
-            refOrRev = path[2];
+            if (std::regex_match(path[2], revRegex))
+                rev = Hash::parseAny(path[2], htSHA1);
+            else if (std::regex_match(path[2], refRegex))
+                ref = path[2];
+            else
+                throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]);
        } else if (size > 3) {
            std::string rs;
            for (auto i = std::next(path.begin(), 2); i != path.end(); i++) {
@ -52,91 +58,61 @@ struct GitArchiveInputScheme : InputScheme
            }

            if (std::regex_match(rs, refRegex)) {
-                refOrRev = rs;
+                ref = rs;
            } else {
                throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs);
            }
        } else if (size < 2)
            throw BadURL("URL '%s' is invalid", url.url);

-        Attrs attrs;
-        attrs.emplace("type", type());
-        attrs.emplace("owner", path[0]);
-        attrs.emplace("repo", path[1]);
-
        for (auto &[name, value] : url.query) {
-            if (name == "rev" || name == "ref") {
-                if (refOrRev) {
-                    throw BadURL("URL '%s' already contains a ref or rev", url.url);
-                } else {
-                    refOrRev = value;
+            if (name == "rev") {
+                if (rev)
+                    throw BadURL("URL '%s' contains multiple commit hashes", url.url);
+                rev = Hash::parseAny(value, htSHA1);
            }
-            } else if (name == "lastModified") {
-                if (auto n = string2Int<uint64_t>(value)) {
-                    attrs.emplace(name, *n);
-                } else {
-                    throw Error(
-                        "Attribute 'lastModified' in URL '%s' must be an integer",
-                        url.to_string()
-                    );
+            else if (name == "ref") {
+                if (!std::regex_match(value, refRegex))
+                    throw BadURL("URL '%s' contains an invalid branch/tag name", url.url);
+                if (ref)
+                    throw BadURL("URL '%s' contains multiple branch/tag names", url.url);
+                ref = value;
            }
-            } else {
-                attrs.emplace(name, value);
+            else if (name == "host") {
+                if (!std::regex_match(value, hostRegex))
+                    throw BadURL("URL '%s' contains an invalid instance host", url.url);
+                host_url = value;
            }
+            // FIXME: barf on unsupported attributes
        }

-        if (refOrRev) attrs.emplace("refOrRev", *refOrRev);
+        if (ref && rev)
+            throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev());

-        return inputFromAttrs(attrs);
+        Input input;
+        input.attrs.insert_or_assign("type", type());
+        input.attrs.insert_or_assign("owner", path[0]);
+        input.attrs.insert_or_assign("repo", path[1]);
+        if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
+        if (ref) input.attrs.insert_or_assign("ref", *ref);
+        if (host_url) input.attrs.insert_or_assign("host", *host_url);
+
+        return input;
    }

    std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
    {
-        // Attributes can contain refOrRev and it needs to be figured out
-        // which one it is (see inputFromURL for when that may happen).
-        // The correct one (ref or rev) will be written into finalAttrs and
-        // it needs to be mutable for that.
-        Attrs finalAttrs(attrs);
-        auto type_ = maybeGetStrAttr(finalAttrs, "type");
-        if (type_ != type()) return {};
+        if (maybeGetStrAttr(attrs, "type") != type()) return {};

-        auto owner = getStrAttr(finalAttrs, "owner");
-        auto repo = getStrAttr(finalAttrs, "repo");
-
-        auto url = fmt("%s:%s/%s", *type_, owner, repo);
-        if (auto host = maybeGetStrAttr(finalAttrs, "host")) {
-            if (!std::regex_match(*host, hostRegex)) {
-                throw BadURL("URL '%s' contains an invalid instance host", url);
-            }
-        }
-
-        if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) {
-            finalAttrs.erase("refOrRev");
-            if (std::regex_match(*refOrRev, revRegex)) {
-                finalAttrs.emplace("rev", *refOrRev);
-            } else if (std::regex_match(*refOrRev, refRegex)) {
-                finalAttrs.emplace("ref", *refOrRev);
-            } else {
-                throw Error(
-                    "in URL '%s', '%s' is not a commit hash or a branch/tag name",
-                    url,
-                    *refOrRev
-                );
-            }
-        } else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) {
-            if (!std::regex_match(*ref, refRegex)) {
-                throw BadURL("URL '%s' contains an invalid branch/tag name", url);
-            }
-        }
-
-        for (auto & [name, value] : finalAttrs) {
-            if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") {
+        for (auto & [name, value] : attrs)
+            if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host")
                throw Error("unsupported input attribute '%s'", name);
-            }
-        }
+
+        getStrAttr(attrs, "owner");
+        getStrAttr(attrs, "repo");

        Input input;
-        input.attrs = finalAttrs;
+        input.attrs = attrs;
        return input;
    }

--- a/src/libfetchers/indirect.cc
+++ b/src/libfetchers/indirect.cc
@ -17,8 +17,6 @@ struct IndirectInputScheme : InputScheme
        std::optional<Hash> rev;
        std::optional<std::string> ref;

-        Attrs attrs;
-
        if (path.size() == 1) {
        } else if (path.size() == 2) {
            if (std::regex_match(path[1], revRegex))
@ -28,21 +26,29 @@ struct IndirectInputScheme : InputScheme
            else
                throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]);
        } else if (path.size() == 3) {
+            if (!std::regex_match(path[1], refRegex))
+                throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]);
            ref = path[1];
+            if (!std::regex_match(path[2], revRegex))
+                throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]);
            rev = Hash::parseAny(path[2], htSHA1);
        } else
            throw BadURL("GitHub URL '%s' is invalid", url.url);

        std::string id = path[0];
+        if (!std::regex_match(id, flakeRegex))
+            throw BadURL("'%s' is not a valid flake ID", id);

-        attrs.emplace("type", "indirect");
-        attrs.emplace("id", id);
-        if (rev) attrs.emplace("rev", rev->gitRev());
-        if (ref) attrs.emplace("ref", *ref);
+        // FIXME: forbid query params?

-        emplaceURLQueryIntoAttrs(url, attrs, {}, {});
+        Input input;
+        input.direct = false;
+        input.attrs.insert_or_assign("type", "indirect");
+        input.attrs.insert_or_assign("id", id);
+        if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
+        if (ref) input.attrs.insert_or_assign("ref", *ref);

-        return inputFromAttrs(attrs);
+        return input;
    }

    std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
@ -57,18 +63,6 @@ struct IndirectInputScheme : InputScheme
        if (!std::regex_match(id, flakeRegex))
            throw BadURL("'%s' is not a valid flake ID", id);

-        // TODO come up with a nicer error message for those two.
-        if (auto rev = maybeGetStrAttr(attrs, "rev")) {
-            if (!std::regex_match(*rev, revRegex)) {
-                throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev);
-            }
-        }
-        if (auto ref = maybeGetStrAttr(attrs, "ref")) {
-            if (!std::regex_match(*ref, refRegex)) {
-                throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref);
-            }
-        }
-
        Input input;
        input.direct = false;
        input.attrs = attrs;
--- a/src/libfetchers/mercurial.cc
+++ b/src/libfetchers/mercurial.cc
@ -56,7 +56,12 @@ struct MercurialInputScheme : InputScheme
        Attrs attrs;
        attrs.emplace("type", "hg");

-        emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
+        for (auto &[name, value] : url.query) {
+            if (name == "rev" || name == "ref")
+                attrs.emplace(name, value);
+            else
+                url2.query.emplace(name, value);
+        }

        attrs.emplace("url", url2.to_string());

--- a/src/libfetchers/tarball.cc
+++ b/src/libfetchers/tarball.cc
@ -201,17 +201,29 @@ struct CurlInputScheme : InputScheme
        if (!isValidURL(_url, requireTree))
            return std::nullopt;

-        auto url = _url;
+        Input input;

-        Attrs attrs;
-        attrs.emplace("type", inputType());
+        auto url = _url;

        url.scheme = parseUrlScheme(url.scheme).transport;

-        emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
+        auto narHash = url.query.find("narHash");
+        if (narHash != url.query.end())
+            input.attrs.insert_or_assign("narHash", narHash->second);

-        attrs.emplace("url", url.to_string());
-        return inputFromAttrs(attrs);
+        if (auto i = get(url.query, "rev"))
+            input.attrs.insert_or_assign("rev", *i);
+
+        if (auto i = get(url.query, "revCount"))
+            if (auto n = string2Int<uint64_t>(*i))
+                input.attrs.insert_or_assign("revCount", *n);
+
+        url.query.erase("rev");
+        url.query.erase("revCount");
+
+        input.attrs.insert_or_assign("type", inputType());
+        input.attrs.insert_or_assign("url", url.to_string());
+        return input;
    }

    std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
@ -223,7 +235,7 @@ struct CurlInputScheme : InputScheme
        std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"};
        for (auto & [name, value] : attrs)
            if (!allowedNames.count(name))
-                throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name);
+                throw Error("unsupported %s input attribute '%s'", *type, name);

        Input input;
        input.attrs = attrs;
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@ -494,7 +494,7 @@ void LocalDerivationGoal::startBuilder()

    /* Create a temporary directory where the build will take
       place. */
-    tmpDir = createTempDir("", "nix-build-" + std::string(drvPath.name()), false, false, 0700);
+    tmpDir = createTempDir(settings.buildDir.get().value_or(""), "nix-build-" + std::string(drvPath.name()), false, false, 0700);

    chownToBuilder(tmpDir);

@ -2602,8 +2602,8 @@ void LocalDerivationGoal::runChild()
        bool allowLocalNetworking = parsedDrv->getBoolAttr("__darwinAllowLocalNetworking");

        /* The tmpDir in scope points at the temporary build directory for our derivation. Some packages try different mechanisms
-           to find temporary directories, so we want to open up a broader place for them to dump their files, if needed. */
-        Path globalTmpDir = canonPath(getEnvNonEmpty("TMPDIR").value_or("/tmp"), true);
+           to find temporary directories, so we want to open up a broader place for them to put their files, if needed. */
+        Path globalTmpDir = canonPath(defaultTempDir(), true);

        /* They don't like trailing slashes on subpath directives */
        if (globalTmpDir.back() == '/') globalTmpDir.pop_back();
--- a/src/libstore/globals.cc
+++ b/src/libstore/globals.cc
@ -429,7 +429,7 @@ void initLibStore() {
    /* On macOS, don't use the per-session TMPDIR (as set e.g. by
       sshd). This breaks build users because they don't have access
       to the TMPDIR, in particular in ‘nix-store --serve’. */
-    if (getEnv("TMPDIR").value_or("/tmp").starts_with("/var/folders/"))
+    if (defaultTempDir().starts_with("/var/folders/"))
        unsetenv("TMPDIR");
 #endif

--- a/src/libstore/globals.hh
+++ b/src/libstore/globals.hh
@ -582,6 +582,8 @@ public:
    Setting<std::string> sandboxShmSize{
        this, "50%", "sandbox-dev-shm-size",
        R"(
+            *Linux only*
+
            This option determines the maximum size of the `tmpfs` filesystem
            mounted on `/dev/shm` in Linux sandboxes. For the format, see the
            description of the `size` option of `tmpfs` in mount(8). The default
@ -589,9 +591,27 @@ public:
        )"};

    Setting<Path> sandboxBuildDir{this, "/build", "sandbox-build-dir",
-        "The build directory inside the sandbox."};
+        R"(
+            *Linux only*
+
+            The build directory inside the sandbox.
+
+            This directory is backed by [`build-dir`](#conf-build-dir) on the host.
+        )"};
 #endif

+    Setting<std::optional<Path>> buildDir{this, std::nullopt, "build-dir",
+        R"(
+            The directory on the host, in which derivations' temporary build directories are created.
+
+            If not set, Nix will use the system temporary directory indicated by the `TMPDIR` environment variable.
+            Note that builds are often performed by the Nix daemon, so its `TMPDIR` is used, and not that of the Nix command line interface.
+
+            This is also the location where [`--keep-failed`](@docroot@/command-ref/opt-common.md#opt-keep-failed) leaves its files.
+
+            If Nix runs without sandbox, or if the platform does not support sandboxing with bind mounts (e.g. macOS), then the [`builder`](@docroot@/language/derivations.md#attr-builder)'s environment will contain this directory, instead of the virtual location [`sandbox-build-dir`](#conf-sandbox-build-dir).
+        )"};
+
    Setting<PathSet> allowedImpureHostPrefixes{this, {}, "allowed-impure-host-deps",
        "Which prefixes to allow derivations to ask for access to (primarily for Darwin)."};

--- a/src/libutil/file-system.cc
+++ b/src/libutil/file-system.cc
@ -511,10 +511,14 @@ void AutoDelete::reset(const Path & p, bool recursive) {

 //////////////////////////////////////////////////////////////////////

+std::string defaultTempDir() {
+    return getEnvNonEmpty("TMPDIR").value_or("/tmp");
+}
+
 static Path tempName(Path tmpRoot, const Path & prefix, bool includePid,
    std::atomic<unsigned int> & counter)
 {
-    tmpRoot = canonPath(tmpRoot.empty() ? getEnv("TMPDIR").value_or("/tmp") : tmpRoot, true);
+    tmpRoot = canonPath(tmpRoot.empty() ? defaultTempDir() : tmpRoot, true);
    if (includePid)
        return fmt("%1%/%2%-%3%-%4%", tmpRoot, prefix, getpid(), counter++);
    else
@ -554,7 +558,7 @@ Path createTempDir(const Path & tmpRoot, const Path & prefix,

 std::pair<AutoCloseFD, Path> createTempFile(const Path & prefix)
 {
-    Path tmpl(getEnv("TMPDIR").value_or("/tmp") + "/" + prefix + ".XXXXXX");
+    Path tmpl(defaultTempDir() + "/" + prefix + ".XXXXXX");
    // Strictly speaking, this is UB, but who cares...
    // FIXME: use O_TMPFILE.
    AutoCloseFD fd(mkstemp((char *) tmpl.c_str()));
--- a/src/libutil/file-system.hh
+++ b/src/libutil/file-system.hh
@ -258,6 +258,11 @@ Path createTempDir(const Path & tmpRoot = "", const Path & prefix = "nix",
 */
 std::pair<AutoCloseFD, Path> createTempFile(const Path & prefix = "nix");

+/**
+ * Return `TMPDIR`, or the default temporary directory if unset or empty.
+ */
+Path defaultTempDir();
+
 /**
 * Used in various places.
 */
--- a/src/nix-build/nix-build.cc
+++ b/src/nix-build/nix-build.cc
@ -414,8 +414,7 @@ static void main_nix_build(int argc, char * * argv)
        // Set the environment.
        auto env = getEnv();

-        auto tmp = getEnv("TMPDIR");
-        if (!tmp) tmp = getEnv("XDG_RUNTIME_DIR").value_or("/tmp");
+        auto tmp = defaultTempDir();

        if (pure) {
            decltype(env) newEnv;
@ -427,7 +426,7 @@ static void main_nix_build(int argc, char * * argv)
            env["__ETC_PROFILE_SOURCED"] = "1";
        }

-        env["NIX_BUILD_TOP"] = env["TMPDIR"] = env["TEMPDIR"] = env["TMP"] = env["TEMP"] = *tmp;
+        env["NIX_BUILD_TOP"] = env["TMPDIR"] = env["TEMPDIR"] = env["TMP"] = env["TEMP"] = tmp;
        env["NIX_STORE"] = store->storeDir;
        env["NIX_BUILD_CORES"] = std::to_string(settings.buildCores);

--- a/tests/functional/check.sh
+++ b/tests/functional/check.sh
@ -34,6 +34,21 @@ nix-build check.nix -A failed --argstr checkBuildId $checkBuildId \
 [ "$status" = "100" ]
 if checkBuildTempDirRemoved $TEST_ROOT/log; then false; fi

+test_custom_build_dir() {
+  local customBuildDir="$TEST_ROOT/custom-build-dir"
+
+  # Nix does not create the parent directories, and perhaps it shouldn't try to
+  # decide the permissions of build-dir.
+  mkdir "$customBuildDir"
+  nix-build check.nix -A failed --argstr checkBuildId $checkBuildId \
+      --no-out-link --keep-failed --option build-dir "$TEST_ROOT/custom-build-dir" 2> $TEST_ROOT/log || status=$?
+  [ "$status" = "100" ]
+  [[ 1 == "$(count "$customBuildDir/nix-build-"*)" ]]
+  local buildDir="$customBuildDir/nix-build-"*
+  grep $checkBuildId $buildDir/checkBuildId
+}
+test_custom_build_dir
+
 nix-build check.nix -A deterministic --argstr checkBuildId $checkBuildId \
    --no-out-link 2> $TEST_ROOT/log
 checkBuildTempDirRemoved $TEST_ROOT/log
--- a/tests/functional/common/vars-and-functions.sh.in
+++ b/tests/functional/common/vars-and-functions.sh.in
@ -274,6 +274,11 @@ grepQuietInverse() {
    ! grep "$@" > /dev/null
 }

+# Return the number of arguments
+count() {
+  echo $#
+}
+
 trap onError ERR

 fi # COMMON_VARS_AND_FUNCTIONS_SH_SOURCED
--- a/tests/functional/fetchers.sh
+++ b/tests/functional/fetchers.sh
@ -1,91 +0,0 @@
-source common.sh
-
-requireGit
-
-clearStore
-
-testFetchTreeError() {
-    rawFetchTreeArg="${1?fetchTree arg missing}"
-    messageSubstring="${2?messageSubstring missing}"
-
-    output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$?
-    grepQuiet "$messageSubstring" <<<"$output"
-    test "$status" -ne 0
-}
-
-# github/gitlab/sourcehut fetcher input validation
-for provider in github gitlab sourcehut; do
-    # ref/rev validation
-    testFetchTreeError \
-        "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \
-        "URL '$provider:foo/bar' contains an invalid branch/tag name"
-
-    testFetchTreeError \
-        "\"$provider://host/foo/bar/,\"" \
-        "URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name"
-
-    testFetchTreeError \
-        "\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \
-        "URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev"
-
-    testFetchTreeError \
-        "\"$provider://host/foo/bar/ref?ref=ref2\"" \
-        "URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev"
-
-    # host validation
-    testFetchTreeError \
-        "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \
-        "URL '$provider:foo/bar' contains an invalid instance host"
-
-    testFetchTreeError \
-        "\"$provider://host/foo/bar/ref?host=git_hub.com\"" \
-        "URL '$provider:foo/bar' contains an invalid instance host"
-
-    # invalid attributes
-    testFetchTreeError \
-        "{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \
-        "unsupported input attribute 'wrong'"
-
-    testFetchTreeError \
-        "\"$provider://host/foo/bar/ref?wrong=1\"" \
-        "unsupported input attribute 'wrong'"
-done
-
-# unsupported attributes w/ tarball fetcher
-testFetchTreeError \
-    "\"https://host/foo?wrong=1\"" \
-    "unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }"
-
-# test for unsupported attributes / validation in git fetcher
-testFetchTreeError \
-    "\"git+https://github.com/owner/repo?invalid=1\"" \
-    "unsupported Git input attribute 'invalid'"
-
-testFetchTreeError \
-    "\"git+https://github.com/owner/repo?url=foo\"" \
-    "URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!"
-
-testFetchTreeError \
-    "\"git+https://github.com/owner/repo?ref=foo.lock\"" \
-    "invalid Git branch/tag name 'foo.lock'"
-
-testFetchTreeError \
-    "{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \
-    "invalid Git branch/tag name 'foo.lock'"
-
-# same for mercurial
-testFetchTreeError \
-    "\"hg+https://forge.tld/owner/repo?invalid=1\"" \
-    "unsupported Mercurial input attribute 'invalid'"
-
-testFetchTreeError \
-    "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \
-    "unsupported Mercurial input attribute 'invalid'"
-
-testFetchTreeError \
-    "\"hg+https://forge.tld/owner/repo?ref=,\"" \
-    "invalid Mercurial branch/tag name ','"
-
-testFetchTreeError \
-    "{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \
-    "invalid Mercurial branch/tag name ','"
--- a/tests/functional/install-darwin.sh
+++ b/tests/functional/install-darwin.sh
@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash

 set -eux

--- a/tests/functional/lang/parse-fail-eof-pos.err.exp
+++ b/tests/functional/lang/parse-fail-eof-pos.err.exp
@ -1,4 +1,4 @@
-error: syntax error, unexpected end of file
+error: syntax error, unexpected end of file, expecting expression
       at «stdin»:3:1:
            2| # no content
            3|
--- a/tests/functional/lang/parse-fail-undef-var-2.err.exp
+++ b/tests/functional/lang/parse-fail-undef-var-2.err.exp
@ -1,4 +1,4 @@
-error: syntax error, unexpected ':', expecting '}'
+error: syntax error, expecting '}'
       at «stdin»:3:13:
            2|
            3|   f = {x, y : ["baz" "bar" z "bat"]}: x + y;
--- a/tests/functional/lang/parse-fail-utf8.err.exp
+++ b/tests/functional/lang/parse-fail-utf8.err.exp
@ -1,4 +1,4 @@
-error: syntax error, unexpected invalid token, expecting end of file
+error: syntax error, expecting end of file
       at «stdin»:1:5:
            1| 123 é 4
             |     ^
--- a/tests/functional/meson.build
+++ b/tests/functional/meson.build
@ -93,7 +93,6 @@ functional_tests_scripts = [
  'fetchGitRefs.sh',
  'gc-runtime.sh',
  'tarball.sh',
-  'fetchers.sh',
  'fetchGit.sh',
  'fetchurl.sh',
  'fetchPath.sh',
--- a/tests/functional/nix-daemon-untrusting.sh
+++ b/tests/functional/nix-daemon-untrusting.sh
@ -1,3 +1,3 @@
-#!/bin/sh
+#!/usr/bin/env bash

 exec nix-daemon --force-untrusted "$@"
--- a/tests/functional/push-to-store-old.sh
+++ b/tests/functional/push-to-store-old.sh
@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash

 set -x
 set -e
--- a/tests/functional/push-to-store.sh
+++ b/tests/functional/push-to-store.sh
@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash

 set -x
 set -e
--- a/tests/nixos/tarball-flakes.nix
+++ b/tests/nixos/tarball-flakes.nix
@ -69,7 +69,7 @@ in

    # Check that we got redirected to the immutable URL.
    locked_url = info["locked"]["url"]
-    assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz"
+    assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz"

    # Check that we got the rev and revCount attributes.
    revision = info["revision"]
Author	SHA1	Message	Date
alois31	8914953e2e	libmain: move ProgressBar implementation out of the header Change-Id: `Ib4b42ebea290ee575294df6b2f17a38a5d850b80`	2024-06-26 09:19:19 +02:00
alois31	ae0bb8e5e6	libstore/build: use an allowlist approach to syscall filtering Previously, system call filtering (to prevent builders from storing files with setuid/setgid permission bits or extended attributes) was performed using a blocklist. While this looks simple at first, it actually carries significant security and maintainability risks: after all, the kernel may add new syscalls to achieve the same functionality one is trying to block, and it can even be hard to actually add the syscall to the blocklist when building against a C library that doesn't know about it yet. For a recent demonstration of this happening in practice to Nix, see the introduction of fchmodat2 [0] [1]. The allowlist approach does not share the same drawback. While it does require a rather large list of harmless syscalls to be maintained in the codebase, failing to update this list (and roll out the update to all users) in time has rather benign effects; at worst, very recent programs that already rely on new syscalls will fail with an error the same way they would on a slightly older kernel that doesn't support them yet. Most importantly, no unintended new ways of performing dangerous operations will be silently allowed. Another possible drawback is reduced system call performance due to the larger filter created by the allowlist requiring more computation [2]. However, this issue has not convincingly been demonstrated yet in practice, for example in systemd or various browsers. This commit tries to keep the behavior as close to unchanged as possible. Only newer syscalls that are not supported by glibc 2.38 (as found in NixOS 23.11) are blocked. Since this includes fchmodat2, the compatibility code added for handling this syscall can be removed too. [0] https://github.com/NixOS/nixpkgs/issues/300635 [1] https://github.com/NixOS/nix/issues/10424 [2] https://github.com/flatpak/flatpak/pull/4462#issuecomment-1061690607 Change-Id: `I541be3ea9b249bcceddfed6a5a13ac10b11e16ad`	2024-06-26 09:19:19 +02:00
alois31	576b28bce1	libstore/build: always treat seccomp setup failures as fatal In `f047e4357b`, I missed the behavior that if building without a dedicated build user (i.e. in single-user setups), seccomp setup failures are silently ignored. This was introduced without explanation 7 years ago (`ff6becafa8`). Hopefully the only use-case nowadays is causing spurious test suite successes when messing up the seccomp filter during development. Let's try removing it. Change-Id: `Ibe51416d9c7a6dd635c2282990224861adf1ceab`	2024-06-26 09:19:14 +02:00
jade	4ac2c496d4	Merge "change shebangs of all .sh scripts to bash" into main	2024-06-25 22:18:26 +00:00
jade	aceef13682	Merge changes If0ddec6b,Iaa63ed18 into main * changes: Add some release notes for things we did packaging: don't build internal api docs by default in dev shells	2024-06-25 22:16:04 +00:00
Lunaphied	f170870ae7	Merge ".envrc: remove MAKEFLAGS and use clang environment by default" into main	2024-06-25 20:42:46 +00:00
Lunaphied	97c86908a4	.envrc: remove MAKEFLAGS and use clang environment by default MAKEFLAGS hasn't been relevant since we switched off the Make buildsystem and using the clang environment by default gives you clangd by default which most developers will want. Change-Id: `I9c11d0613577047e6c908f049c1ffaca5fb5ff67`	2024-06-25 12:36:18 -06:00
eldritch horrors	e6cd67591b	libexpr: rewrite the parser with pegtl instead of flex/bison this gives about 20% performance improvements on pure parsing. obviously it will be less on full eval, but depending on how much parsing is to be done (e.g. including hackage-packages.nix or not) it's more like 4%-10%. this has been tested (with thousands of core hours of fuzzing) to ensure that the ASTs produced by the new parser are exactly the same as the old one would have produced. error messages will change (sometimes by a lot) and are not yet perfect, but we would rather leave this as is for later. test results for running only the parser (excluding the variable binding code) in a tight loop with inputs and parameters as given are promising: - 40% faster on lix's package.nix at 10000 iterations - 1.3% faster on nixpkgs all-packages.nix at 1000 iterations - equivalent on all of nixpkgs concatenated at 100 iterations (excluding invalid files, each file surrounded with parens) more realistic benchmarks are somewhere in between the extremes, parsing once again getting the largest uplift. other realistic workloads improve by a few percentage points as well, notably system builds are 4% faster. Benchmarks summary (from ./bench/summarize.jq bench/bench-*.json) old/bin/nix --extra-experimental-features 'nix-command flakes' eval -f bench/nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix mean: 0.408s ± 0.025s user: 0.355s \| system: 0.033s median: 0.389s range: 0.388s ... 0.442s relative: 1 new/bin/nix --extra-experimental-features 'nix-command flakes' eval -f bench/nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix mean: 0.332s ± 0.024s user: 0.279s \| system: 0.033s median: 0.314s range: 0.313s ... 0.361s relative: 0.814 --- old/bin/nix --extra-experimental-features 'nix-command flakes' eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' mean: 6.133s ± 0.022s user: 5.395s \| system: 0.437s median: 6.128s range: 6.099s ... 6.183s relative: 1 new/bin/nix --extra-experimental-features 'nix-command flakes' eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' mean: 5.925s ± 0.025s user: 5.176s \| system: 0.456s median: 5.934s range: 5.861s ... 5.943s relative: 0.966 --- GC_INITIAL_HEAP_SIZE=10g old/bin/nix eval --extra-experimental-features 'nix-command flakes' --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' mean: 4.503s ± 0.027s user: 3.731s \| system: 0.547s median: 4.499s range: 4.478s ... 4.541s relative: 1 GC_INITIAL_HEAP_SIZE=10g new/bin/nix eval --extra-experimental-features 'nix-command flakes' --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' mean: 4.285s ± 0.031s user: 3.504s \| system: 0.571s median: 4.281s range: 4.221s ... 4.328s relative: 0.951 --- old/bin/nix --extra-experimental-features 'nix-command flakes' search --no-eval-cache github:nixos/nixpkgs/e1fa12d4f6c6fe19ccb59cac54b5b3f25e160870 hello mean: 16.475s ± 0.07s user: 14.088s \| system: 1.572s median: 16.495s range: 16.351s ... 16.536s relative: 1 new/bin/nix --extra-experimental-features 'nix-command flakes' search --no-eval-cache github:nixos/nixpkgs/e1fa12d4f6c6fe19ccb59cac54b5b3f25e160870 hello mean: 15.973s ± 0.013s user: 13.558s \| system: 1.615s median: 15.973s range: 15.946s ... 15.99s relative: 0.97 --- Change-Id: `Ie66ec2d045dec964632c6541e25f8f0797319ee2`	2024-06-25 12:24:58 +00:00
jade	c097ebe66b	Merge "Revert "libfetchers: make attribute / URL query handling consistent"" into main	2024-06-25 10:19:52 +00:00
jade	e19f27917c	Add some release notes for things we did Change-Id: `If0ddec6b64a43c3d8f6cae39e0292863f3b49401`	2024-06-24 16:26:12 -07:00
jade	1245340e44	packaging: don't build internal api docs by default in dev shells These are totally available and you can just turn them on, but they have very bad dependency tracking and thus bloat incremental change times, which is not really ok. Change-Id: `Iaa63ed18a789e74fcb757248cd24c3b194afcc80`	2024-06-24 15:57:38 -07:00
jade	3e151d4d77	Revert "libfetchers: make attribute / URL query handling consistent" This reverts commit `35eec921af`. Reason for revert: Regressed nix-eval-jobs, and it appears to be this change is buggy/missing a case. It just needs another pass. Code causing the problem in n-e-j, when invoked with `nix-eval-jobs --flake '.#hydraJobs'`: ``` n-e-j/tests/assets » ../../build/src/nix-eval-jobs --meta --workers 1 --flake .#hydraJobs warning: unknown setting 'trusted-users' warning: `--gc-roots-dir' not specified error: unsupported Git input attribute 'dir' error: worker error: error: unsupported Git input attribute 'dir' ``` ``` nix::Value vRoot = [&]() { if (args.flake) { auto [flakeRef, fragment, outputSpec] = nix::parseFlakeRefWithFragmentAndExtendedOutputsSpec( args.releaseExpr, nix::absPath(".")); nix::InstallableFlake flake{ {}, state, std::move(flakeRef), fragment, outputSpec, {}, {}, args.lockFlags}; return flake.toValue(state).first; } else { return releaseExprTopLevelValue(*state, autoArgs, args); } }(); ``` Inspecting the program behaviour reveals that `dir` was in fact set in the URL going into the fetcher. This is in turn because unlike in the case changed in this commit, it was not erased before handing it to libfetchers, which is probably just a mistake. ``` (rr) up 3 0x00007ffff60262ae in nix::fetchers::Input::fromURL (url=..., requireTree=requireTree@entry=true) at src/libfetchers/fetchers.cc:39 warning: Source file is more recent than executable. 39 auto res = inputScheme->inputFromURL(url, requireTree); (rr) p url $1 = (const nix::ParsedURL &) @0x7fffdc874190: {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = {["dir"] = "tests/assets"}, fragment = ""} (rr) up 4 0x00007ffff789d904 in nix::parseFlakeRefWithFragment (url=".#hydraJobs", baseDir=std::optional<std::string> = {...}, allowMissing=allowMissing@entry=false, isFlake=isFlake@entry=true) at src/libexpr/flake/flakeref.cc:179 warning: Source file is more recent than executable. 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), (rr) p parsedURL $2 = {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = { ["dir"] = "tests/assets"}, fragment = ""} (rr) list 174 175 if (pathExists(flakeRoot + "/.git/shallow")) 176 parsedURL.query.insert_or_assign("shallow", "1"); 177 178 return std::make_pair( 179 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), 180 fragment); 181 } ``` Change-Id: `Ib55a882eaeb3e59228857761dc1e3b2e366b0f5e`	2024-06-24 22:49:17 +00:00
vigress8	c7af89c797	change shebangs of all .sh scripts to bash On operating systems where /bin/sh is not Bash, some scripts are invalid because of bashisms, and building Lix fails with errors like this: `render-manpage.sh: 3: set: Illegal option -o pipefail` This modifies all scripts that use a `/bin/sh` shebang to `/usr/bin/env bash`, including currently POSIX-compliant ones, to prevent any future confusion. Change-Id: `Ia074cc6db42d40fc59a63726f6194ea0149ea5e0`	2024-06-24 14:00:43 -07:00
jade	d5637ee790	devShell: guard against running from another directory I was working on nix-eval-jobs with a dev shell with some shenanigans to run against a locally built Lix and it was getting really annoying when `nix develop ../lix#` was messing up my other git repo's hooks. This is a fix via blunt force, but it is at least obvious how it works. Change-Id: `Ia29eeb5be57ab6a2c88451c00ea18a51e4dfe65e`	2024-06-24 13:41:38 -07:00
jade	eb5de71adc	justfile: accept extra options to just setup and pass them to meson This lets you get the default options and still be able to add more. Change-Id: `Ife32c348b1498ff2ccdddf051a5bba520cfa36f0`	2024-06-24 13:37:01 -07:00
Robert Hensing	d86009bd76	Add build-dir setting, clean up default TMPDIR handling This is a squash of upstream PRs #10303, #10312 and #10883. fix: Treat empty TMPDIR as unset Fixes an instance of nix: src/libutil/util.cc:139: nix::Path nix::canonPath(PathView, bool): Assertion `path != ""' failed. ... which I've been getting in one of my shells for some reason. I have yet to find out why TMPDIR was empty, but it's no reason for Nix to break. (cherry picked from commit c3fb2aa1f9d1fa756dac38d3588c836c5a5395dc) fix: Treat empty XDG_RUNTIME_DIR as unset See preceding commit. Not observed in the wild, but is sensible and consistent with TMPDIR behavior. (cherry picked from commit b9e7f5aa2df3f0e223f5c44b8089cbf9b81be691) local-derivation-goal.cc: Reuse defaultTempDir() (cherry picked from commit fd31945742710984de22805ee8d97fbd83c3f8eb) fix: remove usage of XDG_RUNTIME_DIR for TMP (cherry picked from commit 1363f51bcb24ab9948b7b5093490a009947f7453) tests/functional: Add count() (cherry picked from commit 6221770c9de4d28137206bdcd1a67eea12e1e499) Remove uncalled for message (cherry picked from commit b1fe388d33530f0157dcf9f461348b61eda13228) Add build-dir setting (cherry picked from commit 8b16cced18925aa612049d08d5e78eccbf0530e4) Change-Id: `Ic7b75ff0b6a3b19e50a4ac8ff2d70f15c683c16a`	2024-06-24 11:30:32 +03:00
				`@ -0,0 +1 @@`
				`This is a file used by the dev shell shellHook in package.nix to check that this is actually a Lix repo before installing git hooks. Its contents have no meaning.`