Merge changes Ic890f781,I77b2de10,I3202aba0,I6ddc4296,I18776984, ... into main

* changes:
  doc: fix broken table in operators list
  terminal code eaters: implement OSC
  libexpr: significantly improve error messages for bad attr paths
  tests/functional2: add terminal code eater
  tests/functional2: fix occasional pytest haunting
  attr path parser: fix bug in not rejecting empty attr paths, add unparser
This commit is contained in:
jade 2024-12-11 02:28:11 +00:00 committed by Gerrit Code Review
commit cb44fe7c15
19 changed files with 417 additions and 38 deletions

View file

@ -0,0 +1,25 @@
---
synopsis: "Improved error messages for bad attr paths"
category: Improvements
cls: [2277, 2280]
credits: jade
---
Lix now includes much more detail when a bad attribute path is accessed at the command line:
```
» nix eval -f '<nixpkgs>' lixVersions.lix_2_92
error: attribute 'lix_2_92' in selection path 'lixVersions.lix_2_92' not found
Did you mean one of lix_2_90 or lix_2_91?
```
After:
```
» nix eval --impure -f '<nixpkgs>' lixVersions.lix_2_92
error: attribute 'lix_2_92' in selection path 'lixVersions.lix_2_92' not found inside path 'lixVersions', whose contents are: { __unfix__ = «lambda @ /nix/store/hfz1qqd0z8amlgn8qwich1dvkmldik36-source/lib/fixed-points.nix:
447:7»; buildLix = «thunk»; extend = «thunk»; latest = «thunk»; lix_2_90 = «thunk»; lix_2_91 = «thunk»; override = «thunk»; overrideDerivation = «thunk»; recurseForDerivations = true; stable = «thunk»; }
Did you mean one of lix_2_90 or lix_2_91?
```
This should avoid some unnecessary trips to the repl or to the debugger by giving some information about the value being selected on that was unexpected.

View file

@ -26,8 +26,8 @@
| Logical conjunction (`AND`) | *bool* `&&` *bool* | left | 12 |
| Logical disjunction (`OR`) | *bool* <code>\|\|</code> *bool* | left | 13 |
| [Logical implication] | *bool* `->` *bool* | none | 14 |
| \[Experimental\] [Function piping] | *expr* |> *func* | left | 15 |
| \[Experimental\] [Function piping] | *expr* <| *func* | right | 16 |
| \[Experimental\] [Function piping] | *expr* `\|>` *func* | left | 15 |
| \[Experimental\] [Function piping] | *expr* `<\|` *func* | right | 16 |
[string]: ./values.md#type-string
[path]: ./values.md#type-path

View file

@ -1,45 +1,84 @@
#include "lix/libexpr/attr-path.hh"
#include "lix/libexpr/eval-inline.hh"
#include "print-options.hh"
#include <algorithm>
#include <sstream>
namespace nix {
Strings parseAttrPath(std::string_view s)
std::vector<std::string> parseAttrPath(std::string_view const s)
{
Strings res;
std::vector<std::string> res;
std::string cur;
bool haveData = false;
auto i = s.begin();
while (i != s.end()) {
if (*i == '.') {
res.push_back(cur);
haveData = false;
cur.clear();
} else if (*i == '"') {
// If there is a quote there *will* be a named term even if it is empty.
++i;
haveData = true;
while (1) {
if (i == s.end())
throw ParseError("missing closing quote in selection path '%1%'", s);
if (*i == '"') break;
cur.push_back(*i++);
}
} else
} else {
cur.push_back(*i);
haveData = true;
}
++i;
}
if (!cur.empty()) res.push_back(cur);
if (haveData) res.push_back(cur);
return res;
}
std::string unparseAttrPath(std::vector<std::string> const & attrPath)
{
// FIXME(jade): can probably be rewritten with ranges once libc++ has a
// fully featured implementation
// https://github.com/llvm/llvm-project/pull/65536
auto ret = std::ostringstream{};
bool first = true;
for (auto const & part : attrPath) {
if (!first) {
ret << ".";
}
first = false;
bool mustQuote = std::ranges::any_of(part, [](char c) -> bool {
return c == '"' || c == '.' || c == ' ';
});
if (mustQuote || part.empty()) {
ret << '"' << part << '"';
} else {
ret << part;
}
}
return ret.str();
}
std::pair<Value *, PosIdx> findAlongAttrPath(EvalState & state, const std::string & attrPath,
Bindings & autoArgs, Value & vIn)
{
Strings tokens = parseAttrPath(attrPath);
auto tokens = parseAttrPath(attrPath);
Value * v = &vIn;
PosIdx pos = noPos;
for (auto & attr : tokens) {
for (auto [attrPathIdx, attr] : enumerate(tokens)) {
/* Is i an index (integer) or a normal attribute name? */
auto attrIndex = string2Int<unsigned int>(attr);
@ -54,15 +93,24 @@ std::pair<Value *, PosIdx> findAlongAttrPath(EvalState & state, const std::strin
according to what is specified in the attrPath. */
if (!attrIndex) {
if (v->type() != nAttrs)
state.ctx.errors.make<TypeError>(
"the expression selected by the selection path '%1%' should be a set but is %2%",
attrPath,
showType(*v)).debugThrow();
if (attr.empty())
throw Error("empty attribute name in selection path '%1%'", attrPath);
if (v->type() != nAttrs) {
auto pathPart =
std::vector<std::string>(tokens.begin(), tokens.begin() + attrPathIdx);
state.ctx.errors
.make<TypeError>(
"the value being indexed in the selection path '%1%' at '%2%' should be a "
"set but is %3%: %4%",
attrPath,
unparseAttrPath(pathPart),
showType(*v),
ValuePrinter(state, *v, errorPrintOptions)
)
.debugThrow();
}
Bindings::iterator a = v->attrs->find(state.ctx.symbols.create(attr));
if (a == v->attrs->end()) {
std::set<std::string> attrNames;
@ -70,21 +118,40 @@ std::pair<Value *, PosIdx> findAlongAttrPath(EvalState & state, const std::strin
attrNames.insert(state.ctx.symbols[attr.name]);
auto suggestions = Suggestions::bestMatches(attrNames, attr);
throw AttrPathNotFound(suggestions, "attribute '%1%' in selection path '%2%' not found", attr, attrPath);
auto pathPart =
std::vector<std::string>(tokens.begin(), tokens.begin() + attrPathIdx);
throw AttrPathNotFound(
suggestions,
"attribute '%1%' in selection path '%2%' not found inside path '%3%', whose "
"contents are: %4%",
attr,
attrPath,
unparseAttrPath(pathPart),
ValuePrinter(state, *v, errorPrintOptions)
);
}
v = &*a->value;
pos = a->pos;
}
else {
if (!v->isList())
state.ctx.errors.make<TypeError>(
"the expression selected by the selection path '%1%' should be a list but is %2%",
} else {
if (!v->isList()) {
state.ctx.errors
.make<TypeError>(
"the expression selected by the selection path '%1%' should be a list but "
"is %2%: %3%",
attrPath,
showType(*v),
ValuePrinter(state, *v, errorPrintOptions)
)
.debugThrow();
}
if (*attrIndex >= v->listSize()) {
throw AttrPathNotFound(
"list index %1% in selection path '%2%' is out of range for list %3%",
*attrIndex,
attrPath,
showType(*v)).debugThrow();
if (*attrIndex >= v->listSize())
throw AttrPathNotFound("list index %1% in selection path '%2%' is out of range", *attrIndex, attrPath);
ValuePrinter(state, *v, errorPrintOptions)
);
}
v = v->listElems()[*attrIndex];
pos = noPos;

View file

@ -22,6 +22,18 @@ std::pair<Value *, PosIdx> findAlongAttrPath(
*/
std::pair<SourcePath, uint32_t> findPackageFilename(EvalState & state, Value & v, std::string what);
Strings parseAttrPath(std::string_view s);
/**
* Parses an attr path (as used in nix-build -A foo.bar.baz) into a list of tokens.
*
* Such an attr path is a dot-separated sequence of attribute names, which are possibly quoted.
* No escaping is performed; attribute names containing double quotes are unrepresentable.
*/
std::vector<std::string> parseAttrPath(std::string_view const s);
/**
* Converts an attr path from a list of strings into a string once more.
* The result returned is an attr path and is *not necessarily valid nix syntax*.
*/
std::string unparseAttrPath(std::vector<std::string> const & attrPath);
}

View file

@ -528,7 +528,7 @@ ref<AttrCursor> AttrCursor::getAttr(EvalState & state, const std::string & name)
return ref(p);
}
OrSuggestions<ref<AttrCursor>> AttrCursor::findAlongAttrPath(EvalState & state, const Strings & attrPath)
OrSuggestions<ref<AttrCursor>> AttrCursor::findAlongAttrPath(EvalState & state, const std::vector<std::string> & attrPath)
{
auto res = shared_from_this();
for (auto & attr : attrPath) {

View file

@ -128,7 +128,7 @@ public:
* Get an attribute along a chain of attrsets. Note that this does
* not auto-call functors or functions.
*/
OrSuggestions<ref<AttrCursor>> findAlongAttrPath(EvalState & state, const Strings & attrPath);
OrSuggestions<ref<AttrCursor>> findAlongAttrPath(EvalState & state, const std::vector<std::string> & attrPath);
std::string getString(EvalState & state);

View file

@ -123,6 +123,9 @@ struct MaintainCount
* A Rust/Python-like enumerate() iterator adapter.
*
* Borrowed from http://reedbeta.com/blog/python-like-enumerate-in-cpp17.
*
* FIXME(jade): remove once P2164R9 is implemented in libc++ and replace with
* std::views::enumerate: https://libcxx.llvm.org/Status/Cxx23.html
*/
template <typename T,
typename TIter = decltype(std::begin(std::declval<T>())),

View file

@ -24,13 +24,6 @@ eval_stdin_res=$(echo 'let a = {} // a; in a.foo' | nix-instantiate --eval -E -
echo $eval_stdin_res | grep "at «stdin»:1:15:"
echo $eval_stdin_res | grep "infinite recursion encountered"
# Attribute path errors
expectStderr 1 nix-instantiate --eval -E '{}' -A '"x' | grepQuiet "missing closing quote in selection path"
expectStderr 1 nix-instantiate --eval -E '[]' -A 'x' | grepQuiet "should be a set"
expectStderr 1 nix-instantiate --eval -E '{}' -A '1' | grepQuiet "should be a list"
expectStderr 1 nix-instantiate --eval -E '{}' -A '.' | grepQuiet "empty attribute name"
expectStderr 1 nix-instantiate --eval -E '[]' -A '1' | grepQuiet "out of range"
# Unknown setting warning
# NOTE(cole-h): behavior is different depending on the order, which is why we test an unknown option
# before and after the `'{}'`!

View file

@ -0,0 +1,56 @@
from functional2.testlib.fixtures import Nix
import pytest
from typing import NamedTuple
from textwrap import dedent
class ShouldError(NamedTuple):
expr: str
attr: str
error: str
ERR_CASES: list[ShouldError] = [
# FIXME(jade): expect-test system for pytest that allows for updating these easily
ShouldError('{}', '"x',
"""error: missing closing quote in selection path '"x'"""),
ShouldError(
'[]', 'x',
"""error: the value being indexed in the selection path 'x' at '' should be a set but is a list: [ ]"""
),
ShouldError(
'{}', '1',
"""error: the expression selected by the selection path '1' should be a list but is a set: { }"""
),
ShouldError('{}', '.',
"""error: empty attribute name in selection path '.'"""),
ShouldError('{ x."" = 2; }', 'x.""',
"""error: empty attribute name in selection path 'x.""'"""),
ShouldError('{ x."".y = 2; }', 'x."".y',
"""error: empty attribute name in selection path 'x."".y'"""),
ShouldError(
'[]', '1',
"""error: list index 1 in selection path '1' is out of range for list [ ]"""
),
ShouldError(
'{ x.y = { z = 2; a = 3; }; }', 'x.y.c',
dedent("""\
error: attribute 'c' in selection path 'x.y.c' not found inside path 'x.y', whose contents are: { a = 3; z = 2; }
Did you mean one of a or z?""")
),
]
# I do not know why it does this, but I guess it makes sense as allowing a tool
# to pass -A unconditionally and then allow a blank attribute to mean the whole
# thing
def test_attrpath_accepts_empty_attr_as_no_attr(nix: Nix):
assert nix.nix_instantiate(['--eval', '--expr', '{}', '-A',
'']).run().ok().stdout_plain == '{ }'
@pytest.mark.parametrize(['expr', 'attr', 'error'], ERR_CASES)
def test_attrpath_error(nix: Nix, expr: str, attr: str, error: str):
res = nix.nix_instantiate(['--eval', '--expr', expr, '-A', attr]).run()
assert res.expect(1).stderr_plain == error

View file

@ -1,4 +1,4 @@
from .testlib.fixtures import Nix
from functional2.testlib.fixtures import Nix
def test_trivial_addition(nix: Nix):
assert nix.eval('1 + 1').json() == 2

View file

@ -0,0 +1,2 @@
[tool.pytest.ini_options]
addopts = "-p no:xonsh"

View file

@ -4,6 +4,7 @@ import subprocess
from typing import Any
from pathlib import Path
from functools import partial, partialmethod
from functional2.testlib.terminal_code_eater import eat_terminal_codes
import dataclasses
@ -33,6 +34,26 @@ class CommandResult:
output=self.stdout)
return self
@property
def stdout_s(self) -> str:
"""Command stdout as str"""
return self.stdout.decode('utf-8', errors='replace')
@property
def stderr_s(self) -> str:
"""Command stderr as str"""
return self.stderr.decode('utf-8', errors='replace')
@property
def stdout_plain(self) -> str:
"""Command stderr as str with terminal escape sequences eaten and whitespace stripped"""
return eat_terminal_codes(self.stdout).decode('utf-8', errors='replace').strip()
@property
def stderr_plain(self) -> str:
"""Command stderr as str with terminal escape sequences eaten and whitespace stripped"""
return eat_terminal_codes(self.stderr).decode('utf-8', errors='replace').strip()
def json(self) -> Any:
self.ok()
return json.loads(self.stdout)

View file

@ -0,0 +1,109 @@
# copy pasta from libutil-support's terminal-code-eater.cc
import enum
import dataclasses
class State(enum.Enum):
ExpectESC = 1
ExpectESCSeq = 2
InCSIParams = 3
InCSIIntermediates = 4
InOSCParams = 5
InOSCST = 6
@dataclasses.dataclass
class TerminalCodeEater:
state: State = State.ExpectESC
def feed(self, data: bytes) -> bytes:
is_param_char = lambda c: c >= 0x30 and c <= 0x3f
is_intermediate_char = lambda c: c >= 0x20 and c <= 0x2f
is_final_char = lambda c: c >= 0x40 and c <= 0x7e
ret = bytearray()
for c in data:
match self.state:
case State.ExpectESC:
match c:
case 0x1b: # \e
self._transition(State.ExpectESCSeq)
continue
case 0xd: # \r
continue
ret.append(c)
case State.ExpectESCSeq:
match c:
case 0x5b:
# CSI ('[')
self._transition(State.InCSIParams)
continue
case 0x5d:
# OSC (']')
self._transition(State.InOSCParams)
continue
# FIXME(jade): whatever this was, we do not know how to
# delimit it, so we just eat the next character and
# keep going. Should we actually eat it?
case _:
self._transition(State.ExpectESC)
continue
# https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
# A CSI sequence is: CSI [\x30-\x3f]* [\x20-\x2f]* [\x40-\x7e]
# ^ params ^ intermediates ^ final byte
case State.InCSIParams:
if is_final_char(c):
self._transition(State.ExpectESC)
continue
elif is_intermediate_char(c):
self._transition(State.InCSIIntermediates)
continue
elif is_param_char(c):
continue
else:
raise ValueError(f'Corrupt escape sequence, at {c:x}')
case State.InCSIIntermediates:
if is_final_char(c):
self._transition(State.ExpectESC)
continue
elif is_intermediate_char(c):
continue
else:
raise ValueError(
f'Corrupt escape sequence in intermediates, at {c:x}'
)
# An OSC is OSC [\x20-\x7e]* ST per ECMA-48
# where OSC is \x1b ] and ST is \x1b \.
case State.InOSCParams:
# first part of ST
if c == 0x1b:
self._transition(State.InOSCST)
continue
# OSC sequences can be ended by BEL on old xterms
elif c == 0x07:
self._transition(State.ExpectESC)
continue
elif c < 0x20 or c == 0x7f:
raise ValueError(f'Corrupt OSC sequence, at {c:x}')
# either way, eat it
continue
case State.InOSCST:
# ST ends by \
if c == 0x5c: # \
self._transition(State.ExpectESC)
elif c < 0x20 or c > 0x7e:
raise ValueError(
f'Corrupt OSC sequence in ST, at {c:x}')
else:
self._transition(State.InOSCParams)
continue
return bytes(ret)
def _transition(self, new_state: State):
self.state = new_state
def eat_terminal_codes(s: bytes) -> bytes:
return TerminalCodeEater().feed(s)

View file

@ -0,0 +1,8 @@
from functional2.testlib.terminal_code_eater import eat_terminal_codes
def test_eats_color():
assert eat_terminal_codes(b'\x1b[7mfoo blah bar\x1b[0m') == b'foo blah bar'
def test_eats_osc():
assert eat_terminal_codes(b'\x1b]8;;http://example.com\x1b\\This is a link\x1b]8;;\x1b\\') == b'This is a link'

View file

@ -8,7 +8,6 @@
#include "lix/libexpr/nixexpr.hh"
#include "lix/libexpr/eval.hh"
#include "lix/libexpr/eval-inline.hh"
#include "lix/libstore/store-api.hh"
#include "tests/libstore.hh"
@ -20,6 +19,10 @@ namespace nix {
initLibExpr();
}
EvalState & evalState() {
return state;
}
protected:
LibExprTest()
: LibStoreTest()

View file

@ -0,0 +1,46 @@
#include "lix/libexpr/attr-path.hh"
#include "lix/libexpr/attr-set.hh"
#include "tests/libexpr.hh"
#include <gtest/gtest.h>
#include <rapidcheck/gen/Arbitrary.h>
#include <rapidcheck/gen/Container.h>
#include <rapidcheck/gen/Predicate.h>
#include <rapidcheck/gtest.h>
namespace nix {
class AttrPathEval : public LibExprTest
{
public:
std::pair<Value *, PosIdx> testFindAlongAttrPath(std::string expr, std::string path);
};
RC_GTEST_PROP(AttrPath, prop_round_trip, ())
{
auto strings = *rc::gen::container<std::vector<std::string>>(
rc::gen::container<std::string>(rc::gen::distinctFrom('"'))
);
auto const unparsed = unparseAttrPath(strings);
auto const unparsedReparsed = parseAttrPath(unparsed);
RC_ASSERT(strings == unparsedReparsed);
}
std::pair<Value *, PosIdx> AttrPathEval::testFindAlongAttrPath(std::string expr, std::string path)
{
auto v = eval(expr);
auto bindings = evalState().ctx.buildBindings(0).finish();
return findAlongAttrPath(state, path, *bindings, v);
}
// n.b. I do not know why we throw for empty attrs but they are apparently
// disallowed.
TEST_F(AttrPathEval, emptyAttrsThrows)
{
std::string expr = "{a.\"\".b = 2;}";
ASSERT_NO_THROW(testFindAlongAttrPath(expr, "a"));
ASSERT_THROW(testFindAlongAttrPath(expr, "a.\"\".b"), Error);
ASSERT_THROW(testFindAlongAttrPath(expr, "a.\"\""), Error);
}
}

View file

@ -1,3 +1,4 @@
// this file has a hissing snake twin in functional2/testlib/terminal_code_eater.py
#include "terminal-code-eater.hh"
#include "lix/libutil/escape-char.hh"
#include <assert.h>
@ -26,7 +27,8 @@ void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
// Just eat \r, since it is part of clearing a line
case '\r':
return;
default: break;
default:
break;
}
if constexpr (DEBUG_EATER) {
std::cerr << "eater uneat" << MaybeHexEscapedChar{c} << "\n";
@ -39,6 +41,11 @@ void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
case '[':
transition(State::InCSIParams);
return;
case ']':
transition(State::InOSCParams);
return;
// FIXME(jade): whatever this was, we do not know how to delimit it, so
// we just eat the next character and keep going
default:
transition(State::ExpectESC);
return;
@ -76,6 +83,30 @@ void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
return;
}
break;
// An OSC is OSC [\x20-\x7e]* ST
// where OSC is \x1b ] and ST is \x1b \.
case State::InOSCParams:
if (c == '\e') {
// first part of ST
transition(State::InOSCST);
} else if (c == '\a') {
// OSC sequences can be ended by BEL on old xterms
transition(State::ExpectESC);
} else if (c < 0x20 or c > 0x7e) {
assert(false && "Corrupt OSC sequence");
}
// either way, eat it
return;
case State::InOSCST:
// ST ends by \.
if (c == '\\') {
transition(State::ExpectESC);
} else if (c < 0x20 || c == 0x7f) {
assert(false && "Corrupt OSC sequence, in ST");
} else {
transition(State::InOSCParams);
}
return;
}
}

View file

@ -20,6 +20,8 @@ private:
ExpectESCSeq,
InCSIParams,
InCSIIntermediates,
InOSCParams,
InOSCST,
};
State state = State::ExpectESC;

View file

@ -193,6 +193,7 @@ liblixexpr_test_support = declare_dependency(
)
libexpr_tests_sources = files(
'libexpr/attr-path.cc',
'libexpr/derived-path.cc',
'libexpr/error_traces.cc',
'libexpr/flakeref.cc',