diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index bc93f75af..30794d5ab 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -922,12 +922,11 @@ void EvalState::mkThunk_(Value & v, Expr * expr) void EvalState::mkPos(Value & v, PosIdx p) { - auto pos = positions[p]; - if (auto path = std::get_if(&pos.origin)) { + auto origin = positions.originOf(p); + if (auto path = std::get_if(&origin)) { auto attrs = buildBindings(3); attrs.alloc(sFile).mkString(path->path.abs()); - attrs.alloc(sLine).mkInt(pos.line); - attrs.alloc(sColumn).mkInt(pos.column); + makePositionThunks(*this, p, attrs.alloc(sLine), attrs.alloc(sColumn)); v.mkAttrs(attrs); } else v.mkNull(); diff --git a/src/libexpr/flake/flake.cc b/src/libexpr/flake/flake.cc index 451780c89..4faafd505 100644 --- a/src/libexpr/flake/flake.cc +++ b/src/libexpr/flake/flake.cc @@ -234,11 +234,10 @@ static Flake getFlake( .storePath = storePath, }; + // NOTE evalFile forces vInfo to be an attrset because mustBeTrivial is true. Value vInfo; state.evalFile(state.rootPath(CanonPath(flakeFile)), vInfo, true); // FIXME: symlink attack - expectType(state, nAttrs, vInfo, state.positions.add({state.rootPath(CanonPath(flakeFile))}, 1, 1)); - if (auto description = vInfo.attrs->get(state.sDescription)) { expectType(state, nString, *description->value, description->pos); flake.description = description->value->c_str(); diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index 380048c77..05e754cf1 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -33,33 +33,16 @@ namespace nix { static void initLoc(YYLTYPE * loc) { - loc->first_line = loc->last_line = 1; - loc->first_column = loc->last_column = 1; + loc->first_line = loc->last_line = 0; + loc->first_column = loc->last_column = 0; } static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) { loc->stash(); - loc->first_line = loc->last_line; loc->first_column = loc->last_column; - - for (size_t i = 0; i < len; i++) { - switch (*s++) { - case '\r': - if (*s == '\n') { /* cr/lf */ - i++; - s++; - } - /* fall through */ - case '\n': - ++loc->last_line; - loc->last_column = 1; - break; - default: - ++loc->last_column; - } - } + loc->last_column += len; } diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc index 9a8b9616b..cf5f3e38c 100644 --- a/src/libexpr/nixexpr.cc +++ b/src/libexpr/nixexpr.cc @@ -583,6 +583,37 @@ std::string ExprLambda::showNamePos(const EvalState & state) const +/* Position table. */ + +Pos PosTable::operator[](PosIdx p) const +{ + auto origin = resolve(p); + if (!origin) + return {}; + + const auto offset = origin->offsetOf(p); + + Pos result{0, 0, origin->origin}; + auto lines = this->lines.lock(); + auto linesForInput = (*lines)[origin->offset]; + + if (linesForInput.empty()) { + auto source = result.getSource().value_or(""); + const char * begin = source.data(); + for (Pos::LinesIterator it(source), end; it != end; it++) + linesForInput.push_back(it->data() - begin); + } + // as above: the first line starts at byte 0 and is always present + auto lineStartOffset = std::prev( + std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); + + result.line = 1 + (lineStartOffset - linesForInput.begin()); + result.column = 1 + (offset - *lineStartOffset); + return result; +} + + + /* Symbol table. */ size_t SymbolTable::totalSize() const diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh index 2390c4286..5fe722f04 100644 --- a/src/libexpr/nixexpr.hh +++ b/src/libexpr/nixexpr.hh @@ -7,7 +7,6 @@ #include "value.hh" #include "symbol-table.hh" #include "error.hh" -#include "chunked-vector.hh" #include "position.hh" #include "eval-error.hh" #include "pos-idx.hh" diff --git a/src/libexpr/parser-state.hh b/src/libexpr/parser-state.hh index 34aef661f..024e79c43 100644 --- a/src/libexpr/parser-state.hh +++ b/src/libexpr/parser-state.hh @@ -24,20 +24,15 @@ struct ParserLocation int last_line, last_column; // backup to recover from yyless(0) - int stashed_first_line, stashed_first_column; - int stashed_last_line, stashed_last_column; + int stashed_first_column, stashed_last_column; void stash() { - stashed_first_line = first_line; stashed_first_column = first_column; - stashed_last_line = last_line; stashed_last_column = last_column; } void unstash() { - first_line = stashed_first_line; first_column = stashed_first_column; - last_line = stashed_last_line; last_column = stashed_last_column; } }; @@ -276,7 +271,7 @@ inline Expr * ParserState::stripIndentation(const PosIdx pos, inline PosIdx ParserState::at(const ParserLocation & loc) { - return positions.add(origin, loc.first_line, loc.first_column); + return positions.add(origin, loc.first_column); } } diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 59f088d53..bff066170 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -438,7 +438,7 @@ Expr * parseExprFromBuf( .symbols = symbols, .positions = positions, .basePath = basePath, - .origin = {origin}, + .origin = positions.addOrigin(origin, length), .rootFS = rootFS, .s = astSymbols, }; diff --git a/src/libexpr/pos-idx.hh b/src/libexpr/pos-idx.hh index 9949f1dc5..e94fd85c6 100644 --- a/src/libexpr/pos-idx.hh +++ b/src/libexpr/pos-idx.hh @@ -6,6 +6,7 @@ namespace nix { class PosIdx { + friend struct LazyPosAcessors; friend class PosTable; private: diff --git a/src/libexpr/pos-table.hh b/src/libexpr/pos-table.hh index 1decf3c85..8a0a3ba86 100644 --- a/src/libexpr/pos-table.hh +++ b/src/libexpr/pos-table.hh @@ -7,6 +7,7 @@ #include "chunked-vector.hh" #include "pos-idx.hh" #include "position.hh" +#include "sync.hh" namespace nix { @@ -17,66 +18,69 @@ public: { friend PosTable; private: - // must always be invalid by default, add() replaces this with the actual value. - // subsequent add() calls use this index as a token to quickly check whether the - // current origins.back() can be reused or not. - mutable uint32_t idx = std::numeric_limits::max(); + uint32_t offset; - // Used for searching in PosTable::[]. - explicit Origin(uint32_t idx) - : idx(idx) - , origin{std::monostate()} - { - } + Origin(Pos::Origin origin, uint32_t offset, size_t size): + offset(offset), origin(origin), size(size) + {} public: const Pos::Origin origin; + const size_t size; - Origin(Pos::Origin origin) - : origin(origin) + uint32_t offsetOf(PosIdx p) const { + return p.id - 1 - offset; } }; - struct Offset - { - uint32_t line, column; - }; - private: - std::vector origins; - ChunkedVector offsets; + using Lines = std::vector; -public: - PosTable() - : offsets(1024) - { - origins.reserve(1024); - } + std::map origins; + mutable Sync> lines; - PosIdx add(const Origin & origin, uint32_t line, uint32_t column) + const Origin * resolve(PosIdx p) const { - const auto idx = offsets.add({line, column}).second; - if (origins.empty() || origins.back().idx != origin.idx) { - origin.idx = idx; - origins.push_back(origin); - } - return PosIdx(idx + 1); - } + if (p.id == 0) + return nullptr; - Pos operator[](PosIdx p) const - { - if (p.id == 0 || p.id > offsets.size()) - return {}; const auto idx = p.id - 1; /* we want the last key <= idx, so we'll take prev(first key > idx). - this is guaranteed to never rewind origin.begin because the first - key is always 0. */ - const auto pastOrigin = std::upper_bound( - origins.begin(), origins.end(), Origin(idx), [](const auto & a, const auto & b) { return a.idx < b.idx; }); - const auto origin = *std::prev(pastOrigin); - const auto offset = offsets[idx]; - return {offset.line, offset.column, origin.origin}; + this is guaranteed to never rewind origin.begin because the first + key is always 0. */ + const auto pastOrigin = origins.upper_bound(idx); + return &std::prev(pastOrigin)->second; + } + +public: + Origin addOrigin(Pos::Origin origin, size_t size) + { + uint32_t offset = 0; + if (auto it = origins.rbegin(); it != origins.rend()) + offset = it->first + it->second.size; + // +1 because all PosIdx are offset by 1 to begin with, and + // another +1 to ensure that all origins can point to EOF, eg + // on (invalid) empty inputs. + if (2 + offset + size < offset) + return Origin{origin, offset, 0}; + return origins.emplace(offset, Origin{origin, offset, size}).first->second; + } + + PosIdx add(const Origin & origin, size_t offset) + { + if (offset > origin.size) + return PosIdx(); + return PosIdx(1 + origin.offset + offset); + } + + Pos operator[](PosIdx p) const; + + Pos::Origin originOf(PosIdx p) const + { + if (auto o = resolve(p)) + return o->origin; + return std::monostate{}; } }; diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 8c6aeffac..d631d929c 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -2496,6 +2496,54 @@ static RegisterPrimOp primop_unsafeGetAttrPos(PrimOp { .fun = prim_unsafeGetAttrPos, }); +// access to exact position information (ie, line and colum numbers) is deferred +// due to the cost associated with calculating that information and how rarely +// it is used in practice. this is achieved by creating thunks to otherwise +// inaccessible primops that are not exposed as __op or under builtins to turn +// the internal PosIdx back into a line and column number, respectively. exposing +// these primops in any way would at best be not useful and at worst create wildly +// indeterministic eval results depending on parse order of files. +// +// in a simpler world this would instead be implemented as another kind of thunk, +// but each type of thunk has an associated runtime cost in the current evaluator. +// as with black holes this cost is too high to justify another thunk type to check +// for in the very hot path that is forceValue. +static struct LazyPosAcessors { + PrimOp primop_lineOfPos{ + .arity = 1, + .fun = [] (EvalState & state, PosIdx pos, Value * * args, Value & v) { + v.mkInt(state.positions[PosIdx(args[0]->integer)].line); + } + }; + PrimOp primop_columnOfPos{ + .arity = 1, + .fun = [] (EvalState & state, PosIdx pos, Value * * args, Value & v) { + v.mkInt(state.positions[PosIdx(args[0]->integer)].column); + } + }; + + Value lineOfPos, columnOfPos; + + LazyPosAcessors() + { + lineOfPos.mkPrimOp(&primop_lineOfPos); + columnOfPos.mkPrimOp(&primop_columnOfPos); + } + + void operator()(EvalState & state, const PosIdx pos, Value & line, Value & column) + { + Value * posV = state.allocValue(); + posV->mkInt(pos.id); + line.mkApp(&lineOfPos, posV); + column.mkApp(&columnOfPos, posV); + } +} makeLazyPosAccessors; + +void makePositionThunks(EvalState & state, const PosIdx pos, Value & line, Value & column) +{ + makeLazyPosAccessors(state, pos, line, column); +} + /* Dynamic version of the `?' operator. */ static void prim_hasAttr(EvalState & state, const PosIdx pos, Value * * args, Value & v) { diff --git a/src/libexpr/primops.hh b/src/libexpr/primops.hh index 45486608f..9f76975db 100644 --- a/src/libexpr/primops.hh +++ b/src/libexpr/primops.hh @@ -51,4 +51,6 @@ void prim_importNative(EvalState & state, const PosIdx pos, Value * * args, Valu */ void prim_exec(EvalState & state, const PosIdx pos, Value * * args, Value & v); +void makePositionThunks(EvalState & state, const PosIdx pos, Value & line, Value & column); + } diff --git a/tests/unit/libexpr/primops.cc b/tests/unit/libexpr/primops.cc index 6d7649b3c..b1426edae 100644 --- a/tests/unit/libexpr/primops.cc +++ b/tests/unit/libexpr/primops.cc @@ -151,7 +151,7 @@ namespace nix { } TEST_F(PrimOpTest, unsafeGetAttrPos) { - state.corepkgsFS->addFile(CanonPath("foo.nix"), "{ y = \"x\"; }"); + state.corepkgsFS->addFile(CanonPath("foo.nix"), "\n\r\n\r{ y = \"x\"; }"); auto expr = "builtins.unsafeGetAttrPos \"y\" (import )"; auto v = eval(expr); @@ -165,10 +165,12 @@ namespace nix { auto line = v.attrs->find(createSymbol("line")); ASSERT_NE(line, nullptr); - ASSERT_THAT(*line->value, IsIntEq(1)); + state.forceValue(*line->value, noPos); + ASSERT_THAT(*line->value, IsIntEq(4)); auto column = v.attrs->find(createSymbol("column")); ASSERT_NE(column, nullptr); + state.forceValue(*column->value, noPos); ASSERT_THAT(*column->value, IsIntEq(3)); } diff --git a/tests/unit/libexpr/value/print.cc b/tests/unit/libexpr/value/print.cc index c1de3a6a9..d6abf3917 100644 --- a/tests/unit/libexpr/value/print.cc +++ b/tests/unit/libexpr/value/print.cc @@ -110,8 +110,8 @@ TEST_F(ValuePrintingTests, vLambda) .up = nullptr, .values = { } }; - PosTable::Origin origin((std::monostate())); - auto posIdx = state.positions.add(origin, 1, 1); + PosTable::Origin origin = state.positions.addOrigin(std::monostate(), 1); + auto posIdx = state.positions.add(origin, 0); auto body = ExprInt(0); auto formals = Formals {}; @@ -558,8 +558,8 @@ TEST_F(ValuePrintingTests, ansiColorsLambda) .up = nullptr, .values = { } }; - PosTable::Origin origin((std::monostate())); - auto posIdx = state.positions.add(origin, 1, 1); + PosTable::Origin origin = state.positions.addOrigin(std::monostate(), 1); + auto posIdx = state.positions.add(origin, 0); auto body = ExprInt(0); auto formals = Formals {};