forked from lix-project/lix
a385e51a08
after #6218 `Symbol` no longer confers a uniqueness invariant on the string it wraps, it is now possible to create multiple symbols that compare equal but whose string contents have different addresses. this guarantee is now only provided by `SymbolIdx`, leaving `Symbol` only as a string wrapper that knows about the intricacies of how symbols need to be formatted for output. this change renames `SymbolIdx` to `Symbol` to restore the previous semantics of `Symbol` to that name. we also keep the wrapper type and rename it to `SymbolStr` instead of returning plain strings from lookups into the symbol table because symbols are formatted for output in many places. theoretically we do not need `SymbolStr`, only a function that formats a string for output as a symbol, but having to wrap every symbol that appears in a message into eg `formatSymbol()` is error-prone and inconvient.
120 lines
3 KiB
C++
120 lines
3 KiB
C++
#pragma once
|
|
|
|
#include <list>
|
|
#include <map>
|
|
#include <unordered_map>
|
|
|
|
#include "types.hh"
|
|
#include "chunked-vector.hh"
|
|
|
|
namespace nix {
|
|
|
|
/* Symbol table used by the parser and evaluator to represent and look
|
|
up identifiers and attributes efficiently. SymbolTable::create()
|
|
converts a string into a symbol. Symbols have the property that
|
|
they can be compared efficiently (using an equality test),
|
|
because the symbol table stores only one copy of each string. */
|
|
|
|
/* This class mainly exists to give us an operator<< for ostreams. We could also
|
|
return plain strings from SymbolTable, but then we'd have to wrap every
|
|
instance of a symbol that is fmt()ed, which is inconvenient and error-prone. */
|
|
class SymbolStr
|
|
{
|
|
friend class SymbolTable;
|
|
|
|
private:
|
|
const std::string * s;
|
|
|
|
explicit SymbolStr(const std::string & symbol): s(&symbol) {}
|
|
|
|
public:
|
|
bool operator == (std::string_view s2) const
|
|
{
|
|
return *s == s2;
|
|
}
|
|
|
|
operator const std::string & () const
|
|
{
|
|
return *s;
|
|
}
|
|
|
|
operator const std::string_view () const
|
|
{
|
|
return *s;
|
|
}
|
|
|
|
friend std::ostream & operator <<(std::ostream & os, const SymbolStr & symbol);
|
|
};
|
|
|
|
class Symbol
|
|
{
|
|
friend class SymbolTable;
|
|
|
|
private:
|
|
uint32_t id;
|
|
|
|
explicit Symbol(uint32_t id): id(id) {}
|
|
|
|
public:
|
|
Symbol() : id(0) {}
|
|
|
|
explicit operator bool() const { return id > 0; }
|
|
|
|
bool operator<(const Symbol other) const { return id < other.id; }
|
|
bool operator==(const Symbol other) const { return id == other.id; }
|
|
bool operator!=(const Symbol other) const { return id != other.id; }
|
|
};
|
|
|
|
class SymbolTable
|
|
{
|
|
private:
|
|
std::unordered_map<std::string_view, std::pair<const std::string *, uint32_t>> symbols;
|
|
ChunkedVector<std::string, 8192> store{16};
|
|
|
|
public:
|
|
Symbol create(std::string_view s)
|
|
{
|
|
// Most symbols are looked up more than once, so we trade off insertion performance
|
|
// for lookup performance.
|
|
// TODO: could probably be done more efficiently with transparent Hash and Equals
|
|
// on the original implementation using unordered_set
|
|
auto it = symbols.find(s);
|
|
if (it != symbols.end()) return Symbol(it->second.second + 1);
|
|
|
|
const auto & [rawSym, idx] = store.add(std::string(s));
|
|
symbols.emplace(rawSym, std::make_pair(&rawSym, idx));
|
|
return Symbol(idx + 1);
|
|
}
|
|
|
|
std::vector<SymbolStr> resolve(const std::vector<Symbol> & symbols) const
|
|
{
|
|
std::vector<SymbolStr> result;
|
|
result.reserve(symbols.size());
|
|
for (auto sym : symbols)
|
|
result.push_back((*this)[sym]);
|
|
return result;
|
|
}
|
|
|
|
SymbolStr operator[](Symbol s) const
|
|
{
|
|
if (s.id == 0 || s.id > store.size())
|
|
abort();
|
|
return SymbolStr(store[s.id - 1]);
|
|
}
|
|
|
|
size_t size() const
|
|
{
|
|
return store.size();
|
|
}
|
|
|
|
size_t totalSize() const;
|
|
|
|
template<typename T>
|
|
void dump(T callback) const
|
|
{
|
|
store.forEach(callback);
|
|
}
|
|
};
|
|
|
|
}
|