From f1fac0b5c3b75efab781949fdff2b67ffdda2cb3 Mon Sep 17 00:00:00 2001 From: Yorick van Pelt Date: Thu, 9 Jan 2020 17:38:27 +0100 Subject: [PATCH] builtins.fromJSON: use nlohmann/json parser instead of custom parser --- src/libexpr/json-to-value.cc | 343 ++++++++++++++--------------------- 1 file changed, 138 insertions(+), 205 deletions(-) diff --git a/src/libexpr/json-to-value.cc b/src/libexpr/json-to-value.cc index 47cab2bb5..19d9a9b90 100644 --- a/src/libexpr/json-to-value.cc +++ b/src/libexpr/json-to-value.cc @@ -1,232 +1,165 @@ #include "json-to-value.hh" -#include +#include +#include +#include + +using json = nlohmann::json; namespace nix { +// for more information, refer to +// https://github.com/nlohmann/json/blob/master/include/nlohmann/detail/input/json_sax.hpp +class JSONSax : nlohmann::json_sax { + class JSONState { + protected: + JSONState* parent; + Value * v; + public: + virtual JSONState* resolve(EvalState &) + { + throw std::logic_error("tried to close toplevel json parser state"); + }; + explicit JSONState(JSONState* p) : parent(p), v(nullptr) {}; + explicit JSONState(Value* v) : v(v) {}; + JSONState(JSONState& p) = delete; + Value& value(EvalState & state) + { + if (v == nullptr) + v = state.allocValue(); + return *v; + }; + virtual ~JSONState() {}; + virtual void add() {}; + }; -static void skipWhitespace(const char * & s) -{ - while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') s++; -} - - -/* - Parse an unicode escape sequence (4 hex characters following \u) in JSON string -*/ -static string parseUnicodeEscapeSequence(const char * & s) -{ - int codepoint = 0; - - const auto factors = { 12u, 8u, 4u, 0u }; - for (const auto factor : factors) - { - if (!*s) throw JSONParseError("got end-of-string in JSON string while parsing \\u sequence"); - - if (*s >= '0' and *s <= '9') { - codepoint += static_cast((static_cast(*s) - 0x30u) << factor); - } else if (*s >= 'A' and *s <= 'F') { - codepoint += static_cast((static_cast(*s) - 0x37u) << factor); - } else if (*s >= 'a' and *s <= 'f') { - codepoint += static_cast((static_cast(*s) - 0x57u) << factor); - } else { - throw JSONParseError(format("illegal character '%1%' in \\u escape sequence.") % *s); + class JSONObjectState : public JSONState { + using JSONState::JSONState; + ValueMap attrs = ValueMap(); + virtual JSONState* resolve(EvalState & state) override + { + Value& v = parent->value(state); + state.mkAttrs(v, attrs.size()); + for (auto & i : attrs) + v.attrs->push_back(Attr(i.first, i.second)); + return parent; } - s++; - } - - if ((codepoint > 0xd7ff && codepoint < 0xe000) || codepoint > 0x10ffff) { - throw JSONParseError("Unicode escape sequence is not a Unicode scalar value"); - } - - // taken from cpptoml.h - std::string result; - // See Table 3-6 of the Unicode standard - if (codepoint <= 0x7f) - { - // 1-byte codepoints: 00000000 0xxxxxxx - // repr: 0xxxxxxx - result += static_cast(codepoint & 0x7f); - } - else if (codepoint <= 0x7ff) - { - // 2-byte codepoints: 00000yyy yyxxxxxx - // repr: 110yyyyy 10xxxxxx - // - // 0x1f = 00011111 - // 0xc0 = 11000000 - // - result += static_cast(0xc0 | ((codepoint >> 6) & 0x1f)); - // - // 0x80 = 10000000 - // 0x3f = 00111111 - // - result += static_cast(0x80 | (codepoint & 0x3f)); - } - else if (codepoint <= 0xffff) - { - // 3-byte codepoints: zzzzyyyy yyxxxxxx - // repr: 1110zzzz 10yyyyyy 10xxxxxx - // - // 0xe0 = 11100000 - // 0x0f = 00001111 - // - result += static_cast(0xe0 | ((codepoint >> 12) & 0x0f)); - result += static_cast(0x80 | ((codepoint >> 6) & 0x1f)); - result += static_cast(0x80 | (codepoint & 0x3f)); - } - else - { - // 4-byte codepoints: 000uuuuu zzzzyyyy yyxxxxxx - // repr: 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx - // - // 0xf0 = 11110000 - // 0x07 = 00000111 - // - result += static_cast(0xf0 | ((codepoint >> 18) & 0x07)); - result += static_cast(0x80 | ((codepoint >> 12) & 0x3f)); - result += static_cast(0x80 | ((codepoint >> 6) & 0x3f)); - result += static_cast(0x80 | (codepoint & 0x3f)); - } - return result; -} - - -static string parseJSONString(const char * & s) -{ - string res; - if (*s++ != '"') throw JSONParseError("expected JSON string"); - while (*s != '"') { - if (!*s) throw JSONParseError("got end-of-string in JSON string"); - if (*s == '\\') { - s++; - if (*s == '"') res += '"'; - else if (*s == '\\') res += '\\'; - else if (*s == '/') res += '/'; - else if (*s == 'b') res += '\b'; - else if (*s == 'f') res += '\f'; - else if (*s == 'n') res += '\n'; - else if (*s == 'r') res += '\r'; - else if (*s == 't') res += '\t'; - else if (*s == 'u') { - res += parseUnicodeEscapeSequence(++s); - // to neuter the outside s++ - s--; - } else throw JSONParseError("invalid escaped character in JSON string"); - s++; - } else - res += *s++; - } - s++; - return res; -} - - -static void parseJSON(EvalState & state, const char * & s, Value & v) -{ - skipWhitespace(s); - - if (!*s) throw JSONParseError("expected JSON value"); - - if (*s == '[') { - s++; - ValueVector values; - values.reserve(128); - skipWhitespace(s); - while (1) { - if (values.empty() && *s == ']') break; - Value * v2 = state.allocValue(); - parseJSON(state, s, *v2); - values.push_back(v2); - skipWhitespace(s); - if (*s == ']') break; - if (*s != ',') throw JSONParseError("expected ',' or ']' after JSON array element"); - s++; + virtual void add() override { v = nullptr; }; + public: + void key(string_t& name, EvalState & state) + { + attrs[state.symbols.create(name)] = &value(state); } - s++; - state.mkList(v, values.size()); - for (size_t n = 0; n < values.size(); ++n) - v.listElems()[n] = values[n]; - } + }; - else if (*s == '{') { - s++; - ValueMap attrs; - while (1) { - skipWhitespace(s); - if (attrs.empty() && *s == '}') break; - string name = parseJSONString(s); - skipWhitespace(s); - if (*s != ':') throw JSONParseError("expected ':' in JSON object"); - s++; - Value * v2 = state.allocValue(); - parseJSON(state, s, *v2); - attrs[state.symbols.create(name)] = v2; - skipWhitespace(s); - if (*s == '}') break; - if (*s != ',') throw JSONParseError("expected ',' or '}' after JSON member"); - s++; + class JSONListState : public JSONState { + ValueVector values = ValueVector(); + virtual JSONState* resolve(EvalState & state) override + { + Value& v = parent->value(state); + state.mkList(v, values.size()); + for (size_t n = 0; n < values.size(); ++n) { + v.listElems()[n] = values[n]; + } + return parent; } - state.mkAttrs(v, attrs.size()); - for (auto & i : attrs) - v.attrs->push_back(Attr(i.first, i.second)); - v.attrs->sort(); - s++; - } - - else if (*s == '"') { - mkString(v, parseJSONString(s)); - } - - else if (isdigit(*s) || *s == '-' || *s == '.' ) { - // Buffer into a string first, then use built-in C++ conversions - std::string tmp_number; - ValueType number_type = tInt; - - while (isdigit(*s) || *s == '-' || *s == '.' || *s == 'e' || *s == 'E') { - if (*s == '.' || *s == 'e' || *s == 'E') - number_type = tFloat; - tmp_number += *s++; + virtual void add() override { + values.push_back(v); + v = nullptr; + }; + public: + JSONListState(JSONState* p, std::size_t reserve) : JSONState(p) + { + values.reserve(reserve); } + }; - try { - if (number_type == tFloat) - mkFloat(v, stod(tmp_number)); - else - mkInt(v, stol(tmp_number)); - } catch (std::invalid_argument & e) { - throw JSONParseError("invalid JSON number"); - } catch (std::out_of_range & e) { - throw JSONParseError("out-of-range JSON number"); - } + EvalState & state; + JSONState* rs; + + template inline bool handle_value(T f, Args... args) + { + f(rs->value(state), args...); + rs->add(); + return true; } - else if (strncmp(s, "true", 4) == 0) { - s += 4; - mkBool(v, true); +public: + JSONSax(EvalState & state, Value & v) : state(state), rs(new JSONState(&v)) {}; + ~JSONSax() { delete rs; }; + + bool null() + { + return handle_value(mkNull); } - else if (strncmp(s, "false", 5) == 0) { - s += 5; - mkBool(v, false); + bool boolean(bool val) + { + return handle_value(mkBool, val); } - else if (strncmp(s, "null", 4) == 0) { - s += 4; - mkNull(v); + bool number_integer(number_integer_t val) + { + return handle_value(mkInt, val); } - else throw JSONParseError("unrecognised JSON value"); -} + bool number_unsigned(number_unsigned_t val) + { + return handle_value(mkInt, val); + } + bool number_float(number_float_t val, const string_t& s) + { + return handle_value(mkFloat, val); + } + + bool string(string_t& val) + { + return handle_value(mkString, val.c_str()); + } + + bool start_object(std::size_t len) + { + JSONState* old = rs; + rs = new JSONObjectState(old); + return true; + } + + bool key(string_t& name) + { + dynamic_cast(rs)->key(name, state); + return true; + } + + bool end_object() { + JSONState* old = rs; + rs = old->resolve(state); + delete old; + rs->add(); + return true; + } + + bool end_array() { + return end_object(); + } + + bool start_array(size_t len) { + JSONState* old = rs; + rs = new JSONListState(old, len != std::numeric_limits::max() ? len : 128); + return true; + } + + bool parse_error(std::size_t, const std::string&, const nlohmann::detail::exception& ex) { + throw JSONParseError(ex.what()); + } +}; void parseJSON(EvalState & state, const string & s_, Value & v) { - const char * s = s_.c_str(); - parseJSON(state, s, v); - skipWhitespace(s); - if (*s) throw JSONParseError(format("expected end-of-string while parsing JSON value: %1%") % s); + JSONSax parser(state, v); + bool res = json::sax_parse(s_, &parser); + if (!res) + throw JSONParseError("Invalid JSON Value"); } - }