From 61a9d16d5c1d4088981f7d0ca08655f9155cc015 Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 21 Dec 2021 09:17:31 +0100 Subject: [PATCH] don't strdup tokens in the lexer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit every stringy token the lexer returns is turned into a Symbol and not used further, so we don't have to strdup. using a string_view is sufficient, but due to limitations of the current parser we have to use a POD type that holds the same information. gives ~2% on system build, 6% on search, 8% on parsing alone # before Benchmark 1: nix search --offline nixpkgs hello Time (mean ± σ): 610.6 ms ± 2.4 ms [User: 602.5 ms, System: 7.8 ms] Range (min … max): 606.6 ms … 617.3 ms 50 runs Benchmark 2: nix eval -f hackage-packages.nix Time (mean ± σ): 430.1 ms ± 1.4 ms [User: 393.1 ms, System: 36.7 ms] Range (min … max): 428.2 ms … 434.2 ms 50 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 3.032 s ± 0.005 s [User: 2.808 s, System: 0.223 s] Range (min … max): 3.023 s … 3.041 s 50 runs # after Benchmark 1: nix search --offline nixpkgs hello Time (mean ± σ): 574.7 ms ± 2.8 ms [User: 566.3 ms, System: 8.0 ms] Range (min … max): 569.2 ms … 580.7 ms 50 runs Benchmark 2: nix eval -f hackage-packages.nix Time (mean ± σ): 394.4 ms ± 0.8 ms [User: 361.8 ms, System: 32.3 ms] Range (min … max): 392.7 ms … 395.7 ms 50 runs Benchmark 3: nix eval --raw --impure --expr 'with import {}; system' Time (mean ± σ): 2.976 s ± 0.005 s [User: 2.757 s, System: 0.218 s] Range (min … max): 2.966 s … 2.990 s 50 runs --- src/libexpr/lexer.l | 14 +++++++------- src/libexpr/parser.y | 25 ++++++++++++++++--------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index c18877e29..70e99d2d2 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -139,7 +139,7 @@ or { return OR_KW; } \/\/ { return UPDATE; } \+\+ { return CONCAT; } -{ID} { yylval->id = strdup(yytext); return ID; } +{ID} { yylval->id = {yytext, (size_t) yyleng}; return ID; } {INT} { errno = 0; try { yylval->n = boost::lexical_cast(yytext); @@ -221,14 +221,14 @@ or { return OR_KW; } {PATH_SEG} { POP_STATE(); PUSH_STATE(INPATH_SLASH); - yylval->path = strdup(yytext); + yylval->path = {yytext, (size_t) yyleng}; return PATH; } {HPATH_START} { POP_STATE(); PUSH_STATE(INPATH_SLASH); - yylval->path = strdup(yytext); + yylval->path = {yytext, (size_t) yyleng}; return HPATH; } @@ -237,7 +237,7 @@ or { return OR_KW; } PUSH_STATE(INPATH_SLASH); else PUSH_STATE(INPATH); - yylval->path = strdup(yytext); + yylval->path = {yytext, (size_t) yyleng}; return PATH; } {HPATH} { @@ -245,7 +245,7 @@ or { return OR_KW; } PUSH_STATE(INPATH_SLASH); else PUSH_STATE(INPATH); - yylval->path = strdup(yytext); + yylval->path = {yytext, (size_t) yyleng}; return HPATH; } @@ -280,8 +280,8 @@ or { return OR_KW; } throw ParseError("path has a trailing slash"); } -{SPATH} { yylval->path = strdup(yytext); return SPATH; } -{URI} { yylval->uri = strdup(yytext); return URI; } +{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; } +{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; } [ \t\r\n]+ /* eat up whitespace */ \#[^\r\n]* /* single-line comments */ diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index f8aaea582..049a149cc 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -273,9 +273,16 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err nix::Formal * formal; nix::NixInt n; nix::NixFloat nf; - const char * id; // !!! -> Symbol - char * path; - char * uri; + // using C a struct allows us to avoid having to define the special + // members that using string_view here would implicitly delete. + struct StringToken { + const char * p; + size_t l; + operator std::string_view() const { return {p, l}; } + }; + StringToken id; // !!! -> Symbol + StringToken path; + StringToken uri; std::vector * attrNames; std::vector > * string_parts; } @@ -397,7 +404,7 @@ expr_select expr_simple : ID { - if (strcmp($1, "__curPos") == 0) + if (strncmp($1.p, "__curPos", $1.l) == 0) $$ = new ExprPos(CUR_POS); else $$ = new ExprVar(CUR_POS, data->symbols.create($1)); @@ -414,7 +421,7 @@ expr_simple $$ = new ExprConcatStrings(CUR_POS, false, $2); } | SPATH { - string path($1 + 1, strlen($1) - 2); + string path($1.p + 1, $1.l - 2); $$ = new ExprCall(CUR_POS, new ExprVar(data->symbols.create("__findFile")), {new ExprVar(data->symbols.create("__nixPath")), @@ -460,14 +467,14 @@ string_parts_interpolated path_start : PATH { - Path path(absPath($1, data->basePath)); + Path path(absPath({$1.p, $1.l}, data->basePath)); /* add back in the trailing '/' to the first segment */ - if ($1[strlen($1)-1] == '/' && strlen($1) > 1) + if ($1.p[$1.l-1] == '/' && $1.l > 1) path += "/"; $$ = new ExprPath(path); } | HPATH { - Path path(getHome() + string($1 + 1)); + Path path(getHome() + string($1.p + 1, $1.l - 1)); $$ = new ExprPath(path); } ; @@ -543,7 +550,7 @@ attrpath attr : ID { $$ = $1; } - | OR_KW { $$ = "or"; } + | OR_KW { $$ = {"or", 2}; } ; string_attr