From 153ee460c59c67910476f7b1eadcabcd47206f2f Mon Sep 17 00:00:00 2001 From: Alex Ameen Date: Sun, 22 Jan 2023 13:45:02 -0600 Subject: [PATCH 1/2] primop: add readFileType, optimize readDir Allows checking directory entry type of a single file/directory. This was added to optimize the use of `builtins.readDir` on some filesystems and operating systems which cannot detect this information using POSIX's `readdir`. Previously `builtins.readDir` would eagerly use system calls to lookup these filetypes using other interfaces; this change makes these operations lazy in the attribute values for each file with application of `builtins.readFileType`. --- doc/manual/src/release-notes/rl-next.md | 8 ++++ src/libexpr/primops.cc | 64 ++++++++++++++++++++++--- tests/lang/eval-okay-readDir.exp | 2 +- tests/lang/eval-okay-readFileType.exp | 1 + tests/lang/eval-okay-readFileType.nix | 6 +++ tests/lang/readDir/ldir | 1 + tests/lang/readDir/linked | 1 + 7 files changed, 75 insertions(+), 8 deletions(-) create mode 100644 tests/lang/eval-okay-readFileType.exp create mode 100644 tests/lang/eval-okay-readFileType.nix create mode 120000 tests/lang/readDir/ldir create mode 120000 tests/lang/readDir/linked diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 78ae99f4b..56507d731 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -1,2 +1,10 @@ # Release X.Y (202?-??-??) +* A new function `builtins.readFileType` is available. It is similar to + `builtins.readDir` but acts on a single file or directory. + +* The `builtins.readDir` function has been optimized when encountering unknown + file types from POSIX's `readdir`. In such cases the type of each file is/was + discovered by making multiple syscalls. This change makes these operations + lazy such that these lookups will only be performed if the attribute is used. + This optimization effects a minority of filesystems and operating systems. diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 080892cbd..24f83b932 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1646,23 +1646,73 @@ static RegisterPrimOp primop_hashFile({ .fun = prim_hashFile, }); + +/* Stringize a directory entry enum. Used by `readFileType' and `readDir'. */ +static const char * dirEntTypeToString(unsigned char dtType) +{ + /* Enum DT_(DIR|LNK|REG|UNKNOWN) */ + switch(dtType) { + case DT_REG: return "regular"; break; + case DT_DIR: return "directory"; break; + case DT_LNK: return "symlink"; break; + default: return "unknown"; break; + } + return "unknown"; /* Unreachable */ +} + + +static void prim_readFileType(EvalState & state, const PosIdx pos, Value * * args, Value & v) +{ + auto path = realisePath(state, pos, *args[0]); + /* Retrieve the directory entry type and stringize it. */ + v.mkString(dirEntTypeToString(getFileType(path))); +} + +static RegisterPrimOp primop_readFileType({ + .name = "__readFileType", + .args = {"p"}, + .doc = R"( + Determine the directory entry type of a filesystem node, being + one of "directory", "regular", "symlink", or "unknown". + )", + .fun = prim_readFileType, +}); + /* Read a directory (without . or ..) */ static void prim_readDir(EvalState & state, const PosIdx pos, Value * * args, Value & v) { auto path = realisePath(state, pos, *args[0]); + // Retrieve directory entries for all nodes in a directory. + // This is similar to `getFileType` but is optimized to reduce system calls + // on many systems. DirEntries entries = readDirectory(path); auto attrs = state.buildBindings(entries.size()); + // If we hit unknown directory entry types we may need to fallback to + // using `getFileType` on some systems. + // In order to reduce system calls we make each lookup lazy by using + // `builtins.readFileType` application. + Value * readFileType = nullptr; + for (auto & ent : entries) { - if (ent.type == DT_UNKNOWN) - ent.type = getFileType(path + "/" + ent.name); - attrs.alloc(ent.name).mkString( - ent.type == DT_REG ? "regular" : - ent.type == DT_DIR ? "directory" : - ent.type == DT_LNK ? "symlink" : - "unknown"); + auto & attr = attrs.alloc(ent.name); + if (ent.type == DT_UNKNOWN) { + // Some filesystems or operating systems may not be able to return + // detailed node info quickly in this case we produce a thunk to + // query the file type lazily. + auto epath = state.allocValue(); + Path path2 = path + "/" + ent.name; + epath->mkString(path2); + if (!readFileType) + readFileType = &state.getBuiltin("readFileType"); + attr.mkApp(readFileType, epath); + } else { + // This branch of the conditional is much more likely. + // Here we just stringize the directory entry type. + attr.mkString(dirEntTypeToString(ent.type)); + } } v.mkAttrs(attrs); diff --git a/tests/lang/eval-okay-readDir.exp b/tests/lang/eval-okay-readDir.exp index bf8d2c14e..6413f6d4f 100644 --- a/tests/lang/eval-okay-readDir.exp +++ b/tests/lang/eval-okay-readDir.exp @@ -1 +1 @@ -{ bar = "regular"; foo = "directory"; } +{ bar = "regular"; foo = "directory"; ldir = "symlink"; linked = "symlink"; } diff --git a/tests/lang/eval-okay-readFileType.exp b/tests/lang/eval-okay-readFileType.exp new file mode 100644 index 000000000..6413f6d4f --- /dev/null +++ b/tests/lang/eval-okay-readFileType.exp @@ -0,0 +1 @@ +{ bar = "regular"; foo = "directory"; ldir = "symlink"; linked = "symlink"; } diff --git a/tests/lang/eval-okay-readFileType.nix b/tests/lang/eval-okay-readFileType.nix new file mode 100644 index 000000000..174fb6c3a --- /dev/null +++ b/tests/lang/eval-okay-readFileType.nix @@ -0,0 +1,6 @@ +{ + bar = builtins.readFileType ./readDir/bar; + foo = builtins.readFileType ./readDir/foo; + linked = builtins.readFileType ./readDir/linked; + ldir = builtins.readFileType ./readDir/ldir; +} diff --git a/tests/lang/readDir/ldir b/tests/lang/readDir/ldir new file mode 120000 index 000000000..191028156 --- /dev/null +++ b/tests/lang/readDir/ldir @@ -0,0 +1 @@ +foo \ No newline at end of file diff --git a/tests/lang/readDir/linked b/tests/lang/readDir/linked new file mode 120000 index 000000000..c503f86a0 --- /dev/null +++ b/tests/lang/readDir/linked @@ -0,0 +1 @@ +foo/git-hates-directories \ No newline at end of file From 37c533ed2730b26207434ffbc972cc4555820037 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Mon, 23 Jan 2023 11:28:31 +0100 Subject: [PATCH 2/2] rl-next.md: Minor improvement --- doc/manual/src/release-notes/rl-next.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 56507d731..8a79703ab 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -3,8 +3,8 @@ * A new function `builtins.readFileType` is available. It is similar to `builtins.readDir` but acts on a single file or directory. -* The `builtins.readDir` function has been optimized when encountering unknown +* The `builtins.readDir` function has been optimized when encountering not-yet-known file types from POSIX's `readdir`. In such cases the type of each file is/was discovered by making multiple syscalls. This change makes these operations lazy such that these lookups will only be performed if the attribute is used. - This optimization effects a minority of filesystems and operating systems. + This optimization affects a minority of filesystems and operating systems.