primop: add readFileType, optimize readDir

Allows checking directory entry type of a single file/directory.

This was added to optimize the use of `builtins.readDir` on some
filesystems and operating systems which cannot detect this information
using POSIX's `readdir`.

Previously `builtins.readDir` would eagerly use system calls to lookup
these filetypes using other interfaces; this change makes these
operations lazy in the attribute values for each file with application
of `builtins.readFileType`.
This commit is contained in:
Alex Ameen 2023-01-22 13:45:02 -06:00
parent 04de0dd0b4
commit 153ee460c5
7 changed files with 75 additions and 8 deletions

View file

@ -1,2 +1,10 @@
# Release X.Y (202?-??-??)
* A new function `builtins.readFileType` is available. It is similar to
`builtins.readDir` but acts on a single file or directory.
* The `builtins.readDir` function has been optimized when encountering unknown
file types from POSIX's `readdir`. In such cases the type of each file is/was
discovered by making multiple syscalls. This change makes these operations
lazy such that these lookups will only be performed if the attribute is used.
This optimization effects a minority of filesystems and operating systems.

View file

@ -1646,23 +1646,73 @@ static RegisterPrimOp primop_hashFile({
.fun = prim_hashFile,
});
/* Stringize a directory entry enum. Used by `readFileType' and `readDir'. */
static const char * dirEntTypeToString(unsigned char dtType)
{
/* Enum DT_(DIR|LNK|REG|UNKNOWN) */
switch(dtType) {
case DT_REG: return "regular"; break;
case DT_DIR: return "directory"; break;
case DT_LNK: return "symlink"; break;
default: return "unknown"; break;
}
return "unknown"; /* Unreachable */
}
static void prim_readFileType(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
auto path = realisePath(state, pos, *args[0]);
/* Retrieve the directory entry type and stringize it. */
v.mkString(dirEntTypeToString(getFileType(path)));
}
static RegisterPrimOp primop_readFileType({
.name = "__readFileType",
.args = {"p"},
.doc = R"(
Determine the directory entry type of a filesystem node, being
one of "directory", "regular", "symlink", or "unknown".
)",
.fun = prim_readFileType,
});
/* Read a directory (without . or ..) */
static void prim_readDir(EvalState & state, const PosIdx pos, Value * * args, Value & v)
{
auto path = realisePath(state, pos, *args[0]);
// Retrieve directory entries for all nodes in a directory.
// This is similar to `getFileType` but is optimized to reduce system calls
// on many systems.
DirEntries entries = readDirectory(path);
auto attrs = state.buildBindings(entries.size());
// If we hit unknown directory entry types we may need to fallback to
// using `getFileType` on some systems.
// In order to reduce system calls we make each lookup lazy by using
// `builtins.readFileType` application.
Value * readFileType = nullptr;
for (auto & ent : entries) {
if (ent.type == DT_UNKNOWN)
ent.type = getFileType(path + "/" + ent.name);
attrs.alloc(ent.name).mkString(
ent.type == DT_REG ? "regular" :
ent.type == DT_DIR ? "directory" :
ent.type == DT_LNK ? "symlink" :
"unknown");
auto & attr = attrs.alloc(ent.name);
if (ent.type == DT_UNKNOWN) {
// Some filesystems or operating systems may not be able to return
// detailed node info quickly in this case we produce a thunk to
// query the file type lazily.
auto epath = state.allocValue();
Path path2 = path + "/" + ent.name;
epath->mkString(path2);
if (!readFileType)
readFileType = &state.getBuiltin("readFileType");
attr.mkApp(readFileType, epath);
} else {
// This branch of the conditional is much more likely.
// Here we just stringize the directory entry type.
attr.mkString(dirEntTypeToString(ent.type));
}
}
v.mkAttrs(attrs);

View file

@ -1 +1 @@
{ bar = "regular"; foo = "directory"; }
{ bar = "regular"; foo = "directory"; ldir = "symlink"; linked = "symlink"; }

View file

@ -0,0 +1 @@
{ bar = "regular"; foo = "directory"; ldir = "symlink"; linked = "symlink"; }

View file

@ -0,0 +1,6 @@
{
bar = builtins.readFileType ./readDir/bar;
foo = builtins.readFileType ./readDir/foo;
linked = builtins.readFileType ./readDir/linked;
ldir = builtins.readFileType ./readDir/ldir;
}

1
tests/lang/readDir/ldir Symbolic link
View file

@ -0,0 +1 @@
foo

1
tests/lang/readDir/linked Symbolic link
View file

@ -0,0 +1 @@
foo/git-hates-directories