From 5a34a473dd14200ffeff496f0e8e4518f7992765 Mon Sep 17 00:00:00 2001 From: Guillaume Bouchard Date: Wed, 29 Apr 2020 13:10:03 +0200 Subject: [PATCH 1/3] builtins.readFile: do not truncate content This closes #3026 by allowing `builtins.readFile` to read a file with a wrongly reported file size, for example, files in `/proc` may report a file size of 0. Reading file in `/proc` is not a good enough motivation, however I do think it just makes nix more robust by allowing more file to be read. Especially, I do considerer the previous behavior to be dangerous because nix was previously reading truncated files. Examples of file system which incorrectly report file size may be network file system or dynamic file system (for performance reason, a dynamic file system such as FUSE may generate the content of the file on demand). ``` nix-repl> builtins.readFile "/proc/version" "" ``` With this commit: ``` nix-repl> builtins.readFile "/proc/version" "Linux version 5.6.7 (nixbld@localhost) (gcc version 9.3.0 (GCC)) #1-NixOS SMP Thu Apr 23 08:38:27 UTC 2020\n" ``` Here is a summary of the behavior changes: - If the reported size is smaller, previous implementation was silently returning a truncated file content. The new implementation is returning the correct file content. - If a file had a bigger reported file size, previous implementation was failing with an exception, but the new implementation is returning the correct file content. This change of behavior is coherent with this pull request. Open questions - The behavior is unchanged for correctly reported file size, however performances may vary because it uses the more complex sink interface. Considering that sink is used a lot, I don't think this impacts the performance a lot. - `builtins.readFile` on an infinite file, such as `/dev/random` may fill the memory. - it does not support adding file to store, such as `${/proc/version}`. --- src/libutil/util.cc | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 615a7656c..d001bc3b7 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -312,14 +312,7 @@ unsigned char getFileType(const Path & path) string readFile(int fd) { - struct stat st; - if (fstat(fd, &st) == -1) - throw SysError("statting file"); - - std::vector buf(st.st_size); - readFull(fd, buf.data(), st.st_size); - - return string((char *) buf.data(), st.st_size); + return drainFD(fd, true); } From 7afcb5af988eb0ce73c9916b809f8528dfb14c0f Mon Sep 17 00:00:00 2001 From: Guillaume Bouchard Date: Wed, 29 Apr 2020 18:42:19 +0200 Subject: [PATCH 2/3] Remove the `drain` argument from `readFile` Now it is always `drain` (see previous commit). --- src/libstore/gc.cc | 4 ++-- src/libutil/serialise.hh | 3 +++ src/libutil/util.cc | 4 ++-- src/libutil/util.hh | 2 +- src/nix/ls.cc | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index 6bab1e37c..3cd35c945 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -419,7 +419,7 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor) try { auto mapFile = fmt("/proc/%s/maps", ent->d_name); - auto mapLines = tokenizeString>(readFile(mapFile, true), "\n"); + auto mapLines = tokenizeString>(readFile(mapFile), "\n"); for (const auto & line : mapLines) { auto match = std::smatch{}; if (std::regex_match(line, match, mapRegex)) @@ -427,7 +427,7 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor) } auto envFile = fmt("/proc/%s/environ", ent->d_name); - auto envString = readFile(envFile, true); + auto envString = readFile(envFile); auto env_end = std::sregex_iterator{}; for (auto i = std::sregex_iterator{envString.begin(), envString.end(), storePathRegex}; i != env_end; ++i) unchecked[i->str()].emplace(envFile); diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 5780c93a6..a04118512 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -148,6 +148,9 @@ struct StringSink : Sink { ref s; StringSink() : s(make_ref()) { }; + explicit StringSink(const size_t reservedSize) : s(make_ref()) { + s->reserve(reservedSize); + }; StringSink(ref s) : s(s) { }; void operator () (const unsigned char * data, size_t len) override; }; diff --git a/src/libutil/util.cc b/src/libutil/util.cc index d001bc3b7..bb4af747b 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -316,12 +316,12 @@ string readFile(int fd) } -string readFile(const Path & path, bool drain) +string readFile(const Path & path) { AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); if (!fd) throw SysError(format("opening file '%1%'") % path); - return drain ? drainFD(fd.get()) : readFile(fd.get()); + return readFile(fd.get()); } diff --git a/src/libutil/util.hh b/src/libutil/util.hh index d7ee62bcc..32ef9a79a 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -103,7 +103,7 @@ unsigned char getFileType(const Path & path); /* Read the contents of a file into a string. */ string readFile(int fd); -string readFile(const Path & path, bool drain = false); +string readFile(const Path & path); void readFile(const Path & path, Sink & sink); /* Write a string to a file. */ diff --git a/src/nix/ls.cc b/src/nix/ls.cc index 8590199d7..ee308a6ec 100644 --- a/src/nix/ls.cc +++ b/src/nix/ls.cc @@ -136,7 +136,7 @@ struct CmdLsNar : Command, MixLs void run() override { - list(makeNarAccessor(make_ref(readFile(narPath, true)))); + list(makeNarAccessor(make_ref(readFile(narPath)))); } }; From 2e5be2a7495ac0b204454c74664e590a38d039d3 Mon Sep 17 00:00:00 2001 From: Guillaume Bouchard Date: Wed, 29 Apr 2020 18:44:01 +0200 Subject: [PATCH 3/3] StringSink pre allocate When used with `readFile`, we have a pretty good heuristic of the file size, so `reserve` this in the `string`. This will save some allocation / copy when the string is growing. --- src/libutil/util.cc | 10 +++++++--- src/libutil/util.hh | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libutil/util.cc b/src/libutil/util.cc index bb4af747b..71db92d77 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -312,7 +312,11 @@ unsigned char getFileType(const Path & path) string readFile(int fd) { - return drainFD(fd, true); + struct stat st; + if (fstat(fd, &st) == -1) + throw SysError("statting file"); + + return drainFD(fd, true, st.st_size); } @@ -658,9 +662,9 @@ void writeFull(int fd, const string & s, bool allowInterrupts) } -string drainFD(int fd, bool block) +string drainFD(int fd, bool block, const size_t reserveSize) { - StringSink sink; + StringSink sink(reserveSize); drainFD(fd, sink, block); return std::move(*sink.s); } diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 32ef9a79a..1b263abcc 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -162,7 +162,7 @@ MakeError(EndOfFile, Error); /* Read a file descriptor until EOF occurs. */ -string drainFD(int fd, bool block = true); +string drainFD(int fd, bool block = true, const size_t reserveSize=0); void drainFD(int fd, Sink & sink, bool block = true);