* Refactoring: move sink/source buffering into separate classes.

* Buffer the HashSink.  This speeds up hashing a bit because it
  prevents lots of calls to the hash update functions (e.g. nix-hash
  went from 9.3s to 8.7s of user time on the closure of my
  /var/run/current-system).
This commit is contained in:
Eelco Dolstra 2011-12-15 16:19:53 +00:00
parent a67b8ae224
commit 5a1b9ed0aa
8 changed files with 125 additions and 90 deletions

View file

@ -1103,16 +1103,14 @@ struct HashAndWriteSink : Sink
HashAndWriteSink(Sink & writeSink) : writeSink(writeSink), hashSink(htSHA256)
{
}
virtual void operator ()
(const unsigned char * data, unsigned int len)
virtual void operator () (const unsigned char * data, size_t len)
{
writeSink(data, len);
hashSink(data, len);
}
Hash currentHash()
{
HashSink hashSinkClone(hashSink);
return hashSinkClone.finish().first;
return hashSink.currentHash().first;
}
};
@ -1201,8 +1199,7 @@ struct HashAndReadSource : Source
{
hashing = true;
}
virtual void operator ()
(unsigned char * data, unsigned int len)
virtual void operator () (unsigned char * data, size_t len)
{
readSource(data, len);
if (hashing) hashSink(data, len);

View file

@ -57,11 +57,11 @@ struct RefScanSink : Sink
RefScanSink() : hashSink(htSHA256) { }
void operator () (const unsigned char * data, unsigned int len);
void operator () (const unsigned char * data, size_t len);
};
void RefScanSink::operator () (const unsigned char * data, unsigned int len)
void RefScanSink::operator () (const unsigned char * data, size_t len)
{
hashSink(data, len);

View file

@ -306,21 +306,13 @@ HashSink::HashSink(HashType ht) : ht(ht)
start(ht, *ctx);
}
HashSink::HashSink(const HashSink & h)
{
ht = h.ht;
bytes = h.bytes;
ctx = new Ctx;
*ctx = *h.ctx;
}
HashSink::~HashSink()
{
bufPos = 0;
delete ctx;
}
void HashSink::operator ()
(const unsigned char * data, unsigned int len)
void HashSink::write(const unsigned char * data, size_t len)
{
bytes += len;
update(ht, *ctx, data, len);
@ -328,11 +320,21 @@ void HashSink::operator ()
HashResult HashSink::finish()
{
flush();
Hash hash(ht);
nix::finish(ht, *ctx, hash.hash);
return HashResult(hash, bytes);
}
HashResult HashSink::currentHash()
{
flush();
Ctx ctx2 = *ctx;
Hash hash(ht);
nix::finish(ht, ctx2, hash.hash);
return HashResult(hash, bytes);
}
HashResult hashPath(
HashType ht, const Path & path, PathFilter & filter)

View file

@ -91,7 +91,7 @@ string printHashType(HashType ht);
union Ctx;
class HashSink : public Sink
class HashSink : public BufferedSink
{
private:
HashType ht;
@ -102,8 +102,9 @@ public:
HashSink(HashType ht);
HashSink(const HashSink & h);
~HashSink();
virtual void operator () (const unsigned char * data, unsigned int len);
void write(const unsigned char * data, size_t len);
HashResult finish();
HashResult currentHash();
};

View file

@ -8,7 +8,16 @@
namespace nix {
void FdSink::operator () (const unsigned char * data, unsigned int len)
BufferedSink::~BufferedSink()
{
/* We can't call flush() here, because C++ for some insane reason
doesn't allow you to call virtual methods from a destructor. */
assert(!bufPos);
if (buffer) delete[] buffer;
}
void BufferedSink::operator () (const unsigned char * data, size_t len)
{
if (!buffer) buffer = new unsigned char[bufSize];
@ -16,7 +25,7 @@ void FdSink::operator () (const unsigned char * data, unsigned int len)
/* Optimisation: bypass the buffer if the data exceeds the
buffer size and there is no unflushed data. */
if (bufPos == 0 && len >= bufSize) {
writeFull(fd, data, len);
write(data, len);
break;
}
/* Otherwise, copy the bytes to the buffer. Flush the buffer
@ -29,31 +38,32 @@ void FdSink::operator () (const unsigned char * data, unsigned int len)
}
void FdSink::flush()
void BufferedSink::flush()
{
if (fd == -1 || bufPos == 0) return;
writeFull(fd, buffer, bufPos);
if (bufPos == 0) return;
write(buffer, bufPos);
bufPos = 0;
}
void FdSource::operator () (unsigned char * data, unsigned int len)
void FdSink::write(const unsigned char * data, size_t len)
{
writeFull(fd, data, len);
}
BufferedSource::~BufferedSource()
{
if (buffer) delete[] buffer;
}
void BufferedSource::operator () (unsigned char * data, size_t len)
{
if (!buffer) buffer = new unsigned char[bufSize];
while (len) {
if (!bufPosIn) {
/* Read as much data as is available (up to the buffer
size). */
checkInterrupt();
ssize_t n = read(fd, (char *) buffer, bufSize);
if (n == -1) {
if (errno == EINTR) continue;
throw SysError("reading from file");
}
if (n == 0) throw EndOfFile("unexpected end-of-file");
bufPosIn = n;
}
if (!bufPosIn) bufPosIn = read(buffer, bufSize);
/* Copy out the data in the buffer. */
size_t n = len > bufPosIn - bufPosOut ? bufPosIn - bufPosOut : len;
@ -64,7 +74,20 @@ void FdSource::operator () (unsigned char * data, unsigned int len)
}
void writePadding(unsigned int len, Sink & sink)
size_t FdSource::read(unsigned char * data, size_t len)
{
ssize_t n;
do {
checkInterrupt();
n = ::read(fd, (char *) data, bufSize);
} while (n == -1 && errno == EINTR);
if (n == -1) throw SysError("reading from file");
if (n == 0) throw EndOfFile("unexpected end-of-file");
return n;
}
void writePadding(size_t len, Sink & sink)
{
if (len % 8) {
unsigned char zero[8];
@ -103,7 +126,7 @@ void writeLongLong(unsigned long long n, Sink & sink)
void writeString(const string & s, Sink & sink)
{
unsigned int len = s.length();
size_t len = s.length();
writeInt(len, sink);
sink((const unsigned char *) s.c_str(), len);
writePadding(len, sink);
@ -118,11 +141,11 @@ void writeStringSet(const StringSet & ss, Sink & sink)
}
void readPadding(unsigned int len, Source & source)
void readPadding(size_t len, Source & source)
{
if (len % 8) {
unsigned char zero[8];
unsigned int n = 8 - (len % 8);
size_t n = 8 - (len % 8);
source(zero, n);
for (unsigned int i = 0; i < n; i++)
if (zero[i]) throw SerialisationError("non-zero padding");
@ -162,7 +185,7 @@ unsigned long long readLongLong(Source & source)
string readString(Source & source)
{
unsigned int len = readInt(source);
size_t len = readInt(source);
unsigned char * buf = new unsigned char[len];
AutoDeleteArray<unsigned char> d(buf);
source(buf, len);

View file

@ -11,7 +11,25 @@ namespace nix {
struct Sink
{
virtual ~Sink() { }
virtual void operator () (const unsigned char * data, unsigned int len) = 0;
virtual void operator () (const unsigned char * data, size_t len) = 0;
};
/* A buffered abstract sink. */
struct BufferedSink : Sink
{
size_t bufSize, bufPos;
unsigned char * buffer;
BufferedSink(size_t bufSize = 32 * 1024)
: bufSize(bufSize), bufPos(0), buffer(0) { }
~BufferedSink();
void operator () (const unsigned char * data, size_t len);
void flush();
virtual void write(const unsigned char * data, size_t len) = 0;
};
@ -20,56 +38,52 @@ struct Source
{
virtual ~Source() { }
/* The callee should store exactly *len bytes in the buffer
pointed to by data. It should block if that much data is not
yet available, or throw an error if it is not going to be
available. */
virtual void operator () (unsigned char * data, unsigned int len) = 0;
/* Store exactly len bytes in the buffer pointed to by data.
It blocks if that much data is not yet available, or throws an
error if it is not going to be available. */
virtual void operator () (unsigned char * data, size_t len) = 0;
};
/* A sink that writes data to a file descriptor (using a buffer). */
struct FdSink : Sink
/* A buffered abstract source. */
struct BufferedSource : Source
{
int fd;
unsigned int bufSize, bufPos;
size_t bufSize, bufPosIn, bufPosOut;
unsigned char * buffer;
FdSink() : fd(-1), bufSize(32 * 1024), bufPos(0), buffer(0) { }
BufferedSource(size_t bufSize = 32 * 1024)
: bufSize(bufSize), bufPosIn(0), bufPosOut(0), buffer(0) { }
~BufferedSource();
FdSink(int fd, unsigned int bufSize = 32 * 1024)
: fd(fd), bufSize(bufSize), bufPos(0), buffer(0) { }
void operator () (unsigned char * data, size_t len);
~FdSink()
{
flush();
if (buffer) delete[] buffer;
}
/* Store up to len in the buffer pointed to by data, and
return the number of bytes stored. If should block until at
least one byte is available. */
virtual size_t read(unsigned char * data, size_t len) = 0;
};
void operator () (const unsigned char * data, unsigned int len);
void flush();
/* A sink that writes data to a file descriptor. */
struct FdSink : BufferedSink
{
int fd;
FdSink() : fd(-1) { }
FdSink(int fd) : fd(fd) { }
~FdSink() { flush(); }
void write(const unsigned char * data, size_t len);
};
/* A source that reads data from a file descriptor. */
struct FdSource : Source
struct FdSource : BufferedSource
{
int fd;
unsigned int bufSize, bufPosIn, bufPosOut;
unsigned char * buffer;
FdSource() : fd(-1), bufSize(32 * 1024), bufPosIn(0), bufPosOut(0), buffer(0) { }
FdSource(int fd, unsigned int bufSize = 32 * 1024)
: fd(fd), bufSize(bufSize), bufPosIn(0), bufPosOut(0), buffer(0) { }
~FdSource()
{
if (buffer) delete[] buffer;
}
void operator () (unsigned char * data, unsigned int len);
FdSource() : fd(-1) { }
FdSource(int fd) : fd(fd) { }
size_t read(unsigned char * data, size_t len);
};
@ -77,7 +91,7 @@ struct FdSource : Source
struct StringSink : Sink
{
string s;
virtual void operator () (const unsigned char * data, unsigned int len)
void operator () (const unsigned char * data, size_t len)
{
s.append((const char *) data, len);
}
@ -88,9 +102,9 @@ struct StringSink : Sink
struct StringSource : Source
{
const string & s;
unsigned int pos;
size_t pos;
StringSource(const string & _s) : s(_s), pos(0) { }
virtual void operator () (unsigned char * data, unsigned int len)
virtual void operator () (unsigned char * data, size_t len)
{
s.copy((char *) data, len, pos);
pos += len;
@ -100,13 +114,13 @@ struct StringSource : Source
};
void writePadding(unsigned int len, Sink & sink);
void writePadding(size_t len, Sink & sink);
void writeInt(unsigned int n, Sink & sink);
void writeLongLong(unsigned long long n, Sink & sink);
void writeString(const string & s, Sink & sink);
void writeStringSet(const StringSet & ss, Sink & sink);
void readPadding(unsigned int len, Source & source);
void readPadding(size_t len, Source & source);
unsigned int readInt(Source & source);
unsigned long long readLongLong(Source & source);
string readString(Source & source);

View file

@ -202,8 +202,7 @@ struct TunnelSink : Sink
{
Sink & to;
TunnelSink(Sink & to) : to(to) { }
virtual void operator ()
(const unsigned char * data, unsigned int len)
virtual void operator () (const unsigned char * data, size_t len)
{
writeInt(STDERR_WRITE, to);
writeString(string((const char *) data, len), to);
@ -215,8 +214,7 @@ struct TunnelSource : Source
{
Source & from;
TunnelSource(Source & from) : from(from) { }
virtual void operator ()
(unsigned char * data, unsigned int len)
virtual void operator () (unsigned char * data, size_t len)
{
/* Careful: we're going to receive data from the client now,
so we have to disable the SIGPOLL handler. */
@ -267,7 +265,7 @@ struct SavingSourceAdapter : Source
Source & orig;
string s;
SavingSourceAdapter(Source & orig) : orig(orig) { }
void operator () (unsigned char * data, unsigned int len)
void operator () (unsigned char * data, size_t len)
{
orig(data, len);
s.append((const char *) data, len);