lix/src/libutil/hash.hh
Eelco Dolstra 0abb3ad537 Allow content-addressable paths to have references
This adds a command 'nix make-content-addressable' that rewrites the
specified store paths into content-addressable paths. The advantage of
such paths is that 1) they can be imported without signatures; 2) they
can enable deduplication in cases where derivation changes do not
cause output changes (apart from store path hashes).

For example,

  $ nix make-content-addressable -r nixpkgs.cowsay
  rewrote '/nix/store/g1g31ah55xdia1jdqabv1imf6mcw0nb1-glibc-2.25-49' to '/nix/store/48jfj7bg78a8n4f2nhg269rgw1936vj4-glibc-2.25-49'
  ...
  rewrote '/nix/store/qbi6rzpk0bxjw8lw6azn2mc7ynnn455q-cowsay-3.03+dfsg1-16' to '/nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16'

We can then copy the resulting closure to another store without
signatures:

  $ nix copy --trusted-public-keys '' ---to ~/my-nix /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16

In order to support self-references in content-addressable paths,
these paths are hashed "modulo" self-references, meaning that
self-references are zeroed out during hashing. Somewhat annoyingly,
this means that the NAR hash stored in the Nix database is no longer
necessarily equal to the output of "nix hash-path"; for
content-addressable paths, you need to pass the --modulo flag:

  $ nix path-info --json /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16  | jq -r .[].narHash
  sha256:0ri611gdilz2c9rsibqhsipbfs9vwcqvs811a52i2bnkhv7w9mgw

  $ nix hash-path --type sha256 --base32 /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16
  1ggznh07khq0hz6id09pqws3a8q9pn03ya3c03nwck1kwq8rclzs

  $ nix hash-path --type sha256 --base32 /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 --modulo iq6g2x4q62xp7y7493bibx0qn5w7xz67
  0ri611gdilz2c9rsibqhsipbfs9vwcqvs811a52i2bnkhv7w9mgw
2019-10-21 17:47:24 +02:00

137 lines
3.4 KiB
C++

#pragma once
#include "types.hh"
#include "serialise.hh"
namespace nix {
MakeError(BadHash, Error);
enum HashType : char { htUnknown, htMD5, htSHA1, htSHA256, htSHA512 };
const int md5HashSize = 16;
const int sha1HashSize = 20;
const int sha256HashSize = 32;
const int sha512HashSize = 64;
extern const string base32Chars;
enum Base : int { Base64, Base32, Base16, SRI };
struct Hash
{
static const unsigned int maxHashSize = 64;
unsigned int hashSize = 0;
unsigned char hash[maxHashSize] = {};
HashType type = htUnknown;
/* Create an unset hash object. */
Hash() { };
/* Create a zero-filled hash object. */
Hash(HashType type) : type(type) { init(); };
/* Initialize the hash from a string representation, in the format
"[<type>:]<base16|base32|base64>" or "<type>-<base64>" (a
Subresource Integrity hash expression). If the 'type' argument
is htUnknown, then the hash type must be specified in the
string. */
Hash(const std::string & s, HashType type = htUnknown);
void init();
/* Check whether a hash is set. */
operator bool () const { return type != htUnknown; }
/* Check whether two hash are equal. */
bool operator == (const Hash & h2) const;
/* Check whether two hash are not equal. */
bool operator != (const Hash & h2) const;
/* For sorting. */
bool operator < (const Hash & h) const;
/* Returns the length of a base-16 representation of this hash. */
size_t base16Len() const
{
return hashSize * 2;
}
/* Returns the length of a base-32 representation of this hash. */
size_t base32Len() const
{
return (hashSize * 8 - 1) / 5 + 1;
}
/* Returns the length of a base-64 representation of this hash. */
size_t base64Len() const
{
return ((4 * hashSize / 3) + 3) & ~3;
}
/* Return a string representation of the hash, in base-16, base-32
or base-64. By default, this is prefixed by the hash type
(e.g. "sha256:"). */
std::string to_string(Base base = Base32, bool includeType = true) const;
};
/* Print a hash in base-16 if it's MD5, or base-32 otherwise. */
string printHash16or32(const Hash & hash);
/* Compute the hash of the given string. */
Hash hashString(HashType ht, const string & s);
/* Compute the hash of the given file. */
Hash hashFile(HashType ht, const Path & path);
/* Compute the hash of the given path. The hash is defined as
(essentially) hashString(ht, dumpPath(path)). */
typedef std::pair<Hash, unsigned long long> HashResult;
HashResult hashPath(HashType ht, const Path & path,
PathFilter & filter = defaultPathFilter);
/* Compress a hash to the specified number of bytes by cyclically
XORing bytes together. */
Hash compressHash(const Hash & hash, unsigned int newSize);
/* Parse a string representing a hash type. */
HashType parseHashType(const string & s);
/* And the reverse. */
string printHashType(HashType ht);
union Ctx;
struct AbstractHashSink : virtual Sink
{
virtual HashResult finish() = 0;
};
class HashSink : public BufferedSink, public AbstractHashSink
{
private:
HashType ht;
Ctx * ctx;
unsigned long long bytes;
public:
HashSink(HashType ht);
HashSink(const HashSink & h);
~HashSink();
void write(const unsigned char * data, size_t len) override;
HashResult finish() override;
HashResult currentHash();
};
}