Yesterday, 22:32
---- cut ----
The current paq8px hash implementation has the following parts:
We have 14 large odd prime numbers. The first one is the golden one.
So Knuth's multiplicative hashing is there.
static HASHEXPR uint64_t hashes = {UINT64_C(0x9E3779B97F4A7C15), UINT64_C(0x993DDEFFB1462949), UINT64_C(0xE9C91DC159AB0D2D),
UINT64_C(0x83D6A14F1B0CED73), UINT64_C(0xA14F1B0CED5A841F), UINT64_C(0xC0E51314A614F4EF),
UINT64_C(0xDA9CC2600AE45A27), UINT64_C(0x826797AA04A65737), UINT64_C(0x2375BE54C41A08ED),
UINT64_C(0xD39104E950564B37), UINT64_C(0x3091697D5E685623), UINT64_C(0x20EB84EE04A3C7E1),
UINT64_C(0xF501F1D0944B2383), UINT64_C(0xE3E4E8AA829AB9B5)};
How did I get these large random primes numbers?
I used an online random number generator. Generated a couple of numbers, looked up the next prime with wolframalpha.
Then tried which ones in which combination works best. Really. :-) Didn't need to try for too long, the results were very similar.
Hashing a constant number of inputs: (1,2 .. 13):
static ALWAYS_INLINE
uint64_t hash(const uint64_t x0) {
return (x0 + 1) * PHI64;
}
static ALWAYS_INLINE
uint64_t hash(const uint64_t x0, const uint64_t x1) {
return (x0 + 1) * PHI64 + (x1 + 1) * MUL64_1;
}
static ALWAYS_INLINE
uint64_t hash(const uint64_t x0, const uint64_t x1, const uint64_t x2, const uint64_t x3, const uint64_t x4, const uint64_t x5, const uint64_t x6,
const uint64_t x7, const uint64_t x8, const uint64_t x9, const uint64_t x10, const uint64_t x11, const uint64_t x12) {
return (x0 + 1) * PHI64 + (x1 + 1) * MUL64_1 + (x2 + 1) * MUL64_2 + (x3 + 1) * MUL64_3 + (x4 + 1) * MUL64_4 + (x5 + 1) * MUL64_5 +
(x6 + 1) * MUL64_6 + (x7 + 1) * MUL64_7 + (x8 + 1) * MUL64_8 + (x9 + 1) * MUL64_9 + (x10 + 1) * MUL64_10 + (x11 + 1) * MUL64_11 +
(x12 + 1) * MUL64_12;
Or hashing a string:
static ALWAYS_INLINE
uint64_t combine64(const uint64_t seed, const uint64_t x) {
return hash(seed + x);
}
Then we finally finalize the result by keeping the most significant bits for hashtable lookup and the next couple of bits (usually 8 or 16) as checksums for collision detection:
static ALWAYS_INLINE
auto finalize64(const uint64_t hash, const int hashBits) -> uint32_t {
assert(uint32_t(hashBits) <= 32); // just a reasonable upper limit
return uint32_t(hash >> (64 - hashBits));
}
static ALWAYS_INLINE
uint64_t checksum64(const uint64_t hash, const int hashBits, const int checksumBits) {
assert(0 < checksumBits && uint32_t(checksumBits) <= 32); //32 is just a reasonable upper limit
return (hash >> (64 - hashBits - checksumBits)) & ((1 << checksumBits) - 1);
}