From 2e86c5fe02332189948672df1fec49b68584bfc3 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Thu, 7 Mar 2019 15:25:31 +0100 Subject: merge_entries uses siphash to identify common chunks --- src/postprocess/halfsiphash.c | 152 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 src/postprocess/halfsiphash.c diff --git a/src/postprocess/halfsiphash.c b/src/postprocess/halfsiphash.c new file mode 100644 index 0000000..d74d3be --- /dev/null +++ b/src/postprocess/halfsiphash.c @@ -0,0 +1,152 @@ + +/* + SipHash reference C implementation + + Copyright (c) 2016 Jean-Philippe Aumasson + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along + with + this software. If not, see + . + */ +#include +#include +#include +#include + +/* default: SipHash-2-4 */ +#define cROUNDS 2 +#define dROUNDS 4 + +#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b)))) + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define U8TO32_LE(p) \ + (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \ + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) + +#define SIPROUND \ + do { \ + v0 += v1; \ + v1 = ROTL(v1, 5); \ + v1 ^= v0; \ + v0 = ROTL(v0, 16); \ + v2 += v3; \ + v3 = ROTL(v3, 8); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = ROTL(v3, 7); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = ROTL(v1, 13); \ + v1 ^= v2; \ + v2 = ROTL(v2, 16); \ + } while (0) + +#ifdef DEBUG +#define TRACE \ + do { \ + printf("(%3d) v0 %08x\n", (int)inlen, v0); \ + printf("(%3d) v1 %08x\n", (int)inlen, v1); \ + printf("(%3d) v2 %08x\n", (int)inlen, v2); \ + printf("(%3d) v3 %08x\n", (int)inlen, v3); \ + } while (0) +#else +#define TRACE +#endif + +int halfsiphash(const uint8_t *in, const size_t inlen, const uint8_t *k, + uint8_t *out, const size_t outlen) { + + assert((outlen == 4) || (outlen == 8)); + uint32_t v0 = 0; + uint32_t v1 = 0; + uint32_t v2 = 0x6c796765; + uint32_t v3 = 0x74656462; + uint32_t k0 = U8TO32_LE(k); + uint32_t k1 = U8TO32_LE(k + 4); + uint32_t m; + int i; + const uint8_t *end = in + inlen - (inlen % sizeof(uint32_t)); + const int left = inlen & 3; + uint32_t b = ((uint32_t)inlen) << 24; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + if (outlen == 8) + v1 ^= 0xee; + + for (; in != end; in += 4) { + m = U8TO32_LE(in); + v3 ^= m; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 3: + b |= ((uint32_t)in[2]) << 16; + case 2: + b |= ((uint32_t)in[1]) << 8; + case 1: + b |= ((uint32_t)in[0]); + break; + case 0: + break; + } + + v3 ^= b; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= b; + + if (outlen == 8) + v2 ^= 0xee; + else + v2 ^= 0xff; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v1 ^ v3; + U32TO8_LE(out, b); + + if (outlen == 4) + return 0; + + v1 ^= 0xdd; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v1 ^ v3; + U32TO8_LE(out + 4, b); + + return 0; +} -- cgit v1.2.3