From 2e86c5fe02332189948672df1fec49b68584bfc3 Mon Sep 17 00:00:00 2001
From: Dirk Engling <erdgeist@erdgeist.org>
Date: Thu, 7 Mar 2019 15:25:31 +0100
Subject: merge_entries uses siphash to identify common chunks

---
 src/postprocess/halfsiphash.c | 152 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)
 create mode 100644 src/postprocess/halfsiphash.c

(limited to 'src')

diff --git a/src/postprocess/halfsiphash.c b/src/postprocess/halfsiphash.c
new file mode 100644
index 0000000..d74d3be
--- /dev/null
+++ b/src/postprocess/halfsiphash.c
@@ -0,0 +1,152 @@
+
+/*
+   SipHash reference C implementation
+
+   Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along
+   with
+   this software. If not, see
+   <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/* default: SipHash-2-4 */
+#define cROUNDS 2
+#define dROUNDS 4
+
+#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b))))
+
+#define U32TO8_LE(p, v)                                                        \
+    (p)[0] = (uint8_t)((v));                                                   \
+    (p)[1] = (uint8_t)((v) >> 8);                                              \
+    (p)[2] = (uint8_t)((v) >> 16);                                             \
+    (p)[3] = (uint8_t)((v) >> 24);
+
+#define U32TO8_LE(p, v)                                                        \
+    (p)[0] = (uint8_t)((v));                                                   \
+    (p)[1] = (uint8_t)((v) >> 8);                                              \
+    (p)[2] = (uint8_t)((v) >> 16);                                             \
+    (p)[3] = (uint8_t)((v) >> 24);
+
+#define U8TO32_LE(p)                                                           \
+    (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) |                        \
+     ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24))
+
+#define SIPROUND                                                               \
+    do {                                                                       \
+        v0 += v1;                                                              \
+        v1 = ROTL(v1, 5);                                                      \
+        v1 ^= v0;                                                              \
+        v0 = ROTL(v0, 16);                                                     \
+        v2 += v3;                                                              \
+        v3 = ROTL(v3, 8);                                                      \
+        v3 ^= v2;                                                              \
+        v0 += v3;                                                              \
+        v3 = ROTL(v3, 7);                                                      \
+        v3 ^= v0;                                                              \
+        v2 += v1;                                                              \
+        v1 = ROTL(v1, 13);                                                     \
+        v1 ^= v2;                                                              \
+        v2 = ROTL(v2, 16);                                                     \
+    } while (0)
+
+#ifdef DEBUG
+#define TRACE                                                                  \
+    do {                                                                       \
+        printf("(%3d) v0 %08x\n", (int)inlen, v0);                             \
+        printf("(%3d) v1 %08x\n", (int)inlen, v1);                             \
+        printf("(%3d) v2 %08x\n", (int)inlen, v2);                             \
+        printf("(%3d) v3 %08x\n", (int)inlen, v3);                             \
+    } while (0)
+#else
+#define TRACE
+#endif
+
+int halfsiphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
+                uint8_t *out, const size_t outlen) {
+
+    assert((outlen == 4) || (outlen == 8));
+    uint32_t v0 = 0;
+    uint32_t v1 = 0;
+    uint32_t v2 = 0x6c796765;
+    uint32_t v3 = 0x74656462;
+    uint32_t k0 = U8TO32_LE(k);
+    uint32_t k1 = U8TO32_LE(k + 4);
+    uint32_t m;
+    int i;
+    const uint8_t *end = in + inlen - (inlen % sizeof(uint32_t));
+    const int left = inlen & 3;
+    uint32_t b = ((uint32_t)inlen) << 24;
+    v3 ^= k1;
+    v2 ^= k0;
+    v1 ^= k1;
+    v0 ^= k0;
+
+    if (outlen == 8)
+        v1 ^= 0xee;
+
+    for (; in != end; in += 4) {
+        m = U8TO32_LE(in);
+        v3 ^= m;
+
+        TRACE;
+        for (i = 0; i < cROUNDS; ++i)
+            SIPROUND;
+
+        v0 ^= m;
+    }
+
+    switch (left) {
+    case 3:
+        b |= ((uint32_t)in[2]) << 16;
+    case 2:
+        b |= ((uint32_t)in[1]) << 8;
+    case 1:
+        b |= ((uint32_t)in[0]);
+        break;
+    case 0:
+        break;
+    }
+
+    v3 ^= b;
+
+    TRACE;
+    for (i = 0; i < cROUNDS; ++i)
+        SIPROUND;
+
+    v0 ^= b;
+
+    if (outlen == 8)
+        v2 ^= 0xee;
+    else
+        v2 ^= 0xff;
+
+    TRACE;
+    for (i = 0; i < dROUNDS; ++i)
+        SIPROUND;
+
+    b = v1 ^ v3;
+    U32TO8_LE(out, b);
+
+    if (outlen == 4)
+        return 0;
+
+    v1 ^= 0xdd;
+
+    TRACE;
+    for (i = 0; i < dROUNDS; ++i)
+        SIPROUND;
+
+    b = v1 ^ v3;
+    U32TO8_LE(out + 4, b);
+
+    return 0;
+}
-- 
cgit v1.2.3