#!venv/bin/python

# convert halfnarp's output into the correlation array that can later
# be served by halfnarp to show clusters of talks by your audience's
# preferences
#
# Run this script with a single parameter, a file containing the output
# of halfnarp2.py -e and place this script's output in a file that is
# being served statically, e.g. corr_array_38c3.json

import json
import sys
import itertools
from collections import defaultdict

# load in list of talk preferences, which is list of strings with talk ids
with open(sys.argv[1]) as data_file:
    corr = json.load(data_file)

all_sums = dict(defaultdict())
all_events = {}
l = len(corr)

for arr in corr:

    for x in arr:
        all_events[x] = 1

    for x, y in itertools.combinations_with_replacement(sorted(arr), 2):
        if x in all_sums.keys():
            all_sums[x][y] = 1 + all_sums[x].get(y, 1)
        else:
            all_sums[x] = defaultdict()

all_events = sorted(set(all_events))

out_strings = defaultdict(str)

for x, y in itertools.combinations(all_events, 2):
    xyc = all_sums[x].get(y, 0)
    xc  = all_sums[x].get(x, 0)
    yc  = all_sums[y].get(y, 0)
#   print (x, y, both, xcount, ycount)
    xy_corr = 4.0 * l * xyc * xyc * (xc + yc) / (xc*xc*yc*yc) if xc * yc > 0 else 0
    if xy_corr > 9:
        xy_corr = 9
    out_strings[x] += str(int(xy_corr))

out_dict = { 'event_ids': all_events, 'event_corrs': list(out_strings.values()) }
print (json.dumps(out_dict, separators=(',', ':')))