#!venv/bin/python # convert halfnarp's output into the correlation array that can later # be served by halfnarp to show clusters of talks by your audience's # preferences # # Run this script with a single parameter, a file containing the output # of halfnarp2.py -e and place this script's output in a file that is # being served statically, e.g. corr_array_38c3.json import json import sys import itertools from collections import defaultdict # load in list of talk preferences, which is list of strings with talk ids with open(sys.argv[1]) as data_file: corr = json.load(data_file) all_sums = dict(defaultdict()) all_events = {} l = len(corr) for arr in corr: for x in arr: all_events[x] = 1 for x, y in itertools.combinations_with_replacement(sorted(arr), 2): if x in all_sums.keys(): all_sums[x][y] = 1 + all_sums[x].get(y, 1) else: all_sums[x] = defaultdict() all_events = sorted(set(all_events)) out_strings = defaultdict(str) for x, y in itertools.combinations(all_events, 2): xyc = all_sums[x].get(y, 0) xc = all_sums[x].get(x, 0) yc = all_sums[y].get(y, 0) # print (x, y, both, xcount, ycount) xy_corr = 4.0 * l * xyc * xyc * (xc + yc) / (xc*xc*yc*yc) if xc * yc > 0 else 0 if xy_corr > 9: xy_corr = 9 out_strings[x] += str(int(xy_corr)) out_dict = { 'event_ids': all_events, 'event_corrs': list(out_strings.values()) } print (json.dumps(out_dict, separators=(',', ':')))