From b4bf8417af0d8ebff2c50570c70fdecaf6a53ed9 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Wed, 20 Mar 2019 04:30:29 +0100 Subject: Add code to lookup new zip codes for 1995 entries and fix up streetnames --- src/postprocess/simi.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100755 src/postprocess/simi.py (limited to 'src/postprocess/simi.py') diff --git a/src/postprocess/simi.py b/src/postprocess/simi.py new file mode 100755 index 0000000..62ff1ff --- /dev/null +++ b/src/postprocess/simi.py @@ -0,0 +1,11 @@ +#!python + +import textdistance +from sys import stdin + +for line in stdin.readlines(): + x,y = line.split('\t') + x = x.casefold() + y = y.casefold() + v = textdistance.ratcliff_obershelp.normalized_similarity(x,y) + textdistance.jaro_winkler.normalized_similarity(x,y) + textdistance.cosine.normalized_similarity(x,y) + print (int(100*(v/3))) -- cgit v1.2.3