summaryrefslogtreecommitdiff
path: root/src/postprocess/simi.py
diff options
context:
space:
mode:
authorDirk Engling <erdgeist@erdgeist.org>2019-03-20 04:30:29 +0100
committerDirk Engling <erdgeist@erdgeist.org>2019-03-20 04:30:29 +0100
commitb4bf8417af0d8ebff2c50570c70fdecaf6a53ed9 (patch)
tree4b9341a67c6fc9fd48cae5eecee79ff10ee0fe2f /src/postprocess/simi.py
parentc4a8bd34b41b2be26426ea01aafc69d41260cce5 (diff)
Add code to lookup new zip codes for 1995 entries and fix up streetnames
Diffstat (limited to 'src/postprocess/simi.py')
-rwxr-xr-xsrc/postprocess/simi.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/src/postprocess/simi.py b/src/postprocess/simi.py
new file mode 100755
index 0000000..62ff1ff
--- /dev/null
+++ b/src/postprocess/simi.py
@@ -0,0 +1,11 @@
1#!python
2
3import textdistance
4from sys import stdin
5
6for line in stdin.readlines():
7 x,y = line.split('\t')
8 x = x.casefold()
9 y = y.casefold()
10 v = textdistance.ratcliff_obershelp.normalized_similarity(x,y) + textdistance.jaro_winkler.normalized_similarity(x,y) + textdistance.cosine.normalized_similarity(x,y)
11 print (int(100*(v/3)))