diff options
Diffstat (limited to 'src/postprocess/postprocess-1992.sh')
| -rw-r--r-- | src/postprocess/postprocess-1992.sh | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/src/postprocess/postprocess-1992.sh b/src/postprocess/postprocess-1992.sh new file mode 100644 index 0000000..1e685d2 --- /dev/null +++ b/src/postprocess/postprocess-1992.sh | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | # Generate file with all relevant columns from 1992 | ||
| 2 | paste 1992_Q2/{01_Flags,12_Vorwahl,12_Vorwahl_block,11_Ort,07_Strasse,08_Hausnummer,02_Nachname,03_Vorname,13_Rufnummer,10_Postleitzahl_West,10_Zustellamt_PLZOst} > 1992-fvvoshnvrpp.txt | ||
| 3 | |||
| 4 | # Generate lookup file from 1995 | ||
| 5 | paste 1995_Q0/{12_Vorwahl,11_Ort,07_Strasse,08_Hausnummer,02_Nachname,03_Vorname,13_Rufnummer,10_Postleitzahl} | tr '\n\t' '\0' > 1995-voshnvrp.bin | ||
| 6 | |||
| 7 | # To debug in lldb | ||
| 8 | process launch -i 1992_testfile.txt -- 1995-vorwahl-ort-strasse-hnr-name-vorname-rufnummer-plz.bin | ||
| 9 | |||
| 10 | # Compile plz mapper | ||
| 11 | cc -O3 -o map_plz map_plz.c -I ../src/export/ ../src/export/mystdlib.c | ||
| 12 | |||
| 13 | # outputs mapped plz, generates brutemap.txt | ||
| 14 | touch brutemap_input.bin zip_simple_map.bin | ||
| 15 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 10_Postleitzahl | ||
| 16 | |||
| 17 | # generate street name translation table from brutemap, | ||
| 18 | # only taking into account similar street names | ||
| 19 | # cut -f 3,4 brutemap.txt | tr '[:upper:]' '[:lower:]' | paste brutemap.txt - | cut -f 1-4,6,7 | ./jaro | cut -f 1-5 > brutemap_filtered.txt | ||
| 20 | |||
| 21 | # generate street name translation table from brutemap, | ||
| 22 | # only taking into account similar street names, new style | ||
| 23 | cut -f 3,4 brutemap.txt | python simi.py | paste - brutemap.txt > brutemap_simifiltered.txt | ||
| 24 | |||
| 25 | # Sort and prepare similarity filtered files for the merge | ||
| 26 | cut -f 1-5 brutemap_simifiltered.txt | sort | uniq -c | sed -E $'s:^ *([[:digit:]]+) :\\1\t:' | tr '\n\t' '\0' > brutemap_input.bin | ||
| 27 | |||
| 28 | # compile zipmap into a binary format | ||
| 29 | sort -u zip_mapfile.txt | tr '\n' '\0' > zip_simple_map.bin | ||
| 30 | |||
| 31 | # Redo the mapping with the data from brutemap and zipmap | ||
| 32 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 10_Postleitzahl | ||
