From 4c3a31b1b03e72e65e080bfcb017ceb9619847a4 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Tue, 2 Jun 2015 19:44:12 +0200 Subject: Cleanup known broken input data, build join.c --- Makefile | 5 ++++- makecolumns.sh | 26 ++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 22a1693..500c0ad 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -BINARIES=bin/extract_version_1 bin/extract_version_2 bin/extract_version_3 bin/extract_version_4 bin/split_version_2 bin/split_version_3 bin/map_coords bin/convert_coords +BINARIES=bin/extract_version_1 bin/extract_version_2 bin/extract_version_3 bin/extract_version_4 bin/split_version_2 bin/split_version_3 bin/map_coords bin/convert_coords bin/join CFLAGS += -W -Wall -Wextra -O3 # -Weverything -Wno-cast-align -Wno-padded all: $(BINARIES) @@ -27,6 +27,9 @@ bin/map_coords: src/export/map_coords.c src/export/mystdlib.c bin/convert_coords: src/export/convert_coords.c $(CC) $(CFLAGS) -o $@ -lm src/export/convert_coords.c +bin/join: src/postprocess/join.c src/export/mystdlib.c + $(CC) $(CFLAGS) -o $@ src/postprocess/join.c src/export/mystdlib.c -Isrc/export + .PHONY: clean clean: @rm -f $(BINARIES) diff --git a/makecolumns.sh b/makecolumns.sh index 8131379..0854b32 100755 --- a/makecolumns.sh +++ b/makecolumns.sh @@ -112,6 +112,7 @@ handle_format_version_1() { lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze printf "done.\n" + tidy_streetnames 07_Strasse } handle_format_version_2() { @@ -158,6 +159,7 @@ handle_format_version_2() { lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze printf "done.\n" + tidy_streetnames 07_Strasse } handle_format_version_3() { @@ -243,7 +245,7 @@ handle_format_version_3() { printf "done.\n" printf "Normalizing zusaetze ... " - lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze + lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E -e 's/ +/ /g' -e 's/^ +//g' -e 's/ +$//g' > 04_Zusaetze printf "done.\n" # If street names come in an extra file, extract @@ -260,10 +262,13 @@ handle_format_version_3() { # fix up known broken Strassennamen file [ `stat -f %z ${streets}` -eq 1642716 ] && printf '9. Str.\n91. Str.\n91er-Str.\n' >> 99_Strassenname + tidy_streetnames 99_Strassenname + cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse printf "done.\n" else mv 07_unknown 07_Strasse + tidy_streetnames 07_Strasse fi karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] @@ -306,7 +311,13 @@ handle_format_version_4() { find . -name file_\* -delete printf "done.\n" - mv column_0 01_Flags + # the 'did not object to inverse search' flag is insane and needs to be reversed + if grep -q ^40 column_0; then + awk '{ a=substr($0,1,1); printf "%x%x\n",index("5670123cdef89ab4",a)%16 ,substr($0,2,1) }' < column_0 > 01_Flags + rm column_0 + else + mv column_0 01_Flags + fi mv column_1 02_Nachname mv column_2 03_Vorname mv column_3 04_05_Namenszusatz_Addresszusatz @@ -318,6 +329,8 @@ handle_format_version_4() { mv column_9 13_Rufnummer mv column_10 14_15_Email_Webadresse + tidy_streetnames 99_Strassenname + printf "Looking up street names from indexes ... " cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse printf "done.\n" @@ -348,6 +361,15 @@ handle_format_version_4() { printf "done.\n" fi rm file_* + +} + +tidy_streetnames () { + streets=$1 + + # Replace any dots at end of line by a single one + # finish any str abbreviation without a period with a period + sed -Ei '' 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./' ${streets} } # After function definitions, main() can use them -- cgit v1.2.3