From fb3616e06ca20ffe78dfb00b962a2599a46b2a5f Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Sat, 29 Jun 2019 18:51:00 +0200 Subject: Clean up 1992 post processing --- makecolumns.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'makecolumns.sh') diff --git a/makecolumns.sh b/makecolumns.sh index 3f05a61..a505d31 100755 --- a/makecolumns.sh +++ b/makecolumns.sh @@ -115,10 +115,17 @@ handle_format_version_1() { mv 11_unknown 12_Vorwahl mv 12_unknown 13_Rufnummer mv 13_unknown 11_Ort - mv 14_unknown 10_Postleitzahl + mv 14_unknown 10_Postleitzahl_West + mv 15_unknown 12_Vorwahl_block + + printf "Splitting appartement to zusaetze ... " + paste 07_Strasse 08_Hausnummer 09_unknown | sed -E $'s:^(.*)\;([0-9]+.*)\t(.*)\t.*$:\\1\t\\2\tWohnung \\3:;s:^(.*)tr(\t.*\t.*)$:\\1tr.\\2:' > tm_unknown + cut -f 1 tm_unknown > 07_Strasse + cut -f 2 tm_unknown > 08_Hausnummer + printf "done.\n" printf "Normalizing zusaetze ... " - sed -E -e 's:^, +:u. :' 09_unknown > 04_Namenszusatz + cut -f 3 tm_unknown | sed -E -e 's:^, +:u. :' > 04_Namenszusatz sed -E -e 's:^, +:u. :' 04_unknown > 05_Adresszusatz paste 04_Namenszusatz 05_Adresszusatz | awk '{$1=$1};1' > 04_Zusaetze printf "done.\n" @@ -501,7 +508,7 @@ tidy_columns () { printf "Tidying up streetnames ... " # Replace any dots at end of line by a single one # finish any str abbreviation without a period with a period - sed -E 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./;s/(.*)-(.*) -/\1-\2-Str./;s/ -$/ Str./;s/-$/str./' 07_Strasse | iconv -f iso-8859-15 -t utf-8 > 07_Strasse.new + sed -E 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./;s/(.*)-(.*) -/\1-\2-Str./;s/ -$/ Str./;s/-$/str./;s/^(.*-.*) Str\.?$/\1-Str./' 07_Strasse | iconv -f iso-8859-15 -t utf-8 > 07_Strasse.new mv 07_Strasse.new 07_Strasse printf "done.\n" -- cgit v1.2.3