summaryrefslogtreecommitdiff
path: root/makecolumns.sh
diff options
context:
space:
mode:
authorDirk Engling <erdgeist@erdgeist.org>2019-06-29 18:51:00 +0200
committerDirk Engling <erdgeist@erdgeist.org>2019-06-29 18:51:00 +0200
commitfb3616e06ca20ffe78dfb00b962a2599a46b2a5f (patch)
treefe7ad54e8cb17174b3296b63f41ad1f5b1038f8e /makecolumns.sh
parentb4bf8417af0d8ebff2c50570c70fdecaf6a53ed9 (diff)
Clean up 1992 post processing
Diffstat (limited to 'makecolumns.sh')
-rwxr-xr-xmakecolumns.sh13
1 files changed, 10 insertions, 3 deletions
diff --git a/makecolumns.sh b/makecolumns.sh
index 3f05a61..a505d31 100755
--- a/makecolumns.sh
+++ b/makecolumns.sh
@@ -115,10 +115,17 @@ handle_format_version_1() {
115 mv 11_unknown 12_Vorwahl 115 mv 11_unknown 12_Vorwahl
116 mv 12_unknown 13_Rufnummer 116 mv 12_unknown 13_Rufnummer
117 mv 13_unknown 11_Ort 117 mv 13_unknown 11_Ort
118 mv 14_unknown 10_Postleitzahl 118 mv 14_unknown 10_Postleitzahl_West
119 mv 15_unknown 12_Vorwahl_block
120
121 printf "Splitting appartement to zusaetze ... "
122 paste 07_Strasse 08_Hausnummer 09_unknown | sed -E $'s:^(.*)\;([0-9]+.*)\t(.*)\t.*$:\\1\t\\2\tWohnung \\3:;s:^(.*)tr(\t.*\t.*)$:\\1tr.\\2:' > tm_unknown
123 cut -f 1 tm_unknown > 07_Strasse
124 cut -f 2 tm_unknown > 08_Hausnummer
125 printf "done.\n"
119 126
120 printf "Normalizing zusaetze ... " 127 printf "Normalizing zusaetze ... "
121 sed -E -e 's:^, +:u. :' 09_unknown > 04_Namenszusatz 128 cut -f 3 tm_unknown | sed -E -e 's:^, +:u. :' > 04_Namenszusatz
122 sed -E -e 's:^, +:u. :' 04_unknown > 05_Adresszusatz 129 sed -E -e 's:^, +:u. :' 04_unknown > 05_Adresszusatz
123 paste 04_Namenszusatz 05_Adresszusatz | awk '{$1=$1};1' > 04_Zusaetze 130 paste 04_Namenszusatz 05_Adresszusatz | awk '{$1=$1};1' > 04_Zusaetze
124 printf "done.\n" 131 printf "done.\n"
@@ -501,7 +508,7 @@ tidy_columns () {
501 printf "Tidying up streetnames ... " 508 printf "Tidying up streetnames ... "
502 # Replace any dots at end of line by a single one 509 # Replace any dots at end of line by a single one
503 # finish any str abbreviation without a period with a period 510 # finish any str abbreviation without a period with a period
504 sed -E 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./;s/(.*)-(.*) -/\1-\2-Str./;s/ -$/ Str./;s/-$/str./' 07_Strasse | iconv -f iso-8859-15 -t utf-8 > 07_Strasse.new 511 sed -E 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./;s/(.*)-(.*) -/\1-\2-Str./;s/ -$/ Str./;s/-$/str./;s/^(.*-.*) Str\.?$/\1-Str./' 07_Strasse | iconv -f iso-8859-15 -t utf-8 > 07_Strasse.new
505 mv 07_Strasse.new 07_Strasse 512 mv 07_Strasse.new 07_Strasse
506 printf "done.\n" 513 printf "done.\n"
507 514