summaryrefslogtreecommitdiff
path: root/makecolumns.sh
diff options
context:
space:
mode:
authorDirk Engling <erdgeist@erdgeist.org>2015-06-02 19:44:12 +0200
committerDirk Engling <erdgeist@erdgeist.org>2015-06-02 19:44:12 +0200
commit4c3a31b1b03e72e65e080bfcb017ceb9619847a4 (patch)
tree613a36c62789e01dc5ce314c10da603eaca64d95 /makecolumns.sh
parentb3a053c07a9f43b951196c62533d6dab0d3ccd3d (diff)
Cleanup known broken input data, build join.c
Diffstat (limited to 'makecolumns.sh')
-rwxr-xr-xmakecolumns.sh26
1 files changed, 24 insertions, 2 deletions
diff --git a/makecolumns.sh b/makecolumns.sh
index 8131379..0854b32 100755
--- a/makecolumns.sh
+++ b/makecolumns.sh
@@ -112,6 +112,7 @@ handle_format_version_1() {
112 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze 112 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze
113 printf "done.\n" 113 printf "done.\n"
114 114
115 tidy_streetnames 07_Strasse
115} 116}
116 117
117handle_format_version_2() { 118handle_format_version_2() {
@@ -158,6 +159,7 @@ handle_format_version_2() {
158 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze 159 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze
159 printf "done.\n" 160 printf "done.\n"
160 161
162 tidy_streetnames 07_Strasse
161} 163}
162 164
163handle_format_version_3() { 165handle_format_version_3() {
@@ -243,7 +245,7 @@ handle_format_version_3() {
243 printf "done.\n" 245 printf "done.\n"
244 246
245 printf "Normalizing zusaetze ... " 247 printf "Normalizing zusaetze ... "
246 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze 248 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E -e 's/ +/ /g' -e 's/^ +//g' -e 's/ +$//g' > 04_Zusaetze
247 printf "done.\n" 249 printf "done.\n"
248 250
249 # If street names come in an extra file, extract 251 # If street names come in an extra file, extract
@@ -260,10 +262,13 @@ handle_format_version_3() {
260 # fix up known broken Strassennamen file 262 # fix up known broken Strassennamen file
261 [ `stat -f %z ${streets}` -eq 1642716 ] && printf '9. Str.\n91. Str.\n91er-Str.\n' >> 99_Strassenname 263 [ `stat -f %z ${streets}` -eq 1642716 ] && printf '9. Str.\n91. Str.\n91er-Str.\n' >> 99_Strassenname
262 264
265 tidy_streetnames 99_Strassenname
266
263 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse 267 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
264 printf "done.\n" 268 printf "done.\n"
265 else 269 else
266 mv 07_unknown 07_Strasse 270 mv 07_unknown 07_Strasse
271 tidy_streetnames 07_Strasse
267 fi 272 fi
268 273
269 karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] 274 karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt]
@@ -306,7 +311,13 @@ handle_format_version_4() {
306 find . -name file_\* -delete 311 find . -name file_\* -delete
307 printf "done.\n" 312 printf "done.\n"
308 313
309 mv column_0 01_Flags 314 # the 'did not object to inverse search' flag is insane and needs to be reversed
315 if grep -q ^40 column_0; then
316 awk '{ a=substr($0,1,1); printf "%x%x\n",index("5670123cdef89ab4",a)%16 ,substr($0,2,1) }' < column_0 > 01_Flags
317 rm column_0
318 else
319 mv column_0 01_Flags
320 fi
310 mv column_1 02_Nachname 321 mv column_1 02_Nachname
311 mv column_2 03_Vorname 322 mv column_2 03_Vorname
312 mv column_3 04_05_Namenszusatz_Addresszusatz 323 mv column_3 04_05_Namenszusatz_Addresszusatz
@@ -318,6 +329,8 @@ handle_format_version_4() {
318 mv column_9 13_Rufnummer 329 mv column_9 13_Rufnummer
319 mv column_10 14_15_Email_Webadresse 330 mv column_10 14_15_Email_Webadresse
320 331
332 tidy_streetnames 99_Strassenname
333
321 printf "Looking up street names from indexes ... " 334 printf "Looking up street names from indexes ... "
322 cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse 335 cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
323 printf "done.\n" 336 printf "done.\n"
@@ -348,6 +361,15 @@ handle_format_version_4() {
348 printf "done.\n" 361 printf "done.\n"
349 fi 362 fi
350 rm file_* 363 rm file_*
364
365}
366
367tidy_streetnames () {
368 streets=$1
369
370 # Replace any dots at end of line by a single one
371 # finish any str abbreviation without a period with a period
372 sed -Ei '' 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./' ${streets}
351} 373}
352 374
353# After function definitions, main() can use them 375# After function definitions, main() can use them