From 4e33872678d38319e3bb6bd98584dcb78aae5940 Mon Sep 17 00:00:00 2001
From: Dirk Engling <erdgeist@erdgeist.org>
Date: Sat, 18 Jan 2014 06:47:38 +0100
Subject: Implement street name extraction

---
 src/makecolumns.sh | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/makecolumns.sh b/src/makecolumns.sh
index c4b5d24..450cb5d 100644
--- a/src/makecolumns.sh
+++ b/src/makecolumns.sh
@@ -24,12 +24,15 @@ main() {
     mkdir -p ../work_`basename $1`
     cd ../work_`basename $1` || exit 1
 
+    unset strassen; [ -f $1/dat/strassen.dat ] && strassen=$1/dat/strassen.dat
+
     if [ -f "$1/phonebook.db" ]; then
         handle_new_format $1
     elif [ -f "$1/DAT/TEILN.DAT" ]; then
-        handle_old_format "$1/DAT/TEILN.DAT"
+        handle_old_format "$1/DAT/TEILN.DAT" "${strassen}"
     elif [ -f "$1/dat/teiln.dat" ]; then
-        handle_old_format "$1/dat/teiln.dat"
+        echo handle_old_format "$1/dat/teiln.dat" "${strassen}"
+        handle_old_format "$1/dat/teiln.dat" "${strassen}"
     else
         echo "Not a recognized Telefonbuch folder"
     fi
@@ -44,10 +47,27 @@ get_dword() {
 handle_old_format() {
     # Clear old files. Be very careful, we could
     # have ended up in an unexpected directory, after all.
-    find -depth 1 -regex ^[0-9]+$ -delete
+    find -E . -depth 1 -regex '^\./[0123456789]+' -delete
     rm ??_*
 
-    # First the simple stuff, extract teiln.dat
+    # If street names come in an extra file, extract
+    # street names first
+    if [ "$2" ]; then
+        mkdir strassen
+        cd strassen/
+
+        ../../bin/extractblocks $2
+
+        # This should leave us with a bunch of .lha files
+        for archive in *.lha; do lha x ${archive}; done
+        find . -name \*.lha -delete
+        cd ..
+
+        cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname
+        rm -r strassen/
+    fi
+
+    # Then extract teiln.dat
     ../bin/extractblocks $1
 
     # This should leave us with a bunch of .lha files
@@ -115,7 +135,7 @@ handle_old_format() {
     done
 
     # wipe all temporary extracted files
-    find -depth 1 -regex ^[0-9]+$ -delete
+    find -E . -depth 1 -regex '^\./[0123456789]+' -delete
 
     # rename our columns extracted from the table file
     mv 04_unknown 04_Namenszusatz
@@ -130,8 +150,14 @@ handle_old_format() {
     [ -f 14_unknown ] && mv 14_unknown 14_Email
     [ -f 15_unknown ] && mv 15_unknown 15_Webadresse
 
-    # TODO: extract street names if 07_unknown contains street indexes
+    # extract street names if 07_unknown contains street indexes
     # instead of street names
+    if [ -f 99_Strassenname ]; then
+        mv 07_unknown 07_Strassenindex
+        cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
+    else
+        mv 07_unknown 07_Strasse
+    fi
 }
 
 handle_new_format() {
-- 
cgit v1.2.3