From 4e33872678d38319e3bb6bd98584dcb78aae5940 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Sat, 18 Jan 2014 06:47:38 +0100 Subject: Implement street name extraction --- src/makecolumns.sh | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/makecolumns.sh b/src/makecolumns.sh index c4b5d24..450cb5d 100644 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh @@ -24,12 +24,15 @@ main() { mkdir -p ../work_`basename $1` cd ../work_`basename $1` || exit 1 + unset strassen; [ -f $1/dat/strassen.dat ] && strassen=$1/dat/strassen.dat + if [ -f "$1/phonebook.db" ]; then handle_new_format $1 elif [ -f "$1/DAT/TEILN.DAT" ]; then - handle_old_format "$1/DAT/TEILN.DAT" + handle_old_format "$1/DAT/TEILN.DAT" "${strassen}" elif [ -f "$1/dat/teiln.dat" ]; then - handle_old_format "$1/dat/teiln.dat" + echo handle_old_format "$1/dat/teiln.dat" "${strassen}" + handle_old_format "$1/dat/teiln.dat" "${strassen}" else echo "Not a recognized Telefonbuch folder" fi @@ -44,10 +47,27 @@ get_dword() { handle_old_format() { # Clear old files. Be very careful, we could # have ended up in an unexpected directory, after all. - find -depth 1 -regex ^[0-9]+$ -delete + find -E . -depth 1 -regex '^\./[0123456789]+' -delete rm ??_* - # First the simple stuff, extract teiln.dat + # If street names come in an extra file, extract + # street names first + if [ "$2" ]; then + mkdir strassen + cd strassen/ + + ../../bin/extractblocks $2 + + # This should leave us with a bunch of .lha files + for archive in *.lha; do lha x ${archive}; done + find . -name \*.lha -delete + cd .. + + cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname + rm -r strassen/ + fi + + # Then extract teiln.dat ../bin/extractblocks $1 # This should leave us with a bunch of .lha files @@ -115,7 +135,7 @@ handle_old_format() { done # wipe all temporary extracted files - find -depth 1 -regex ^[0-9]+$ -delete + find -E . -depth 1 -regex '^\./[0123456789]+' -delete # rename our columns extracted from the table file mv 04_unknown 04_Namenszusatz @@ -130,8 +150,14 @@ handle_old_format() { [ -f 14_unknown ] && mv 14_unknown 14_Email [ -f 15_unknown ] && mv 15_unknown 15_Webadresse - # TODO: extract street names if 07_unknown contains street indexes + # extract street names if 07_unknown contains street indexes # instead of street names + if [ -f 99_Strassenname ]; then + mv 07_unknown 07_Strassenindex + cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse + else + mv 07_unknown 07_Strasse + fi } handle_new_format() { -- cgit v1.2.3