#!/bin/sh export LANG=C export LC_CTYPE=C export LC_ALL=C main() { [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el if [ -z "${EL}" ]; then echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" exit 1 fi if [ $# -ne 1 ]; then echo "Syntax: $0 [phonebookdirectory]" exit 1 fi # Compile all the binaries make all mkdir -p ../work_`basename ${1#white_}` cd ../work_`basename ${1#white_}` || exit 1 unset strassen; [ -f $1/dat/strassen.dat ] && strassen=$1/dat/strassen.dat if [ -f "$1/phonebook.db" ]; then handle_new_format $1 elif [ -f "$1/DAT/TEILN.DAT" ]; then handle_old_format "$1/DAT/TEILN.DAT" "${strassen}" elif [ -f "$1/dat/teiln.dat" ]; then echo handle_old_format "$1/dat/teiln.dat" "${strassen}" handle_old_format "$1/dat/teiln.dat" "${strassen}" else echo "Not a recognized Telefonbuch folder" fi cd .. } get_dword() { # $1 file, $2 offset set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` printf "%d\n" $2 } handle_old_format() { # Clear old files. Be very careful, we could # have ended up in an unexpected directory, after all. find -E . -depth 1 -regex '^\./[0123456789]+' -delete rm -f ??_* # If street names come in an extra file, extract # street names first if [ "$2" ]; then mkdir strassen cd strassen/ ../../bin/extractblocks $2 # This should leave us with a bunch of .lha files for archive in *.lha; do lha x ${archive}; done find . -name \*.lha -delete cd .. cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname rm -r strassen/ fi # Then extract teiln.dat ../bin/extractblocks $1 # This should leave us with a bunch of .lha files for archive in *.lha; do lha x ${archive}; done find . -name \*.lha -delete # See how long each filename is filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) # from 2000F on file 0+3*n is table, so make it default table_file=`printf %0${filename_len}d 0` vname_file=`printf %0${filename_len}d 2` # if supposed vname file is larger than table file, # we're having a pre-2000F layout, so switch accordingly if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then table_file=`printf %0${filename_len}d 2` nname_file=`printf %0${filename_len}d 0` vname_file=`printf %0${filename_len}d 1` else nname_file=`printf %0${filename_len}d 1` fi # Table file has a table header with identical count # to nname file's header. Verify this if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then echo "Unknown layout." exit fi # Now loop over all files and dump them while [ -f ${nname_file} ]; do # Get number of entries in this round count=`get_dword ${nname_file}` # Get offset into first nname nname_off=$(( `get_dword ${nname_file} 1` + 1 )) # Now get the flags before the nnames tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname # Extract the vnames tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname # Offset into first table entry tells us how many # fields are in table file table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 )) # Now iterate over all entries in the table file for table_index in `jot ${table_entries}`; do table_off=`get_dword ${table_file} ${table_index}` tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` done # Advance the filenames. Note, that we need bc because # builtin arithmetic treats numbers with leading zeros as octals nname_file=`printf "%s + 3\n" ${nname_file} | bc` nname_file=`printf %0${filename_len}d ${nname_file}` vname_file=`printf "%s + 3\n" ${vname_file} | bc` vname_file=`printf %0${filename_len}d ${vname_file}` table_file=`printf "%s + 3\n" ${table_file} | bc` table_file=`printf %0${filename_len}d ${table_file}` done # wipe all temporary extracted files find -E . -depth 1 -regex '^\./[0123456789]+' -delete # rename our columns extracted from the table file mv 04_unknown 04_Namenszusatz mv 05_unknown 05_Adresszusatz mv 06_unknown 06_Ortszusatz mv 08_unknown 08_Hausnummer mv 09_unknown 09_Verweise mv 10_unknown 10_Postleitzahl mv 11_unknown 11_Ort mv 12_unknown 12_Vorwahl mv 13_unknown 13_Rufnummer [ -f 14_unknown ] && mv 14_unknown 14_Email [ -f 15_unknown ] && mv 15_unknown 15_Webadresse # extract street names if 07_unknown contains street indexes # instead of street names if [ -f 99_Strassenname ]; then mv 07_unknown 07_Strassenindex cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse else mv 07_unknown 07_Strasse fi } handle_new_format() { echo "Working on $1. Detected post-2003 Telefonbuch version." printf "Extracting street names ... " ../bin/decompress $1/streets.tl cat file_* | tr '\n\0' '\t\n' > 99_Strassenname rm file_* printf "done.\n" printf "Extracting phonebook.db ... " ../bin/decompress $1/phonebook.db | grep -v appropriate numfiles=`find . -name file_\* | wc -l` printf "done.\nFound %d entries.\n" $numfiles printf "Splitting decompressed chunks into their columns ... " for column in `jot - 0 10 1`; do for file in `jot - ${column} $(( numfiles - 1 )) 11`; do acton=`printf file_%05X ${file}` if [ ${column} = 0 ]; then xxd -ps -c1 ${acton} >> column_0; else tr '\n\0' '\t\n' < ${acton} >> column_${column}; fi done; done printf "done.\n" printf "Cleaning up decompressed chunks ... " find . -name file_\* -delete printf "done.\n" mv column_0 01_Flags mv column_1 02_Nachname mv column_2 03_Vorname mv column_3 04_05_Namenszusatz_Addresszusatz mv column_4 09_Verweise mv column_5 07_08_Strassenindex_Hausnummer mv column_6 12_Vorwahl mv column_7 10_Postleitzahl mv column_8 11_Ort mv column_9 13_Rufnummer mv column_10 14_15_Email_Webadresse printf "Looing up street names from indexes ... " cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse printf "done.\n" printf "Splitting house numbers ... " sed -E $'s:$:\t:' < 07_08_Strassenindex_Hausnummer | cut -f 2 > 08_Hausnummer printf "done.\n" if [ -f $1/zip-streets-hn-geo.tl ]; then printf "Extracting geo coordinates (precision: house number) ... " ../bin/decompress $1/zip-streets-hn-geo.tl cat file_* > 90_Geokoordinaten_hnr printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | ../bin/mapcoords 90_Geokoordinaten_hnr > 16_Koordinaten printf "done.\n" elif [ -f $1/zip-streets-geo.tl ]; then printf "Extracting geo coordinates (precision: street) ... " ../bin/decompress $1/zip-streets-geo.tl cat file_* > 91_Geokoordinaten_str printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " lam 10_Postleitzahl -s $'\t' 07_Strasse | ../bin/mapcoords 91_Geokoordinaten_str > 16_Koordinaten printf "done.\n" fi rm file_* } # After function definitions, main() can use them main "$@"