#!/bin/sh export LANG=C export LC_CTYPE=C export LC_ALL=C export PATH=${PATH}:`pwd`/../bin/ main() { [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el if [ -z "${EL}" ]; then echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'" exit 1 fi if [ $# -ne 1 ]; then echo "Syntax: $0 [phonebookdirectory]" exit 1 fi # Compile all the binaries make all printf "Cleaning up old working directory ... " rm -rf ../work_`basename ${1#white_}` printf "done.\n" mkdir -p ../work_`basename ${1#white_}` cd ../work_`basename ${1#white_}` || exit 1 if [ -f "$1/phonebook.db" ]; then handle_new_format $1 elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then handle_old_format $1 else echo "Not a recognized Telefonbuch folder" fi cd .. } get_dword() { # $1 file, $2 offset set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` printf "%d\n" $2 } do_decompress_old() { printf "Extracting $2 chunks ... " extractblocks "${1}" printf "done.\n" printf "Decompressing $2 chunks ... " number_of_files=`find . -name \*.lha | wc -l` reported=0; processed=0 for archive in *.lha; do lha x ${archive} > /dev/null rm ${archive} [ 1 -eq $(( ( processed++ * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( ++reported * 5 )) done [ $reported -lt 10 ] && printf "100% " printf "done.\n" } do_processfile_old() { working_on=`basename ${1}` mkdir $working_on && cd ${working_on} do_decompress_old "${1}" "${2}" cd .. printf "Combining $2 into single file ... " if [ "${4}" = "convert_zeros" ]; then cat ${working_on}/* | tr '\n\0' '\t\n' > $3 else cat ${working_on}/* > $3 fi printf "done.\n" rm -rf ${working_on} } handle_old_format() { echo "Working on $1. Detected pre-2004 Telefonbuch version." # Extract teiln.dat do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" # See how long each filename is filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) # Get total amount of files, for reporting progress number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` # from 2000F on file 0+3*n is table, so make it default table_file=`printf %0${filename_len}d 0` vname_file=`printf %0${filename_len}d 2` # if supposed vname file is larger than table file, # we're having a pre-2000F layout, so switch accordingly if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then table_file=`printf %0${filename_len}d 2` nname_file=`printf %0${filename_len}d 0` vname_file=`printf %0${filename_len}d 1` else nname_file=`printf %0${filename_len}d 1` fi # Table file has a table header with identical count # to nname file's header. Verify this if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then echo "Unknown layout." exit fi # Now loop over all files and dump them printf "Splitting decompressed chunks into their columns ... " reported=0 while [ -f ${nname_file} ]; do # Get number of entries in this round count=`get_dword ${nname_file}` # Get offset into first nname nname_off=$(( `get_dword ${nname_file} 1` + 1 )) # Now get the flags before the nnames tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname # Extract the vnames tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname # Offset into first table entry tells us how many # fields are in table file table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 )) # Now iterate over all entries in the table file for table_index in `jot ${table_entries}`; do table_off=`get_dword ${table_file} ${table_index}` tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` done # Advance the filenames. Note, that we need bc because # builtin arithmetic treats numbers with leading zeros as octals nname_file=`printf "%s + 3\n" ${nname_file} | bc` nname_file=`printf %0${filename_len}d ${nname_file}` vname_file=`printf "%s + 3\n" ${vname_file} | bc` vname_file=`printf %0${filename_len}d ${vname_file}` table_file=`printf "%s + 3\n" ${table_file} | bc` [ 1 -eq $(( ( table_file * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( ++reported * 5 )) table_file=`printf %0${filename_len}d ${table_file}` done printf "done.\n" # wipe all temporary extracted files printf "Cleaning up decompressed chunks ... " find -E . -depth 1 -regex '^\./[0123456789]+' -delete printf "done.\n" # rename our columns extracted from the table file mv 04_unknown 04_Namenszusatz mv 05_unknown 05_Adresszusatz mv 06_unknown 06_Ortszusatz mv 08_unknown 08_Hausnummer mv 09_unknown 09_Verweise mv 10_unknown 10_Postleitzahl mv 11_unknown 11_Ort mv 12_unknown 12_Vorwahl mv 13_unknown 13_Rufnummer [ -f 14_unknown ] && mv 14_unknown 14_Email [ -f 15_unknown ] && mv 15_unknown 15_Webadresse # If street names come in an extra file, extract # street names first streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros # extract street names if 07_unknown contains street indexes # instead of street names if [ -f 99_Strassenname ]; then mv 07_unknown 07_Strassenindex printf "Looking up street names from indexes ... " cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse printf "done.\n" else mv 07_unknown 07_Strasse fi karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr } handle_new_format() { echo "Working on $1. Detected post-2003 Telefonbuch version." printf "Extracting street names ... " decompress $1/streets.tl cat file_* | tr '\n\0' '\t\n' > 99_Strassenname rm file_* printf "done.\n" printf "Extracting phonebook.db ... " decompress $1/phonebook.db rows=`find . -name file_\* | wc -l` rows=$(( rows / 11 )) printf "done.\n" # Do enumerations with builtin shell tools. Unfortunally neither # jot nor seq are standards printf "Splitting decompressed chunks into their columns (11 total) ... 1, " f=0; while [ $f -lt $rows ]; do printf "file_%05X " $(( f++ * 11)); done | xargs cat | xxd -ps -c1 > column_0 for column in 1 2 3 4 5 6 7 8 9 10; do printf "%d, " $(( column + 1 )) f=0; while [ $f -lt $rows ]; do printf "file_%05X " $(( column + f++ * 11 )); done | xargs cat | tr '\n\0' '\t\n' > column_${column} done printf "done.\n" printf "Cleaning up decompressed chunks ... " find . -name file_\* -delete printf "done.\n" mv column_0 01_Flags mv column_1 02_Nachname mv column_2 03_Vorname mv column_3 04_05_Namenszusatz_Addresszusatz mv column_4 09_Verweise mv column_5 07_08_Strassenindex_Hausnummer mv column_6 12_Vorwahl mv column_7 10_Postleitzahl mv column_8 11_Ort mv column_9 13_Rufnummer mv column_10 14_15_Email_Webadresse printf "Looking up street names from indexes ... " cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse printf "done.\n" printf "Splitting house numbers ... " sed -E $'s:$:\t:' < 07_08_Strassenindex_Hausnummer | cut -f 2 > 08_Hausnummer printf "done.\n" if [ -f $1/zip-streets-hn-geo.tl ]; then printf "Extracting geo coordinates (precision: house number) ... " decompress $1/zip-streets-hn-geo.tl cat file_* > 90_Geokoordinaten_hnr printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | mapcoords 90_Geokoordinaten_hnr > 16_Koordinaten printf "done.\n" elif [ -f $1/zip-streets-geo.tl ]; then printf "Extracting geo coordinates (precision: street) ... " decompress $1/zip-streets-geo.tl cat file_* > 91_Geokoordinaten_str printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " lam 10_Postleitzahl -s $'\t' 07_Strasse | mapcoords 91_Geokoordinaten_str > 16_Koordinaten printf "done.\n" fi rm file_* } # After function definitions, main() can use them main "$@"