#!/bin/sh

export LANG=C
export LC_CTYPE=C
export LC_ALL=C

main() {
    [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
    [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el

    if [ -z "${EL}" ]; then
      echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"
      exit 1
    fi

    if [ $# -ne 1 ]; then
      echo "Syntax: $0 [phonebookdirectory]"
      exit 1
    fi

    # Compile all the binaries
    make all

    mkdir -p ../work_`basename ${1#white_}`
    cd ../work_`basename ${1#white_}` || exit 1

    unset strassen; [ -f $1/dat/strassen.dat ] && strassen=$1/dat/strassen.dat

    if [ -f "$1/phonebook.db" ]; then
        handle_new_format $1
    elif [ -f "$1/DAT/TEILN.DAT" ]; then
        handle_old_format "$1/DAT/TEILN.DAT" "${strassen}"
    elif [ -f "$1/dat/teiln.dat" ]; then
        echo handle_old_format "$1/dat/teiln.dat" "${strassen}"
        handle_old_format "$1/dat/teiln.dat" "${strassen}"
    else
        echo "Not a recognized Telefonbuch folder"
    fi
    cd ..
}

get_dword() {
  # $1 file, $2 offset
  set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}`
  printf "%d\n" $2
}

handle_old_format() {
    # Clear old files. Be very careful, we could
    # have ended up in an unexpected directory, after all.
    find -E . -depth 1 -regex '^\./[0123456789]+' -delete
    rm -f ??_*

    # If street names come in an extra file, extract
    # street names first
    if [ "$2" ]; then
        mkdir strassen
        cd strassen/

        ../../bin/extractblocks $2

        # This should leave us with a bunch of .lha files
        for archive in *.lha; do lha x ${archive}; done
        find . -name \*.lha -delete
        cd ..

        cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname
        rm -r strassen/
    fi

    # Then extract teiln.dat
    ../bin/extractblocks $1

    # This should leave us with a bunch of .lha files
    for archive in *.lha; do lha x ${archive}; done
    find . -name \*.lha -delete

    # See how long each filename is
    filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))

    # from 2000F on file 0+3*n is table, so make it default
    table_file=`printf %0${filename_len}d 0`
    vname_file=`printf %0${filename_len}d 2`

    # if supposed vname file is larger than table file,
    # we're having a pre-2000F layout, so switch accordingly
    if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then
        table_file=`printf %0${filename_len}d 2`
        nname_file=`printf %0${filename_len}d 0`
        vname_file=`printf %0${filename_len}d 1`
    else
        nname_file=`printf %0${filename_len}d 1`
    fi

    # Table file has a table header with identical count
    # to nname file's header. Verify this
    if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then
        echo "Unknown layout."
        exit
    fi

    # Now loop over all files and dump them
    while [ -f ${nname_file} ]; do
        # Get number of entries in this round
        count=`get_dword ${nname_file}`

        # Get offset into first nname
        nname_off=$(( `get_dword ${nname_file} 1` + 1 ))

        # Now get the flags before the nnames
        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags
        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname

        # Extract the vnames
        tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname

        # Offset into first table entry tells us how many
        # fields are in table file
        table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 ))

        # Now iterate over all entries in the table file
        for table_index in `jot ${table_entries}`; do
            table_off=`get_dword ${table_file} ${table_index}`
            tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))`
        done

        # Advance the filenames. Note, that we need bc because
        # builtin arithmetic treats numbers with leading zeros as octals
        nname_file=`printf "%s + 3\n" ${nname_file} | bc`
        nname_file=`printf %0${filename_len}d ${nname_file}`
        vname_file=`printf "%s + 3\n" ${vname_file} | bc`
        vname_file=`printf %0${filename_len}d ${vname_file}`
        table_file=`printf "%s + 3\n" ${table_file} | bc`
        table_file=`printf %0${filename_len}d ${table_file}`

    done

    # wipe all temporary extracted files
    find -E . -depth 1 -regex '^\./[0123456789]+' -delete

    # rename our columns extracted from the table file
    mv 04_unknown 04_Namenszusatz
    mv 05_unknown 05_Adresszusatz
    mv 06_unknown 06_Ortszusatz
    mv 08_unknown 08_Hausnummer
    mv 09_unknown 09_Verweise
    mv 10_unknown 10_Postleitzahl
    mv 11_unknown 11_Ort
    mv 12_unknown 12_Vorwahl
    mv 13_unknown 13_Rufnummer
    [ -f 14_unknown ] && mv 14_unknown 14_Email
    [ -f 15_unknown ] && mv 15_unknown 15_Webadresse

    # extract street names if 07_unknown contains street indexes
    # instead of street names
    if [ -f 99_Strassenname ]; then
        mv 07_unknown 07_Strassenindex
        cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
    else
        mv 07_unknown 07_Strasse
    fi
}

handle_new_format() {
    echo "Working on $1. Detected post-2003 Telefonbuch version."
    printf "Extracting street names ... "
    ../bin/decompress $1/streets.tl

    cat file_* | tr '\n\0' '\t\n' > 99_Strassenname
    rm file_*
    printf "done.\n"

    printf "Extracting phonebook.db ... "
    ../bin/decompress $1/phonebook.db | grep -v appropriate

    numfiles=`find . -name file_\* | wc -l`
    printf "done.\nFound %d entries.\n" $numfiles

    printf "Splitting decompressed chunks into their columns ... "
    for column in `jot - 0 10 1`; do
      for file in `jot - ${column} $(( numfiles - 1 )) 11`; do
        acton=`printf file_%05X ${file}`
        if [ ${column} = 0 ]; then
          xxd -ps -c1 ${acton} >> column_0;
        else
          tr '\n\0' '\t\n' < ${acton} >> column_${column};
        fi
     done;
    done
    printf "done.\n"

    printf "Cleaning up decompressed chunks ... "
    find . -name file_\* -delete
    printf "done.\n"

    mv column_0 01_Flags
    mv column_1 02_Nachname
    mv column_2 03_Vorname
    mv column_3 04_05_Namenszusatz_Addresszusatz
    mv column_4 09_Verweise
    mv column_5 07_08_Strassenindex_Hausnummer
    mv column_6 12_Vorwahl
    mv column_7 10_Postleitzahl
    mv column_8 11_Ort
    mv column_9 13_Rufnummer
    mv column_10 14_15_Email_Webadresse

    printf "Looing up street names from indexes ... "
    cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
    printf "done.\n"

    printf "Splitting house numbers ... "
    sed -E $'s:$:\t:' < 07_08_Strassenindex_Hausnummer | cut -f 2 > 08_Hausnummer
    printf "done.\n"

    if [ -f $1/zip-streets-hn-geo.tl ]; then
      printf "Extracting geo coordinates (precision: house number) ... "
      ../bin/decompress $1/zip-streets-hn-geo.tl
      cat file_* > 90_Geokoordinaten_hnr
      printf "done.\n"
      printf "Looking up geo coordinates for each phonebook entry ... "
      lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | ../bin/mapcoords 90_Geokoordinaten_hnr > 16_Koordinaten
      printf "done.\n"
    elif [ -f $1/zip-streets-geo.tl ]; then
      printf "Extracting geo coordinates (precision: street) ... "
      ../bin/decompress $1/zip-streets-geo.tl
      cat file_*  > 91_Geokoordinaten_str
      printf "done.\n"
      printf "Looking up geo coordinates for each phonebook entry ... "
      lam 10_Postleitzahl -s $'\t' 07_Strasse | ../bin/mapcoords 91_Geokoordinaten_str > 16_Koordinaten
      printf "done.\n"
    fi
    rm file_*
}

# After function definitions, main() can use them
main "$@"