#!/bin/sh

export LANG=C
export LC_CTYPE=C
export LC_ALL=C
export PATH=${PATH}:`pwd`/../bin/

main() {
    [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
    [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el

    if [ -z "${EL}" ]; then
      echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"
      exit 1
    fi

    if [ $# -ne 1 ]; then
      echo "Syntax: $0 [phonebookdirectory]"
      exit 1
    fi

    # Compile all the binaries
    make all

    printf "Cleaning up old working directory ... "
    rm -rf ../work_`basename ${1#white_}`
    printf "done.\n"
    mkdir -p ../work_`basename ${1#white_}`
    cd ../work_`basename ${1#white_}` || exit 1

    if [ -f "$1/phonebook.db" ]; then
        handle_new_format $1
    elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then
        handle_old_format $1
    else
        echo "Not a recognized Telefonbuch folder"
    fi
    cd ..
}

get_dword() {
    # $1 file, $2 offset
    set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}`
    printf "%d\n" $2
}

do_decompress_old() {
    printf "Extracting $2 chunks ... "
    extractblocks "${1}"
    printf "done.\n"

    printf "Decompressing $2 chunks ... "
    number_of_files=`find . -name \*.lha | wc -l`
    reported=0; processed=0
    for archive in *.lha; do
        lha x ${archive} > /dev/null
        rm ${archive}
        [ 1 -eq $(( ( processed++ * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( ++reported * 5 ))
    done
    [ $reported -lt 10 ] && printf "100% "
    printf "done.\n"
}

do_processfile_old() {
    working_on=`basename ${1}`
    mkdir $working_on && cd ${working_on}
    do_decompress_old "${1}" "${2}"
    cd ..

    printf "Combining $2 into single file ... "
    if [ "${4}" = "convert_zeros" ]; then
        cat ${working_on}/* | tr '\n\0' '\t\n' > $3
    else
        cat ${working_on}/* > $3
    fi
    printf "done.\n"

    rm -rf ${working_on}
}

handle_old_format() {
    echo "Working on $1. Detected pre-2004 Telefonbuch version."
    # Extract teiln.dat
    do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat"

    # See how long each filename is
    filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))

    # Get total amount of files, for reporting progress
    number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l`

    # from 2000F on file 0+3*n is table, so make it default
    table_file=`printf %0${filename_len}d 0`
    vname_file=`printf %0${filename_len}d 2`

    # if supposed vname file is larger than table file,
    # we're having a pre-2000F layout, so switch accordingly
    if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then
        table_file=`printf %0${filename_len}d 2`
        nname_file=`printf %0${filename_len}d 0`
        vname_file=`printf %0${filename_len}d 1`
    else
        nname_file=`printf %0${filename_len}d 1`
    fi

    # Table file has a table header with identical count
    # to nname file's header. Verify this
    if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then
        echo "Unknown layout."
        exit
    fi

    # Now loop over all files and dump them
    printf "Splitting decompressed chunks into their columns ... "
    reported=0
    while [ -f ${nname_file} ]; do
        # Get number of entries in this round
        count=`get_dword ${nname_file}`

        # Get offset into first nname
        nname_off=$(( `get_dword ${nname_file} 1` + 1 ))

        # Now get the flags before the nnames
        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags
        tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname

        # Extract the vnames
        tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname

        # Offset into first table entry tells us how many
        # fields are in table file
        table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 ))

        # Now iterate over all entries in the table file
        for table_index in `jot ${table_entries}`; do
            table_off=`get_dword ${table_file} ${table_index}`
            tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))`
        done

        # Advance the filenames. Note, that we need bc because
        # builtin arithmetic treats numbers with leading zeros as octals
        nname_file=`printf "%s + 3\n" ${nname_file} | bc`
        nname_file=`printf %0${filename_len}d ${nname_file}`
        vname_file=`printf "%s + 3\n" ${vname_file} | bc`
        vname_file=`printf %0${filename_len}d ${vname_file}`
        table_file=`printf "%s + 3\n" ${table_file} | bc`
        [ 1 -eq $(( ( table_file * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( ++reported * 5 ))
        table_file=`printf %0${filename_len}d ${table_file}`
    done
    printf "done.\n"

    # wipe all temporary extracted files
    printf "Cleaning up decompressed chunks ... "
    find -E . -depth 1 -regex '^\./[0123456789]+' -delete
    printf "done.\n"

    # rename our columns extracted from the table file
    mv 04_unknown 04_Namenszusatz
    mv 05_unknown 05_Adresszusatz
    mv 06_unknown 06_Ortszusatz
    mv 08_unknown 08_Hausnummer
    mv 09_unknown 09_Verweise
    mv 10_unknown 10_Postleitzahl
    mv 11_unknown 11_Ort
    mv 12_unknown 12_Vorwahl
    mv 13_unknown 13_Rufnummer
    [ -f 14_unknown ] && mv 14_unknown 14_Email
    [ -f 15_unknown ] && mv 15_unknown 15_Webadresse

    # If street names come in an extra file, extract
    # street names first
    streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt]
    [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros

    # extract street names if 07_unknown contains street indexes
    # instead of street names
    if [ -f 99_Strassenname ]; then
        mv 07_unknown 07_Strassenindex
        printf "Looking up street names from indexes ... "
        cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
        printf "done.\n"
    else
        mv 07_unknown 07_Strasse
    fi

    karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt]
    [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr
}

handle_new_format() {
    echo "Working on $1. Detected post-2003 Telefonbuch version."
    printf "Extracting street names ... "
    decompress $1/streets.tl

    cat file_* | tr '\n\0' '\t\n' > 99_Strassenname
    rm file_*
    printf "done.\n"

    printf "Extracting phonebook.db ... "
    decompress $1/phonebook.db

    rows=`find . -name file_\* | wc -l`
    rows=$(( rows / 11 ))
    printf "done.\n"

    # Do enumerations with builtin shell tools. Unfortunally neither
    # jot nor seq are standards
    printf "Splitting decompressed chunks into their columns (11 total) ... 1, "
    f=0; while [ $f -lt $rows ]; do printf "file_%05X " $(( f++ * 11)); done | xargs cat | xxd -ps -c1 > column_0

    for column in 1 2 3 4 5 6 7 8 9 10; do
      printf "%d, " $(( column + 1 ))
      f=0; while [ $f -lt $rows ]; do printf "file_%05X " $(( column + f++ * 11 )); done | xargs cat | tr '\n\0' '\t\n' > column_${column}
    done
    printf "done.\n"

    printf "Cleaning up decompressed chunks ... "
    find . -name file_\* -delete
    printf "done.\n"

    mv column_0 01_Flags
    mv column_1 02_Nachname
    mv column_2 03_Vorname
    mv column_3 04_05_Namenszusatz_Addresszusatz
    mv column_4 09_Verweise
    mv column_5 07_08_Strassenindex_Hausnummer
    mv column_6 12_Vorwahl
    mv column_7 10_Postleitzahl
    mv column_8 11_Ort
    mv column_9 13_Rufnummer
    mv column_10 14_15_Email_Webadresse

    printf "Looking up street names from indexes ... "
    cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
    printf "done.\n"

    printf "Splitting house numbers ... "
    sed -E $'s:$:\t:' < 07_08_Strassenindex_Hausnummer | cut -f 2 > 08_Hausnummer
    printf "done.\n"

    if [ -f $1/zip-streets-hn-geo.tl ]; then
      printf "Extracting geo coordinates (precision: house number) ... "
      decompress $1/zip-streets-hn-geo.tl
      cat file_* > 90_Geokoordinaten_hnr
      printf "done.\n"
      printf "Looking up geo coordinates for each phonebook entry ... "
      lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | mapcoords 90_Geokoordinaten_hnr > 16_Koordinaten
      printf "done.\n"
    elif [ -f $1/zip-streets-geo.tl ]; then
      printf "Extracting geo coordinates (precision: street) ... "
      decompress $1/zip-streets-geo.tl
      cat file_*  > 91_Geokoordinaten_str
      printf "done.\n"
      printf "Looking up geo coordinates for each phonebook entry ... "
      lam 10_Postleitzahl -s $'\t' 07_Strasse | mapcoords 91_Geokoordinaten_str > 16_Koordinaten
      printf "done.\n"
    fi
    rm file_*
}

# After function definitions, main() can use them
main "$@"