From ae1f17a7b0abbc425f33106c666f1bc71e1b4711 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Mon, 10 Feb 2014 15:35:47 +0100 Subject: Use integers to count through files on old telefonbuch format. Only convert them to fixed with representation when needed --- src/makecolumns.sh | 56 ++++++++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/makecolumns.sh b/src/makecolumns.sh index ab61c29..476ce90 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh @@ -38,12 +38,6 @@ main() { cd .. } -get_dword() { - # $1 file, $2 offset - set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` - printf "%d\n" $2 -} - do_decompress_old() { printf "Extracting $2 chunks ... " extractblocks "${1}" @@ -78,29 +72,37 @@ do_processfile_old() { rm -rf ${working_on} } +size() { + stat -f %z `printf %0${filename_len}d $1` +} + +get_dword() { + # $1 file, $2 offset + file=`printf %0${filename_len}d ${1}` + set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${file}` + printf "%d\n" $2 +} + handle_old_format() { echo "Working on $1. Detected pre-2004 Telefonbuch version." # Extract teiln.dat do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" # See how long each filename is - filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) + export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) # Get total amount of files, for reporting progress number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` # from 2000F on file 0+3*n is table, so make it default - table_file=`printf %0${filename_len}d 0` - vname_file=`printf %0${filename_len}d 2` + table_file=0; vname_file=2 # if supposed vname file is larger than table file, # we're having a pre-2000F layout, so switch accordingly - if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then - table_file=`printf %0${filename_len}d 2` - nname_file=`printf %0${filename_len}d 0` - vname_file=`printf %0${filename_len}d 1` + if [ `size ${table_file}` -lt `size ${vname_file}` ]; then + table_file=2; nname_file=0; vname_file=1 else - nname_file=`printf %0${filename_len}d 1` + nname_file=1 fi # Table file has a table header with identical count @@ -113,7 +115,7 @@ handle_old_format() { # Now loop over all files and dump them printf "Splitting decompressed chunks into their columns ... " reported=0 - while [ -f ${nname_file} ]; do + while [ -f `printf %0${filename_len}d ${nname_file}` ]; do # Get number of entries in this round count=`get_dword ${nname_file}` @@ -121,11 +123,11 @@ handle_old_format() { nname_off=$(( `get_dword ${nname_file} 1` + 1 )) # Now get the flags before the nnames - tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags - tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname + tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags + tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname # Extract the vnames - tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname + tr '\n\0' '\t\n' < `printf %0${filename_len}d ${vname_file}` | head -n ${count} >> 03_Vorname # Offset into first table entry tells us how many # fields are in table file @@ -134,18 +136,14 @@ handle_old_format() { # Now iterate over all entries in the table file for table_index in `jot ${table_entries}`; do table_off=`get_dword ${table_file} ${table_index}` - tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` + tail -c +$(( table_off + 1 )) `printf %0${filename_len}d ${table_file}` | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` done - # Advance the filenames. Note, that we need bc because - # builtin arithmetic treats numbers with leading zeros as octals - nname_file=`printf "%s + 3\n" ${nname_file} | bc` - nname_file=`printf %0${filename_len}d ${nname_file}` - vname_file=`printf "%s + 3\n" ${vname_file} | bc` - vname_file=`printf %0${filename_len}d ${vname_file}` - table_file=`printf "%s + 3\n" ${table_file} | bc` - [ 1 -eq $(( ( table_file * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) - table_file=`printf %0${filename_len}d ${table_file}` + # Advance the filenames. + nname_file=$(( nname_file+3 )) + vname_file=$(( vname_file+3 )) + table_file=$(( table_file+3 )) + [ 1 -eq $(( ( ( table_file * 20 ) / number_of_files ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) done printf "done.\n" @@ -170,7 +168,7 @@ handle_old_format() { # If street names come in an extra file, extract # street names first streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] - [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros + [ -f ${streets} ] && do_processfile_old ${streets} "street name" 99_Strassenname convert_zeros # extract street names if 07_unknown contains street indexes # instead of street names -- cgit v1.2.3