From 43a5ac139b552b23de78434a8ee3df8fc6651b38 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Wed, 29 Apr 2015 12:44:47 +0200 Subject: We have a new format between the former version 1 and 2. So shift version numbers. Add README --- makecolumns.sh | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'makecolumns.sh') diff --git a/makecolumns.sh b/makecolumns.sh index b60cfd6..5d2f1aa 100755 --- a/makecolumns.sh +++ b/makecolumns.sh @@ -29,9 +29,9 @@ main() { cd work/`basename "${1#white_}"` || exit 1 if [ -f "$1/phonebook.db" ]; then - handle_format_version_3 "${1}" + handle_format_version_4 "${1}" elif [ -f ${1}/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then - handle_format_version_2 "${1}" + handle_format_version_3 "${1}" elif [ -n "`find "${1}" -name dpr00000.005 -ls -quit`" ]; then handle_format_version_1 "${1}" else @@ -40,9 +40,9 @@ main() { cd ../.. } -do_decompress_version_2() { +do_decompress_version_3() { printf "Extracting $2 chunks ... " - extract_version_2 "${1}" + extract_version_3 "${1}" printf "done.\n" printf "Decompressing $2 chunks ... " @@ -57,10 +57,10 @@ do_decompress_version_2() { printf "done.\n" } -do_processfile_version_2() { +do_processfile_version_3() { working_on=`basename ${1}` mkdir $working_on && cd ${working_on} - do_decompress_version_2 "${1}" "${2}" + do_decompress_version_3 "${1}" "${2}" cd .. printf "Combining $2 into single file ... " @@ -109,10 +109,10 @@ handle_format_version_1() { } -handle_format_version_2() { +handle_format_version_3() { echo "Working on $1. Detected pre-2004 Telefonbuch version." # Extract teiln.dat - do_decompress_version_2 $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" + do_decompress_version_3 $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" # See how long each filename is export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) @@ -140,7 +140,7 @@ handle_format_version_2() { # Now loop over all files and dump them printf "Splitting decompressed nname chunks into their columns ... " - jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | split_version_2 1 1 + jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | split_version_3 1 1 # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` # tail -c +$(( $2 + 1 )) ${file} # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname @@ -154,7 +154,7 @@ handle_format_version_2() { printf "done.\n" printf "Splitting decompress table file chunks into their columns ... " - jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | split_version_2 4 0 + jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | split_version_3 4 0 # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do # # Offset into first table entry tells us how many # # fields are in table file @@ -198,7 +198,7 @@ handle_format_version_2() { # If street names come in an extra file, extract # street names first streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] - [ -f ${streets} ] && do_processfile_version_2 ${streets} "street name" 99_Strassenname convert_zeros + [ -f ${streets} ] && do_processfile_version_3 ${streets} "street name" 99_Strassenname convert_zeros # extract street names if 07_unknown contains street indexes # instead of street names @@ -213,7 +213,7 @@ handle_format_version_2() { karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] if [ -f ${karto} ]; then - do_processfile_version_2 ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw + do_processfile_version_3 ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw printf "Looking up geo coordinates for each phonebook entry ... " tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr @@ -223,17 +223,17 @@ handle_format_version_2() { fi } -handle_format_version_3() { +handle_format_version_4() { echo "Working on $1. Detected post-2003 Telefonbuch version." printf "Extracting street names ... " - extract_version_3 $1/streets.tl + extract_version_4 $1/streets.tl cat file_* | tr '\n\0' '\t\n' > 99_Strassenname rm file_* printf "done.\n" printf "Extracting phonebook.db ... " - extract_version_3 $1/phonebook.db + extract_version_4 $1/phonebook.db rows=`find . -name file_\* | wc -l` printf "done.\n" @@ -277,7 +277,7 @@ handle_format_version_3() { if [ -f $1/zip-streets-hn-geo.tl ]; then printf "Extracting geo coordinates (precision: house number) ... " - extract_version_3 $1/zip-streets-hn-geo.tl + extract_version_4 $1/zip-streets-hn-geo.tl cat file_* > 90_Geokoordinaten_hnr printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " @@ -285,7 +285,7 @@ handle_format_version_3() { printf "done.\n" elif [ -f $1/zip-streets-geo.tl ]; then printf "Extracting geo coordinates (precision: street) ... " - extract_version_3 $1/zip-streets-geo.tl + extract_version_4 $1/zip-streets-geo.tl cat file_* > 91_Geokoordinaten_str printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " -- cgit v1.2.3