From dd4cefb2ae4c0254e358a42ad7b8c732b07d93e2 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Tue, 22 Jan 2019 22:51:38 +0100 Subject: Add support for yellow pages format after 2003 --- makecolumns.sh | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'makecolumns.sh') diff --git a/makecolumns.sh b/makecolumns.sh index 288e702..c38a791 100755 --- a/makecolumns.sh +++ b/makecolumns.sh @@ -285,7 +285,14 @@ handle_format_version_3() { } handle_format_version_4() { - echo "Working on $1. Detected post-2003 Telefonbuch version." + if [ -f $1/branchcodes.tl ]; then + is_yp=true + echo "Working on $1. Detected post-2003 Yellow Pages version." + else + unset is_yp + echo "Working on $1. Detected post-2003 Telefonbuch version." + fi + printf "Extracting street names ... " extract_version_4 $1/streets.tl @@ -314,8 +321,10 @@ handle_format_version_4() { # the 'did not object to inverse search' flag is insane and needs to be reversed if grep -q ^40 column_0; then + printf "Cleanung up inverted reverse search flags ... " awk '{ a=substr($0,1,1); printf "%x%x\n",index("5670123cdef89ab4",a)%16 ,substr($0,2,1) }' < column_0 > 01_Flags rm column_0 + printf "done\n" else mv column_0 01_Flags fi @@ -355,7 +364,7 @@ handle_format_version_4() { elif [ -f $1/zip-streets-geo.tl ]; then printf "Extracting geo coordinates (precision: street) ... " extract_version_4 $1/zip-streets-geo.tl - cat file_* > 91_Geokoordinaten_str + cat file_* > 91_Geokoordinaten_str printf "done.\n" printf "Looking up geo coordinates for each phonebook entry ... " paste 10_Postleitzahl 07_Strasse | map_coords 91_Geokoordinaten_str | convert_coords > 16_Koordinaten @@ -363,6 +372,25 @@ handle_format_version_4() { fi rm file_* + if [ "${is_yp}" ]; then + printf "Extracting branch names ... " + extract_version_4 $1/branchcodes.tl + cat file_* | tr '\n\0' '\t\n' > 97_Branchenname + rm file_* + printf "done.\n" + + printf "Generating branch name index ... " + mkdir branchcodes/ + while read index name; do + printf $name > branchcodes/${index} + done < 97_Branchenname + printf "done.\n" + + printf "Looking up branch names from codes ... " + map_branches 97_Branchenname < 09_Verweise > 09_Branchen + printf "done.\n" + rm -r branchcodes + fi } tidy_streetnames () { -- cgit v1.2.3