From a187241f4e4cf8a592e0a3cc0b61f949e6184a9e Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Wed, 30 Jan 2019 18:12:18 +0100 Subject: Add branch name mapper code for v3 --- makecolumns.sh | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'makecolumns.sh') diff --git a/makecolumns.sh b/makecolumns.sh index edd965c..4f4bebc 100755 --- a/makecolumns.sh +++ b/makecolumns.sh @@ -171,9 +171,21 @@ handle_format_version_2() { } handle_format_version_3() { - echo "Working on $1. Detected pre-2004 Telefonbuch version." + # glob + teiln=`printf "%s" "$1"/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt]` + braid=`printf "%s" "$1"/[Dd][Aa][Tt]/[Bb][Rr][Aa][Ii][Dd].[Dd][Aa][Tt]` + streets=`printf "%s" "$1"/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt]` + karto=`printf "%s" "$1"/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt]` + + if [ -f "${braid}" ]; then + echo "Working on $1. Detected pre-2004 Yellow Pages version." + is_yp=true + else + echo "Working on $1. Detected pre-2004 Telefonbuch version." + unset is_yp + fi # Extract teiln.dat - do_decompress_version_3 "$1"/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" + do_decompress_version_3 "${teiln}" "teiln.dat" # See how long each filename is export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) @@ -205,8 +217,14 @@ handle_format_version_3() { # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` # tail -c +$(( $2 + 1 )) ${file} # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname - cut -c 1 < 01_unknown > 01_Flags - cut -c 2- < 01_unknown > 02_Nachname + if [ "${is_yp}" ]; then + cut -c 1 < 01_unknown > 01_Flags + cut -c 2-7 < 01_unknown > 09_Branchenindex + cut -c 8- < 01_unknown > 02_Nachname + else + cut -c 1 < 01_unknown > 01_Flags + cut -c 2- < 01_unknown > 02_Nachname + fi rm 01_unknown printf "done.\n" @@ -259,7 +277,6 @@ handle_format_version_3() { # If street names come in an extra file, extract # street names first - streets="$1"/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] [ -f "${streets}" ] && do_processfile_version_3 "${streets}" "street name" 99_Strassenname convert_zeros # extract street names if 07_unknown contains street indexes @@ -280,7 +297,6 @@ handle_format_version_3() { tidy_streetnames 07_Strasse fi - karto="$1"/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] if [ -f "${karto}" ]; then do_processfile_version_3 "${karto}" "geo coordinates" 90_Geokoordinaten_hnr_raw @@ -290,6 +306,14 @@ handle_format_version_3() { paste 10_Postleitzahl 11_Ort 07_Strasse 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten printf "done.\n" fi + + if [ -f "${braid}" ]; then + do_processfile_version_3 "${braid}" "branchen name index" 97_Branchenname convert_zeros + + printf "Looking up branch names from codes ... " + map_branches_v3 97_Branchenname < 09_Branchenindex > 09_Branchen + printf "done.\n" + fi } handle_format_version_4() { @@ -387,17 +411,9 @@ handle_format_version_4() { rm file_* printf "done.\n" - printf "Generating branch name index ... " - mkdir branchcodes/ - while read index name; do - printf $name > branchcodes/${index} - done < 97_Branchenname - printf "done.\n" - printf "Looking up branch names from codes ... " map_branches 97_Branchenname < 09_Verweise > 09_Branchen printf "done.\n" - rm -r branchcodes fi } -- cgit v1.2.3