diff options
author | User Erdgeist <erdgeist@avon.ccc.de> | 2014-02-07 17:15:43 +0000 |
---|---|---|
committer | User Erdgeist <erdgeist@avon.ccc.de> | 2014-02-07 17:15:43 +0000 |
commit | 00a8ae93bc88d8cdf1aecc7f3c410359af987c3c (patch) | |
tree | 7b9c766b31e76e642e096b87f8ecf0df05e420cd | |
parent | 0d9bc8f4efd22460a3dfea679d4b84b2c5da7cfa (diff) |
Mute the invoked tools
Be more verbose what we do when parsing the older format
Factor out common code to avoid redundant implementation
Export geo coordinates for the older format
-rwxr-xr-x[-rw-r--r--] | src/makecolumns.sh | 90 |
1 files changed, 57 insertions, 33 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 4986157..95c9d08 100644..100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
@@ -3,6 +3,7 @@ | |||
3 | export LANG=C | 3 | export LANG=C |
4 | export LC_CTYPE=C | 4 | export LC_CTYPE=C |
5 | export LC_ALL=C | 5 | export LC_ALL=C |
6 | export PATH=${PATH}:`pwd`/../bin/ | ||
6 | 7 | ||
7 | main() { | 8 | main() { |
8 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el | 9 | [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el |
@@ -28,11 +29,8 @@ main() { | |||
28 | 29 | ||
29 | if [ -f "$1/phonebook.db" ]; then | 30 | if [ -f "$1/phonebook.db" ]; then |
30 | handle_new_format $1 | 31 | handle_new_format $1 |
31 | elif [ -f "$1/DAT/TEILN.DAT" ]; then | 32 | elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then |
32 | handle_old_format "$1/DAT/TEILN.DAT" "${strassen}" | 33 | handle_old_format $1 |
33 | elif [ -f "$1/dat/teiln.dat" ]; then | ||
34 | echo handle_old_format "$1/dat/teiln.dat" "${strassen}" | ||
35 | handle_old_format "$1/dat/teiln.dat" "${strassen}" | ||
36 | else | 34 | else |
37 | echo "Not a recognized Telefonbuch folder" | 35 | echo "Not a recognized Telefonbuch folder" |
38 | fi | 36 | fi |
@@ -40,40 +38,56 @@ main() { | |||
40 | } | 38 | } |
41 | 39 | ||
42 | get_dword() { | 40 | get_dword() { |
43 | # $1 file, $2 offset | 41 | # $1 file, $2 offset |
44 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` | 42 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` |
45 | printf "%d\n" $2 | 43 | printf "%d\n" $2 |
44 | } | ||
45 | |||
46 | do_decompress_old() { | ||
47 | printf "Extracting $2 chunks ... " | ||
48 | extractblocks "${1}" | grep -v appropriate | ||
49 | printf "done.\n" | ||
50 | |||
51 | printf "Decompressing $2 chunks ... " | ||
52 | for archive in *.lha; do | ||
53 | lha x ${archive} > /dev/null | ||
54 | rm ${archive} | ||
55 | done | ||
56 | printf "done.\n" | ||
57 | } | ||
58 | |||
59 | do_processfile_old() { | ||
60 | working_on=`basename ${1}` | ||
61 | mkdir $working_on && cd ${working_on} | ||
62 | do_decompress_old "${1}" "${2}" | ||
63 | cd .. | ||
64 | |||
65 | printf "Combining $2 into single file ... " | ||
66 | if [ "${4}" = "convert_zeros" ]; then | ||
67 | cat ${working_on}/* | tr '\n\0' '\t\n' > $3 | ||
68 | else | ||
69 | cat ${working_on}/* > $3 | ||
70 | fi | ||
71 | printf "done.\n" | ||
72 | |||
73 | rm -rf ${working_on} | ||
46 | } | 74 | } |
47 | 75 | ||
48 | handle_old_format() { | 76 | handle_old_format() { |
49 | # Clear old files. Be very careful, we could | 77 | # Clear old files. Be very careful, we could |
50 | # have ended up in an unexpected directory, after all. | 78 | # have ended up in an unexpected directory, after all. |
79 | printf "Cleaning up old temporary files ... " | ||
51 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete | 80 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete |
52 | rm -f ??_* | 81 | rm -rf ??_* coords strassen |
82 | printf "done.\n" | ||
53 | 83 | ||
54 | # If street names come in an extra file, extract | 84 | # If street names come in an extra file, extract |
55 | # street names first | 85 | # street names first |
56 | if [ "$2" ]; then | 86 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] |
57 | mkdir strassen | 87 | [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros |
58 | cd strassen/ | ||
59 | |||
60 | ../../bin/extractblocks $2 | ||
61 | |||
62 | # This should leave us with a bunch of .lha files | ||
63 | for archive in *.lha; do lha x ${archive}; done | ||
64 | find . -name \*.lha -delete | ||
65 | cd .. | ||
66 | |||
67 | cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname | ||
68 | rm -r strassen/ | ||
69 | fi | ||
70 | 88 | ||
71 | # Then extract teiln.dat | 89 | # Then extract teiln.dat |
72 | ../bin/extractblocks $1 | 90 | do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" |
73 | |||
74 | # This should leave us with a bunch of .lha files | ||
75 | for archive in *.lha; do lha x ${archive}; done | ||
76 | find . -name \*.lha -delete | ||
77 | 91 | ||
78 | # See how long each filename is | 92 | # See how long each filename is |
79 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | 93 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) |
@@ -100,6 +114,7 @@ handle_old_format() { | |||
100 | fi | 114 | fi |
101 | 115 | ||
102 | # Now loop over all files and dump them | 116 | # Now loop over all files and dump them |
117 | printf "Splitting decompressed chunks into their columns ... " | ||
103 | while [ -f ${nname_file} ]; do | 118 | while [ -f ${nname_file} ]; do |
104 | # Get number of entries in this round | 119 | # Get number of entries in this round |
105 | count=`get_dword ${nname_file}` | 120 | count=`get_dword ${nname_file}` |
@@ -134,9 +149,12 @@ handle_old_format() { | |||
134 | table_file=`printf %0${filename_len}d ${table_file}` | 149 | table_file=`printf %0${filename_len}d ${table_file}` |
135 | 150 | ||
136 | done | 151 | done |
152 | printf "done.\n" | ||
137 | 153 | ||
138 | # wipe all temporary extracted files | 154 | # wipe all temporary extracted files |
155 | printf "Cleaning up decompressed chunks ... " | ||
139 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete | 156 | find -E . -depth 1 -regex '^\./[0123456789]+' -delete |
157 | printf "done.\n" | ||
140 | 158 | ||
141 | # rename our columns extracted from the table file | 159 | # rename our columns extracted from the table file |
142 | mv 04_unknown 04_Namenszusatz | 160 | mv 04_unknown 04_Namenszusatz |
@@ -155,23 +173,29 @@ handle_old_format() { | |||
155 | # instead of street names | 173 | # instead of street names |
156 | if [ -f 99_Strassenname ]; then | 174 | if [ -f 99_Strassenname ]; then |
157 | mv 07_unknown 07_Strassenindex | 175 | mv 07_unknown 07_Strassenindex |
176 | printf "Looking up street names from indexes ... " | ||
158 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | 177 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse |
178 | printf "done.\n" | ||
159 | else | 179 | else |
160 | mv 07_unknown 07_Strasse | 180 | mv 07_unknown 07_Strasse |
161 | fi | 181 | fi |
182 | |||
183 | karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] | ||
184 | printf "%s\n" $karto | ||
185 | [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr | ||
162 | } | 186 | } |
163 | 187 | ||
164 | handle_new_format() { | 188 | handle_new_format() { |
165 | echo "Working on $1. Detected post-2003 Telefonbuch version." | 189 | echo "Working on $1. Detected post-2003 Telefonbuch version." |
166 | printf "Extracting street names ... " | 190 | printf "Extracting street names ... " |
167 | ../bin/decompress $1/streets.tl | 191 | decompress $1/streets.tl |
168 | 192 | ||
169 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname | 193 | cat file_* | tr '\n\0' '\t\n' > 99_Strassenname |
170 | rm file_* | 194 | rm file_* |
171 | printf "done.\n" | 195 | printf "done.\n" |
172 | 196 | ||
173 | printf "Extracting phonebook.db ... " | 197 | printf "Extracting phonebook.db ... " |
174 | ../bin/decompress $1/phonebook.db | grep -v appropriate | 198 | decompress $1/phonebook.db | grep -v appropriate |
175 | 199 | ||
176 | numfiles=`find . -name file_\* | wc -l` | 200 | numfiles=`find . -name file_\* | wc -l` |
177 | printf "done.\n" | 201 | printf "done.\n" |
@@ -206,7 +230,7 @@ handle_new_format() { | |||
206 | mv column_9 13_Rufnummer | 230 | mv column_9 13_Rufnummer |
207 | mv column_10 14_15_Email_Webadresse | 231 | mv column_10 14_15_Email_Webadresse |
208 | 232 | ||
209 | printf "Looing up street names from indexes ... " | 233 | printf "Looking up street names from indexes ... " |
210 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse | 234 | cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse |
211 | printf "done.\n" | 235 | printf "done.\n" |
212 | 236 | ||
@@ -216,7 +240,7 @@ handle_new_format() { | |||
216 | 240 | ||
217 | if [ -f $1/zip-streets-hn-geo.tl ]; then | 241 | if [ -f $1/zip-streets-hn-geo.tl ]; then |
218 | printf "Extracting geo coordinates (precision: house number) ... " | 242 | printf "Extracting geo coordinates (precision: house number) ... " |
219 | ../bin/decompress $1/zip-streets-hn-geo.tl | 243 | decompress $1/zip-streets-hn-geo.tl |
220 | cat file_* > 90_Geokoordinaten_hnr | 244 | cat file_* > 90_Geokoordinaten_hnr |
221 | printf "done.\n" | 245 | printf "done.\n" |
222 | printf "Looking up geo coordinates for each phonebook entry ... " | 246 | printf "Looking up geo coordinates for each phonebook entry ... " |
@@ -224,7 +248,7 @@ handle_new_format() { | |||
224 | printf "done.\n" | 248 | printf "done.\n" |
225 | elif [ -f $1/zip-streets-geo.tl ]; then | 249 | elif [ -f $1/zip-streets-geo.tl ]; then |
226 | printf "Extracting geo coordinates (precision: street) ... " | 250 | printf "Extracting geo coordinates (precision: street) ... " |
227 | ../bin/decompress $1/zip-streets-geo.tl | 251 | decompress $1/zip-streets-geo.tl |
228 | cat file_* > 91_Geokoordinaten_str | 252 | cat file_* > 91_Geokoordinaten_str |
229 | printf "done.\n" | 253 | printf "done.\n" |
230 | printf "Looking up geo coordinates for each phonebook entry ... " | 254 | printf "Looking up geo coordinates for each phonebook entry ... " |