summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-x[-rw-r--r--]src/makecolumns.sh90
1 files changed, 57 insertions, 33 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh
index 4986157..95c9d08 100644..100755
--- a/src/makecolumns.sh
+++ b/src/makecolumns.sh
@@ -3,6 +3,7 @@
3export LANG=C 3export LANG=C
4export LC_CTYPE=C 4export LC_CTYPE=C
5export LC_ALL=C 5export LC_ALL=C
6export PATH=${PATH}:`pwd`/../bin/
6 7
7main() { 8main() {
8 [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el 9 [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
@@ -28,11 +29,8 @@ main() {
28 29
29 if [ -f "$1/phonebook.db" ]; then 30 if [ -f "$1/phonebook.db" ]; then
30 handle_new_format $1 31 handle_new_format $1
31 elif [ -f "$1/DAT/TEILN.DAT" ]; then 32 elif [ -f $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] ]; then
32 handle_old_format "$1/DAT/TEILN.DAT" "${strassen}" 33 handle_old_format $1
33 elif [ -f "$1/dat/teiln.dat" ]; then
34 echo handle_old_format "$1/dat/teiln.dat" "${strassen}"
35 handle_old_format "$1/dat/teiln.dat" "${strassen}"
36 else 34 else
37 echo "Not a recognized Telefonbuch folder" 35 echo "Not a recognized Telefonbuch folder"
38 fi 36 fi
@@ -40,40 +38,56 @@ main() {
40} 38}
41 39
42get_dword() { 40get_dword() {
43 # $1 file, $2 offset 41 # $1 file, $2 offset
44 set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` 42 set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}`
45 printf "%d\n" $2 43 printf "%d\n" $2
44}
45
46do_decompress_old() {
47 printf "Extracting $2 chunks ... "
48 extractblocks "${1}" | grep -v appropriate
49 printf "done.\n"
50
51 printf "Decompressing $2 chunks ... "
52 for archive in *.lha; do
53 lha x ${archive} > /dev/null
54 rm ${archive}
55 done
56 printf "done.\n"
57}
58
59do_processfile_old() {
60 working_on=`basename ${1}`
61 mkdir $working_on && cd ${working_on}
62 do_decompress_old "${1}" "${2}"
63 cd ..
64
65 printf "Combining $2 into single file ... "
66 if [ "${4}" = "convert_zeros" ]; then
67 cat ${working_on}/* | tr '\n\0' '\t\n' > $3
68 else
69 cat ${working_on}/* > $3
70 fi
71 printf "done.\n"
72
73 rm -rf ${working_on}
46} 74}
47 75
48handle_old_format() { 76handle_old_format() {
49 # Clear old files. Be very careful, we could 77 # Clear old files. Be very careful, we could
50 # have ended up in an unexpected directory, after all. 78 # have ended up in an unexpected directory, after all.
79 printf "Cleaning up old temporary files ... "
51 find -E . -depth 1 -regex '^\./[0123456789]+' -delete 80 find -E . -depth 1 -regex '^\./[0123456789]+' -delete
52 rm -f ??_* 81 rm -rf ??_* coords strassen
82 printf "done.\n"
53 83
54 # If street names come in an extra file, extract 84 # If street names come in an extra file, extract
55 # street names first 85 # street names first
56 if [ "$2" ]; then 86 streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt]
57 mkdir strassen 87 [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros
58 cd strassen/
59
60 ../../bin/extractblocks $2
61
62 # This should leave us with a bunch of .lha files
63 for archive in *.lha; do lha x ${archive}; done
64 find . -name \*.lha -delete
65 cd ..
66
67 cat strassen/* | tr '\n\0' '\t\n' > 99_Strassenname
68 rm -r strassen/
69 fi
70 88
71 # Then extract teiln.dat 89 # Then extract teiln.dat
72 ../bin/extractblocks $1 90 do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat"
73
74 # This should leave us with a bunch of .lha files
75 for archive in *.lha; do lha x ${archive}; done
76 find . -name \*.lha -delete
77 91
78 # See how long each filename is 92 # See how long each filename is
79 filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) 93 filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))
@@ -100,6 +114,7 @@ handle_old_format() {
100 fi 114 fi
101 115
102 # Now loop over all files and dump them 116 # Now loop over all files and dump them
117 printf "Splitting decompressed chunks into their columns ... "
103 while [ -f ${nname_file} ]; do 118 while [ -f ${nname_file} ]; do
104 # Get number of entries in this round 119 # Get number of entries in this round
105 count=`get_dword ${nname_file}` 120 count=`get_dword ${nname_file}`
@@ -134,9 +149,12 @@ handle_old_format() {
134 table_file=`printf %0${filename_len}d ${table_file}` 149 table_file=`printf %0${filename_len}d ${table_file}`
135 150
136 done 151 done
152 printf "done.\n"
137 153
138 # wipe all temporary extracted files 154 # wipe all temporary extracted files
155 printf "Cleaning up decompressed chunks ... "
139 find -E . -depth 1 -regex '^\./[0123456789]+' -delete 156 find -E . -depth 1 -regex '^\./[0123456789]+' -delete
157 printf "done.\n"
140 158
141 # rename our columns extracted from the table file 159 # rename our columns extracted from the table file
142 mv 04_unknown 04_Namenszusatz 160 mv 04_unknown 04_Namenszusatz
@@ -155,23 +173,29 @@ handle_old_format() {
155 # instead of street names 173 # instead of street names
156 if [ -f 99_Strassenname ]; then 174 if [ -f 99_Strassenname ]; then
157 mv 07_unknown 07_Strassenindex 175 mv 07_unknown 07_Strassenindex
176 printf "Looking up street names from indexes ... "
158 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse 177 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
178 printf "done.\n"
159 else 179 else
160 mv 07_unknown 07_Strasse 180 mv 07_unknown 07_Strasse
161 fi 181 fi
182
183 karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt]
184 printf "%s\n" $karto
185 [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr
162} 186}
163 187
164handle_new_format() { 188handle_new_format() {
165 echo "Working on $1. Detected post-2003 Telefonbuch version." 189 echo "Working on $1. Detected post-2003 Telefonbuch version."
166 printf "Extracting street names ... " 190 printf "Extracting street names ... "
167 ../bin/decompress $1/streets.tl 191 decompress $1/streets.tl
168 192
169 cat file_* | tr '\n\0' '\t\n' > 99_Strassenname 193 cat file_* | tr '\n\0' '\t\n' > 99_Strassenname
170 rm file_* 194 rm file_*
171 printf "done.\n" 195 printf "done.\n"
172 196
173 printf "Extracting phonebook.db ... " 197 printf "Extracting phonebook.db ... "
174 ../bin/decompress $1/phonebook.db | grep -v appropriate 198 decompress $1/phonebook.db | grep -v appropriate
175 199
176 numfiles=`find . -name file_\* | wc -l` 200 numfiles=`find . -name file_\* | wc -l`
177 printf "done.\n" 201 printf "done.\n"
@@ -206,7 +230,7 @@ handle_new_format() {
206 mv column_9 13_Rufnummer 230 mv column_9 13_Rufnummer
207 mv column_10 14_15_Email_Webadresse 231 mv column_10 14_15_Email_Webadresse
208 232
209 printf "Looing up street names from indexes ... " 233 printf "Looking up street names from indexes ... "
210 cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse 234 cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
211 printf "done.\n" 235 printf "done.\n"
212 236
@@ -216,7 +240,7 @@ handle_new_format() {
216 240
217 if [ -f $1/zip-streets-hn-geo.tl ]; then 241 if [ -f $1/zip-streets-hn-geo.tl ]; then
218 printf "Extracting geo coordinates (precision: house number) ... " 242 printf "Extracting geo coordinates (precision: house number) ... "
219 ../bin/decompress $1/zip-streets-hn-geo.tl 243 decompress $1/zip-streets-hn-geo.tl
220 cat file_* > 90_Geokoordinaten_hnr 244 cat file_* > 90_Geokoordinaten_hnr
221 printf "done.\n" 245 printf "done.\n"
222 printf "Looking up geo coordinates for each phonebook entry ... " 246 printf "Looking up geo coordinates for each phonebook entry ... "
@@ -224,7 +248,7 @@ handle_new_format() {
224 printf "done.\n" 248 printf "done.\n"
225 elif [ -f $1/zip-streets-geo.tl ]; then 249 elif [ -f $1/zip-streets-geo.tl ]; then
226 printf "Extracting geo coordinates (precision: street) ... " 250 printf "Extracting geo coordinates (precision: street) ... "
227 ../bin/decompress $1/zip-streets-geo.tl 251 decompress $1/zip-streets-geo.tl
228 cat file_* > 91_Geokoordinaten_str 252 cat file_* > 91_Geokoordinaten_str
229 printf "done.\n" 253 printf "done.\n"
230 printf "Looking up geo coordinates for each phonebook entry ... " 254 printf "Looking up geo coordinates for each phonebook entry ... "