summaryrefslogtreecommitdiff
path: root/makecolumns.sh
diff options
context:
space:
mode:
Diffstat (limited to 'makecolumns.sh')
-rwxr-xr-xmakecolumns.sh286
1 files changed, 286 insertions, 0 deletions
diff --git a/makecolumns.sh b/makecolumns.sh
new file mode 100755
index 0000000..0f9c5ba
--- /dev/null
+++ b/makecolumns.sh
@@ -0,0 +1,286 @@
1#!/bin/sh
2
3export LANG=C
4export LC_CTYPE=C
5export LC_ALL=C
6export PATH=${PATH}:`pwd`/../bin/
7
8main() {
9 [ -f /usr/local/bin/el ] && EL=/usr/local/bin/el
10 [ -f `dirname $0`/../bin/el ] && EL=`dirname $0`/../bin/el
11
12 if [ -z "${EL}" ]; then
13 echo "el not found. Get it at 'cvs -d :pserver:anoncvs@cvs.erdgeist.org:/home/cvsroot co el'"
14 exit 1
15 fi
16
17 if [ $# -ne 1 ]; then
18 echo "Syntax: $0 [phonebookdirectory]"
19 exit 1
20 fi
21
22 # Compile all the binaries
23 make binaries
24
25 printf "Cleaning up old working directory ... "
26 rm -rf ../work_`basename "${1#white_}"`
27 printf "done.\n"
28 mkdir -p ../work_`basename "${1#white_}"`
29 cd ../work_`basename "${1#white_}"` || exit 1
30
31 if [ -f "$1/phonebook.db" ]; then
32 handle_format_version_3 "${1}"
33 elif [ -f "${1}/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt]" ]; then
34 handle_format_version_2 "${1}"
35 elif [ -n "`find "${1}" -name dpr00000.005 -ls -quit`" ]; then
36 handle_format_version_1 "${1}"
37 else
38 echo "Not a recognized Telefonbuch folder"
39 fi
40 cd ..
41}
42
43do_decompress_version_2() {
44 printf "Extracting $2 chunks ... "
45 extract_version_2 "${1}"
46 printf "done.\n"
47
48 printf "Decompressing $2 chunks ... "
49 numfiles=`find . -name \*.lha | wc -l`
50 reported=0; processed=0
51 for archive in *.lha; do
52 lha x ${archive} > /dev/null
53 rm ${archive}
54 [ 1 -eq $(( ( ( (processed+=1) * 20 ) / numfiles ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 ))
55 done
56 [ $reported -lt 10 ] && printf "100% "
57 printf "done.\n"
58}
59
60do_processfile_version_2() {
61 working_on=`basename ${1}`
62 mkdir $working_on && cd ${working_on}
63 do_decompress_version_2 "${1}" "${2}"
64 cd ..
65
66 printf "Combining $2 into single file ... "
67 if [ "${4}" = "convert_zeros" ]; then
68 cat ${working_on}/* | tr '\n\0' '\t\n' > $3
69 else
70 cat ${working_on}/* > $3
71 fi
72 printf "done.\n"
73
74 rm -rf ${working_on}
75}
76
77size() {
78 stat -f %z `printf %0${filename_len}d $1`
79}
80
81get_dword() {
82 # $1 file
83 hexdump -n 4 -v -e '" " 1/4 "%u"' `printf %0${filename_len}d ${1}`
84}
85
86handle_format_version_1() {
87 echo "Working on $1. Detected pre-02/1996 Telefonbuch version."
88 # Extract all dpr database files
89 printf "Extracting dpr databases ... "
90 find "$1" -name dpr\*.001 | extract_version_1
91 printf "done.\n"
92
93 # rename our extracted columns
94 mv 01_unknown 01_Flags
95 mv 02_unknown 02_Nachname
96 mv 03_unknown 03_Vorname
97 mv 04_unknown 05_Adresszusatz
98 mv 05_unknown 06_Ortszusatz
99 mv 06_unknown 10_Zustellamt_PLZOst
100 mv 07_unknown 07_Strasse
101 mv 08_unknown 08_Hausnummer
102 mv 09_unknown 04_Namenszusatz
103 mv 10_unknown 09_Fax_Verweise
104 mv 11_unknown 12_Vorwahl
105 mv 12_unknown 13_Rufnummer
106 mv 13_unknown 11_Ort
107 mv 14_unknown 10_Postleitzahl
108}
109
110
111handle_format_version_2() {
112 echo "Working on $1. Detected pre-2004 Telefonbuch version."
113 # Extract teiln.dat
114 do_decompress_version_2 $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat"
115
116 # See how long each filename is
117 export filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))
118
119 # Get total amount of files, for reporting progress
120 number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l`
121
122 # from 2000F on file 0+3*n is table, so make it default
123 table_file=0; vname_file=2
124
125 # if supposed vname file is larger than table file,
126 # we're having a pre-2000F layout, so switch accordingly
127 if [ `size ${table_file}` -lt `size ${vname_file}` ]; then
128 table_file=2; nname_file=0; vname_file=1
129 else
130 nname_file=1
131 fi
132
133 # Table file has a table header with identical count
134 # to nname file's header. Verify this
135 if [ `get_dword ${nname_file}` -ne `get_dword ${table_file}` ]; then
136 echo "Unknown layout."
137 exit
138 fi
139
140 # Now loop over all files and dump them
141 printf "Splitting decompressed nname chunks into their columns ... "
142 jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | split_version_2 1 1
143# set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}`
144# tail -c +$(( $2 + 1 )) ${file}
145# done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname
146 cut -c 1 < 01_unknown > 01_Flags
147 cut -c 2- < 01_unknown > 02_Nachname
148 rm 01_unknown
149 printf "done.\n"
150
151 printf "Splitting decompress vname chunks into their columns ... "
152 jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname
153 printf "done.\n"
154
155 printf "Splitting decompress table file chunks into their columns ... "
156 jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | split_version_2 4 0
157# for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do
158# # Offset into first table entry tells us how many
159# # fields are in table file
160# set -- `hexdump -n 64 -v -e '" " 1/4 "%u"' ${file}`
161# count=$1; table_entries=$(( $2 / 4 - 1 )); shift
162#
163# # Now iterate over all entries in the table file
164# for idx in `jot ${table_entries}`; do
165# tail -c +$(( $1 + 1 )) ${file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( idx + 3 ))`
166# shift
167# done
168# done
169 printf "done.\n"
170
171 # wipe all temporary extracted files
172 printf "Cleaning up decompressed chunks ... "
173 find -E . -depth 1 -regex '^\./[0123456789]+' -delete
174 printf "done.\n"
175
176 # rename our columns extracted from the table file
177 mv 04_unknown 04_Namenszusatz
178 mv 05_unknown 05_Adresszusatz
179 mv 06_unknown 06_Ortszusatz
180 mv 08_unknown 08_Hausnummer
181 mv 09_unknown 09_Verweise
182 mv 10_unknown 10_Postleitzahl
183 mv 11_unknown 11_Ort
184 mv 12_unknown 12_Vorwahl
185 mv 13_unknown 13_Rufnummer
186 [ -f 14_unknown ] && mv 14_unknown 14_Email
187 [ -f 15_unknown ] && mv 15_unknown 15_Webadresse
188
189 # If street names come in an extra file, extract
190 # street names first
191 streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt]
192 [ -f ${streets} ] && do_processfile_version_2 ${streets} "street name" 99_Strassenname convert_zeros
193
194 # extract street names if 07_unknown contains street indexes
195 # instead of street names
196 if [ -f 99_Strassenname ]; then
197 mv 07_unknown 07_Strassenindex
198 printf "Looking up street names from indexes ... "
199 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
200 printf "done.\n"
201 else
202 mv 07_unknown 07_Strasse
203 fi
204
205 karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt]
206 if [ -f ${karto} ]; then
207 do_processfile_version_2 ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw
208
209 printf "Looking up geo coordinates for each phonebook entry ... "
210 tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr
211 rm 90_Geokoordinaten_hnr_raw
212 lam 10_Postleitzahl -s $'\t' 11_Ort -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten
213 printf "done.\n"
214 fi
215}
216
217handle_format_version_3() {
218 echo "Working on $1. Detected post-2003 Telefonbuch version."
219 printf "Extracting street names ... "
220 extract_version_3 $1/streets.tl
221
222 cat file_* | tr '\n\0' '\t\n' > 99_Strassenname
223 rm file_*
224 printf "done.\n"
225
226 printf "Extracting phonebook.db ... "
227 extract_version_3 $1/phonebook.db
228
229 rows=`find . -name file_\* | wc -l`
230 printf "done.\n"
231
232 printf "Splitting decompressed chunks into their columns (11 total) ... 1, "
233 jot -w "file_%05X" - 0 $(( rows - 1 )) 11 | xargs cat | xxd -ps -c1 > column_0
234
235 for col in 1 2 3 4 5 6 7 8 9 10; do
236 printf "%d, " $(( col + 1 ))
237 jot -w "file_%05X" - ${col} $(( rows - 1 )) 11 | xargs cat | tr '\n\0' '\t\n' > column_${col}
238 done
239 printf "done.\n"
240
241 printf "Cleaning up decompressed chunks ... "
242 find . -name file_\* -delete
243 printf "done.\n"
244
245 mv column_0 01_Flags
246 mv column_1 02_Nachname
247 mv column_2 03_Vorname
248 mv column_3 04_05_Namenszusatz_Addresszusatz
249 mv column_4 09_Verweise
250 mv column_5 07_08_Strassenindex_Hausnummer
251 mv column_6 12_Vorwahl
252 mv column_7 10_Postleitzahl
253 mv column_8 11_Ort
254 mv column_9 13_Rufnummer
255 mv column_10 14_15_Email_Webadresse
256
257 printf "Looking up street names from indexes ... "
258 cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
259 printf "done.\n"
260
261 printf "Splitting house numbers ... "
262 sed -E $'s:$:\t:' < 07_08_Strassenindex_Hausnummer | cut -f 2 > 08_Hausnummer
263 printf "done.\n"
264
265 if [ -f $1/zip-streets-hn-geo.tl ]; then
266 printf "Extracting geo coordinates (precision: house number) ... "
267 extract_version_3 $1/zip-streets-hn-geo.tl
268 cat file_* > 90_Geokoordinaten_hnr
269 printf "done.\n"
270 printf "Looking up geo coordinates for each phonebook entry ... "
271 lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | map_coords 90_Geokoordinaten_hnr | convert_coords > 16_Koordinaten
272 printf "done.\n"
273 elif [ -f $1/zip-streets-geo.tl ]; then
274 printf "Extracting geo coordinates (precision: street) ... "
275 extract_version_3 $1/zip-streets-geo.tl
276 cat file_* > 91_Geokoordinaten_str
277 printf "done.\n"
278 printf "Looking up geo coordinates for each phonebook entry ... "
279 lam 10_Postleitzahl -s $'\t' 07_Strasse | map_coords 91_Geokoordinaten_str | convert_coords > 16_Koordinaten
280 printf "done.\n"
281 fi
282 rm file_*
283}
284
285# After function definitions, main() can use them
286main "$@"