summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDirk Engling <erdgeist@erdgeist.org>2014-02-10 15:35:47 +0100
committerDirk Engling <erdgeist@erdgeist.org>2014-02-10 15:35:47 +0100
commitae1f17a7b0abbc425f33106c666f1bc71e1b4711 (patch)
treebe0d685293274bbaa1add147617861d050075856 /src
parent1e0ae2a4e3c0bab562d7f8c8ee9539a0613357b6 (diff)
Use integers to count through files on old telefonbuch format. Only convert them to fixed with representation when needed
Diffstat (limited to 'src')
-rwxr-xr-xsrc/makecolumns.sh56
1 files changed, 27 insertions, 29 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh
index ab61c29..476ce90 100755
--- a/src/makecolumns.sh
+++ b/src/makecolumns.sh
@@ -38,12 +38,6 @@ main() {
38 cd .. 38 cd ..
39} 39}
40 40
41get_dword() {
42 # $1 file, $2 offset
43 set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}`
44 printf "%d\n" $2
45}
46
47do_decompress_old() { 41do_decompress_old() {
48 printf "Extracting $2 chunks ... " 42 printf "Extracting $2 chunks ... "
49 extractblocks "${1}" 43 extractblocks "${1}"
@@ -78,29 +72,37 @@ do_processfile_old() {
78 rm -rf ${working_on} 72 rm -rf ${working_on}
79} 73}
80 74
75size() {
76 stat -f %z `printf %0${filename_len}d $1`
77}
78
79get_dword() {
80 # $1 file, $2 offset
81 file=`printf %0${filename_len}d ${1}`
82 set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${file}`
83 printf "%d\n" $2
84}
85
81handle_old_format() { 86handle_old_format() {
82 echo "Working on $1. Detected pre-2004 Telefonbuch version." 87 echo "Working on $1. Detected pre-2004 Telefonbuch version."
83 # Extract teiln.dat 88 # Extract teiln.dat
84 do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" 89 do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat"
85 90
86 # See how long each filename is 91 # See how long each filename is
87 filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) 92 export filename_len=$(( `ls | head -n 1 | wc -c` - 1 ))
88 93
89 # Get total amount of files, for reporting progress 94 # Get total amount of files, for reporting progress
90 number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` 95 number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l`
91 96
92 # from 2000F on file 0+3*n is table, so make it default 97 # from 2000F on file 0+3*n is table, so make it default
93 table_file=`printf %0${filename_len}d 0` 98 table_file=0; vname_file=2
94 vname_file=`printf %0${filename_len}d 2`
95 99
96 # if supposed vname file is larger than table file, 100 # if supposed vname file is larger than table file,
97 # we're having a pre-2000F layout, so switch accordingly 101 # we're having a pre-2000F layout, so switch accordingly
98 if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then 102 if [ `size ${table_file}` -lt `size ${vname_file}` ]; then
99 table_file=`printf %0${filename_len}d 2` 103 table_file=2; nname_file=0; vname_file=1
100 nname_file=`printf %0${filename_len}d 0`
101 vname_file=`printf %0${filename_len}d 1`
102 else 104 else
103 nname_file=`printf %0${filename_len}d 1` 105 nname_file=1
104 fi 106 fi
105 107
106 # Table file has a table header with identical count 108 # Table file has a table header with identical count
@@ -113,7 +115,7 @@ handle_old_format() {
113 # Now loop over all files and dump them 115 # Now loop over all files and dump them
114 printf "Splitting decompressed chunks into their columns ... " 116 printf "Splitting decompressed chunks into their columns ... "
115 reported=0 117 reported=0
116 while [ -f ${nname_file} ]; do 118 while [ -f `printf %0${filename_len}d ${nname_file}` ]; do
117 # Get number of entries in this round 119 # Get number of entries in this round
118 count=`get_dword ${nname_file}` 120 count=`get_dword ${nname_file}`
119 121
@@ -121,11 +123,11 @@ handle_old_format() {
121 nname_off=$(( `get_dword ${nname_file} 1` + 1 )) 123 nname_off=$(( `get_dword ${nname_file} 1` + 1 ))
122 124
123 # Now get the flags before the nnames 125 # Now get the flags before the nnames
124 tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags 126 tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags
125 tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname 127 tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname
126 128
127 # Extract the vnames 129 # Extract the vnames
128 tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname 130 tr '\n\0' '\t\n' < `printf %0${filename_len}d ${vname_file}` | head -n ${count} >> 03_Vorname
129 131
130 # Offset into first table entry tells us how many 132 # Offset into first table entry tells us how many
131 # fields are in table file 133 # fields are in table file
@@ -134,18 +136,14 @@ handle_old_format() {
134 # Now iterate over all entries in the table file 136 # Now iterate over all entries in the table file
135 for table_index in `jot ${table_entries}`; do 137 for table_index in `jot ${table_entries}`; do
136 table_off=`get_dword ${table_file} ${table_index}` 138 table_off=`get_dword ${table_file} ${table_index}`
137 tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` 139 tail -c +$(( table_off + 1 )) `printf %0${filename_len}d ${table_file}` | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))`
138 done 140 done
139 141
140 # Advance the filenames. Note, that we need bc because 142 # Advance the filenames.
141 # builtin arithmetic treats numbers with leading zeros as octals 143 nname_file=$(( nname_file+3 ))
142 nname_file=`printf "%s + 3\n" ${nname_file} | bc` 144 vname_file=$(( vname_file+3 ))
143 nname_file=`printf %0${filename_len}d ${nname_file}` 145 table_file=$(( table_file+3 ))
144 vname_file=`printf "%s + 3\n" ${vname_file} | bc` 146 [ 1 -eq $(( ( ( table_file * 20 ) / number_of_files ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 ))
145 vname_file=`printf %0${filename_len}d ${vname_file}`
146 table_file=`printf "%s + 3\n" ${table_file} | bc`
147 [ 1 -eq $(( ( table_file * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 ))
148 table_file=`printf %0${filename_len}d ${table_file}`
149 done 147 done
150 printf "done.\n" 148 printf "done.\n"
151 149
@@ -170,7 +168,7 @@ handle_old_format() {
170 # If street names come in an extra file, extract 168 # If street names come in an extra file, extract
171 # street names first 169 # street names first
172 streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] 170 streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt]
173 [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros 171 [ -f ${streets} ] && do_processfile_old ${streets} "street name" 99_Strassenname convert_zeros
174 172
175 # extract street names if 07_unknown contains street indexes 173 # extract street names if 07_unknown contains street indexes
176 # instead of street names 174 # instead of street names