diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2014-02-10 15:35:47 +0100 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2014-02-10 15:35:47 +0100 |
commit | ae1f17a7b0abbc425f33106c666f1bc71e1b4711 (patch) | |
tree | be0d685293274bbaa1add147617861d050075856 /src | |
parent | 1e0ae2a4e3c0bab562d7f8c8ee9539a0613357b6 (diff) |
Use integers to count through files on old telefonbuch format. Only convert them to fixed with representation when needed
Diffstat (limited to 'src')
-rwxr-xr-x | src/makecolumns.sh | 56 |
1 files changed, 27 insertions, 29 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index ab61c29..476ce90 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
@@ -38,12 +38,6 @@ main() { | |||
38 | cd .. | 38 | cd .. |
39 | } | 39 | } |
40 | 40 | ||
41 | get_dword() { | ||
42 | # $1 file, $2 offset | ||
43 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${1}` | ||
44 | printf "%d\n" $2 | ||
45 | } | ||
46 | |||
47 | do_decompress_old() { | 41 | do_decompress_old() { |
48 | printf "Extracting $2 chunks ... " | 42 | printf "Extracting $2 chunks ... " |
49 | extractblocks "${1}" | 43 | extractblocks "${1}" |
@@ -78,29 +72,37 @@ do_processfile_old() { | |||
78 | rm -rf ${working_on} | 72 | rm -rf ${working_on} |
79 | } | 73 | } |
80 | 74 | ||
75 | size() { | ||
76 | stat -f %z `printf %0${filename_len}d $1` | ||
77 | } | ||
78 | |||
79 | get_dword() { | ||
80 | # $1 file, $2 offset | ||
81 | file=`printf %0${filename_len}d ${1}` | ||
82 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${file}` | ||
83 | printf "%d\n" $2 | ||
84 | } | ||
85 | |||
81 | handle_old_format() { | 86 | handle_old_format() { |
82 | echo "Working on $1. Detected pre-2004 Telefonbuch version." | 87 | echo "Working on $1. Detected pre-2004 Telefonbuch version." |
83 | # Extract teiln.dat | 88 | # Extract teiln.dat |
84 | do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" | 89 | do_decompress_old $1/[Dd][Aa][Tt]/[Tt][Ee][Ii][Ll][Nn].[Dd][Aa][Tt] "teiln.dat" |
85 | 90 | ||
86 | # See how long each filename is | 91 | # See how long each filename is |
87 | filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) | 92 | export filename_len=$(( `ls | head -n 1 | wc -c` - 1 )) |
88 | 93 | ||
89 | # Get total amount of files, for reporting progress | 94 | # Get total amount of files, for reporting progress |
90 | number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` | 95 | number_of_files=`find -E . -depth 1 -regex '^\./[0123456789]+' | wc -l` |
91 | 96 | ||
92 | # from 2000F on file 0+3*n is table, so make it default | 97 | # from 2000F on file 0+3*n is table, so make it default |
93 | table_file=`printf %0${filename_len}d 0` | 98 | table_file=0; vname_file=2 |
94 | vname_file=`printf %0${filename_len}d 2` | ||
95 | 99 | ||
96 | # if supposed vname file is larger than table file, | 100 | # if supposed vname file is larger than table file, |
97 | # we're having a pre-2000F layout, so switch accordingly | 101 | # we're having a pre-2000F layout, so switch accordingly |
98 | if [ `stat -f %z ${table_file}` -lt `stat -f %z ${vname_file}` ]; then | 102 | if [ `size ${table_file}` -lt `size ${vname_file}` ]; then |
99 | table_file=`printf %0${filename_len}d 2` | 103 | table_file=2; nname_file=0; vname_file=1 |
100 | nname_file=`printf %0${filename_len}d 0` | ||
101 | vname_file=`printf %0${filename_len}d 1` | ||
102 | else | 104 | else |
103 | nname_file=`printf %0${filename_len}d 1` | 105 | nname_file=1 |
104 | fi | 106 | fi |
105 | 107 | ||
106 | # Table file has a table header with identical count | 108 | # Table file has a table header with identical count |
@@ -113,7 +115,7 @@ handle_old_format() { | |||
113 | # Now loop over all files and dump them | 115 | # Now loop over all files and dump them |
114 | printf "Splitting decompressed chunks into their columns ... " | 116 | printf "Splitting decompressed chunks into their columns ... " |
115 | reported=0 | 117 | reported=0 |
116 | while [ -f ${nname_file} ]; do | 118 | while [ -f `printf %0${filename_len}d ${nname_file}` ]; do |
117 | # Get number of entries in this round | 119 | # Get number of entries in this round |
118 | count=`get_dword ${nname_file}` | 120 | count=`get_dword ${nname_file}` |
119 | 121 | ||
@@ -121,11 +123,11 @@ handle_old_format() { | |||
121 | nname_off=$(( `get_dword ${nname_file} 1` + 1 )) | 123 | nname_off=$(( `get_dword ${nname_file} 1` + 1 )) |
122 | 124 | ||
123 | # Now get the flags before the nnames | 125 | # Now get the flags before the nnames |
124 | tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags | 126 | tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags |
125 | tail -c +${nname_off} ${nname_file} | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname | 127 | tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname |
126 | 128 | ||
127 | # Extract the vnames | 129 | # Extract the vnames |
128 | tr '\n\0' '\t\n' < ${vname_file} | head -n ${count} >> 03_Vorname | 130 | tr '\n\0' '\t\n' < `printf %0${filename_len}d ${vname_file}` | head -n ${count} >> 03_Vorname |
129 | 131 | ||
130 | # Offset into first table entry tells us how many | 132 | # Offset into first table entry tells us how many |
131 | # fields are in table file | 133 | # fields are in table file |
@@ -134,18 +136,14 @@ handle_old_format() { | |||
134 | # Now iterate over all entries in the table file | 136 | # Now iterate over all entries in the table file |
135 | for table_index in `jot ${table_entries}`; do | 137 | for table_index in `jot ${table_entries}`; do |
136 | table_off=`get_dword ${table_file} ${table_index}` | 138 | table_off=`get_dword ${table_file} ${table_index}` |
137 | tail -c +$(( table_off + 1 )) ${table_file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` | 139 | tail -c +$(( table_off + 1 )) `printf %0${filename_len}d ${table_file}` | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` |
138 | done | 140 | done |
139 | 141 | ||
140 | # Advance the filenames. Note, that we need bc because | 142 | # Advance the filenames. |
141 | # builtin arithmetic treats numbers with leading zeros as octals | 143 | nname_file=$(( nname_file+3 )) |
142 | nname_file=`printf "%s + 3\n" ${nname_file} | bc` | 144 | vname_file=$(( vname_file+3 )) |
143 | nname_file=`printf %0${filename_len}d ${nname_file}` | 145 | table_file=$(( table_file+3 )) |
144 | vname_file=`printf "%s + 3\n" ${vname_file} | bc` | 146 | [ 1 -eq $(( ( ( table_file * 20 ) / number_of_files ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) |
145 | vname_file=`printf %0${filename_len}d ${vname_file}` | ||
146 | table_file=`printf "%s + 3\n" ${table_file} | bc` | ||
147 | [ 1 -eq $(( ( table_file * 20 ) / number_of_files > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) | ||
148 | table_file=`printf %0${filename_len}d ${table_file}` | ||
149 | done | 147 | done |
150 | printf "done.\n" | 148 | printf "done.\n" |
151 | 149 | ||
@@ -170,7 +168,7 @@ handle_old_format() { | |||
170 | # If street names come in an extra file, extract | 168 | # If street names come in an extra file, extract |
171 | # street names first | 169 | # street names first |
172 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] | 170 | streets=$1/[Dd][Aa][Tt]/[Ss][Tt][Rr][Aa][Ss][Ss][Ee][Nn].[Dd][Aa][Tt] |
173 | [ -f ${streets} ] && do_processfile_old "${streets}" "street name" 99_Strassenname convert_zeros | 171 | [ -f ${streets} ] && do_processfile_old ${streets} "street name" 99_Strassenname convert_zeros |
174 | 172 | ||
175 | # extract street names if 07_unknown contains street indexes | 173 | # extract street names if 07_unknown contains street indexes |
176 | # instead of street names | 174 | # instead of street names |