diff options
author | User Erdgeist <erdgeist@avon.ccc.de> | 2014-02-13 21:16:42 +0000 |
---|---|---|
committer | User Erdgeist <erdgeist@avon.ccc.de> | 2014-02-13 21:16:42 +0000 |
commit | bf154653c1c49eafcf5c47dcded2bf5946aea3d7 (patch) | |
tree | 6a162296c435f9428bd0b26e4f9a09d1b06c1756 /src | |
parent | 28f818ad8313da4bec3d0bf1abfbc93da3df4f70 (diff) |
make use of new splitold tool to speedup extraction by factor 6
Diffstat (limited to 'src')
-rwxr-xr-x | src/makecolumns.sh | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 5d2d90b..f5803c6 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
@@ -112,21 +112,21 @@ handle_old_format() { | |||
112 | 112 | ||
113 | # Now loop over all files and dump them | 113 | # Now loop over all files and dump them |
114 | printf "Splitting decompressed nname chunks into their columns ... " | 114 | printf "Splitting decompressed nname chunks into their columns ... " |
115 | for file in `jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3`; do | 115 | jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1 |
116 | set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` | 116 | # set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` |
117 | tail -c +$(( $2 + 1 )) ${file} | 117 | # tail -c +$(( $2 + 1 )) ${file} |
118 | done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname | 118 | # done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname |
119 | cut -c 1 < 01_02_Flags_Nachname > 01_Flags | 119 | cut -c 1 < 01_unknown > 01_Flags |
120 | cut -c 2- < 01_02_Flags_Nachname > 02_Nachname | 120 | cut -c 2- < 01_unknown > 02_Nachname |
121 | rm 01_02_Flags_Nachname | 121 | rm 01_unknown |
122 | printf "done.\n" | 122 | printf "done.\n" |
123 | 123 | ||
124 | printf "Splitting decompress vname chunks into their columns ... " | 124 | printf "Splitting decompress vname chunks into their columns ... " |
125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' > 03_Vorname | 125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname |
126 | printf "done.\n" | 126 | printf "done.\n" |
127 | 127 | ||
128 | printf "Splitting decompress table file chunks into their columns ... " | 128 | printf "Splitting decompress table file chunks into their columns ... " |
129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold | 129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0 |
130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do | 130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do |
131 | # # Offset into first table entry tells us how many | 131 | # # Offset into first table entry tells us how many |
132 | # # fields are in table file | 132 | # # fields are in table file |
@@ -148,17 +148,17 @@ handle_old_format() { | |||
148 | 148 | ||
149 | # rename our columns extracted from the table file | 149 | # rename our columns extracted from the table file |
150 | printf "Converting string terminators to line newlines ... " | 150 | printf "Converting string terminators to line newlines ... " |
151 | tr '\0' '\n' < 04_unknown > 04_Namenszusatz | 151 | mv 04_unknown 04_Namenszusatz |
152 | tr '\0' '\n' < 05_unknown > 05_Adresszusatz | 152 | mv 05_unknown 05_Adresszusatz |
153 | tr '\0' '\n' < 06_unknown > 06_Ortszusatz | 153 | mv 06_unknown 06_Ortszusatz |
154 | tr '\0' '\n' < 08_unknown > 08_Hausnummer | 154 | mv 08_unknown 08_Hausnummer |
155 | tr '\0' '\n' < 09_unknown > 09_Verweise | 155 | mv 09_unknown 09_Verweise |
156 | tr '\0' '\n' < 10_unknown > 10_Postleitzahl | 156 | mv 10_unknown 10_Postleitzahl |
157 | tr '\0' '\n' < 11_unknown > 11_Ort | 157 | mv 11_unknown 11_Ort |
158 | tr '\0' '\n' < 12_unknown > 12_Vorwahl | 158 | mv 12_unknown 12_Vorwahl |
159 | tr '\0' '\n' < 13_unknown > 13_Rufnummer | 159 | mv 13_unknown 13_Rufnummer |
160 | [ -f 14_unknown ] && tr '\0' '\n' < 14_unknown > 14_Email | 160 | [ -f 14_unknown ] && mv 14_unknown 14_Email |
161 | [ -f 15_unknown ] && tr '\0' '\n' < 15_unknown > 15_Webadresse | 161 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse |
162 | printf "done.\n" | 162 | printf "done.\n" |
163 | 163 | ||
164 | # If street names come in an extra file, extract | 164 | # If street names come in an extra file, extract |
@@ -169,12 +169,12 @@ handle_old_format() { | |||
169 | # extract street names if 07_unknown contains street indexes | 169 | # extract street names if 07_unknown contains street indexes |
170 | # instead of street names | 170 | # instead of street names |
171 | if [ -f 99_Strassenname ]; then | 171 | if [ -f 99_Strassenname ]; then |
172 | tr '\0' '\n' < 07_unknown > 07_Strassenindex | 172 | mv 07_unknown 07_Strassenindex |
173 | printf "Looking up street names from indexes ... " | 173 | printf "Looking up street names from indexes ... " |
174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | 174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse |
175 | printf "done.\n" | 175 | printf "done.\n" |
176 | else | 176 | else |
177 | tr '\0' '\n' < 07_unknown > 07_Strasse | 177 | mv 07_unknown 07_Strasse |
178 | fi | 178 | fi |
179 | rm ??_unknown | 179 | rm ??_unknown |
180 | 180 | ||