diff options
| -rwxr-xr-x | src/makecolumns.sh | 104 |
1 files changed, 49 insertions, 55 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh index 2df65c9..5d2d90b 100755 --- a/src/makecolumns.sh +++ b/src/makecolumns.sh | |||
| @@ -77,10 +77,8 @@ size() { | |||
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | get_dword() { | 79 | get_dword() { |
| 80 | # $1 file, $2 offset | 80 | # $1 file |
| 81 | file=`printf %0${filename_len}d ${1}` | 81 | hexdump -n 4 -v -e '" " 1/4 "%u"' `printf %0${filename_len}d ${1}` |
| 82 | set -- `od -tu4 -N4 -j$(( 4*${2:-0} )) ${file}` | ||
| 83 | printf "%d\n" $2 | ||
| 84 | } | 82 | } |
| 85 | 83 | ||
| 86 | handle_old_format() { | 84 | handle_old_format() { |
| @@ -113,38 +111,34 @@ handle_old_format() { | |||
| 113 | fi | 111 | fi |
| 114 | 112 | ||
| 115 | # Now loop over all files and dump them | 113 | # Now loop over all files and dump them |
| 116 | printf "Splitting decompressed chunks into their columns ... " | 114 | printf "Splitting decompressed nname chunks into their columns ... " |
| 117 | reported=0 | 115 | for file in `jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3`; do |
| 118 | while [ -f `printf %0${filename_len}d ${nname_file}` ]; do | 116 | set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` |
| 119 | # Get number of entries in this round | 117 | tail -c +$(( $2 + 1 )) ${file} |
| 120 | count=`get_dword ${nname_file}` | 118 | done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname |
| 121 | 119 | cut -c 1 < 01_02_Flags_Nachname > 01_Flags | |
| 122 | # Get offset into first nname | 120 | cut -c 2- < 01_02_Flags_Nachname > 02_Nachname |
| 123 | nname_off=$(( `get_dword ${nname_file} 1` + 1 )) | 121 | rm 01_02_Flags_Nachname |
| 124 | 122 | printf "done.\n" | |
| 125 | # Now get the flags before the nnames | 123 | |
| 126 | tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c -1 >> 01_Flags | 124 | printf "Splitting decompress vname chunks into their columns ... " |
| 127 | tail -c +${nname_off} `printf %0${filename_len}d ${nname_file}` | tr '\n\0' '\t\n' | head -n ${count} | cut -c 2- >> 02_Nachname | 125 | jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' > 03_Vorname |
| 128 | 126 | printf "done.\n" | |
| 129 | # Extract the vnames | 127 | |
| 130 | tr '\n\0' '\t\n' < `printf %0${filename_len}d ${vname_file}` | head -n ${count} >> 03_Vorname | 128 | printf "Splitting decompress table file chunks into their columns ... " |
| 131 | 129 | jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold | |
| 132 | # Offset into first table entry tells us how many | 130 | # for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do |
| 133 | # fields are in table file | 131 | # # Offset into first table entry tells us how many |
| 134 | table_entries=$(( `get_dword ${table_file} 1` / 4 - 1 )) | 132 | # # fields are in table file |
| 135 | 133 | # set -- `hexdump -n 64 -v -e '" " 1/4 "%u"' ${file}` | |
| 136 | # Now iterate over all entries in the table file | 134 | # count=$1; table_entries=$(( $2 / 4 - 1 )); shift |
| 137 | for table_index in `jot ${table_entries}`; do | 135 | # |
| 138 | table_off=`get_dword ${table_file} ${table_index}` | 136 | # # Now iterate over all entries in the table file |
| 139 | tail -c +$(( table_off + 1 )) `printf %0${filename_len}d ${table_file}` | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( table_index + 3 ))` | 137 | # for idx in `jot ${table_entries}`; do |
| 140 | done | 138 | # tail -c +$(( $1 + 1 )) ${file} | tr '\n\0' '\t\n' | head -n ${count} >> `printf %02d_unknown $(( idx + 3 ))` |
| 141 | 139 | # shift | |
| 142 | # Advance the filenames. | 140 | # done |
| 143 | nname_file=$(( nname_file+3 )) | 141 | # done |
| 144 | vname_file=$(( vname_file+3 )) | ||
| 145 | table_file=$(( table_file+3 )) | ||
| 146 | [ 1 -eq $(( ( ( table_file * 20 ) / number_of_files ) > reported )) ] && printf "%d%% " $(( (reported+=1) * 5 )) | ||
| 147 | done | ||
| 148 | printf "done.\n" | 142 | printf "done.\n" |
| 149 | 143 | ||
| 150 | # wipe all temporary extracted files | 144 | # wipe all temporary extracted files |
| @@ -153,17 +147,19 @@ handle_old_format() { | |||
| 153 | printf "done.\n" | 147 | printf "done.\n" |
| 154 | 148 | ||
| 155 | # rename our columns extracted from the table file | 149 | # rename our columns extracted from the table file |
| 156 | mv 04_unknown 04_Namenszusatz | 150 | printf "Converting string terminators to line newlines ... " |
| 157 | mv 05_unknown 05_Adresszusatz | 151 | tr '\0' '\n' < 04_unknown > 04_Namenszusatz |
| 158 | mv 06_unknown 06_Ortszusatz | 152 | tr '\0' '\n' < 05_unknown > 05_Adresszusatz |
| 159 | mv 08_unknown 08_Hausnummer | 153 | tr '\0' '\n' < 06_unknown > 06_Ortszusatz |
| 160 | mv 09_unknown 09_Verweise | 154 | tr '\0' '\n' < 08_unknown > 08_Hausnummer |
| 161 | mv 10_unknown 10_Postleitzahl | 155 | tr '\0' '\n' < 09_unknown > 09_Verweise |
| 162 | mv 11_unknown 11_Ort | 156 | tr '\0' '\n' < 10_unknown > 10_Postleitzahl |
| 163 | mv 12_unknown 12_Vorwahl | 157 | tr '\0' '\n' < 11_unknown > 11_Ort |
| 164 | mv 13_unknown 13_Rufnummer | 158 | tr '\0' '\n' < 12_unknown > 12_Vorwahl |
| 165 | [ -f 14_unknown ] && mv 14_unknown 14_Email | 159 | tr '\0' '\n' < 13_unknown > 13_Rufnummer |
| 166 | [ -f 15_unknown ] && mv 15_unknown 15_Webadresse | 160 | [ -f 14_unknown ] && tr '\0' '\n' < 14_unknown > 14_Email |
| 161 | [ -f 15_unknown ] && tr '\0' '\n' < 15_unknown > 15_Webadresse | ||
| 162 | printf "done.\n" | ||
| 167 | 163 | ||
| 168 | # If street names come in an extra file, extract | 164 | # If street names come in an extra file, extract |
| 169 | # street names first | 165 | # street names first |
| @@ -173,13 +169,14 @@ handle_old_format() { | |||
| 173 | # extract street names if 07_unknown contains street indexes | 169 | # extract street names if 07_unknown contains street indexes |
| 174 | # instead of street names | 170 | # instead of street names |
| 175 | if [ -f 99_Strassenname ]; then | 171 | if [ -f 99_Strassenname ]; then |
| 176 | mv 07_unknown 07_Strassenindex | 172 | tr '\0' '\n' < 07_unknown > 07_Strassenindex |
| 177 | printf "Looking up street names from indexes ... " | 173 | printf "Looking up street names from indexes ... " |
| 178 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse | 174 | cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse |
| 179 | printf "done.\n" | 175 | printf "done.\n" |
| 180 | else | 176 | else |
| 181 | mv 07_unknown 07_Strasse | 177 | tr '\0' '\n' < 07_unknown > 07_Strasse |
| 182 | fi | 178 | fi |
| 179 | rm ??_unknown | ||
| 183 | 180 | ||
| 184 | karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] | 181 | karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] |
| 185 | [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw | 182 | [ -f ${karto} ] && do_processfile_old ${karto} "geo coordinates" 90_Geokoordinaten_hnr_raw |
| @@ -187,7 +184,7 @@ handle_old_format() { | |||
| 187 | printf "Looking up geo coordinates for each phonebook entry ... " | 184 | printf "Looking up geo coordinates for each phonebook entry ... " |
| 188 | tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr | 185 | tr '\0' '\n' < 90_Geokoordinaten_hnr_raw | tr ';' '\t' | cut -f "1,2,3,4,6,7" | tr '\n' '\0' > 90_Geokoordinaten_hnr |
| 189 | rm 90_Geokoordinaten_hnr_raw | 186 | rm 90_Geokoordinaten_hnr_raw |
| 190 | lam 10_Postleitzahl -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | mapcoords 90_Geokoordinaten_hnr | convertcoords > 16_Koordinaten | 187 | lam 10_Postleitzahl -s $'\t' 11_Ort -s $'\t' 07_Strasse -s $'\t' 08_Hausnummer | mapcoords 90_Geokoordinaten_hnr | convertcoords > 16_Koordinaten |
| 191 | printf "done.\n" | 188 | printf "done.\n" |
| 192 | } | 189 | } |
| 193 | 190 | ||
| @@ -204,17 +201,14 @@ handle_new_format() { | |||
| 204 | decompress $1/phonebook.db | 201 | decompress $1/phonebook.db |
| 205 | 202 | ||
| 206 | rows=`find . -name file_\* | wc -l` | 203 | rows=`find . -name file_\* | wc -l` |
| 207 | rows=$(( rows / 11 )) | ||
| 208 | printf "done.\n" | 204 | printf "done.\n" |
| 209 | 205 | ||
| 210 | # Do enumerations with builtin shell tools. Unfortunally neither | ||
| 211 | # jot nor seq are standards | ||
| 212 | printf "Splitting decompressed chunks into their columns (11 total) ... 1, " | 206 | printf "Splitting decompressed chunks into their columns (11 total) ... 1, " |
| 213 | f=-1; while [ $f -lt $rows ]; do printf "file_%05X " $(( (f+=1) * 11)); done | xargs cat | xxd -ps -c1 > column_0 | 207 | jot -w "file_%05X" - 0 $rows 11 | xargs cat | xxd -ps -c1 > column_0 |
| 214 | 208 | ||
| 215 | for col in 1 2 3 4 5 6 7 8 9 10; do | 209 | for col in 1 2 3 4 5 6 7 8 9 10; do |
| 216 | printf "%d, " $(( col + 1 )) | 210 | printf "%d, " $(( col + 1 )) |
| 217 | f=-1; while [ $f -lt $rows ]; do printf "file_%05X " $(( col + (f+=1) * 11 )); done | xargs cat | tr '\n\0' '\t\n' > column_${col} | 211 | jot -w "file_%05X" - ${col} ${rows} 11 | xargs cat | tr '\n\0' '\t\n' > column_${col} |
| 218 | done | 212 | done |
| 219 | printf "done.\n" | 213 | printf "done.\n" |
| 220 | 214 | ||
