summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsrc/makecolumns.sh46
-rw-r--r--src/splitold.c27
2 files changed, 39 insertions, 34 deletions
diff --git a/src/makecolumns.sh b/src/makecolumns.sh
index d70f564..ff8a1e4 100755
--- a/src/makecolumns.sh
+++ b/src/makecolumns.sh
@@ -112,21 +112,21 @@ handle_old_format() {
112 112
113 # Now loop over all files and dump them 113 # Now loop over all files and dump them
114 printf "Splitting decompressed nname chunks into their columns ... " 114 printf "Splitting decompressed nname chunks into their columns ... "
115 for file in `jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3`; do 115 jot -w %0${filename_len}d - ${nname_file} $(( number_of_files - 1 )) 3 | splitold 1 1
116 set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}` 116# set -- `hexdump -n 8 -v -e '" " 1/4 "%u"' ${file}`
117 tail -c +$(( $2 + 1 )) ${file} 117# tail -c +$(( $2 + 1 )) ${file}
118 done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname 118# done | tr '\n\0' '\t\n' > 01_02_Flags_Nachname
119 cut -c 1 < 01_02_Flags_Nachname > 01_Flags 119 cut -c 1 < 01_unknown > 01_Flags
120 cut -c 2- < 01_02_Flags_Nachname > 02_Nachname 120 cut -c 2- < 01_unknown > 02_Nachname
121 rm 01_02_Flags_Nachname 121 rm 01_unknown
122 printf "done.\n" 122 printf "done.\n"
123 123
124 printf "Splitting decompress vname chunks into their columns ... " 124 printf "Splitting decompress vname chunks into their columns ... "
125 jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' > 03_Vorname 125 jot -w "%0${filename_len}d" - ${vname_file} $(( number_of_files - 1 )) 3 | xargs cat | tr '\n\0' '\t\n' | tr -d '\377' > 03_Vorname
126 printf "done.\n" 126 printf "done.\n"
127 127
128 printf "Splitting decompress table file chunks into their columns ... " 128 printf "Splitting decompress table file chunks into their columns ... "
129 jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 129 jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3 | splitold 4 0
130# for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do 130# for file in `jot -w %0${filename_len}d - ${table_file} $(( number_of_files - 1 )) 3`; do
131# # Offset into first table entry tells us how many 131# # Offset into first table entry tells us how many
132# # fields are in table file 132# # fields are in table file
@@ -148,17 +148,17 @@ handle_old_format() {
148 148
149 # rename our columns extracted from the table file 149 # rename our columns extracted from the table file
150 printf "Converting string terminators to line newlines ... " 150 printf "Converting string terminators to line newlines ... "
151 tr '\0' '\n' < 04_unknown > 04_Namenszusatz 151 mv 04_unknown 04_Namenszusatz
152 tr '\0' '\n' < 05_unknown > 05_Adresszusatz 152 mv 05_unknown 05_Adresszusatz
153 tr '\0' '\n' < 06_unknown > 06_Ortszusatz 153 mv 06_unknown 06_Ortszusatz
154 tr '\0' '\n' < 08_unknown > 08_Hausnummer 154 mv 08_unknown 08_Hausnummer
155 tr '\0' '\n' < 09_unknown > 09_Verweise 155 mv 09_unknown 09_Verweise
156 tr '\0' '\n' < 10_unknown > 10_Postleitzahl 156 mv 10_unknown 10_Postleitzahl
157 tr '\0' '\n' < 11_unknown > 11_Ort 157 mv 11_unknown 11_Ort
158 tr '\0' '\n' < 12_unknown > 12_Vorwahl 158 mv 12_unknown 12_Vorwahl
159 tr '\0' '\n' < 13_unknown > 13_Rufnummer 159 mv 13_unknown 13_Rufnummer
160 [ -f 14_unknown ] && tr '\0' '\n' < 14_unknown > 14_Email 160 [ -f 14_unknown ] && mv 14_unknown 14_Email
161 [ -f 15_unknown ] && tr '\0' '\n' < 15_unknown > 15_Webadresse 161 [ -f 15_unknown ] && mv 15_unknown 15_Webadresse
162 printf "done.\n" 162 printf "done.\n"
163 163
164 # If street names come in an extra file, extract 164 # If street names come in an extra file, extract
@@ -169,12 +169,12 @@ handle_old_format() {
169 # extract street names if 07_unknown contains street indexes 169 # extract street names if 07_unknown contains street indexes
170 # instead of street names 170 # instead of street names
171 if [ -f 99_Strassenname ]; then 171 if [ -f 99_Strassenname ]; then
172 tr '\0' '\n' < 07_unknown > 07_Strassenindex 172 mv 07_unknown 07_Strassenindex
173 printf "Looking up street names from indexes ... " 173 printf "Looking up street names from indexes ... "
174 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse 174 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
175 printf "done.\n" 175 printf "done.\n"
176 else 176 else
177 tr '\0' '\n' < 07_unknown > 07_Strasse 177 mv 07_unknown 07_Strasse
178 fi 178 fi
179 rm ??_unknown 179 rm ??_unknown
180 180
diff --git a/src/splitold.c b/src/splitold.c
index 847eb2c..bd85775 100644
--- a/src/splitold.c
+++ b/src/splitold.c
@@ -1,4 +1,3 @@
1#include "mystdlib.h"
2#include <stdint.h> 1#include <stdint.h>
3#include <stdio.h> 2#include <stdio.h>
4#include <unistd.h> 3#include <unistd.h>
@@ -6,31 +5,37 @@
6#include <stdlib.h> 5#include <stdlib.h>
7#include <string.h> 6#include <string.h>
8 7
9int main() { 8int main( int argc, char **args ) {
10 char table[64]; 9 char table[64], f[1024*1024*16];
11 int outfiles[64], i, off; 10 int outfiles[64], i, off, base = 0;
11 uint32_t fixed_columns = 0;
12
13 if( argc > 1 ) base = atol( args[1] );
14 if( argc > 2 ) fixed_columns = atol( args[2] );
12 15
13 for( i=0; i<64; ++i ) outfiles[i] = -1; 16 for( i=0; i<64; ++i ) outfiles[i] = -1;
14 while( fgets( table, sizeof(table), stdin ) ) { 17 while( fgets( table, sizeof(table), stdin ) ) {
15 int off = ( table[strlen(table)-1] = 0 ); /* fgets sucks */ 18 int off = ( table[strlen(table)-1] = 0 ); /* fgets sucks */
16 MAP f = map_file( table, 1 ); 19 int f_in = open( table, O_RDONLY );
17 uint32_t *p = (uint32_t*)(f->addr); 20 size_t s_in = read( f_in, f, sizeof(f));
18 uint32_t count = p[0], columns = p[1] / 4 - 1; 21 uint32_t *p = (uint32_t*)f;
22 uint32_t count = p[0], columns = fixed_columns ? fixed_columns : p[1] / 4 - 1;
19 unsigned int file, strnr; 23 unsigned int file, strnr;
20 24
25 close(f_in);
26
21 for( file=0; file<columns; ++file ) { 27 for( file=0; file<columns; ++file ) {
22 /* Create outfile, if it is not yet there */ 28 /* Create outfile, if it is not yet there */
23 if( outfiles[file] == -1 ) { 29 if( outfiles[file] == -1 ) {
24 sprintf( table, "%02d_unknown", file+4 ); 30 sprintf( table, "%02d_unknown", file+base );
25 outfiles[file] = open( table, O_WRONLY | O_APPEND | O_CREAT, 0644 ); 31 outfiles[file] = open( table, O_WRONLY | O_APPEND | O_CREAT, 0644 );
26 if ( outfiles[file] == -1 ) exit(1); 32 if ( outfiles[file] == -1 ) exit(1);
27 } 33 }
28 off = p[file+1]; 34 off = p[file+1];
29 /* Look for end of this chunk, which is <count> strings long */ 35 /* Look for end of this chunk, which is <count> strings long */
30 for( strnr=0; strnr < count; ++strnr ) while( f->addr[off++] ); 36 for( strnr=0; strnr < count; ++strnr ) { while( f[off++] ) {}; f[off-1] = '\n'; }
31 write( outfiles[file], f->addr + p[file+1], off - p[file+1] ); 37 write( outfiles[file], f + p[file+1], off - p[file+1] );
32 } 38 }
33 unmap_file(&f);
34 } 39 }
35 for( i=0; i<64; ++i ) close( outfiles[i] ); 40 for( i=0; i<64; ++i ) close( outfiles[i] );
36 return 0; 41 return 0;