summaryrefslogtreecommitdiff
path: root/src/export
diff options
context:
space:
mode:
authorUser Erdgeist <erdgeist@avon.ccc.de>2014-02-25 04:30:29 +0100
committerUser Erdgeist <erdgeist@avon.ccc.de>2014-02-25 04:30:29 +0100
commit0acfd93d7d7f277618cfa9af34f7587878f51064 (patch)
treed4193d39cc8286a28de750ca20a29156025d2358 /src/export
parentfcc827b1356781a96ba313baa501aad36abebfe5 (diff)
Massively speed up dump by buffering output before write()
Diffstat (limited to 'src/export')
-rw-r--r--src/export/extract_version_1.c85
1 files changed, 55 insertions, 30 deletions
diff --git a/src/export/extract_version_1.c b/src/export/extract_version_1.c
index dc006e5..8ab3190 100644
--- a/src/export/extract_version_1.c
+++ b/src/export/extract_version_1.c
@@ -9,7 +9,7 @@
9#include "mystdlib.h" 9#include "mystdlib.h"
10 10
11static uint8_t xlat_table[] = { 11static uint8_t xlat_table[] = {
12 0x00, 0x09, 0x02, 0x03, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 12 0x00, 0x0a, 0x02, 0x03, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
13 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 13 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
14 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 14 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b,
15 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5f, 15 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5f,
@@ -37,13 +37,17 @@ static uint8_t cp437_to_iso8859_1_table[] = {
37}; 37};
38 38
39static struct { 39static struct {
40 int outfiles[15]; 40 int outfiles[15];
41 char * vorwahl; 41 uint8_t *outbuf[15];
42 char * ort; 42 size_t outfill[15];
43 char * zip; 43 char * vorwahl;
44 char ort[1024];
45 size_t ort_len;
46 char zip[32];
47 size_t zip_len;
44} g_state; 48} g_state;
45 49
46static void decode_7bit_string( uint8_t const *source, uint8_t *dest ) 50static uint8_t * decode_7bit_string( uint8_t const *source, uint8_t *dest )
47{ 51{
48 uint16_t acc = 0, acc_bits = 0; 52 uint16_t acc = 0, acc_bits = 0;
49 while( 1 ) 53 while( 1 )
@@ -51,7 +55,7 @@ static void decode_7bit_string( uint8_t const *source, uint8_t *dest )
51 acc = acc*256+*(source++); ++acc_bits; 55 acc = acc*256+*(source++); ++acc_bits;
52again: 56again:
53 *(dest++) = xlat_table[ 0x7f & ( acc >> acc_bits ) ]; 57 *(dest++) = xlat_table[ 0x7f & ( acc >> acc_bits ) ];
54 if( !dest[-1] ) return; 58 if( !dest[-1] ) return dest - 1;
55 if( acc_bits == 7 ) { acc_bits = 0; goto again; } 59 if( acc_bits == 7 ) { acc_bits = 0; goto again; }
56 } 60 }
57} 61}
@@ -64,28 +68,33 @@ static void split_to_files( uint8_t *entries, int num_entries )
64 for( entry = 0; entry < num_entries; ++entry ) 68 for( entry = 0; entry < num_entries; ++entry )
65 { 69 {
66 /* mimic flags from telefonbuch v3 */ 70 /* mimic flags from telefonbuch v3 */
67 write( g_state.outfiles[0], entry ? "02\n" : ( num_entries > 1 ? "01\n" : "00\n" ), 3 ); 71 memcpy( g_state.outbuf[0 ] + g_state.outfill[0 ], entry ? "02\n" : ( num_entries > 1 ? "01\n" : "00\n" ), 3 ); g_state.outfill[0] += 3;
68 dprintf( g_state.outfiles[12], "%s\n", g_state.ort ); 72 memcpy( g_state.outbuf[12] + g_state.outfill[12], g_state.ort, g_state.ort_len ); g_state.outfill[12] += g_state.ort_len;
69 /* Only part of zip code, Zustellpostamt or eastern German code in column 5 (06_unknown) */ 73 memcpy( g_state.outbuf[13] + g_state.outfill[13], g_state.zip, g_state.zip_len ); g_state.outfill[13] += g_state.zip_len;
70 dprintf( g_state.outfiles[13], "%s\n", g_state.zip );
71 74
72 for( column = 0; column < 11; ++column ) 75 for( column = 0; column < 11; ++column )
73 { 76 {
74 char * end = strchr( e, '\t' ); 77 char * end = strchr( e, '\n' );
75 if( end ) { 78 if( !end ) {
76 *end = '\n'; 79 fprintf( stderr, "Unexpected end of line in city: %s", g_state.ort );
77 if( column == 9 && *e >= '0' && *e <= '9' ) write( g_state.outfiles[column+1], "0", 1 ); /* Augment Vorwahl */ 80 /* fprintf( stderr, "Failing String, (%d of %d entries, column %d): ###%s\n", entry, num_entries, column, entries ); */
78 write( g_state.outfiles[column+1], e, (size_t)(end - e + 1) ); 81 if( column ) for( ; column < 11; ++column) {
79 e = end + 1; 82 memcpy( g_state.outbuf[column+1] + g_state.outfill[column+1], "#\n", 2 );
80 } else 83 g_state.outfill[column+1] += 2;
81 dprintf( g_state.outfiles[column+1], "%s\n", e); 84 }
85 return;
86 }
87
88 if( column == 9 && *e >= '0' && *e <= '9' ) *--e = '0'; /* Augment Vorwahl */
89 memcpy( g_state.outbuf[column+1] + g_state.outfill[column+1], e, (size_t)(++end - e) ); g_state.outfill[column+1] += end - e;
90 e = end;
82 } 91 }
83 } 92 }
84} 93}
85 94
86static void act_on_record( uint8_t *file, int flag, uint8_t *page, uint16_t record_off ) 95static void act_on_record( uint8_t *file, int flag, uint8_t *page, uint16_t record_off )
87{ 96{
88 uint8_t outbuf[8192], *out_dest = outbuf, *record; 97 uint8_t outbuf[8192*4], *out_dest = outbuf, *record;
89 int num_entries; 98 int num_entries;
90 99
91 if( record_off > 0x1fff ) 100 if( record_off > 0x1fff )
@@ -112,7 +121,8 @@ static void act_on_record( uint8_t *file, int flag, uint8_t *page, uint16_t reco
112 record += 2; 121 record += 2;
113 } 122 }
114 123
115 decode_7bit_string( record, out_dest ); 124 record = decode_7bit_string( record, out_dest );
125 *record++ = '\n'; *record = 0;
116 split_to_files( outbuf, num_entries ); 126 split_to_files( outbuf, num_entries );
117} 127}
118 128
@@ -134,24 +144,28 @@ static void act_on_page( uint8_t *file, uint8_t *page, int page_nr )
134 144
135static void act_on_file( uint8_t *file ) 145static void act_on_file( uint8_t *file )
136{ 146{
137 int page, num_pages = *(uint16_t*)(file+0x40); 147 int i, page, num_pages = *(uint16_t*)(file+0x40);
138 uint32_t num_records= *(uint32_t*)(file+0x42); 148 uint32_t num_records= *(uint32_t*)(file+0x42);
139 char *gasse = (char *)file + 0x8e; 149 char *gasse = (char *)file + 0x8e;
140 char *ort = gasse + 1 + strlen(gasse); 150 char *ort = gasse + 1 + strlen(gasse);
141 char *zip = ort + 1 + strlen(ort); 151 char *zip = ort + 1 + strlen(ort);
142 char *vorwahl = zip + 1 + strlen(zip); 152 char *vorwahl = zip + 1 + strlen(zip);
143 char ort_conv[1024]; int i;
144 153
145 for(i=0; ort[i]; ++i ) ort_conv[i] = (char)cp437_to_iso8859_1_table[((uint8_t*)ort)[i]]; ort_conv[i] = 0; 154 for(i=0; ort[i]; ++i )
146 /* printf( "Working on a %04d page and %06d records file, city: %4s %-32s with prefix %s\n", num_pages, num_records, zip, ort_conv, vorwahl ); */ 155 g_state.ort[i] = (char)cp437_to_iso8859_1_table[((uint8_t*)ort)[i]];
147 (void)num_records; /* silence warning about unused variable */ 156 g_state.ort[i++] = '\n';
157 g_state.ort[i] = 0;
158 g_state.ort_len = i;
148 159
149 g_state.ort = ort_conv; 160 g_state.zip_len = snprintf( g_state.zip, sizeof(g_state.zip), "%s\n", zip );
150 g_state.zip = zip;
151 g_state.vorwahl = vorwahl; 161 g_state.vorwahl = vorwahl;
152 162
163 /* printf( "Working on a %04d page and %06d records file, city: %4s %-32s with prefix %s\n", num_pages, num_records, zip, ort, vorwahl ); */
164 (void)num_records; /* silence warning about unused variable */
165
153 for( page = 0; page < num_pages; ++page ) 166 for( page = 0; page < num_pages; ++page )
154 act_on_page( file, file + 0x800 + 0x2000 * page, page ); 167 act_on_page( file, file + 0x800 + 0x2000 * page, page );
168
155} 169}
156 170
157int main( ) 171int main( )
@@ -164,6 +178,8 @@ int main( )
164 { 178 {
165 sprintf( filename, "%02d_unknown", i+1 ); 179 sprintf( filename, "%02d_unknown", i+1 );
166 g_state.outfiles[i] = open( filename, O_WRONLY | O_APPEND | O_CREAT, 0644 ); 180 g_state.outfiles[i] = open( filename, O_WRONLY | O_APPEND | O_CREAT, 0644 );
181 g_state.outbuf[i] = malloc(8192*4096);
182 g_state.outfill[i] = 0;
167 } 183 }
168 184
169 while( fgets( filename, sizeof(filename), stdin ) ) { 185 while( fgets( filename, sizeof(filename), stdin ) ) {
@@ -172,10 +188,19 @@ int main( )
172 188
173 act_on_file( f->addr ); 189 act_on_file( f->addr );
174 unmap_file( &f ); 190 unmap_file( &f );
191
192 /* Write out results */
193 for( i=0; i<14; ++i ) {
194 /* if( g_state.outfill[i] > 1024*1024*6 ) printf( "Large: %s %zd\n", g_state.ort, g_state.outfill[i] ); */
195 write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] );
196 g_state.outfill[i] = 0;
197 }
175 } 198 }
176 199
177 for( i=0; i<14; ++i ) 200 for( i=0; i<14; ++i ) {
178 close( g_state.outfiles[i] ); 201 write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] );
202 close( g_state.outfiles[i] );
203 }
179 204
180 return 0; 205 return 0;
181} 206}