summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDirk Engling <erdgeist@erdgeist.org>2015-05-10 21:45:16 +0200
committerDirk Engling <erdgeist@erdgeist.org>2015-05-10 21:46:06 +0200
commit08a9f406a0e18e0902bdf4f50b4f5ecad9fe2588 (patch)
tree682d7bfc7724c9bb94ff06b1099902a21cc2bdd9
parent051fb5a22d179610ad5892f5f455909506e2cfcd (diff)
Split entries into different column files. Done for non-continuation-entries
-rw-r--r--src/export/split_version_2.c185
1 files changed, 99 insertions, 86 deletions
diff --git a/src/export/split_version_2.c b/src/export/split_version_2.c
index 2b7a79f..7a6f04e 100644
--- a/src/export/split_version_2.c
+++ b/src/export/split_version_2.c
@@ -1,10 +1,20 @@
1#include <stdlib.h> 1#include <stdlib.h>
2#include <stdint.h> 2#include <stdint.h>
3#include <ctype.h> 3#include <ctype.h>
4#include <fcntl.h>
5#include <unistd.h>
4 6
5#include "mystdlib.h" 7#include "mystdlib.h"
6 8
7static int g_first_field_length; 9enum { g_outfiles = 17 };
10
11static struct {
12 int outfiles[g_outfiles];
13 uint8_t *outbuf [g_outfiles];
14 size_t outfill [g_outfiles];
15} g_state;
16
17static int g_northern_version;;
8static uint8_t cp437_to_iso8859_1_table[] = { 18static uint8_t cp437_to_iso8859_1_table[] = {
9 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 19 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
10 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 20 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
@@ -23,14 +33,24 @@ static uint8_t cp437_to_iso8859_1_table[] = {
23 0x2e, 0xdf, 0x2e, 0x2e, 0x2e, 0x2e, 0xb5, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 33 0x2e, 0xdf, 0x2e, 0x2e, 0x2e, 0x2e, 0xb5, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
24 0x2e, 0xb1, 0x2e, 0x2e, 0x2e, 0x2e, 0xf7, 0x2e, 0xb0, 0x2e, 0xb7, 0x2e, 0x2e, 0xb2, 0x2e, 0xa0 34 0x2e, 0xb1, 0x2e, 0x2e, 0x2e, 0x2e, 0xf7, 0x2e, 0xb0, 0x2e, 0xb7, 0x2e, 0x2e, 0xb2, 0x2e, 0xa0
25}; 35};
26static void dump_field( uint8_t **end) { 36
27 uint8_t *e = *end; 37static void dump_string( uint8_t *start, size_t len, int file ) {
28 uint8_t len = *--e; 38 uint8_t *dest = g_state.outbuf[file] + g_state.outfill[file];
29 *end = e - len; 39 g_state.outfill[file] += len;
30 printf( "_%02d_ ", len );
31 while( len-- ) 40 while( len-- )
32 putchar( cp437_to_iso8859_1_table[ e[-len-1] ] ); 41 *(dest++) = cp437_to_iso8859_1_table[ *(start++) ];
33 putchar( 9 ); 42}
43
44static void dump_field( uint8_t **end, int file ) {
45 uint8_t len = (*end)[-1];
46 *end -= len + 1;
47 dump_string( *end, len, file);
48}
49
50static void finish_record( ) {
51 int i;
52 for( i=0; i<g_outfiles; ++i )
53 *( g_state.outbuf[i] + g_state.outfill[i]++ ) = '\n';
34} 54}
35 55
36static uint16_t load_word( uint8_t *table_start, uint16_t base, int offset ) { 56static uint16_t load_word( uint8_t *table_start, uint16_t base, int offset ) {
@@ -48,79 +68,45 @@ static uint16_t load_word( uint8_t *table_start, uint16_t base, int offset ) {
48 68
49static void dump_primary( uint8_t *end, uint32_t flags ) { 69static void dump_primary( uint8_t *end, uint32_t flags ) {
50 /* First dump type of record (should be 1) */ 70 /* First dump type of record (should be 1) */
51 putchar( *end ); 71 dump_string( end, 1, 0 );
52 putchar( 9 );
53 72
54 /* Dump first 5 chars of zip, they're always there */ 73 /* Dump first 5 chars of zip, they're always there */
55 printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] ); 74 dump_string( end -= 5, 5, 1 );
56 end -= 5;
57
58 if( g_first_field_length == 5 ) {
59 /* Dump first 5 chars of prefix, they're always there */
60 printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] );
61 end -= 5;
62
63 /* There is another version of the zip code present, if this bit is set */
64 if( flags & 0x0080 ) {
65 printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] );
66 end -= 5;
67 }
68 75
69 /* There is an unclear X present, if this bit is set */ 76 /* Dump first 5 chars of prefix, they're always here in s and w */
70 if( flags & 0x0040 ) { 77 if( !g_northern_version )
71 printf( "%c\t", end[-1] ); 78 dump_string( end -= 5, 5, 2 );
72 end--;
73 }
74 79
75 /* There is another version of the prefix present, if this bit is set */ 80 /* There is another version of the zip code present, if this bit is set */
76 if( flags & 0x0020 ) { 81 if( flags & 0x0080 ) dump_string( end -= 5, 5, 3 );
77 printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] );
78 end -= 5;
79 }
80 82
81 if( flags & 0x0010 ) dump_field( &end ); 83 /* There is an unclear X present, if this bit is set */
82 if( flags & 0x0008 ) dump_field( &end ); 84 if( flags & 0x0040 ) dump_string( end -= 1, 1, 4 );
83 if( flags & 0x0004 ) dump_field( &end );
84 if( flags & 0x0002 ) dump_field( &end );
85 if( flags & 0x0001 ) dump_field( &end );
86 if( flags & 0x8000 ) dump_field( &end );
87 if( flags & 0x4000 ) dump_field( &end );
88 if( flags & 0x2000 ) dump_field( &end );
89 if( flags & 0x1000 ) dump_field( &end );
90
91 dump_field( &end );
92 dump_field( &end );
93 } else {
94
95 /* There is another version of the zip code present, if this bit is set */
96 if( flags & 0x0080 ) {
97 printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] );
98 end -= 5;
99 }
100 85
101 /* There is an unclear X present, if this bit is set */ 86 /* There is another version of the prefix present, if this bit is set */
102 if( flags & 0x0040 ) { 87 if( flags & 0x0020 ) {
103 printf( "%c\t", end[-1] ); 88 if( !g_northern_version )
104 end--; 89 dump_string( end -= 5, 5, 5 );
105 } 90 else
106 91 dump_field( &end, 5 );
107 if( flags & 0x0010 ) dump_field( &end );
108 if( flags & 0x0008 ) dump_field( &end );
109 if( flags & 0x0004 ) dump_field( &end );
110 if( flags & 0x0002 ) dump_field( &end );
111 if( flags & 0x0001 ) dump_field( &end );
112 if( flags & 0x8000 ) dump_field( &end );
113 if( flags & 0x4000 ) dump_field( &end );
114 if( flags & 0x2000 ) dump_field( &end );
115 if( flags & 0x1000 ) dump_field( &end );
116
117 /* There is another version of the prefix present, if this bit is set */
118 if( flags & 0x0020 ) dump_field( &end );
119
120 dump_field( &end );
121 dump_field( &end );
122 dump_field( &end );
123 } 92 }
93
94 if( flags & 0x0010 ) dump_field( &end, 6 );
95 if( flags & 0x0008 ) dump_field( &end, 7 );
96 if( flags & 0x0004 ) dump_field( &end, 8 );
97 if( flags & 0x0002 ) dump_field( &end, 9 );
98 if( flags & 0x0001 ) dump_field( &end, 10 );
99 if( flags & 0x8000 ) dump_field( &end, 11 );
100 if( flags & 0x4000 ) dump_field( &end, 12 );
101 if( flags & 0x2000 ) dump_field( &end, 13 );
102 if( flags & 0x1000 ) dump_field( &end, 14 );
103
104 dump_field( &end, 15 );
105
106 /* Dump first 5 chars of prefix, they're always here in no */
107 if( g_northern_version )
108 dump_field( &end, 2 );
109 dump_field( &end, 16 );
124} 110}
125 111
126static void act_on_record( uint8_t * end, uint16_t base ) { 112static void act_on_record( uint8_t * end, uint16_t base ) {
@@ -129,55 +115,82 @@ static void act_on_record( uint8_t * end, uint16_t base ) {
129 uint16_t num_dwords = rec[0]; 115 uint16_t num_dwords = rec[0];
130 uint16_t flagbytes = rec[1]; 116 uint16_t flagbytes = rec[1];
131 uint32_t flags = 0; 117 uint32_t flags = 0;
132 int bold = 0, i; 118 int i;
133 119
134 for (i=0; i<flagbytes; ++i) flags = (flags<<8) | *--end; 120 for (i=0; i<flagbytes; ++i) flags = (flags<<8) | *--end;
135 121
136 /* 122// printf( "-------- %03d: %06X\n", num_dwords, flags );
137 putchar( 27 ) ; putchar( '[' ); putchar( '3' );
138 putchar( ( flags & test_me ) ? '1' : '2' );
139 putchar( 'm' ); */
140 (void)bold;
141
142 printf( "-------- %03d: %06X\n", num_dwords, flags );
143 for (i = 0; i < num_dwords; i++ ) { 123 for (i = 0; i < num_dwords; i++ ) {
144 uint16_t subflag = load_word( e, base, 2*i ); 124 uint16_t subflag = load_word( e, base, 2*i );
145 uint16_t t_off = load_word( e, base, 2*i+1 ); 125 uint16_t t_off = load_word( e, base, 2*i+1 );
146 uint8_t *rec_start = end - t_off; 126 uint8_t *rec_start = end - t_off;
147 127
148 printf( "%04x (%04x): ", subflag, t_off );
149 if (i == 0) 128 if (i == 0)
150 dump_primary( rec_start, flags ); 129 dump_primary( rec_start, flags );
151 else { 130 else {
152 uint8_t * rec_end = end - ( ( i+1 == num_dwords ) ? 0 : load_word( e, base, 2*i+3 ) ); 131 uint8_t * rec_end = end - ( ( i+1 == num_dwords ) ? 0 : load_word( e, base, 2*i+3 ) );
132 printf( "(%04X): ", subflag );
133// dump_string( rec_start, rec_end, file );
153 while ( rec_start < rec_end ) 134 while ( rec_start < rec_end )
154 putchar( cp437_to_iso8859_1_table[ *(rec_start++) ] ); 135 putchar( cp437_to_iso8859_1_table[ *(rec_start++) ] );
136 putchar(10);
155 } 137 }
156 putchar(10);
157 } 138 }
139 finish_record();
158} 140}
159 141
160int main( int args, char **argv ) { 142int main( int args, char **argv ) {
143 char filename[1024];
161 MAP data, index; 144 MAP data, index;
162 uint32_t * indoff; 145 uint32_t * indoff;
146 size_t limit;
147 int i;
163 148
164 if( args != 3 ) { 149 if( args != 3 ) {
165 fprintf( stderr, "Syntax: %s <dumpfile> <indexfile>\n", argv[0] ); 150 fprintf( stderr, "Syntax: %s <dumpfile> <indexfile>\n", argv[0] );
166 exit(1); 151 exit(1);
167 } 152 }
153
154 for( i=0; i<g_outfiles; ++i )
155 {
156 sprintf( filename, "%02d_unknown", i+1 );
157 g_state.outfiles[i] = open( filename, O_WRONLY | O_APPEND | O_CREAT, 0644 );
158 g_state.outbuf[i] = malloc(8192*4096);
159 g_state.outfill[i] = 0;
160 }
161
168 data = map_file( argv[1], 1 ); 162 data = map_file( argv[1], 1 );
169 index = map_file( argv[2], 1 ); 163 index = map_file( argv[2], 1 );
170 164
171 if( !data || !index ) 165 if( !data || !index )
172 exit( 1 ); 166 exit( 1 );
173 167
174 g_first_field_length = data->addr[0x21e]; 168 g_northern_version = data->addr[0x21e] != 5;
169
170 /* Each entry in outfile[0] is flag + \n, i.e. 3 bytes
171 We want to flush at ever percent progress */
172 limit = 3 * ( ( index->size / 4 ) / 100 );
175 173
176 for( indoff = (uint32_t*)(index->addr + 8); 174 for( indoff = (uint32_t*)(index->addr + 8);
177 indoff < (uint32_t*)(index->addr + index->size) && *indoff; 175 indoff < (uint32_t*)(index->addr + index->size) && *indoff;
178 indoff++ ) { 176 indoff++ ) {
179// printf( "\nActing on record at off: %08X\n", *indoff ); 177// printf( "\nActing on record at off: %08X\n", *indoff );
180// if( indoff[0] >> 14 < indoff[1] >> 14 )
181 act_on_record( data->addr + *indoff, *indoff & 0x1fff ); 178 act_on_record( data->addr + *indoff, *indoff & 0x1fff );
179
180 /* Write out results */
181 if( g_state.outfill[0] > limit )
182 for( i=0; i<g_outfiles; ++i ) {
183 if( g_state.outfill[i] > 1024*1024*6 ) printf( "Large: %zd\n", g_state.outfill[i] );
184 write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] );
185 g_state.outfill[i] = 0;
186 }
182 } 187 }
188
189 for( i=0; i<g_outfiles; ++i ) {
190 write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] );
191 close( g_state.outfiles[i] );
192 }
193 unmap_file( &data );
194 unmap_file( &index );
195
183} 196}