From 08a9f406a0e18e0902bdf4f50b4f5ecad9fe2588 Mon Sep 17 00:00:00 2001 From: Dirk Engling <erdgeist@erdgeist.org> Date: Sun, 10 May 2015 21:45:16 +0200 Subject: Split entries into different column files. Done for non-continuation-entries --- src/export/split_version_2.c | 185 +++++++++++++++++++++++-------------------- 1 file changed, 99 insertions(+), 86 deletions(-) diff --git a/src/export/split_version_2.c b/src/export/split_version_2.c index 2b7a79f..7a6f04e 100644 --- a/src/export/split_version_2.c +++ b/src/export/split_version_2.c @@ -1,10 +1,20 @@ #include <stdlib.h> #include <stdint.h> #include <ctype.h> +#include <fcntl.h> +#include <unistd.h> #include "mystdlib.h" -static int g_first_field_length; +enum { g_outfiles = 17 }; + +static struct { + int outfiles[g_outfiles]; + uint8_t *outbuf [g_outfiles]; + size_t outfill [g_outfiles]; +} g_state; + +static int g_northern_version;; static uint8_t cp437_to_iso8859_1_table[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, @@ -23,14 +33,24 @@ static uint8_t cp437_to_iso8859_1_table[] = { 0x2e, 0xdf, 0x2e, 0x2e, 0x2e, 0x2e, 0xb5, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0xb1, 0x2e, 0x2e, 0x2e, 0x2e, 0xf7, 0x2e, 0xb0, 0x2e, 0xb7, 0x2e, 0x2e, 0xb2, 0x2e, 0xa0 }; -static void dump_field( uint8_t **end) { - uint8_t *e = *end; - uint8_t len = *--e; - *end = e - len; - printf( "_%02d_ ", len ); + +static void dump_string( uint8_t *start, size_t len, int file ) { + uint8_t *dest = g_state.outbuf[file] + g_state.outfill[file]; + g_state.outfill[file] += len; while( len-- ) - putchar( cp437_to_iso8859_1_table[ e[-len-1] ] ); - putchar( 9 ); + *(dest++) = cp437_to_iso8859_1_table[ *(start++) ]; +} + +static void dump_field( uint8_t **end, int file ) { + uint8_t len = (*end)[-1]; + *end -= len + 1; + dump_string( *end, len, file); +} + +static void finish_record( ) { + int i; + for( i=0; i<g_outfiles; ++i ) + *( g_state.outbuf[i] + g_state.outfill[i]++ ) = '\n'; } static uint16_t load_word( uint8_t *table_start, uint16_t base, int offset ) { @@ -48,79 +68,45 @@ static uint16_t load_word( uint8_t *table_start, uint16_t base, int offset ) { static void dump_primary( uint8_t *end, uint32_t flags ) { /* First dump type of record (should be 1) */ - putchar( *end ); - putchar( 9 ); + dump_string( end, 1, 0 ); /* Dump first 5 chars of zip, they're always there */ - printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] ); - end -= 5; - - if( g_first_field_length == 5 ) { - /* Dump first 5 chars of prefix, they're always there */ - printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] ); - end -= 5; - - /* There is another version of the zip code present, if this bit is set */ - if( flags & 0x0080 ) { - printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] ); - end -= 5; - } + dump_string( end -= 5, 5, 1 ); - /* There is an unclear X present, if this bit is set */ - if( flags & 0x0040 ) { - printf( "%c\t", end[-1] ); - end--; - } + /* Dump first 5 chars of prefix, they're always here in s and w */ + if( !g_northern_version ) + dump_string( end -= 5, 5, 2 ); - /* There is another version of the prefix present, if this bit is set */ - if( flags & 0x0020 ) { - printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] ); - end -= 5; - } + /* There is another version of the zip code present, if this bit is set */ + if( flags & 0x0080 ) dump_string( end -= 5, 5, 3 ); - if( flags & 0x0010 ) dump_field( &end ); - if( flags & 0x0008 ) dump_field( &end ); - if( flags & 0x0004 ) dump_field( &end ); - if( flags & 0x0002 ) dump_field( &end ); - if( flags & 0x0001 ) dump_field( &end ); - if( flags & 0x8000 ) dump_field( &end ); - if( flags & 0x4000 ) dump_field( &end ); - if( flags & 0x2000 ) dump_field( &end ); - if( flags & 0x1000 ) dump_field( &end ); - - dump_field( &end ); - dump_field( &end ); - } else { - - /* There is another version of the zip code present, if this bit is set */ - if( flags & 0x0080 ) { - printf( "%c%c%c%c%c\t", end[-5], end[-4], end[-3], end[-2], end[-1] ); - end -= 5; - } + /* There is an unclear X present, if this bit is set */ + if( flags & 0x0040 ) dump_string( end -= 1, 1, 4 ); - /* There is an unclear X present, if this bit is set */ - if( flags & 0x0040 ) { - printf( "%c\t", end[-1] ); - end--; - } - - if( flags & 0x0010 ) dump_field( &end ); - if( flags & 0x0008 ) dump_field( &end ); - if( flags & 0x0004 ) dump_field( &end ); - if( flags & 0x0002 ) dump_field( &end ); - if( flags & 0x0001 ) dump_field( &end ); - if( flags & 0x8000 ) dump_field( &end ); - if( flags & 0x4000 ) dump_field( &end ); - if( flags & 0x2000 ) dump_field( &end ); - if( flags & 0x1000 ) dump_field( &end ); - - /* There is another version of the prefix present, if this bit is set */ - if( flags & 0x0020 ) dump_field( &end ); - - dump_field( &end ); - dump_field( &end ); - dump_field( &end ); + /* There is another version of the prefix present, if this bit is set */ + if( flags & 0x0020 ) { + if( !g_northern_version ) + dump_string( end -= 5, 5, 5 ); + else + dump_field( &end, 5 ); } + + if( flags & 0x0010 ) dump_field( &end, 6 ); + if( flags & 0x0008 ) dump_field( &end, 7 ); + if( flags & 0x0004 ) dump_field( &end, 8 ); + if( flags & 0x0002 ) dump_field( &end, 9 ); + if( flags & 0x0001 ) dump_field( &end, 10 ); + if( flags & 0x8000 ) dump_field( &end, 11 ); + if( flags & 0x4000 ) dump_field( &end, 12 ); + if( flags & 0x2000 ) dump_field( &end, 13 ); + if( flags & 0x1000 ) dump_field( &end, 14 ); + + dump_field( &end, 15 ); + + /* Dump first 5 chars of prefix, they're always here in no */ + if( g_northern_version ) + dump_field( &end, 2 ); + dump_field( &end, 16 ); } static void act_on_record( uint8_t * end, uint16_t base ) { @@ -129,55 +115,82 @@ static void act_on_record( uint8_t * end, uint16_t base ) { uint16_t num_dwords = rec[0]; uint16_t flagbytes = rec[1]; uint32_t flags = 0; - int bold = 0, i; + int i; for (i=0; i<flagbytes; ++i) flags = (flags<<8) | *--end; - /* - putchar( 27 ) ; putchar( '[' ); putchar( '3' ); - putchar( ( flags & test_me ) ? '1' : '2' ); - putchar( 'm' ); */ - (void)bold; - - printf( "-------- %03d: %06X\n", num_dwords, flags ); +// printf( "-------- %03d: %06X\n", num_dwords, flags ); for (i = 0; i < num_dwords; i++ ) { uint16_t subflag = load_word( e, base, 2*i ); uint16_t t_off = load_word( e, base, 2*i+1 ); uint8_t *rec_start = end - t_off; - printf( "%04x (%04x): ", subflag, t_off ); if (i == 0) dump_primary( rec_start, flags ); else { uint8_t * rec_end = end - ( ( i+1 == num_dwords ) ? 0 : load_word( e, base, 2*i+3 ) ); + printf( "(%04X): ", subflag ); +// dump_string( rec_start, rec_end, file ); while ( rec_start < rec_end ) putchar( cp437_to_iso8859_1_table[ *(rec_start++) ] ); + putchar(10); } - putchar(10); } + finish_record(); } int main( int args, char **argv ) { + char filename[1024]; MAP data, index; uint32_t * indoff; + size_t limit; + int i; if( args != 3 ) { fprintf( stderr, "Syntax: %s <dumpfile> <indexfile>\n", argv[0] ); exit(1); } + + for( i=0; i<g_outfiles; ++i ) + { + sprintf( filename, "%02d_unknown", i+1 ); + g_state.outfiles[i] = open( filename, O_WRONLY | O_APPEND | O_CREAT, 0644 ); + g_state.outbuf[i] = malloc(8192*4096); + g_state.outfill[i] = 0; + } + data = map_file( argv[1], 1 ); index = map_file( argv[2], 1 ); if( !data || !index ) exit( 1 ); - g_first_field_length = data->addr[0x21e]; + g_northern_version = data->addr[0x21e] != 5; + + /* Each entry in outfile[0] is flag + \n, i.e. 3 bytes + We want to flush at ever percent progress */ + limit = 3 * ( ( index->size / 4 ) / 100 ); for( indoff = (uint32_t*)(index->addr + 8); indoff < (uint32_t*)(index->addr + index->size) && *indoff; indoff++ ) { // printf( "\nActing on record at off: %08X\n", *indoff ); -// if( indoff[0] >> 14 < indoff[1] >> 14 ) act_on_record( data->addr + *indoff, *indoff & 0x1fff ); + + /* Write out results */ + if( g_state.outfill[0] > limit ) + for( i=0; i<g_outfiles; ++i ) { + if( g_state.outfill[i] > 1024*1024*6 ) printf( "Large: %zd\n", g_state.outfill[i] ); + write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] ); + g_state.outfill[i] = 0; + } } + + for( i=0; i<g_outfiles; ++i ) { + write( g_state.outfiles[i], g_state.outbuf[i], g_state.outfill[i] ); + close( g_state.outfiles[i] ); + } + unmap_file( &data ); + unmap_file( &index ); + } -- cgit v1.2.3