summaryrefslogtreecommitdiff
path: root/src/postprocess/join.c
blob: e2c75eb8aed75841b7d2e123f6a685a06944c5eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <stdio.h>
#include <stdlib.h>
#include "mystdlib.h"

#define HUGEBLOCK (1024*1024*256)
#define ZIP_FIELD 3
#define STREET_FIELD 5

int rt_strcmp( uint8_t *a, uint8_t *b ) {
  while( ( *a != '\n' ) && ( *b != '\n' ) && ( *a == *b ) ) ++a, ++b;
  if( *a == *b ) return 0;
  return -1; 
}

size_t rt_strcpy( uint8_t *dest, uint8_t *src ) {
  uint8_t *d = dest;
  while( *src != '\n' )
    *dest++ = *src++;
  *dest++ = '\n';
  return dest - d;
} 

size_t rt_strlen( uint8_t *str ) {
  uint8_t *s = str;
  while( *str++ != '\n' );
  return str - s;
}

int main( int argc, char **argv ) {
  MAP file = map_file( argv[1], 1 );
  uint8_t *out, *in;
  size_t last = 3, off = 0, out_off = 0;
  int start, end, copy;

  (void)argc;

  out = malloc( HUGEBLOCK );

  if( !file || !out )
    exit(1);

  in = file->addr;
  start = 10 * ( in[off] - '0' ) + in[off+1] - '0'; 
  end = start - 1;

  while( off < file->size ) {
    int issue = 10 * ( in[off] - '0' ) + in[off+1] - '0'; 
    off += 3;
    copy = 1;

//    fprintf( stderr, "issue: %02d start %02d end %02d last %08d off %08d", issue, start, end, last, off );
    switch ( rt_strcmp( in + last, in + off ) ) {
    case 1:
      last = off;
    case 0:
    case 2:
      if (issue == end + 1 ) copy = 0, end++;
      if (issue == end ) copy = 0;
      break;
    default:
      break;
    }
//    fprintf( stderr, " copy: %d\n", copy ); 

    if( copy) {
      out_off += sprintf( (char*)out + out_off, "%02d%02d\a", start, end );
      out_off += rt_strcpy( out + out_off, in + last );
      start = issue; end = issue;
      last = off;
    }

    off += rt_strlen( in + off );

    if( out_off + 8192 * 2 > HUGEBLOCK ) {
      (void)fwrite( out, out_off, 1, stdout );
      out_off = 0;
    }
  }

  return 0;
}