summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile5
-rwxr-xr-xmakecolumns.sh26
-rw-r--r--src/postprocess/join.c81
3 files changed, 109 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index 3e64576..c2d04d4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
1BINARIES=bin/extract_version_1 bin/extract_version_2 bin/extract_version_3 bin/extract_version_4 bin/split_version_2 bin/split_version_3 bin/map_coords bin/convert_coords 1BINARIES=bin/extract_version_1 bin/extract_version_2 bin/extract_version_3 bin/extract_version_4 bin/split_version_2 bin/split_version_3 bin/map_coords bin/convert_coords bin/join
2CFLAGS += -W -Wall -Wextra -O3 # -Weverything -Wno-cast-align -Wno-padded 2CFLAGS += -W -Wall -Wextra -O3 # -Weverything -Wno-cast-align -Wno-padded
3 3
4all: $(BINARIES) 4all: $(BINARIES)
@@ -27,6 +27,9 @@ bin/map_coords: src/export/map_coords.c src/export/mystdlib.c
27bin/convert_coords: src/export/convert_coords.c 27bin/convert_coords: src/export/convert_coords.c
28 $(CC) $(CFLAGS) -o $@ src/export/convert_coords.c -lm 28 $(CC) $(CFLAGS) -o $@ src/export/convert_coords.c -lm
29 29
30bin/join: src/postprocess/join.c src/export/mystdlib.c
31 $(CC) $(CFLAGS) -o $@ src/postprocess/join.c src/export/mystdlib.c -Isrc/export
32
30.PHONY: clean 33.PHONY: clean
31clean: 34clean:
32 @rm -f $(BINARIES) 35 @rm -f $(BINARIES)
diff --git a/makecolumns.sh b/makecolumns.sh
index 8131379..0854b32 100755
--- a/makecolumns.sh
+++ b/makecolumns.sh
@@ -112,6 +112,7 @@ handle_format_version_1() {
112 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze 112 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze
113 printf "done.\n" 113 printf "done.\n"
114 114
115 tidy_streetnames 07_Strasse
115} 116}
116 117
117handle_format_version_2() { 118handle_format_version_2() {
@@ -158,6 +159,7 @@ handle_format_version_2() {
158 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze 159 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze
159 printf "done.\n" 160 printf "done.\n"
160 161
162 tidy_streetnames 07_Strasse
161} 163}
162 164
163handle_format_version_3() { 165handle_format_version_3() {
@@ -243,7 +245,7 @@ handle_format_version_3() {
243 printf "done.\n" 245 printf "done.\n"
244 246
245 printf "Normalizing zusaetze ... " 247 printf "Normalizing zusaetze ... "
246 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E s/' +'/' '/g > 04_Zusaetze 248 lam 04_Namenszusatz 05_Adresszusatz | tr '\t' ' ' | sed -E -e 's/ +/ /g' -e 's/^ +//g' -e 's/ +$//g' > 04_Zusaetze
247 printf "done.\n" 249 printf "done.\n"
248 250
249 # If street names come in an extra file, extract 251 # If street names come in an extra file, extract
@@ -260,10 +262,13 @@ handle_format_version_3() {
260 # fix up known broken Strassennamen file 262 # fix up known broken Strassennamen file
261 [ `stat -f %z ${streets}` -eq 1642716 ] && printf '9. Str.\n91. Str.\n91er-Str.\n' >> 99_Strassenname 263 [ `stat -f %z ${streets}` -eq 1642716 ] && printf '9. Str.\n91. Str.\n91er-Str.\n' >> 99_Strassenname
262 264
265 tidy_streetnames 99_Strassenname
266
263 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse 267 cut -d ';' -f 1 07_Strassenindex | ${EL} -0x 99_Strassenname > 07_Strasse
264 printf "done.\n" 268 printf "done.\n"
265 else 269 else
266 mv 07_unknown 07_Strasse 270 mv 07_unknown 07_Strasse
271 tidy_streetnames 07_Strasse
267 fi 272 fi
268 273
269 karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt] 274 karto=$1/[Dd][Aa][Tt]/[Kk][Aa][Rr][Tt][Oo].[Dd][Aa][Tt]
@@ -306,7 +311,13 @@ handle_format_version_4() {
306 find . -name file_\* -delete 311 find . -name file_\* -delete
307 printf "done.\n" 312 printf "done.\n"
308 313
309 mv column_0 01_Flags 314 # the 'did not object to inverse search' flag is insane and needs to be reversed
315 if grep -q ^40 column_0; then
316 awk '{ a=substr($0,1,1); printf "%x%x\n",index("5670123cdef89ab4",a)%16 ,substr($0,2,1) }' < column_0 > 01_Flags
317 rm column_0
318 else
319 mv column_0 01_Flags
320 fi
310 mv column_1 02_Nachname 321 mv column_1 02_Nachname
311 mv column_2 03_Vorname 322 mv column_2 03_Vorname
312 mv column_3 04_05_Namenszusatz_Addresszusatz 323 mv column_3 04_05_Namenszusatz_Addresszusatz
@@ -318,6 +329,8 @@ handle_format_version_4() {
318 mv column_9 13_Rufnummer 329 mv column_9 13_Rufnummer
319 mv column_10 14_15_Email_Webadresse 330 mv column_10 14_15_Email_Webadresse
320 331
332 tidy_streetnames 99_Strassenname
333
321 printf "Looking up street names from indexes ... " 334 printf "Looking up street names from indexes ... "
322 cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse 335 cut -f 1 07_08_Strassenindex_Hausnummer | ${EL} -0 99_Strassenname > 07_Strasse
323 printf "done.\n" 336 printf "done.\n"
@@ -348,6 +361,15 @@ handle_format_version_4() {
348 printf "done.\n" 361 printf "done.\n"
349 fi 362 fi
350 rm file_* 363 rm file_*
364
365}
366
367tidy_streetnames () {
368 streets=$1
369
370 # Replace any dots at end of line by a single one
371 # finish any str abbreviation without a period with a period
372 sed -Ei '' 's/\.+/./g;s/(S|s)tr( |:)?$/\1tr./' ${streets}
351} 373}
352 374
353# After function definitions, main() can use them 375# After function definitions, main() can use them
diff --git a/src/postprocess/join.c b/src/postprocess/join.c
new file mode 100644
index 0000000..9782ec8
--- /dev/null
+++ b/src/postprocess/join.c
@@ -0,0 +1,81 @@
1#include <stdio.h>
2#include <stdlib.h>
3#include "mystdlib.h"
4
5#define HUGEBLOCK (1024*1024*256)
6#define ZIP_FIELD 3
7#define STREET_FIELD 5
8
9int rt_strcmp( uint8_t *a, uint8_t *b ) {
10 while( ( *a != '\n' ) && ( *b != '\n' ) && ( *a == *b ) ) ++a, ++b;
11 if( *a == *b ) return 0;
12 return -1;
13}
14
15size_t rt_strcpy( uint8_t *dest, uint8_t *src ) {
16 uint8_t *d = dest;
17 while( *src != '\n' )
18 *dest++ = *src++;
19 *dest++ = '\n';
20 return dest - d;
21}
22
23size_t rt_strlen( uint8_t *str ) {
24 uint8_t *s = str;
25 while( *str++ != '\n' );
26 return str - s;
27}
28
29int main( int argc, char **argv ) {
30 MAP file = map_file( argv[1], 1 );
31 uint8_t *out, *in;
32 size_t last = 3, off = 0, out_off = 0;
33 int start, end, copy;
34
35 (void)argc;
36
37 out = malloc( HUGEBLOCK );
38
39 if( !file || !out )
40 exit(1);
41
42 in = file->addr;
43 start = 10 * ( in[off] - '0' ) + in[off+1] - '0';
44 end = start - 1;
45
46 while( off < file->size ) {
47 int issue = 10 * ( in[off] - '0' ) + in[off+1] - '0';
48 off += 3;
49 copy = 1;
50
51// fprintf( stderr, "issue: %02d start %02d end %02d last %08d off %08d", issue, start, end, last, off );
52 switch ( rt_strcmp( in + last, in + off ) ) {
53 case 1:
54 last = off;
55 case 0:
56 case 2:
57 if (issue == end + 1 ) copy = 0, end++;
58 if (issue == end ) copy = 0;
59 break;
60 default:
61 break;
62 }
63// fprintf( stderr, " copy: %d\n", copy );
64
65 if( copy) {
66 out_off += sprintf( (char*)out + out_off, "%02d%02d\a", start, end );
67 out_off += rt_strcpy( out + out_off, in + last );
68 start = issue; end = issue;
69 last = off;
70 }
71
72 off += rt_strlen( in + off );
73
74 if( out_off + 8192 * 2 > HUGEBLOCK ) {
75 fwrite( out, out_off, 1, stdout );
76 out_off = 0;
77 }
78 }
79
80 return 0;
81}