diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2019-06-29 18:51:00 +0200 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2019-06-29 18:51:00 +0200 |
commit | fb3616e06ca20ffe78dfb00b962a2599a46b2a5f (patch) | |
tree | fe7ad54e8cb17174b3296b63f41ad1f5b1038f8e /src/postprocess | |
parent | b4bf8417af0d8ebff2c50570c70fdecaf6a53ed9 (diff) |
Clean up 1992 post processing
Diffstat (limited to 'src/postprocess')
-rw-r--r-- | src/postprocess/map_plz.c | 4 | ||||
-rw-r--r-- | src/postprocess/merge_entries.c | 5 | ||||
-rw-r--r-- | src/postprocess/postprocess-1992.sh | 15 |
3 files changed, 11 insertions, 13 deletions
diff --git a/src/postprocess/map_plz.c b/src/postprocess/map_plz.c index ab0db71..9dec6bb 100644 --- a/src/postprocess/map_plz.c +++ b/src/postprocess/map_plz.c | |||
@@ -18,7 +18,7 @@ int main(int argc, char **args) { | |||
18 | FILE *bfile, *streetfile_out; | 18 | FILE *bfile, *streetfile_out; |
19 | char *ptr, *input = malloc(65335); | 19 | char *ptr, *input = malloc(65335); |
20 | char *ort = malloc(65335), vorwahl_block[16]; | 20 | char *ort = malloc(65335), vorwahl_block[16]; |
21 | int i, brutes_count = 0, report = 0; | 21 | unsigned int i, brutes_count = 0, report = 0; |
22 | brute_t *brutes = malloc(200000*sizeof(brute_t)); | 22 | brute_t *brutes = malloc(200000*sizeof(brute_t)); |
23 | 23 | ||
24 | /* prepare io */ | 24 | /* prepare io */ |
@@ -63,7 +63,7 @@ int main(int argc, char **args) { | |||
63 | g_book_by_name = (entry_t*)malloc(g_book_size * sizeof(entry_t)); | 63 | g_book_by_name = (entry_t*)malloc(g_book_size * sizeof(entry_t)); |
64 | 64 | ||
65 | /* Split pointers into input files into our arrays */ | 65 | /* Split pointers into input files into our arrays */ |
66 | for (i = 0, ptr = (char*)tbuch->addr; i < g_book_size; ++i) { | 66 | for (i=0, ptr=(char*)tbuch->addr; i<g_book_size; ++i) { |
67 | g_book[i].vorwahl = ptr; ptr += strlen(ptr) + 1; | 67 | g_book[i].vorwahl = ptr; ptr += strlen(ptr) + 1; |
68 | g_book[i].ort = ptr; ptr += strlen(ptr) + 1; | 68 | g_book[i].ort = ptr; ptr += strlen(ptr) + 1; |
69 | g_book[i].strasse = ptr; ptr += strlen(ptr) + 1; | 69 | g_book[i].strasse = ptr; ptr += strlen(ptr) + 1; |
diff --git a/src/postprocess/merge_entries.c b/src/postprocess/merge_entries.c index f9ee67d..1dd7d50 100644 --- a/src/postprocess/merge_entries.c +++ b/src/postprocess/merge_entries.c | |||
@@ -148,9 +148,10 @@ static int sort_me(const void *f_a, const void *f_b) { | |||
148 | outvec_t *oa_row = oa + row * COLUMNS; | 148 | outvec_t *oa_row = oa + row * COLUMNS; |
149 | outvec_t *ob_row = ob + row * COLUMNS; | 149 | outvec_t *ob_row = ob + row * COLUMNS; |
150 | 150 | ||
151 | if ((res = STRCMP_n(oa_row[ 2].ptr, ob_row[ 2].ptr))) return res; /* PLZ */ | ||
152 | if ((res = STRCMP_n(oa_row[ 9].ptr, ob_row[ 9].ptr))) return res; /* Ort */ | ||
151 | if ((res = STRCMP_n(oa_row[10].ptr, ob_row[10].ptr))) return res; /* Vorwahl */ | 153 | if ((res = STRCMP_n(oa_row[10].ptr, ob_row[10].ptr))) return res; /* Vorwahl */ |
152 | if ((res = STRCMP_n(oa_row[11].ptr, ob_row[11].ptr))) return res; /* Rufnummer */ | 154 | if ((res = STRCMP_n(oa_row[11].ptr, ob_row[11].ptr))) return res; /* Rufnummer */ |
153 | if ((res = STRCMP_n(oa_row[ 2].ptr, ob_row[ 2].ptr))) return res; /* PLZ */ | ||
154 | if ((res = STRCMP_n(oa_row[ 6].ptr, ob_row[ 6].ptr))) return res; /* Strasse */ | 155 | if ((res = STRCMP_n(oa_row[ 6].ptr, ob_row[ 6].ptr))) return res; /* Strasse */ |
155 | if ((res = STRCMP_n(oa_row[ 7].ptr, ob_row[ 7].ptr))) return res; /* Hausnummer */ | 156 | if ((res = STRCMP_n(oa_row[ 7].ptr, ob_row[ 7].ptr))) return res; /* Hausnummer */ |
156 | if ((res = STRCMP_n(oa_row[ 3].ptr, ob_row[ 3].ptr))) return res; /* Nachname */ | 157 | if ((res = STRCMP_n(oa_row[ 3].ptr, ob_row[ 3].ptr))) return res; /* Nachname */ |
@@ -200,7 +201,7 @@ int main(int argc, char **args) { | |||
200 | unsigned long current = 0, i, flag; | 201 | unsigned long current = 0, i, flag; |
201 | uint64_t year_list = 0, revflag_list = 0, bizflag_list = 0; | 202 | uint64_t year_list = 0, revflag_list = 0, bizflag_list = 0; |
202 | 203 | ||
203 | if (argc != 1) exit(1); | 204 | if (argc != 2) exit(1); |
204 | tbuch = map_file(args[1], 1); | 205 | tbuch = map_file(args[1], 1); |
205 | 206 | ||
206 | /* Estimate upper bound for amount of lines */ | 207 | /* Estimate upper bound for amount of lines */ |
diff --git a/src/postprocess/postprocess-1992.sh b/src/postprocess/postprocess-1992.sh index 1e685d2..6720991 100644 --- a/src/postprocess/postprocess-1992.sh +++ b/src/postprocess/postprocess-1992.sh | |||
@@ -4,19 +4,16 @@ paste 1992_Q2/{01_Flags,12_Vorwahl,12_Vorwahl_block,11_Ort,07_Strasse,08_Hausnum | |||
4 | # Generate lookup file from 1995 | 4 | # Generate lookup file from 1995 |
5 | paste 1995_Q0/{12_Vorwahl,11_Ort,07_Strasse,08_Hausnummer,02_Nachname,03_Vorname,13_Rufnummer,10_Postleitzahl} | tr '\n\t' '\0' > 1995-voshnvrp.bin | 5 | paste 1995_Q0/{12_Vorwahl,11_Ort,07_Strasse,08_Hausnummer,02_Nachname,03_Vorname,13_Rufnummer,10_Postleitzahl} | tr '\n\t' '\0' > 1995-voshnvrp.bin |
6 | 6 | ||
7 | # To debug in lldb | ||
8 | process launch -i 1992_testfile.txt -- 1995-vorwahl-ort-strasse-hnr-name-vorname-rufnummer-plz.bin | ||
9 | |||
10 | # Compile plz mapper | 7 | # Compile plz mapper |
11 | cc -O3 -o map_plz map_plz.c -I ../src/export/ ../src/export/mystdlib.c | 8 | cc -O3 -o map_plz map_plz.c -I ../src/export/ ../src/export/mystdlib.c |
12 | 9 | ||
10 | # To debug in lldb | ||
11 | # cc -O0 -g -o map_plz map_plz.c -I ../src/export/ ../src/export/mystdlib.c | ||
12 | # process launch -i 1992_testfile.txt -- 1995-vorwahl-ort-strasse-hnr-name-vorname-rufnummer-plz.bin | ||
13 | |||
13 | # outputs mapped plz, generates brutemap.txt | 14 | # outputs mapped plz, generates brutemap.txt |
14 | touch brutemap_input.bin zip_simple_map.bin | 15 | touch brutemap_input.bin zip_simple_map.bin |
15 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 10_Postleitzahl | 16 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 1992_Q2/10_Postleitzahl |
16 | |||
17 | # generate street name translation table from brutemap, | ||
18 | # only taking into account similar street names | ||
19 | # cut -f 3,4 brutemap.txt | tr '[:upper:]' '[:lower:]' | paste brutemap.txt - | cut -f 1-4,6,7 | ./jaro | cut -f 1-5 > brutemap_filtered.txt | ||
20 | 17 | ||
21 | # generate street name translation table from brutemap, | 18 | # generate street name translation table from brutemap, |
22 | # only taking into account similar street names, new style | 19 | # only taking into account similar street names, new style |
@@ -29,4 +26,4 @@ cut -f 1-5 brutemap_simifiltered.txt | sort | uniq -c | sed -E $'s:^ *([[:digit: | |||
29 | sort -u zip_mapfile.txt | tr '\n' '\0' > zip_simple_map.bin | 26 | sort -u zip_mapfile.txt | tr '\n' '\0' > zip_simple_map.bin |
30 | 27 | ||
31 | # Redo the mapping with the data from brutemap and zipmap | 28 | # Redo the mapping with the data from brutemap and zipmap |
32 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 10_Postleitzahl | 29 | ./map_plz 1995-voshnvrp.bin zip_simple_map.bin brutemap_input.bin < 1992-fvvoshnvrpp.txt > 1992_Q2/10_Postleitzahl |