diff options
-rwxr-xr-x | makecolumns.sh | 32 | ||||
-rw-r--r-- | postprocess.sh | 5 | ||||
-rw-r--r-- | shopping.txt | 2 | ||||
-rw-r--r-- | src/export/extract_version_4.c | 40 | ||||
-rw-r--r-- | src/postprocess/merge_entries.c | 2 | ||||
-rw-r--r-- | src/postprocess/postgres.sql | 2 | ||||
-rw-r--r-- | src/postprocess/sort_plz.c | 3 |
7 files changed, 68 insertions, 18 deletions
diff --git a/makecolumns.sh b/makecolumns.sh index a505d31..cef88e0 100755 --- a/makecolumns.sh +++ b/makecolumns.sh | |||
@@ -373,6 +373,18 @@ handle_format_version_3() { | |||
373 | rm ??_unknown | 373 | rm ??_unknown |
374 | } | 374 | } |
375 | 375 | ||
376 | fix_broken_v4_file() { | ||
377 | name=$1 | ||
378 | offset=$2 | ||
379 | nullbytes=$3 | ||
380 | gap=$4 | ||
381 | |||
382 | head -c $(( offset - 1 )) file_${name} > file_${name}_ | ||
383 | [ "${nullbytes}" -gt 0 ] && printf '\x00' >> file_${name}_ | ||
384 | tail -c +$(( offset + gap )) file_${name} >> file_${name}_ | ||
385 | mv file_${name}_ file_${name} | ||
386 | } | ||
387 | |||
376 | handle_format_version_4() { | 388 | handle_format_version_4() { |
377 | if [ -f "$1/branchcodes.tl" ]; then | 389 | if [ -f "$1/branchcodes.tl" ]; then |
378 | is_yp=true | 390 | is_yp=true |
@@ -392,6 +404,26 @@ handle_format_version_4() { | |||
392 | printf "Extracting phonebook.db ... " | 404 | printf "Extracting phonebook.db ... " |
393 | extract_version_4 "$1/phonebook.db" | 405 | extract_version_4 "$1/phonebook.db" |
394 | 406 | ||
407 | case $1 in | ||
408 | *2023_Q1*) | ||
409 | # Fixup broken file in 2023_Q1 | ||
410 | fix_broken_v4_file 17F0A 0x01BB 1 93 | ||
411 | ;; | ||
412 | *2023_Q3*) | ||
413 | # Fixup broken file in 2023_Q3 | ||
414 | fix_broken_v4_file 15DB2 0x3D05 0 373 | ||
415 | fix_broken_v4_file 15DBD 0x8CA8 0 391 | ||
416 | fix_broken_v4_file 15DD3 0x72B7 0 120 | ||
417 | fix_broken_v4_file 15E2B 0x0549 0 2051 | ||
418 | fix_broken_v4_file 15EE6 0x0569 1 144 | ||
419 | fix_broken_v4_file 15F28 0x2EF8 0 103 | ||
420 | fix_broken_v4_file 15F3E 0x3C1C 0 164 | ||
421 | fix_broken_v4_file 16348 0x0942 0 221 | ||
422 | fix_broken_v4_file 16419 0x0CF8 0 140 | ||
423 | fix_broken_v4_file 16471 0x0681 0 106 | ||
424 | ;; | ||
425 | esac | ||
426 | |||
395 | rows=`find . -name file_\* | wc -l` | 427 | rows=`find . -name file_\* | wc -l` |
396 | printf "done.\n" | 428 | printf "done.\n" |
397 | 429 | ||
diff --git a/postprocess.sh b/postprocess.sh index bc70492..401a70f 100644 --- a/postprocess.sh +++ b/postprocess.sh | |||
@@ -26,4 +26,7 @@ fi | |||
26 | 26 | ||
27 | cd work || exit 1 | 27 | cd work || exit 1 |
28 | 28 | ||
29 | for a in ????_Q?; do ./sort_plz $a; done | 29 | for a in ????_Q?; do |
30 | printf "Sorting files from issue %s\n" $a | ||
31 | ./sort_plz $a | ||
32 | done | ||
diff --git a/shopping.txt b/shopping.txt index c98df05..72ba624 100644 --- a/shopping.txt +++ b/shopping.txt | |||
@@ -3,8 +3,10 @@ https://www.buecher-nach-isbn.info/3-934073/ | |||
3 | https://www.buecher-nach-isbn.info/3-938913/ | 3 | https://www.buecher-nach-isbn.info/3-938913/ |
4 | 4 | ||
5 | Erscheinungsjahr ISBN Autor(en) Titel | 5 | Erscheinungsjahr ISBN Autor(en) Titel |
6 | 1997 978-3-931424-46-4 Gelbe Seiten für Deutschland Herbst 1997 | ||
6 | 1998 978-3-931424-50-3 Telefonbuch für Deutschland Frühjahr 98. CD- ROM für Windows 3.1/95/NT 3.5.1, Mac ab 7.5 | 7 | 1998 978-3-931424-50-3 Telefonbuch für Deutschland Frühjahr 98. CD- ROM für Windows 3.1/95/NT 3.5.1, Mac ab 7.5 |
7 | 1999 978-3-931424-60-2 Telefonbuch für Deutschland Herbst 98. CD- ROM für Windows 3.1/95/98, Mac ab 7.5 | 8 | 1999 978-3-931424-60-2 Telefonbuch für Deutschland Herbst 98. CD- ROM für Windows 3.1/95/98, Mac ab 7.5 |
9 | 2000 978-3-931424-75-6 Gelbe Seiten für Deutschland Herbst 1999. CD- ROM für Windows ab 3.1/ MacOS ab 7.5. 3,8 Mio Kommunikationsnummern mit Branche und Branchenumfeld | ||
8 | 2001 978-3-931424-92-3 Gelbe Seiten Map & Route für Deutschland Herbst 2000. CD- ROM für Windows ab 95 | 10 | 2001 978-3-931424-92-3 Gelbe Seiten Map & Route für Deutschland Herbst 2000. CD- ROM für Windows ab 95 |
9 | '' 978-3-931424-98-5 Das Telefonbuch für Deutschland Frühjahr 2001. 2 CD- ROMs für Windows 3.1/95/98/ NT 4.0/2000/ ME/ MacOS ab 7.5/ Linux | 11 | '' 978-3-931424-98-5 Das Telefonbuch für Deutschland Frühjahr 2001. 2 CD- ROMs für Windows 3.1/95/98/ NT 4.0/2000/ ME/ MacOS ab 7.5/ Linux |
10 | 2001 978-3-934073-04-3 Telefonbuch für Deutschland - Herbst 2001 | 12 | 2001 978-3-934073-04-3 Telefonbuch für Deutschland - Herbst 2001 |
diff --git a/src/export/extract_version_4.c b/src/export/extract_version_4.c index 1dbaf40..e014e23 100644 --- a/src/export/extract_version_4.c +++ b/src/export/extract_version_4.c | |||
@@ -7,44 +7,56 @@ | |||
7 | #include "mystdlib.h" | 7 | #include "mystdlib.h" |
8 | 8 | ||
9 | #define XORLEN (29) | 9 | #define XORLEN (29) |
10 | #define HUGEINBLOCK (128*1024) | ||
10 | #define HUGEBLOCK (4*1024*1024) | 11 | #define HUGEBLOCK (4*1024*1024) |
11 | 12 | ||
12 | int main(int argc, char **argv) { | 13 | int main(int argc, char **argv) { |
13 | unsigned const char xorkey [XORLEN] = "Just for Fun. Linus Torvalds."; | 14 | unsigned const char xorkey [XORLEN] = "Just for Fun. Linus Torvalds."; |
14 | unsigned char input [XORLEN]; | 15 | unsigned char input [HUGEINBLOCK]; |
15 | unsigned char output [HUGEBLOCK]; | 16 | unsigned char output [HUGEBLOCK]; |
16 | char respath[32]; /* file_XXXXX\0 */ | 17 | char respath[32]; /* file_XXXXX\0 */ |
17 | int zres = 0, filenum = 0, resfile; | 18 | int zres = 0, filenum = 0, resfile; |
18 | size_t i, offs = 0, reported = 0; | 19 | size_t offs = 0, reported = 0; |
19 | ssize_t temp = 0; | 20 | ssize_t temp = 0; |
20 | MAP in; | 21 | MAP in; |
21 | 22 | ||
22 | if( argc != 2 ) exit(111); | 23 | if( argc != 2 ) exit(111); |
23 | in = map_file( argv[1], 1 ); | 24 | in = map_file( argv[1], 1 ); |
24 | 25 | ||
25 | z_stream z; memset( &z, 0, sizeof(z)); | 26 | z_stream z; |
26 | 27 | ||
27 | while( offs < in->size ) { | 28 | while( offs < in->size ) { |
28 | size_t inlen = offs + XORLEN < in->size ? XORLEN : in->size - offs; | 29 | /* std::min(remain, HUGEINBLOCK) */ |
29 | for( i=0; i<inlen; ++i ) input[i] = in->addr[offs+i] ^ xorkey[i]; | 30 | size_t inlen = in->size - offs; |
31 | if (inlen > sizeof(input)) | ||
32 | inlen = sizeof(input); | ||
33 | |||
34 | /* Copy in block large enough */ | ||
35 | memcpy(input, in->addr + offs, inlen); | ||
36 | |||
37 | /* De-"crypt" */ | ||
38 | for (size_t i = 0; i < sizeof(xorkey); ++i ) | ||
39 | input[i] ^= xorkey[i]; | ||
40 | |||
41 | /* Prepare decompression struct */ | ||
42 | memset( &z, 0, sizeof(z)); | ||
30 | z.next_in = input; z.avail_in = inlen; | 43 | z.next_in = input; z.avail_in = inlen; |
31 | z.next_out = output; z.avail_out = HUGEBLOCK; | 44 | z.next_out = output; z.avail_out = HUGEBLOCK; |
32 | inflateInit( &z ); zres = inflate( &z, Z_NO_FLUSH ); | 45 | inflateInit( &z ); |
33 | if( (zres != Z_OK) && (zres != Z_STREAM_END) ) | 46 | zres = Z_OK; |
34 | goto error_continue; | ||
35 | 47 | ||
36 | z.next_in = in->addr + offs + inlen; | 48 | while( zres == Z_OK ) |
37 | z.avail_in = (unsigned int)(in->size - offs - inlen); | 49 | zres = inflate( &z, Z_NO_FLUSH ); |
38 | while( zres == Z_OK ) zres = inflate( &z, Z_NO_FLUSH ); | ||
39 | 50 | ||
40 | if( zres != Z_STREAM_END ) { | 51 | if( zres != Z_STREAM_END ) { |
41 | error_continue: | 52 | inflateEnd(&z); |
42 | inflateEnd(&z); memset( &z, 0, sizeof(z)); | ||
43 | offs++; | 53 | offs++; |
44 | continue; | 54 | continue; |
45 | } | 55 | } |
46 | 56 | ||
47 | sprintf( respath, "file_%05X", filenum++ ); | 57 | // fprintf( stderr, "%08X\n", (unsigned int)(offs)); |
58 | //old_offs = offs; | ||
59 | snprintf( respath, sizeof(respath), "file_%05X", filenum++ ); | ||
48 | 60 | ||
49 | resfile = open( respath, O_RDWR | O_CREAT, 0644 ); | 61 | resfile = open( respath, O_RDWR | O_CREAT, 0644 ); |
50 | if( resfile < 0 ) { | 62 | if( resfile < 0 ) { |
diff --git a/src/postprocess/merge_entries.c b/src/postprocess/merge_entries.c index 1dd7d50..aed0aec 100644 --- a/src/postprocess/merge_entries.c +++ b/src/postprocess/merge_entries.c | |||
@@ -26,7 +26,7 @@ const char *g_year_map[] = { | |||
26 | "1992_Q2", "1995_Q0", "1996_Q0", "1996_Q1", "1997_Q1", "1997_Q3", "1998_Q1", "1998_Q3", "1999_Q1", "1999_Q3", "2000_Q1", "2000_Q3", "2001_Q1", "2001_Q2", "2001_Q3", "2001_Q4", "2002_Q1", | 26 | "1992_Q2", "1995_Q0", "1996_Q0", "1996_Q1", "1997_Q1", "1997_Q3", "1998_Q1", "1998_Q3", "1999_Q1", "1999_Q3", "2000_Q1", "2000_Q3", "2001_Q1", "2001_Q2", "2001_Q3", "2001_Q4", "2002_Q1", |
27 | "2002_Q3", "2003_Q1", "2003_Q3", "2004_Q1", "2004_Q3", "2005_Q1", "2005_Q3", "2006_Q1", "2006_Q3", "2007_Q1", "2007_Q3", "2008_Q1", "2008_Q3", "2009_Q1", "2009_Q3", "2010_Q1", "2010_Q3", | 27 | "2002_Q3", "2003_Q1", "2003_Q3", "2004_Q1", "2004_Q3", "2005_Q1", "2005_Q3", "2006_Q1", "2006_Q3", "2007_Q1", "2007_Q3", "2008_Q1", "2008_Q3", "2009_Q1", "2009_Q3", "2010_Q1", "2010_Q3", |
28 | "2011_Q1", "2011_Q3", "2012_Q1", "2012_Q3", "2013_Q1", "2013_Q3", "2014_Q1", "2014_Q3", "2015_Q1", "2015_Q3", "2016_Q1", "2016_Q3", "2017_Q1", "2017_Q3", "2018_Q1", "2018_Q3", "2019_Q1", | 28 | "2011_Q1", "2011_Q3", "2012_Q1", "2012_Q3", "2013_Q1", "2013_Q3", "2014_Q1", "2014_Q3", "2015_Q1", "2015_Q3", "2016_Q1", "2016_Q3", "2017_Q1", "2017_Q3", "2018_Q1", "2018_Q3", "2019_Q1", |
29 | 0 | 29 | "2019_Q3", "2020_Q1", "2020_Q3", "2021_Q1", "2021_Q3", "2022_Q1", "2022_Q3", "2023_Q1", "2023_Q3", "2024_Q1", "2024_Q3", "2025_Q1", NULL |
30 | }; | 30 | }; |
31 | 31 | ||
32 | static int year_to_offset(const char *year) { | 32 | static int year_to_offset(const char *year) { |
diff --git a/src/postprocess/postgres.sql b/src/postprocess/postgres.sql index 2d89d1c..3c891d4 100644 --- a/src/postprocess/postgres.sql +++ b/src/postprocess/postgres.sql | |||
@@ -74,7 +74,7 @@ CREATE INDEX idx_strasse ON table_strasse USING btree (value, telefonbuch | |||
74 | CREATE INDEX idx_hausnummer ON table_hausnummer USING btree (value, telefonbuch_id, offs); | 74 | CREATE INDEX idx_hausnummer ON table_hausnummer USING btree (value, telefonbuch_id, offs); |
75 | CREATE INDEX idx_zip ON table_zip USING btree (value, telefonbuch_id, offs); | 75 | CREATE INDEX idx_zip ON table_zip USING btree (value, telefonbuch_id, offs); |
76 | CREATE INDEX idx_ort ON table_ort USING btree (value, telefonbuch_id, offs); | 76 | CREATE INDEX idx_ort ON table_ort USING btree (value, telefonbuch_id, offs); |
77 | CREATE INDEX idx_verweise ON table_verweise USING btree (value, telefonbuch_id, offs); | 77 | CREATE INDEX idx_verweise ON table_verweise USING btree (value, telefonbuch_id, offs) WHERE LENGTH(value) < 256; |
78 | CREATE INDEX idx_vorwahl ON table_vorwahl USING btree (value, telefonbuch_id, offs); | 78 | CREATE INDEX idx_vorwahl ON table_vorwahl USING btree (value, telefonbuch_id, offs); |
79 | CREATE INDEX idx_rufnummer ON table_rufnummer USING btree (value, telefonbuch_id, offs); | 79 | CREATE INDEX idx_rufnummer ON table_rufnummer USING btree (value, telefonbuch_id, offs); |
80 | CREATE INDEX idx_web ON table_web USING btree (value, telefonbuch_id, offs); | 80 | CREATE INDEX idx_web ON table_web USING btree (value, telefonbuch_id, offs); |
diff --git a/src/postprocess/sort_plz.c b/src/postprocess/sort_plz.c index dc0b222..9c67ab7 100644 --- a/src/postprocess/sort_plz.c +++ b/src/postprocess/sort_plz.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <stdlib.h> | 13 | #include <stdlib.h> |
14 | #include <stdio.h> | 14 | #include <stdio.h> |
15 | #include <string.h> | 15 | #include <string.h> |
16 | #include <stdint.h> | ||
16 | #include <errno.h> | 17 | #include <errno.h> |
17 | #include <unistd.h> | 18 | #include <unistd.h> |
18 | #include <err.h> | 19 | #include <err.h> |
@@ -84,7 +85,7 @@ int main(int argc, char **args) { | |||
84 | char *input = malloc(1024); | 85 | char *input = malloc(1024); |
85 | size_t input_size = 1024; | 86 | size_t input_size = 1024; |
86 | 87 | ||
87 | if (argc != 1) exit(1); | 88 | if (argc != 2) exit(1); |
88 | 89 | ||
89 | /* First open all input files */ | 90 | /* First open all input files */ |
90 | for (i=F_01; i<F_COUNT; ++i) { | 91 | for (i=F_01; i<F_COUNT; ++i) { |