diff options
author | erdgeist <> | 2004-04-29 19:42:16 +0000 |
---|---|---|
committer | erdgeist <> | 2004-04-29 19:42:16 +0000 |
commit | c2011c5def9154c9a48f5e7e17d48d840aad675d (patch) | |
tree | fac07559c95ccb997c41c22f95b6fb05596855bc /src | |
parent | 9417862eda890b70185a5329e8657b327b8d859f (diff) |
Start
Diffstat (limited to 'src')
-rw-r--r-- | src/dumpindex2.c | 33 | ||||
-rw-r--r-- | src/dumppointers2.c | 127 | ||||
-rw-r--r-- | src/mystdlib.c | 54 | ||||
-rw-r--r-- | src/mystdlib.h | 32 | ||||
-rw-r--r-- | src/sortindex.c | 81 |
5 files changed, 327 insertions, 0 deletions
diff --git a/src/dumpindex2.c b/src/dumpindex2.c new file mode 100644 index 0000000..f49a329 --- /dev/null +++ b/src/dumpindex2.c | |||
@@ -0,0 +1,33 @@ | |||
1 | #include "mystdlib.h" | ||
2 | #include <sys/mman.h> | ||
3 | #include <fcntl.h> | ||
4 | |||
5 | static int indexed = -1; | ||
6 | |||
7 | int main( int argc, char **argv ) { | ||
8 | MAP index = NULL; | ||
9 | int i,j; | ||
10 | char out[50]; | ||
11 | |||
12 | if( argc != 3 ) | ||
13 | { fputs( "Syntax: sortindex <indexedfile> <indexfile>", stderr); exit( 1 ); } | ||
14 | |||
15 | if( ( indexed = open( argv[1], O_RDONLY ) ) == -1 ) | ||
16 | { fprintf( stderr, "Could not open file: %s\n", argv[1] ); exit( 1 ); } | ||
17 | |||
18 | if( !(index = map_file( argv[2], 0 ) ) ) exit( 1 ); | ||
19 | |||
20 | for( i = 0; i < index->size; i+= 16 ) { | ||
21 | unsigned char *x = i + (unsigned char*)index->addr; | ||
22 | unsigned long p = *(unsigned long*)x; | ||
23 | |||
24 | pread( indexed, out, 40, (off_t)p ); | ||
25 | for( j=0;j<40;++j) if( out[j] == '\t' || out[j] == '\n' ) out[j] = 0; out[j] = 0; | ||
26 | puts( out ); | ||
27 | } | ||
28 | |||
29 | unmap_file( &index ); | ||
30 | close( indexed ); | ||
31 | |||
32 | return 0; | ||
33 | } | ||
diff --git a/src/dumppointers2.c b/src/dumppointers2.c new file mode 100644 index 0000000..80a4b61 --- /dev/null +++ b/src/dumppointers2.c | |||
@@ -0,0 +1,127 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <unistd.h> | ||
3 | #include <fcntl.h> | ||
4 | |||
5 | #define BUFSIZE 0x1000000 | ||
6 | #define BUFLOW 0x400 | ||
7 | static int infile = -1; | ||
8 | static unsigned char nblock[ BUFSIZE + 16 ]; | ||
9 | static unsigned char*inblock = nblock + 16; | ||
10 | static unsigned long inblockoffs = 0; | ||
11 | |||
12 | static unsigned long infilesize = 0; | ||
13 | static unsigned long infileoffs = 0; | ||
14 | static unsigned char to_lower[]; | ||
15 | static unsigned char run = 1; | ||
16 | |||
17 | static void buffer_lowwatermark( void ) | ||
18 | { | ||
19 | int i; | ||
20 | if( infilesize - infileoffs < BUFSIZE ) | ||
21 | return; | ||
22 | memmove( inblock, inblock + inblockoffs, BUFSIZE - inblockoffs ); | ||
23 | read( infile, inblock + (BUFSIZE - inblockoffs), inblockoffs); | ||
24 | for ( i = BUFSIZE - inblockoffs; i<BUFSIZE; ++i) inblock[i] = to_lower[inblock[i]]; | ||
25 | infileoffs += inblockoffs; | ||
26 | inblockoffs = 0; | ||
27 | } | ||
28 | |||
29 | static void buffer_init( void ) | ||
30 | { | ||
31 | int i; | ||
32 | read( infile, inblock, BUFSIZE); | ||
33 | for ( i = 0; i<BUFSIZE; ++i) inblock[i] = to_lower[inblock[i]]; | ||
34 | } | ||
35 | |||
36 | static unsigned char to_lower[] = { | ||
37 | 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, | ||
38 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, | ||
39 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, | ||
40 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, | ||
41 | 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||
42 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, | ||
43 | 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||
44 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, | ||
45 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, | ||
46 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, | ||
47 | 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, | ||
48 | 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, | ||
49 | 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | ||
50 | 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xFC, 0xDD, 0xDE, 0xDF, | ||
51 | 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||
52 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF | ||
53 | }; | ||
54 | |||
55 | int makefile( int currrow, unsigned char a, unsigned char b) | ||
56 | { | ||
57 | int fid; char fn[16]; sprintf( fn, "%02d/%2X%2X", currrow, a, b); | ||
58 | fid = open( fn, O_RDWR | O_CREAT, 0644 ); | ||
59 | if( fid == -1 ) { | ||
60 | fprintf( stderr, "Unable to open: %s\n", fn ); | ||
61 | exit( 1 ); | ||
62 | } | ||
63 | return fid; | ||
64 | } | ||
65 | |||
66 | int main( int args, char **argv ) | ||
67 | { | ||
68 | int currrow = 1, inrow = 0, nowfd, use, mycount = 0; | ||
69 | unsigned long row; | ||
70 | static int fd[0x10000][17]; | ||
71 | |||
72 | memset( fd, 0, sizeof(fd)); | ||
73 | setvbuf( stdout, NULL, _IONBF, 0); | ||
74 | |||
75 | if( (args != 3) || ((row = atol(argv[1]))==0)) | ||
76 | { | ||
77 | fprintf( stderr, "syntax: %s row toindex\n", *argv ); | ||
78 | exit( 1 ); | ||
79 | } | ||
80 | |||
81 | if( ( infile = open( argv[2], O_RDONLY ) ) == -1 ) | ||
82 | { | ||
83 | fprintf( stderr, "Could not open file %s.\n", argv[2] ); | ||
84 | exit( 1 ); | ||
85 | } | ||
86 | getfilesize( infile, &infilesize ); | ||
87 | |||
88 | buffer_init(); | ||
89 | |||
90 | row = 1 << row; | ||
91 | use = row & 1; | ||
92 | while( 1 ) | ||
93 | { | ||
94 | unsigned char a, b; | ||
95 | switch( a = inblock[inblockoffs++] ) | ||
96 | { | ||
97 | case 0 : | ||
98 | currrow = 0; | ||
99 | if( inblockoffs > BUFSIZE - BUFLOW ) | ||
100 | buffer_lowwatermark(); | ||
101 | if( infilesize <= infileoffs + inblockoffs) | ||
102 | return 0; | ||
103 | if( !(mycount++ & 255) ) | ||
104 | printf( "%cSo far: %010.6f%% (%010lu / %010lu)", 13, 100.00 * (double)(infileoffs+inblockoffs) / (double)infilesize, infileoffs+inblockoffs, infilesize ); | ||
105 | // Fall through | ||
106 | case 1 : | ||
107 | use = (1<<currrow) & row; currrow++; inrow = 0; | ||
108 | break; | ||
109 | default : | ||
110 | if( use && ( (b = inblock[inblockoffs] ) > 1 ) ) | ||
111 | { | ||
112 | if( !fd[ b | (a*256) ][currrow] ) | ||
113 | nowfd = fd[ b | (a*256) ][currrow] = makefile( currrow, a, b); | ||
114 | else | ||
115 | nowfd = fd[ b | (a*256) ][currrow]; | ||
116 | |||
117 | *(unsigned long*)(inblock+inblockoffs-4) = infileoffs + inblockoffs - 1 - inrow; | ||
118 | inblock[inblockoffs] = inrow++; | ||
119 | write( nowfd, inblock+inblockoffs-4, 16); | ||
120 | inblock[inblockoffs] = b; | ||
121 | break; | ||
122 | } | ||
123 | } | ||
124 | } | ||
125 | |||
126 | return 0; | ||
127 | } | ||
diff --git a/src/mystdlib.c b/src/mystdlib.c new file mode 100644 index 0000000..2deda22 --- /dev/null +++ b/src/mystdlib.c | |||
@@ -0,0 +1,54 @@ | |||
1 | #include <sys/types.h> | ||
2 | #include <sys/stat.h> | ||
3 | #include <sys/mman.h> | ||
4 | #include <unistd.h> | ||
5 | #include <fcntl.h> | ||
6 | #include <stdio.h> | ||
7 | |||
8 | #include "mystdlib.h" | ||
9 | |||
10 | MAP map_file( char *filename, int readonly ) | ||
11 | { | ||
12 | struct stat fstatus; | ||
13 | MAP map = (MAP)malloc( sizeof( *map )); | ||
14 | |||
15 | if( map ) | ||
16 | { | ||
17 | memset( map, 0, sizeof( *map )); | ||
18 | |||
19 | if( ( map->fh = open( filename, readonly ? O_RDONLY : O_RDWR ) ) >= 0 ) | ||
20 | { | ||
21 | fstat( map->fh, &fstatus ); | ||
22 | if( ( map->addr = mmap( NULL, map->size = (size_t)fstatus.st_size, | ||
23 | PROT_READ | ( readonly ? 0 : PROT_WRITE), MAP_NOCORE | (readonly ? 0 : MAP_SHARED), map->fh, 0) ) == MAP_FAILED ) | ||
24 | { | ||
25 | fprintf( stderr, "Mapping file '%s' failed\n", filename ); | ||
26 | close( map->fh ); free( map ); map = NULL; | ||
27 | } | ||
28 | } else { | ||
29 | fprintf( stderr, "Couldn't open file: '%s'\n", filename ); | ||
30 | free( map ); map = NULL; | ||
31 | } | ||
32 | } else { | ||
33 | fputs( "Couldn't allocate memory", stderr ); | ||
34 | } | ||
35 | |||
36 | return map; | ||
37 | } | ||
38 | |||
39 | void unmap_file ( MAP *pMap ) | ||
40 | { | ||
41 | if( !pMap || !*pMap ) return; | ||
42 | munmap( (*pMap)->addr, (*pMap)->size); | ||
43 | close( (*pMap)->fh); | ||
44 | free( *pMap ); *pMap = NULL; | ||
45 | } | ||
46 | |||
47 | int getfilesize( int fd, unsigned long *size) | ||
48 | { | ||
49 | struct stat sb; | ||
50 | int ret; | ||
51 | if( fstat( fd, &sb )) return -1; | ||
52 | *size = sb.st_size; | ||
53 | return 0; | ||
54 | } | ||
diff --git a/src/mystdlib.h b/src/mystdlib.h new file mode 100644 index 0000000..2e9499f --- /dev/null +++ b/src/mystdlib.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #include <sys/types.h> | ||
2 | #include <stdio.h> | ||
3 | |||
4 | typedef struct { int fh; unsigned char *addr; size_t size; } *MAP; | ||
5 | |||
6 | /* Mapps a file into memory | ||
7 | returns pointer to the mapping struct, | ||
8 | containing the file's size, the mapped | ||
9 | address and its file handle. | ||
10 | |||
11 | If readonly is true, the file will be | ||
12 | opened and mapped read only. File is | ||
13 | opened and mapped writable, if false. | ||
14 | |||
15 | Returns NULL if memory could not be | ||
16 | allocated, file could not be opened or | ||
17 | mapped. Gives out an diagnostic message | ||
18 | on stderr | ||
19 | */ | ||
20 | MAP map_file( char *filename, int readonly ); | ||
21 | |||
22 | /* Unmapps a file from memory. NULL pointer | ||
23 | checks are being done, so this is safe | ||
24 | to be called from cleanup without knowing | ||
25 | whether there actually is a map. | ||
26 | */ | ||
27 | void unmap_file ( MAP *pMap ); | ||
28 | |||
29 | /* Gets file size of open file | ||
30 | returns != 0 in case of error */ | ||
31 | inline int getfilesize( int fd, unsigned long *size ); | ||
32 | |||
diff --git a/src/sortindex.c b/src/sortindex.c new file mode 100644 index 0000000..b3b3bfe --- /dev/null +++ b/src/sortindex.c | |||
@@ -0,0 +1,81 @@ | |||
1 | #include "mystdlib.h" | ||
2 | #include <sys/mman.h> | ||
3 | #include <fcntl.h> | ||
4 | |||
5 | static int indexed = -1; | ||
6 | static unsigned char to_lower[] = { | ||
7 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
8 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, | ||
9 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, | ||
10 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, | ||
11 | 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||
12 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, | ||
13 | 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | ||
14 | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, | ||
15 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, | ||
16 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, | ||
17 | 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, | ||
18 | 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, | ||
19 | 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | ||
20 | 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xFC, 0xDD, 0xDE, 0xDF, | ||
21 | 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | ||
22 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF | ||
23 | }; | ||
24 | |||
25 | int | ||
26 | mystrcasecmp(const unsigned char *s1, const unsigned char *s2) { | ||
27 | while (to_lower[*s1] == to_lower[*s2++]) | ||
28 | if ( to_lower[*s1++] == '\0' ) return (0); | ||
29 | return (to_lower[*s1] - to_lower[*--s2]); | ||
30 | } | ||
31 | |||
32 | int | ||
33 | mystrcasecmp2(const unsigned char *s1, const unsigned char *s2) { | ||
34 | int i = 0; | ||
35 | while (i++<11 && to_lower[*s1] == to_lower[*s2++]) | ||
36 | if ( to_lower[*s1++] == '\0' ) return (0); | ||
37 | if( i != 11 ) | ||
38 | return (to_lower[*s1] - to_lower[*--s2]); | ||
39 | } | ||
40 | |||
41 | int cb_compare( const void* a, const void* b) { | ||
42 | int result; | ||
43 | |||
44 | result = mystrcasecmp2( 5+(unsigned char*)a, 5+(unsigned char*)b ); | ||
45 | if( result != 0x1000 ) | ||
46 | return result; | ||
47 | |||
48 | { | ||
49 | void *myptr1 = mmap( NULL, 1024, PROT_READ, MAP_NOCORE, indexed, 16+(off_t)*(unsigned long*)a ); | ||
50 | void *myptr2 = mmap( NULL, 1024, PROT_READ, MAP_NOCORE, indexed, 16+(off_t)*(unsigned long*)b ); | ||
51 | |||
52 | if( myptr1 && myptr2) | ||
53 | result = mystrcasecmp( myptr1, myptr2 ); | ||
54 | else { | ||
55 | fprintf( stderr, "Mapping during sort failed.\n" ); | ||
56 | exit( 1 ); | ||
57 | } | ||
58 | munmap( myptr1, 1024 ); | ||
59 | munmap( myptr2, 1024 ); | ||
60 | } | ||
61 | return result; | ||
62 | } | ||
63 | |||
64 | int main( int argc, char **argv ) { | ||
65 | MAP index = NULL; | ||
66 | |||
67 | if( argc != 3 ) | ||
68 | { fputs( "Syntax: sortindex <indexedfile> <indexfile>", stderr); exit( 1 ); } | ||
69 | |||
70 | if( ( indexed = open( argv[1], O_RDONLY ) ) == -1 ) | ||
71 | { fprintf( stderr, "Could not open file: %s\n", argv[1] ); exit( 1 ); } | ||
72 | |||
73 | if( !(index = map_file( argv[2], 0 ) ) ) exit( 1 ); | ||
74 | |||
75 | qsort( index->addr, index->size / 16, 16, cb_compare ); | ||
76 | |||
77 | unmap_file( &index ); | ||
78 | close( indexed ); | ||
79 | |||
80 | return 0; | ||
81 | } | ||