summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorerdgeist <>2004-04-29 19:42:16 +0000
committererdgeist <>2004-04-29 19:42:16 +0000
commitc2011c5def9154c9a48f5e7e17d48d840aad675d (patch)
treefac07559c95ccb997c41c22f95b6fb05596855bc /src
parent9417862eda890b70185a5329e8657b327b8d859f (diff)
Start
Diffstat (limited to 'src')
-rw-r--r--src/dumpindex2.c33
-rw-r--r--src/dumppointers2.c127
-rw-r--r--src/mystdlib.c54
-rw-r--r--src/mystdlib.h32
-rw-r--r--src/sortindex.c81
5 files changed, 327 insertions, 0 deletions
diff --git a/src/dumpindex2.c b/src/dumpindex2.c
new file mode 100644
index 0000000..f49a329
--- /dev/null
+++ b/src/dumpindex2.c
@@ -0,0 +1,33 @@
1#include "mystdlib.h"
2#include <sys/mman.h>
3#include <fcntl.h>
4
5static int indexed = -1;
6
7int main( int argc, char **argv ) {
8 MAP index = NULL;
9 int i,j;
10 char out[50];
11
12 if( argc != 3 )
13 { fputs( "Syntax: sortindex <indexedfile> <indexfile>", stderr); exit( 1 ); }
14
15 if( ( indexed = open( argv[1], O_RDONLY ) ) == -1 )
16 { fprintf( stderr, "Could not open file: %s\n", argv[1] ); exit( 1 ); }
17
18 if( !(index = map_file( argv[2], 0 ) ) ) exit( 1 );
19
20 for( i = 0; i < index->size; i+= 16 ) {
21 unsigned char *x = i + (unsigned char*)index->addr;
22 unsigned long p = *(unsigned long*)x;
23
24 pread( indexed, out, 40, (off_t)p );
25 for( j=0;j<40;++j) if( out[j] == '\t' || out[j] == '\n' ) out[j] = 0; out[j] = 0;
26 puts( out );
27 }
28
29 unmap_file( &index );
30 close( indexed );
31
32 return 0;
33}
diff --git a/src/dumppointers2.c b/src/dumppointers2.c
new file mode 100644
index 0000000..80a4b61
--- /dev/null
+++ b/src/dumppointers2.c
@@ -0,0 +1,127 @@
1#include <stdio.h>
2#include <unistd.h>
3#include <fcntl.h>
4
5#define BUFSIZE 0x1000000
6#define BUFLOW 0x400
7static int infile = -1;
8static unsigned char nblock[ BUFSIZE + 16 ];
9static unsigned char*inblock = nblock + 16;
10static unsigned long inblockoffs = 0;
11
12static unsigned long infilesize = 0;
13static unsigned long infileoffs = 0;
14static unsigned char to_lower[];
15static unsigned char run = 1;
16
17static void buffer_lowwatermark( void )
18{
19 int i;
20 if( infilesize - infileoffs < BUFSIZE )
21 return;
22 memmove( inblock, inblock + inblockoffs, BUFSIZE - inblockoffs );
23 read( infile, inblock + (BUFSIZE - inblockoffs), inblockoffs);
24 for ( i = BUFSIZE - inblockoffs; i<BUFSIZE; ++i) inblock[i] = to_lower[inblock[i]];
25 infileoffs += inblockoffs;
26 inblockoffs = 0;
27}
28
29static void buffer_init( void )
30{
31 int i;
32 read( infile, inblock, BUFSIZE);
33 for ( i = 0; i<BUFSIZE; ++i) inblock[i] = to_lower[inblock[i]];
34}
35
36static unsigned char to_lower[] = {
370x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
380x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
390x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
400x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
410x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
420x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
430x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
440x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
450x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
460x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
470xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
480xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
490xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
500xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xFC, 0xDD, 0xDE, 0xDF,
510xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
520xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
53};
54
55int makefile( int currrow, unsigned char a, unsigned char b)
56{
57 int fid; char fn[16]; sprintf( fn, "%02d/%2X%2X", currrow, a, b);
58 fid = open( fn, O_RDWR | O_CREAT, 0644 );
59 if( fid == -1 ) {
60 fprintf( stderr, "Unable to open: %s\n", fn );
61 exit( 1 );
62 }
63 return fid;
64}
65
66int main( int args, char **argv )
67{
68 int currrow = 1, inrow = 0, nowfd, use, mycount = 0;
69 unsigned long row;
70 static int fd[0x10000][17];
71
72 memset( fd, 0, sizeof(fd));
73 setvbuf( stdout, NULL, _IONBF, 0);
74
75 if( (args != 3) || ((row = atol(argv[1]))==0))
76 {
77 fprintf( stderr, "syntax: %s row toindex\n", *argv );
78 exit( 1 );
79 }
80
81 if( ( infile = open( argv[2], O_RDONLY ) ) == -1 )
82 {
83 fprintf( stderr, "Could not open file %s.\n", argv[2] );
84 exit( 1 );
85 }
86 getfilesize( infile, &infilesize );
87
88 buffer_init();
89
90 row = 1 << row;
91 use = row & 1;
92 while( 1 )
93 {
94 unsigned char a, b;
95 switch( a = inblock[inblockoffs++] )
96 {
97 case 0 :
98 currrow = 0;
99 if( inblockoffs > BUFSIZE - BUFLOW )
100 buffer_lowwatermark();
101 if( infilesize <= infileoffs + inblockoffs)
102 return 0;
103 if( !(mycount++ & 255) )
104 printf( "%cSo far: %010.6f%% (%010lu / %010lu)", 13, 100.00 * (double)(infileoffs+inblockoffs) / (double)infilesize, infileoffs+inblockoffs, infilesize );
105 // Fall through
106 case 1 :
107 use = (1<<currrow) & row; currrow++; inrow = 0;
108 break;
109 default :
110 if( use && ( (b = inblock[inblockoffs] ) > 1 ) )
111 {
112 if( !fd[ b | (a*256) ][currrow] )
113 nowfd = fd[ b | (a*256) ][currrow] = makefile( currrow, a, b);
114 else
115 nowfd = fd[ b | (a*256) ][currrow];
116
117 *(unsigned long*)(inblock+inblockoffs-4) = infileoffs + inblockoffs - 1 - inrow;
118 inblock[inblockoffs] = inrow++;
119 write( nowfd, inblock+inblockoffs-4, 16);
120 inblock[inblockoffs] = b;
121 break;
122 }
123 }
124 }
125
126 return 0;
127}
diff --git a/src/mystdlib.c b/src/mystdlib.c
new file mode 100644
index 0000000..2deda22
--- /dev/null
+++ b/src/mystdlib.c
@@ -0,0 +1,54 @@
1#include <sys/types.h>
2#include <sys/stat.h>
3#include <sys/mman.h>
4#include <unistd.h>
5#include <fcntl.h>
6#include <stdio.h>
7
8#include "mystdlib.h"
9
10MAP map_file( char *filename, int readonly )
11{
12 struct stat fstatus;
13 MAP map = (MAP)malloc( sizeof( *map ));
14
15 if( map )
16 {
17 memset( map, 0, sizeof( *map ));
18
19 if( ( map->fh = open( filename, readonly ? O_RDONLY : O_RDWR ) ) >= 0 )
20 {
21 fstat( map->fh, &fstatus );
22 if( ( map->addr = mmap( NULL, map->size = (size_t)fstatus.st_size,
23 PROT_READ | ( readonly ? 0 : PROT_WRITE), MAP_NOCORE | (readonly ? 0 : MAP_SHARED), map->fh, 0) ) == MAP_FAILED )
24 {
25 fprintf( stderr, "Mapping file '%s' failed\n", filename );
26 close( map->fh ); free( map ); map = NULL;
27 }
28 } else {
29 fprintf( stderr, "Couldn't open file: '%s'\n", filename );
30 free( map ); map = NULL;
31 }
32 } else {
33 fputs( "Couldn't allocate memory", stderr );
34 }
35
36 return map;
37}
38
39void unmap_file ( MAP *pMap )
40{
41 if( !pMap || !*pMap ) return;
42 munmap( (*pMap)->addr, (*pMap)->size);
43 close( (*pMap)->fh);
44 free( *pMap ); *pMap = NULL;
45}
46
47int getfilesize( int fd, unsigned long *size)
48{
49 struct stat sb;
50 int ret;
51 if( fstat( fd, &sb )) return -1;
52 *size = sb.st_size;
53 return 0;
54}
diff --git a/src/mystdlib.h b/src/mystdlib.h
new file mode 100644
index 0000000..2e9499f
--- /dev/null
+++ b/src/mystdlib.h
@@ -0,0 +1,32 @@
1#include <sys/types.h>
2#include <stdio.h>
3
4typedef struct { int fh; unsigned char *addr; size_t size; } *MAP;
5
6/* Mapps a file into memory
7 returns pointer to the mapping struct,
8 containing the file's size, the mapped
9 address and its file handle.
10
11 If readonly is true, the file will be
12 opened and mapped read only. File is
13 opened and mapped writable, if false.
14
15 Returns NULL if memory could not be
16 allocated, file could not be opened or
17 mapped. Gives out an diagnostic message
18 on stderr
19*/
20MAP map_file( char *filename, int readonly );
21
22/* Unmapps a file from memory. NULL pointer
23 checks are being done, so this is safe
24 to be called from cleanup without knowing
25 whether there actually is a map.
26*/
27void unmap_file ( MAP *pMap );
28
29/* Gets file size of open file
30 returns != 0 in case of error */
31inline int getfilesize( int fd, unsigned long *size );
32
diff --git a/src/sortindex.c b/src/sortindex.c
new file mode 100644
index 0000000..b3b3bfe
--- /dev/null
+++ b/src/sortindex.c
@@ -0,0 +1,81 @@
1#include "mystdlib.h"
2#include <sys/mman.h>
3#include <fcntl.h>
4
5static int indexed = -1;
6static unsigned char to_lower[] = {
70x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
80x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
90x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
100x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
110x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
120x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
130x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
140x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
150x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
160x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
170xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
180xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
190xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
200xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xF6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xFC, 0xDD, 0xDE, 0xDF,
210xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
220xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
23};
24
25int
26mystrcasecmp(const unsigned char *s1, const unsigned char *s2) {
27 while (to_lower[*s1] == to_lower[*s2++])
28 if ( to_lower[*s1++] == '\0' ) return (0);
29 return (to_lower[*s1] - to_lower[*--s2]);
30}
31
32int
33mystrcasecmp2(const unsigned char *s1, const unsigned char *s2) {
34 int i = 0;
35 while (i++<11 && to_lower[*s1] == to_lower[*s2++])
36 if ( to_lower[*s1++] == '\0' ) return (0);
37 if( i != 11 )
38 return (to_lower[*s1] - to_lower[*--s2]);
39}
40
41int cb_compare( const void* a, const void* b) {
42 int result;
43
44 result = mystrcasecmp2( 5+(unsigned char*)a, 5+(unsigned char*)b );
45 if( result != 0x1000 )
46 return result;
47
48 {
49 void *myptr1 = mmap( NULL, 1024, PROT_READ, MAP_NOCORE, indexed, 16+(off_t)*(unsigned long*)a );
50 void *myptr2 = mmap( NULL, 1024, PROT_READ, MAP_NOCORE, indexed, 16+(off_t)*(unsigned long*)b );
51
52 if( myptr1 && myptr2)
53 result = mystrcasecmp( myptr1, myptr2 );
54 else {
55 fprintf( stderr, "Mapping during sort failed.\n" );
56 exit( 1 );
57 }
58 munmap( myptr1, 1024 );
59 munmap( myptr2, 1024 );
60 }
61 return result;
62}
63
64int main( int argc, char **argv ) {
65 MAP index = NULL;
66
67 if( argc != 3 )
68 { fputs( "Syntax: sortindex <indexedfile> <indexfile>", stderr); exit( 1 ); }
69
70 if( ( indexed = open( argv[1], O_RDONLY ) ) == -1 )
71 { fprintf( stderr, "Could not open file: %s\n", argv[1] ); exit( 1 ); }
72
73 if( !(index = map_file( argv[2], 0 ) ) ) exit( 1 );
74
75 qsort( index->addr, index->size / 16, 16, cb_compare );
76
77 unmap_file( &index );
78 close( indexed );
79
80 return 0;
81}