From cf193c66d988637d9ddfb0acee82608a04f96402 Mon Sep 17 00:00:00 2001 From: erdgeist <> Date: Thu, 19 Apr 2007 19:54:07 +0000 Subject: Kickoff --- el.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 el.c (limited to 'el.c') diff --git a/el.c b/el.c new file mode 100644 index 0000000..a1994b6 --- /dev/null +++ b/el.c @@ -0,0 +1,195 @@ +/* Extract Lines + Usage: echo -x 10 2 7 100 | el data.txt + extracts lines 16, 2, 7, 256 from the file data.txt +*/ + +#include +#include "mystdlib.h" +#include + +// Our index into the already scanned lines. +// memory buffer is estimated to 1/32th of the +// indexed file's size, meaning each line +// containing approx 31 letters. If it hits the +// top, we increase in 1/32th steps +static unsigned char **index = NULL; + +// this denotes the number of fields, NOT the +// memory occupied (would be a size_t, then!!!) +static long indexsize = 0; + +// this specifies, how many lines have been indexed +// already, scanning always only happens to the +// last reqeusted line +static long indexfilled = 1; + +// If set, the stream of linenums specified calls +// the first line number 0... *shiver* +static int zerobased = 0; + +// This tells us, whether we need to scan for hex +// line numbers +static char *g_scanfmodifier = "%i"; + +// scans the text file for the requested line +// returns NULL, if line exceeds file's end +// Note: we will not extend the index too early +// to prevent huge numbers from stdin to steal our +// memory + +static unsigned char * scanforline( MAP file, const long lineno, long *size ) { + unsigned char *scanline, *const e_o_f = file->addr + file->size; + static int alllinesscanned = 0; + *size = 0; + + // lines start at 1 + if( lineno < 1 ) return NULL; + + // lines we already found can simply be returned + if( lineno < indexfilled ) { + *size = index[lineno] - index[lineno-1]; + return index[lineno-1]; + } + + // if alllines were scanned, were either on + // or behind last line + if( alllinesscanned ) { + if( lineno == indexfilled ) { + *size = e_o_f - index[lineno-1]; + return index[lineno-1]; + } + return NULL; + } + + // exploring undiscovered land... + scanline = index[indexfilled-1]; + + while( indexfilled <= lineno ) { + // scan for next line + while( ( scanline < e_o_f ) && ( *scanline++ != '\n' )); + + // store pointer + if( scanline == e_o_f ) { + alllinesscanned = 1; + if( indexfilled == lineno ) { + *size = e_o_f - index[lineno-1]; + return index[lineno-1]; + } else + return NULL; + } + + index[indexfilled++] = scanline; + + // reallocate some memory + if( indexfilled == indexsize ) { + unsigned char ** newblock = (unsigned char**) realloc( index, sizeof(char *) * ( indexsize + file->size / 32 ) ); + if( !newblock ) { + fputs( "Could not allocate memory, exiting.\n", stderr); + // unmap file and zero pointer + unmap_file( &file ); + exit ( 1 ); + } + indexsize += file->size / 32; + index = newblock; + } + + } + + *size = index[lineno] - index[lineno-1]; + return index[lineno-1]; +} + +// Reads all characters up to the next white space +// from stdin, eof is treated as a white space +// If more than 13 characters without white space +// occur, we assume a too large line number and return +// 0 which is an invalid line (these start at 1) and will +// result in an empty line being printed + +// Rationale: +// Since we wont support linenos greater 2^31 and this +// is 12 characters + leading 0 in octal, we stop parsing +// after 13 characters (technically, 001 is 01, but +// constructing such strings to annoy parser writers must +// be punished + +static int acquire_lineno( int c ) { + char input[15]; + int inputindex = 0, lineno; + + while ( (c != EOF) && !isspace(c)) { + if( inputindex < 14 ) + input[inputindex++] = (char)c; + c = getchar(); + } + + if( inputindex == 14 ) + return 0; + + input[inputindex] = 0; + + // Try to read + if( sscanf( input, g_scanfmodifier, &lineno ) != 1 ) + return 0; + + return lineno + zerobased; +} + +static void usage() { + fputs( "Usage: el [-0xh] filename < filenums\n", stderr); +} + +int main( int argc, char **argv ) { + // Our handle to the mapped text file + MAP textfile = NULL; + int c; + + while ((c = getopt(argc, argv, ":0x")) != -1) { + switch (c) { + case '0': + zerobased = 1; + break; + case 'x': + g_scanfmodifier = "%x"; + break; + case 'h': + case '?': + default: + usage(); + exit(1); + } + } + argc -= optind; + argv += optind; + + if( argc != 1 ) { usage(); exit(1); } + + // Map text file read only + if( (textfile = map_file( argv[0], 1 )) == NULL ) exit(1); + + indexsize = textfile->size < 32 ? 32 : textfile->size / 32; + if( (index = malloc( sizeof(char *) * indexsize )) == NULL ) { + fputs( "Could not allocate memory, exiting.\n", stderr); + // unmap file and zero pointer + unmap_file( &textfile ); + exit ( 1 ); + } + + // First line starts at begin of file. + index[0] = textfile->addr; + + while( (c = getchar()) != EOF ) { + // get linenumber, pass on eof test char + long slen, lineno = acquire_lineno(c); + unsigned char *line = scanforline( textfile, lineno, &slen ); + + if( line && slen ) + fwrite( line, slen, 1, stdout ); + else + putchar('\n'); + } + + // unmap file and zero pointer + unmap_file( &textfile ); + return 0; +} -- cgit v1.2.3