/* Extract Lines Usage: echo -x 10 2 7 100 | el -x data.txt extracts lines 16, 2, 7, 256 from the file data.txt */ #include #include "mystdlib.h" #include #include typedef struct { // Our mmap()ed file, from mystlib MAP map; // Our index into the already scanned lines. // memory buffer is estimated to 1/32th of the // indexed file's size, meaning each line // containing approx 31 letters. If it hits the // top, we increase in 1/32th steps uint8_t **index; // this denotes the number of fields, NOT the // memory occupied (would be a size_t, then!!!) long index_size; // this specifies, how many lines have been indexed // already, scanning always only happens to the // last reqeusted line long index_filled; // indicates that we've been through all lines in the file int all_lines_scanned; } EL_FILE; // If set, the stream of linenums specified calls // the first line number 0... *shiver* static int g_zerobased = 0; // If set, line numbers are prepended to each line // (the way grep does it, i.e. ^2342:$) static int g_echolinenr = 0; // This tells us, whether we need to scan for hex // line numbers static char *g_scanfmodifier = "%i"; // If user specifies some line numbers on command // line, store pointer here static char *g_immediatelinenums = (char *)0; // If input is guaranteed to come from "grep -n" // we will spare the user from "| cut -f 1 -d :" static int g_fromgrep = 0; static int g_fromgrepverbatim = 0; static int nextchar( void ) { if( !g_immediatelinenums ) return getchar(); if( *g_immediatelinenums ) return *g_immediatelinenums++; return EOF; } // scans the text file for the requested line // returns NULL, if line exceeds file's end // Note: we will not extend the index too early // to prevent huge numbers from stdin to steal our // memory static uint8_t * scanforline( EL_FILE *file, const long lineno, long *size ) { uint8_t *scanline, *const e_o_f = file->map->addr + file->map->size; *size = 0; // lines start at 1 if( lineno < 1 ) return NULL; // lines we already found can simply be returned if( lineno < file->index_filled ) goto return_line; // if all lines were scanned, were either on // or behind last line if( file->all_lines_scanned ) { if( lineno == file->index_filled ) goto return_line; return NULL; } // exploring undiscovered land... scanline = file->index[file->index_filled-1]; while( file->index_filled <= lineno ) { // scan for next line while( ( scanline < e_o_f ) && ( *scanline++ != '\n' )); file->index[file->index_filled++] = scanline; // store pointer if( scanline == e_o_f ) { file->all_lines_scanned = 1; if( file->index_filled == lineno + 1 ) goto return_line; return NULL; } // reallocate some memory if( file->index_filled == file->index_size ) { void *newblock = realloc( file->index, sizeof(uint8_t*) * ( file->index_size + file->map->size / 32 ) ); if( !newblock ) { fputs( "Could not allocate memory, exiting.\n", stderr); exit ( 1 ); } file->index_size += file->map->size / 32; file->index = newblock; } } return_line: *size = file->index[lineno] - file->index[lineno-1]; return file->index[lineno-1]; } // Reads all characters up to the next white space // from stdin, eof is treated as a white space // If more than 13 characters without white space // occur, we assume a too large line number and return // 0 which is an invalid line (these start at 1) and will // result in an empty line being printed // Rationale: // Since we wont support linenos greater 2^31 and this // is 12 characters + leading 0 in octal, we stop parsing // after 13 characters (technically, 001 is 01, but // constructing such strings to annoy parser writers must // be punished static int acquire_lineno( int c ) { char input[15]; int inputindex = 0, lineno; while ( (c != EOF) && !isspace(c)) { if( g_fromgrep && c == ':' ) { if( g_echolinenr ) putchar( ':' ); while( ( ( c = nextchar() ) != EOF ) && ( c != '\n' ) ) if( g_fromgrepverbatim ) putchar(c); if( g_fromgrepverbatim ) putchar( '\t' ); break; } if( g_echolinenr ) putchar( c ); if( inputindex < 14 ) input[inputindex++] = (char)c; c = nextchar(); } if( !g_fromgrep && g_echolinenr ) putchar( ':' ); if( inputindex == 14 ) return 0; input[inputindex] = 0; // Try to read if( sscanf( input, g_scanfmodifier, &lineno ) != 1 ) return 0; return lineno + g_zerobased; } static void usage() { fputs( "Usage: el [-0Ggnxh] [-i linenums] filename .. < linenum_file\n", stderr); } int main( int argc, char **argv ) { EL_FILE *textfiles; size_t i, textfiles_count; int c; while ((c = getopt(argc, argv, ":0i:gGnxh")) != -1) { switch (c) { case '0': g_zerobased = 1; break; case 'x': g_scanfmodifier = "%x"; break; case 'i': g_immediatelinenums = optarg; break; case 'n': g_echolinenr = 1; break; case 'G': g_fromgrepverbatim = 1; case 'g': g_fromgrep = 1; break; case 'h': case '?': default: usage(); exit(1); } } argc -= optind; argv += optind; if( argc < 1 ) { usage(); exit(1); } textfiles_count = argc; if( ( textfiles = malloc( textfiles_count * sizeof( EL_FILE ) ) ) == NULL ) { fputs( "Could not allocate memory, exiting.\n", stderr); exit(1); } for( i=0; isize < 32 ? 32 : textfiles[i].map->size / 32; if( (textfiles[i].index = malloc( sizeof(uint8_t*) * textfiles[i].index_size )) == NULL ) { fputs( "Could not allocate memory, exiting.\n", stderr); exit ( 1 ); } // First line starts at begin of file. textfiles[i].index[0] = textfiles[i].map->addr; textfiles[i].index_filled = 1; textfiles[i].all_lines_scanned = 0; } while( (c = nextchar()) != EOF ) { // get linenumber, pass on eof test char long lineno = acquire_lineno(c); for( i=0; i