summaryrefslogtreecommitdiff
path: root/scan_urlencoded_query.c
diff options
context:
space:
mode:
authorerdgeist <>2007-10-18 00:21:22 +0000
committererdgeist <>2007-10-18 00:21:22 +0000
commit243d5961d0425b199319967e1c296c5d0124f3f2 (patch)
tree6b8c085b95398b1daac7e4ed9112770a729cea4a /scan_urlencoded_query.c
parentbe117f96a22e0d6d1052c2fea4991cb65dec731f (diff)
fixed one performance bug, where "skipping values from a &param=values pair" was requested, the requestor ended up with "values" to be parsed again.
improved performance of fromhex improved performance of is_unreserved() by moving it all into a simple byte array improved performance of %41 => 'A' conversion by reordering variables
Diffstat (limited to 'scan_urlencoded_query.c')
-rw-r--r--scan_urlencoded_query.c41
1 files changed, 26 insertions, 15 deletions
diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c
index 296e829..f61d79e 100644
--- a/scan_urlencoded_query.c
+++ b/scan_urlencoded_query.c
@@ -12,39 +12,51 @@
12 we add '%' to the matrix to not stop at encoded chars. 12 we add '%' to the matrix to not stop at encoded chars.
13 After losing too many requests to being too strict, add the following characters to reserved matrix 13 After losing too many requests to being too strict, add the following characters to reserved matrix
14 relax = "+" | "," | "/" | ";" | "<" | ">" | ":" 14 relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
15
16static const unsigned char reserved_matrix_strict[] = { 0xA2, 0x67, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47};
17*/ 15*/
18static const unsigned char reserved_matrix[] = { 0xA2, 0xFF, 0xFF, 0x5F, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x47};
19 16
20static int is_unreserved( unsigned char c ) { 17static const unsigned char is_unreserved[256] = {
21 if( ( c <= 32 ) || ( c >= 127 ) ) return 0; return 1&(reserved_matrix[(c-32)>>3]>>(c&7)); 18 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
19 0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,
20 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,
21 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0,
22 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
23 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
24 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
26};
27
28static unsigned char fromhex(unsigned char c) {
29 if (c>='0' && c<='9') return c-'0';
30 c &= 0xdf; /* Toggle off lower case bit */
31 if (c>='A' && c<='F') return c-'A'+10;
32 return 0xff;
22} 33}
23 34
24ssize_t scan_urlencoded_query(char **string, char *deststring, int flags) { 35ssize_t scan_urlencoded_query(char **string, char *deststring, int flags) {
25 register const unsigned char* s=*(const unsigned char**) string; 36 const unsigned char* s=*(const unsigned char**) string;
26 unsigned char *d = (unsigned char*)deststring; 37 unsigned char *d = (unsigned char*)deststring;
27 register unsigned char b, c; 38 register unsigned char b, c;
28 39
29retry_parsing: 40retry_parsing:
30 while( is_unreserved( c = *s++) ) { 41 while( is_unreserved[ c = *s++ ] ) {
31 if( c=='%') { 42 if( c=='%') {
32 if( ( c = scan_fromhex(*s++) ) == 0xff ) return -1; 43 if( ( b = fromhex(*s++) ) == 0xff ) return -1;
33 if( ( b = scan_fromhex(*s++) ) == 0xff ) return -1; 44 if( ( c = fromhex(*s++) ) == 0xff ) return -1;
34 c=(c<<4)|b; 45 c|=(b<<4);
35 } 46 }
36 if( d ) *d++ = c; 47 if( d ) *d++ = c;
37 } 48 }
38 49
39 switch( c ) { 50 switch( c ) {
40 case 0: case '\r': case '\n': case ' ': 51 case 0: case '\r': case '\n': case ' ':
41 if( d == (unsigned char*)deststring ) return -2; 52 if( d && ( d == (unsigned char*)deststring ) ) return -2;
42 --s; 53 --s;
43 break; 54 break;
44 case '?': 55 case '?':
45 if( flags == SCAN_PATH ) goto found_terminator; 56 if( flags != SCAN_PATH ) {
46 if( d ) *d++ = c; 57 if( d ) *d++ = c;
47 goto retry_parsing; 58 goto retry_parsing;
59 }
48 break; 60 break;
49 case '=': 61 case '=':
50 if( flags != SCAN_SEARCHPATH_PARAM ) return -1; 62 if( flags != SCAN_SEARCHPATH_PARAM ) return -1;
@@ -57,7 +69,6 @@ retry_parsing:
57 return -1; 69 return -1;
58 } 70 }
59 71
60found_terminator:
61 *string = (char *)s; 72 *string = (char *)s;
62 return d - (unsigned char*)deststring; 73 return d - (unsigned char*)deststring;
63} 74}