summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorerdgeist <>2007-12-17 13:23:27 +0000
committererdgeist <>2007-12-17 13:23:27 +0000
commit0cfd1e575dae3a5705203b6b06b8a534a12ee652 (patch)
treead9c95204430bf86504725905dc794c2c0bf5763
parentac078bccf2bec2220233bb7ff40560da2131c10d (diff)
Add documentation to our uri scanner
-rw-r--r--scan_urlencoded_query.c32
-rw-r--r--scan_urlencoded_query.h6
2 files changed, 36 insertions, 2 deletions
diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c
index ba4bbd8..e0c2e30 100644
--- a/scan_urlencoded_query.c
+++ b/scan_urlencoded_query.c
@@ -14,6 +14,16 @@
14 relax = "+" | "," | "/" | ";" | "<" | ">" | ":" 14 relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
15*/ 15*/
16 16
17/* This matrix holds for each ascii character the information,
18 whether it is a non-terminating character for on of the three
19 scan states we are in, that is 'path', 'param' and 'value' from
20 /path?param=value&param=value, it is encoded in bit 0, 1 and 2
21 respectively
22
23 The top bit of lower nibble indicates, whether this character is
24 a hard terminator, ie. \0, \n or \s, where the whole scanning
25 process should terminate
26 */
17static const unsigned char is_unreserved[256] = { 27static const unsigned char is_unreserved[256] = {
18 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 28 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
19 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6, 29 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6,
@@ -25,6 +35,7 @@ static const unsigned char is_unreserved[256] = {
25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 35 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
26}; 36};
27 37
38/* Do a fast nibble to hex representation conversion */
28static unsigned char fromhex(unsigned char x) { 39static unsigned char fromhex(unsigned char x) {
29 x-='0'; if( x<=9) return x; 40 x-='0'; if( x<=9) return x;
30 x&=~0x20; x-='A'-'0'; 41 x&=~0x20; x-='A'-'0';
@@ -32,12 +43,19 @@ static unsigned char fromhex(unsigned char x) {
32 return 0xff; 43 return 0xff;
33} 44}
34 45
46/* Skip the value of a param=value pair */
35void scan_urlencoded_skipvalue( char **string ) { 47void scan_urlencoded_skipvalue( char **string ) {
36 const unsigned char* s=*(const unsigned char**) string; 48 const unsigned char* s=*(const unsigned char**) string;
37 unsigned char f; 49 unsigned char f;
38 50
51 /* Since we are asked to skip the 'value', we assume to stop at
52 terminators for a 'value' string position */
39 while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE ); 53 while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE );
54
55 /* If we stopped at a hard terminator like \0 or \n, make the
56 next scan_urlencoded_query encounter it again */
40 if( f & SCAN_SEARCHPATH_TERMINATOR ) --s; 57 if( f & SCAN_SEARCHPATH_TERMINATOR ) --s;
58
41 *string = (char*)s; 59 *string = (char*)s;
42} 60}
43 61
@@ -46,21 +64,35 @@ ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_F
46 unsigned char *d = (unsigned char*)deststring; 64 unsigned char *d = (unsigned char*)deststring;
47 unsigned char b, c, f; 65 unsigned char b, c, f;
48 66
67 /* This is the main decoding loop.
68 'flag' determines, which characters are non-terminating in current context
69 (ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path )
70 */
49 while( ( f = is_unreserved[ c = *s++ ] ) & flags ) { 71 while( ( f = is_unreserved[ c = *s++ ] ) & flags ) {
72
73 /* When encountering an url escaped character, try to decode */
50 if( c=='%') { 74 if( c=='%') {
51 if( ( b = fromhex(*s++) ) == 0xff ) return -1; 75 if( ( b = fromhex(*s++) ) == 0xff ) return -1;
52 if( ( c = fromhex(*s++) ) == 0xff ) return -1; 76 if( ( c = fromhex(*s++) ) == 0xff ) return -1;
53 c|=(b<<4); 77 c|=(b<<4);
54 } 78 }
79
80 /* Write (possibly decoded) character to output */
55 *d++ = c; 81 *d++ = c;
56 } 82 }
57 83
58 switch( c ) { 84 switch( c ) {
59 case 0: case '\r': case '\n': case ' ': 85 case 0: case '\r': case '\n': case ' ':
86 /* If we started scanning on a hard terminator, indicate we've finished */
60 if( d == (unsigned char*)deststring ) return -2; 87 if( d == (unsigned char*)deststring ) return -2;
88
89 /* Else make the next call to scan_urlencoded_param encounter it again */
61 --s; 90 --s;
62 break; 91 break;
63 case '?': 92 case '?':
93 /* XXX to help us parse path?param=value?param=value?... sent by µTorrent 1600
94 do not return an error but silently terminate
95 if( flags != SCAN_PATH ) return -1; */
64 break; 96 break;
65 case '=': 97 case '=':
66 if( flags != SCAN_SEARCHPATH_PARAM ) return -1; 98 if( flags != SCAN_SEARCHPATH_PARAM ) return -1;
diff --git a/scan_urlencoded_query.h b/scan_urlencoded_query.h
index 4fa35c4..f0ad781 100644
--- a/scan_urlencoded_query.h
+++ b/scan_urlencoded_query.h
@@ -11,7 +11,8 @@ typedef enum {
11 SCAN_SEARCHPATH_TERMINATOR = 8 11 SCAN_SEARCHPATH_TERMINATOR = 8
12} SCAN_SEARCHPATH_FLAG; 12} SCAN_SEARCHPATH_FLAG;
13 13
14/* string pointer to source, pointer to next scan position on return 14/* string in: pointer to source
15 out: pointer to next scan position
15 deststring pointer to destination 16 deststring pointer to destination
16 flags determines, what to parse 17 flags determines, what to parse
17 returns number of valid converted characters in deststring 18 returns number of valid converted characters in deststring
@@ -19,7 +20,8 @@ typedef enum {
19*/ 20*/
20ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags); 21ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags);
21 22
22/* string pointer to source, pointer to next scan position on return 23/* string in: pointer to value of a param=value pair to skip
24 out: pointer to next scan position on return
23*/ 25*/
24void scan_urlencoded_skipvalue( char **string ); 26void scan_urlencoded_skipvalue( char **string );
25 27