diff options
author | erdgeist <> | 2007-12-17 13:23:27 +0000 |
---|---|---|
committer | erdgeist <> | 2007-12-17 13:23:27 +0000 |
commit | 0cfd1e575dae3a5705203b6b06b8a534a12ee652 (patch) | |
tree | ad9c95204430bf86504725905dc794c2c0bf5763 | |
parent | ac078bccf2bec2220233bb7ff40560da2131c10d (diff) |
Add documentation to our uri scanner
-rw-r--r-- | scan_urlencoded_query.c | 32 | ||||
-rw-r--r-- | scan_urlencoded_query.h | 6 |
2 files changed, 36 insertions, 2 deletions
diff --git a/scan_urlencoded_query.c b/scan_urlencoded_query.c index ba4bbd8..e0c2e30 100644 --- a/scan_urlencoded_query.c +++ b/scan_urlencoded_query.c | |||
@@ -14,6 +14,16 @@ | |||
14 | relax = "+" | "," | "/" | ";" | "<" | ">" | ":" | 14 | relax = "+" | "," | "/" | ";" | "<" | ">" | ":" |
15 | */ | 15 | */ |
16 | 16 | ||
17 | /* This matrix holds for each ascii character the information, | ||
18 | whether it is a non-terminating character for on of the three | ||
19 | scan states we are in, that is 'path', 'param' and 'value' from | ||
20 | /path?param=value¶m=value, it is encoded in bit 0, 1 and 2 | ||
21 | respectively | ||
22 | |||
23 | The top bit of lower nibble indicates, whether this character is | ||
24 | a hard terminator, ie. \0, \n or \s, where the whole scanning | ||
25 | process should terminate | ||
26 | */ | ||
17 | static const unsigned char is_unreserved[256] = { | 27 | static const unsigned char is_unreserved[256] = { |
18 | 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | 28 | 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
19 | 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6, | 29 | 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6, |
@@ -25,6 +35,7 @@ static const unsigned char is_unreserved[256] = { | |||
25 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 | 35 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
26 | }; | 36 | }; |
27 | 37 | ||
38 | /* Do a fast nibble to hex representation conversion */ | ||
28 | static unsigned char fromhex(unsigned char x) { | 39 | static unsigned char fromhex(unsigned char x) { |
29 | x-='0'; if( x<=9) return x; | 40 | x-='0'; if( x<=9) return x; |
30 | x&=~0x20; x-='A'-'0'; | 41 | x&=~0x20; x-='A'-'0'; |
@@ -32,12 +43,19 @@ static unsigned char fromhex(unsigned char x) { | |||
32 | return 0xff; | 43 | return 0xff; |
33 | } | 44 | } |
34 | 45 | ||
46 | /* Skip the value of a param=value pair */ | ||
35 | void scan_urlencoded_skipvalue( char **string ) { | 47 | void scan_urlencoded_skipvalue( char **string ) { |
36 | const unsigned char* s=*(const unsigned char**) string; | 48 | const unsigned char* s=*(const unsigned char**) string; |
37 | unsigned char f; | 49 | unsigned char f; |
38 | 50 | ||
51 | /* Since we are asked to skip the 'value', we assume to stop at | ||
52 | terminators for a 'value' string position */ | ||
39 | while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE ); | 53 | while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE ); |
54 | |||
55 | /* If we stopped at a hard terminator like \0 or \n, make the | ||
56 | next scan_urlencoded_query encounter it again */ | ||
40 | if( f & SCAN_SEARCHPATH_TERMINATOR ) --s; | 57 | if( f & SCAN_SEARCHPATH_TERMINATOR ) --s; |
58 | |||
41 | *string = (char*)s; | 59 | *string = (char*)s; |
42 | } | 60 | } |
43 | 61 | ||
@@ -46,21 +64,35 @@ ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_F | |||
46 | unsigned char *d = (unsigned char*)deststring; | 64 | unsigned char *d = (unsigned char*)deststring; |
47 | unsigned char b, c, f; | 65 | unsigned char b, c, f; |
48 | 66 | ||
67 | /* This is the main decoding loop. | ||
68 | 'flag' determines, which characters are non-terminating in current context | ||
69 | (ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path ) | ||
70 | */ | ||
49 | while( ( f = is_unreserved[ c = *s++ ] ) & flags ) { | 71 | while( ( f = is_unreserved[ c = *s++ ] ) & flags ) { |
72 | |||
73 | /* When encountering an url escaped character, try to decode */ | ||
50 | if( c=='%') { | 74 | if( c=='%') { |
51 | if( ( b = fromhex(*s++) ) == 0xff ) return -1; | 75 | if( ( b = fromhex(*s++) ) == 0xff ) return -1; |
52 | if( ( c = fromhex(*s++) ) == 0xff ) return -1; | 76 | if( ( c = fromhex(*s++) ) == 0xff ) return -1; |
53 | c|=(b<<4); | 77 | c|=(b<<4); |
54 | } | 78 | } |
79 | |||
80 | /* Write (possibly decoded) character to output */ | ||
55 | *d++ = c; | 81 | *d++ = c; |
56 | } | 82 | } |
57 | 83 | ||
58 | switch( c ) { | 84 | switch( c ) { |
59 | case 0: case '\r': case '\n': case ' ': | 85 | case 0: case '\r': case '\n': case ' ': |
86 | /* If we started scanning on a hard terminator, indicate we've finished */ | ||
60 | if( d == (unsigned char*)deststring ) return -2; | 87 | if( d == (unsigned char*)deststring ) return -2; |
88 | |||
89 | /* Else make the next call to scan_urlencoded_param encounter it again */ | ||
61 | --s; | 90 | --s; |
62 | break; | 91 | break; |
63 | case '?': | 92 | case '?': |
93 | /* XXX to help us parse path?param=value?param=value?... sent by µTorrent 1600 | ||
94 | do not return an error but silently terminate | ||
95 | if( flags != SCAN_PATH ) return -1; */ | ||
64 | break; | 96 | break; |
65 | case '=': | 97 | case '=': |
66 | if( flags != SCAN_SEARCHPATH_PARAM ) return -1; | 98 | if( flags != SCAN_SEARCHPATH_PARAM ) return -1; |
diff --git a/scan_urlencoded_query.h b/scan_urlencoded_query.h index 4fa35c4..f0ad781 100644 --- a/scan_urlencoded_query.h +++ b/scan_urlencoded_query.h | |||
@@ -11,7 +11,8 @@ typedef enum { | |||
11 | SCAN_SEARCHPATH_TERMINATOR = 8 | 11 | SCAN_SEARCHPATH_TERMINATOR = 8 |
12 | } SCAN_SEARCHPATH_FLAG; | 12 | } SCAN_SEARCHPATH_FLAG; |
13 | 13 | ||
14 | /* string pointer to source, pointer to next scan position on return | 14 | /* string in: pointer to source |
15 | out: pointer to next scan position | ||
15 | deststring pointer to destination | 16 | deststring pointer to destination |
16 | flags determines, what to parse | 17 | flags determines, what to parse |
17 | returns number of valid converted characters in deststring | 18 | returns number of valid converted characters in deststring |
@@ -19,7 +20,8 @@ typedef enum { | |||
19 | */ | 20 | */ |
20 | ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags); | 21 | ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags); |
21 | 22 | ||
22 | /* string pointer to source, pointer to next scan position on return | 23 | /* string in: pointer to value of a param=value pair to skip |
24 | out: pointer to next scan position on return | ||
23 | */ | 25 | */ |
24 | void scan_urlencoded_skipvalue( char **string ); | 26 | void scan_urlencoded_skipvalue( char **string ); |
25 | 27 | ||