| OLD | NEW |
| (Empty) | |
| 1 #!/usr/bin/env python |
| 2 |
| 3 """ |
| 4 shared HTTP infrastructure |
| 5 |
| 6 This module contains utility functions for nbhttp and a base class |
| 7 for the parsing portions of the client and server. |
| 8 """ |
| 9 |
| 10 __author__ = "Mark Nottingham <mnot@mnot.net>" |
| 11 __copyright__ = """\ |
| 12 Copyright (c) 2008-2009 Mark Nottingham |
| 13 |
| 14 Permission is hereby granted, free of charge, to any person obtaining a copy |
| 15 of this software and associated documentation files (the "Software"), to deal |
| 16 in the Software without restriction, including without limitation the rights |
| 17 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 18 copies of the Software, and to permit persons to whom the Software is |
| 19 furnished to do so, subject to the following conditions: |
| 20 |
| 21 The above copyright notice and this permission notice shall be included in |
| 22 all copies or substantial portions of the Software. |
| 23 |
| 24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 25 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 26 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 27 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 28 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 29 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 30 THE SOFTWARE. |
| 31 """ |
| 32 |
| 33 import re |
| 34 lws = re.compile("\r?\n[ \t]+", re.M) |
| 35 hdr_end = re.compile(r"\r?\n\r?\n", re.M) |
| 36 linesep = "\r\n" |
| 37 |
| 38 # conn_modes |
| 39 CLOSE, COUNTED, CHUNKED, NOBODY = 'close', 'counted', 'chunked', 'nobody' |
| 40 |
| 41 # states |
| 42 WAITING, HEADERS_DONE = 1, 2 |
| 43 |
| 44 idempotent_methods = ['GET', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'] |
| 45 safe_methods = ['GET', 'HEAD', 'OPTIONS', 'TRACE'] |
| 46 no_body_status = ['100', '101', '204', '304'] |
| 47 hop_by_hop_hdrs = ['connection', 'keep-alive', 'proxy-authenticate', |
| 48 'proxy-authorization', 'te', 'trailers', 'transfer-encoding',
|
| 49 'upgrade', 'proxy-connection'] |
| 50 |
| 51 |
| 52 from error import ERR_EXTRA_DATA, ERR_CHUNK, ERR_BODY_FORBIDDEN |
| 53 |
| 54 def dummy(*args, **kw): |
| 55 "Dummy method that does nothing; useful to ignore a callback." |
| 56 pass |
| 57 |
| 58 def header_dict(header_tuple, strip=None): |
| 59 """ |
| 60 Given a header tuple, return a dictionary keyed upon the lower-cased |
| 61 header names. |
| 62 |
| 63 If strip is defined, each header listed (by lower-cased name) will not be |
| 64 returned in the dictionary. |
| 65 """ |
| 66 # TODO: return a list of values; currently destructive. |
| 67 if strip == None: |
| 68 strip = [] |
| 69 return dict([(n.strip().lower(), v.strip()) for (n, v) in header_tuple]) |
| 70 |
| 71 def get_hdr(hdr_tuples, name): |
| 72 """ |
| 73 Given a list of (name, value) header tuples and a header name (lowercase), |
| 74 return a list of all values for that header. |
| 75 |
| 76 This includes header lines with multiple values separated by a comma; |
| 77 such headers will be split into separate values. As a result, it is NOT |
| 78 safe to use this on headers whose values may include a comma (e.g., |
| 79 Set-Cookie, or any value with a quoted string). |
| 80 """ |
| 81 # TODO: support quoted strings |
| 82 return [v.strip() for v in sum( |
| 83 [l.split(',') for l in |
| 84 [i[1] for i in hdr_tuples if i[0].lower() == name] |
| 85 ] |
| 86 , [])] |
| 87 |
| 88 |
| 89 class HttpMessageHandler: |
| 90 """ |
| 91 This is a base class for something that has to parse and/or serialise |
| 92 HTTP messages, request or response. |
| 93 |
| 94 For parsing, it expects you to override _input_start, _input_body and |
| 95 _input_end, and call _handle_input when you get bytes from the network. |
| 96 |
| 97 For serialising, it expects you to override _output. |
| 98 """ |
| 99 |
| 100 def __init__(self): |
| 101 self._input_buffer = "" |
| 102 self._input_state = WAITING |
| 103 self._input_delimit = None |
| 104 self._input_body_left = 0 |
| 105 self._output_state = WAITING |
| 106 self._output_delimit = None |
| 107 |
| 108 # input-related methods |
| 109 |
| 110 def _input_start(self, top_line, hdr_tuples, conn_tokens, transfer_codes, co
ntent_length): |
| 111 """ |
| 112 Take the top set of headers from the input stream, parse them |
| 113 and queue the request to be processed by the application. |
| 114 |
| 115 Returns boolean allows_body to indicate whether the message allows a |
| 116 body. |
| 117 """ |
| 118 raise NotImplementedError |
| 119 |
| 120 def _input_body(self, chunk): |
| 121 "Process a body chunk from the wire." |
| 122 raise NotImplementedError |
| 123 |
| 124 def _input_end(self): |
| 125 "Indicate that the response body is complete." |
| 126 raise NotImplementedError |
| 127 |
| 128 def _input_error(self, err, detail=None): |
| 129 "Indicate a parsing problem with the body." |
| 130 raise NotImplementedError |
| 131 |
| 132 def _handle_input(self, instr): |
| 133 """ |
| 134 Given a chunk of input, figure out what state we're in and handle it, |
| 135 making the appropriate calls. |
| 136 """ |
| 137 if self._input_buffer != "": |
| 138 instr = self._input_buffer + instr # will need to move to a list if
writev comes around |
| 139 self._input_buffer = "" |
| 140 if self._input_state == WAITING: |
| 141 if hdr_end.search(instr): # found one |
| 142 rest = self._parse_headers(instr) |
| 143 self._handle_input(rest) |
| 144 else: # partial headers; store it and wait for more |
| 145 self._input_buffer = instr |
| 146 elif self._input_state == HEADERS_DONE: |
| 147 try: |
| 148 getattr(self, '_handle_%s' % self._input_delimit)(instr) |
| 149 except AttributeError: |
| 150 raise Exception, "Unknown input delimiter %s" % self._input_deli
mit |
| 151 else: |
| 152 raise Exception, "Unknown state %s" % self._input_state |
| 153 |
| 154 def _handle_nobody(self, instr): |
| 155 "Handle input that shouldn't have a body." |
| 156 if instr: |
| 157 self._input_error(ERR_BODY_FORBIDDEN, instr) # FIXME: will not work
with pipelining |
| 158 else: |
| 159 self._input_end() |
| 160 self._input_state = WAITING |
| 161 # self._handle_input(instr) |
| 162 |
| 163 def _handle_close(self, instr): |
| 164 "Handle input where the body is delimited by the connection closing." |
| 165 self._input_body(instr) |
| 166 |
| 167 def _handle_chunked(self, instr): |
| 168 "Handle input where the body is delimited by chunked encoding." |
| 169 while instr: |
| 170 if self._input_body_left < 0: # new chunk |
| 171 instr = self._handle_chunk_new(instr) |
| 172 elif self._input_body_left > 0: # we're in the middle of reading a c
hunk |
| 173 instr = self._handle_chunk_body(instr) |
| 174 elif self._input_body_left == 0: # body is done |
| 175 instr = self._handle_chunk_done(instr) |
| 176 |
| 177 def _handle_chunk_new(self, instr): |
| 178 try: |
| 179 # they really need to use CRLF |
| 180 chunk_size, rest = instr.split(linesep, 1) |
| 181 except ValueError: |
| 182 # got a CRLF without anything behind it.. wait a bit |
| 183 if len(instr) > 256: |
| 184 # OK, this is absurd... |
| 185 self._input_error(ERR_CHUNK, instr) |
| 186 else: |
| 187 self._input_buffer += instr |
| 188 return |
| 189 if chunk_size.strip() == "": # ignore bare lines |
| 190 self._handle_chunked(rest) # FIXME: recursion |
| 191 return |
| 192 if ";" in chunk_size: # ignore chunk extensions |
| 193 chunk_size = chunk_size.split(";", 1)[0] |
| 194 try: |
| 195 self._input_body_left = int(chunk_size, 16) |
| 196 except ValueError: |
| 197 self._input_error(ERR_CHUNK, chunk_size) |
| 198 return # blow up if we can't process a chunk. |
| 199 return rest |
| 200 |
| 201 def _handle_chunk_body(self, instr): |
| 202 if self._input_body_left < len(instr): # got more than the chunk |
| 203 this_chunk = self._input_body_left |
| 204 self._input_body(instr[:this_chunk]) |
| 205 self._input_body_left = -1 |
| 206 return instr[this_chunk+2:] # +2 consumes the CRLF |
| 207 elif self._input_body_left == len(instr): # got the whole chunk exactly |
| 208 self._input_body(instr) |
| 209 self._input_body_left = -1 |
| 210 else: # got partial chunk |
| 211 self._input_body(instr) |
| 212 self._input_body_left -= len(instr) |
| 213 |
| 214 def _handle_chunk_done(self, instr): |
| 215 if len(instr) >= 2 and instr[:2] == linesep: |
| 216 self._input_state = WAITING |
| 217 self._input_end() |
| 218 # self._handle_input(instr[2:]) # pipelining |
| 219 elif hdr_end.search(instr): # trailers |
| 220 self._input_state = WAITING |
| 221 self._input_end() |
| 222 trailers, rest = hdr_end.split(instr, 1) # TODO: process trailers |
| 223 # self._handle_input(rest) # pipelining |
| 224 else: # don't have full headers yet |
| 225 self._input_buffer = instr |
| 226 |
| 227 def _handle_counted(self, instr): |
| 228 "Handle input where the body is delimited by the Content-Length." |
| 229 assert self._input_body_left >= 0, \ |
| 230 "message counting problem (%s)" % self._input_body_left |
| 231 # process body |
| 232 if self._input_body_left <= len(instr): # got it all (and more?) |
| 233 self._input_body(instr[:self._input_body_left]) |
| 234 self._input_state = WAITING |
| 235 if instr[self._input_body_left:]: |
| 236 # This will catch extra input that isn't on packet boundaries. |
| 237 self._input_error(ERR_EXTRA_DATA, instr[self._input_body_left:]) |
| 238 else: |
| 239 self._input_end() |
| 240 else: # got some of it |
| 241 self._input_body(instr) |
| 242 self._input_body_left -= len(instr) |
| 243 |
| 244 def _parse_headers(self, instr): |
| 245 """ |
| 246 Given a string that we knows contains a header block (possibly more), |
| 247 parse the headers out and return the rest. Calls self._input_start |
| 248 to kick off processing. |
| 249 """ |
| 250 top, rest = hdr_end.split(instr, 1) |
| 251 hdr_lines = lws.sub(" ", top).splitlines() # Fold LWS |
| 252 try: |
| 253 top_line = hdr_lines.pop(0) |
| 254 except IndexError: # empty |
| 255 return "" |
| 256 hdr_tuples = [] |
| 257 conn_tokens = [] |
| 258 transfer_codes = [] |
| 259 content_length = None |
| 260 for line in hdr_lines: |
| 261 try: |
| 262 fn, fv = line.split(":", 1) |
| 263 hdr_tuples.append((fn, fv)) |
| 264 except ValueError: |
| 265 continue # TODO: flesh out bad header handling |
| 266 f_name = fn.strip().lower() |
| 267 f_val = fv.strip() |
| 268 |
| 269 # parse connection-related headers |
| 270 if f_name == "connection": |
| 271 conn_tokens += [v.strip().lower() for v in f_val.split(',')] |
| 272 elif f_name == "transfer-encoding": # FIXME: parameters |
| 273 transfer_codes += [v.strip().lower() for v in f_val.split(',')] |
| 274 elif f_name == "content-length": |
| 275 if content_length != None: |
| 276 continue # ignore any C-L past the first. |
| 277 try: |
| 278 content_length = int(f_val) |
| 279 except ValueError: |
| 280 continue |
| 281 |
| 282 # FIXME: WSP between name and colon; request = 400, response = discard |
| 283 # TODO: remove *and* ignore conn tokens if the message was 1.0 |
| 284 |
| 285 # ignore content-length if transfer-encoding is present |
| 286 if transfer_codes != [] and content_length != None: |
| 287 content_length = None |
| 288 |
| 289 try: |
| 290 allows_body = self._input_start(top_line, hdr_tuples, |
| 291 conn_tokens, transfer_codes, content_length) |
| 292 except ValueError: # parsing error of some kind; abort. |
| 293 return "" |
| 294 |
| 295 self._input_state = HEADERS_DONE |
| 296 if not allows_body: |
| 297 self._input_delimit = NOBODY |
| 298 elif len(transfer_codes) > 0: |
| 299 if 'chunked' in transfer_codes: |
| 300 self._input_delimit = CHUNKED |
| 301 self._input_body_left = -1 # flag that we don't know |
| 302 else: |
| 303 self._input_delimit = CLOSE |
| 304 elif content_length != None: |
| 305 self._input_delimit = COUNTED |
| 306 self._input_body_left = content_length |
| 307 else: |
| 308 self._input_delimit = CLOSE |
| 309 return rest |
| 310 |
| 311 ### output-related methods |
| 312 |
| 313 def _output(self, out): |
| 314 raise NotImplementedError |
| 315 |
| 316 def _handle_error(self, err): |
| 317 raise NotImplementedError |
| 318 |
| 319 def _output_start(self, top_line, hdr_tuples, delimit): |
| 320 """ |
| 321 Start ouputting a HTTP message. |
| 322 """ |
| 323 self._output_delimit = delimit |
| 324 # TODO: strip whitespace? |
| 325 out = linesep.join( |
| 326 [top_line] + |
| 327 ["%s: %s" % (k, v) for k, v in hdr_tuples] + |
| 328 ["", ""] |
| 329 ) |
| 330 self._output(out) |
| 331 self._output_state = HEADERS_DONE |
| 332 |
| 333 def _output_body(self, chunk): |
| 334 """ |
| 335 Output a part of a HTTP message. |
| 336 """ |
| 337 if not chunk: |
| 338 return |
| 339 if self._output_delimit == CHUNKED: |
| 340 chunk = "%s\r\n%s\r\n" % (hex(len(chunk))[2:], chunk) |
| 341 self._output(chunk) |
| 342 #FIXME: body counting |
| 343 # self._output_body_sent += len(chunk) |
| 344 # assert self._output_body_sent <= self._output_content_length, \ |
| 345 # "Too many body bytes sent" |
| 346 |
| 347 def _output_end(self, err): |
| 348 """ |
| 349 Finish outputting a HTTP message. |
| 350 """ |
| 351 if err: |
| 352 self.output_body_cb, self.output_done_cb = dummy, dummy |
| 353 self._tcp_conn.close() |
| 354 self._tcp_conn = None |
| 355 elif self._output_delimit == NOBODY: |
| 356 pass # didn't have a body at all. |
| 357 elif self._output_delimit == CHUNKED: |
| 358 self._output("0\r\n\r\n") |
| 359 elif self._output_delimit == COUNTED: |
| 360 pass # TODO: double-check the length |
| 361 elif self._output_delimit == CLOSE: |
| 362 self._tcp_conn.close() # FIXME: abstract out? |
| 363 else: |
| 364 raise AssertionError, "Unknown request delimiter %s" % self._output_
delimit |
| OLD | NEW |