Tools/Scripts/webkitpy/thirdparty/webpagereplay/third_party/nbhttp/http_common.py - Issue 18418010: Check in the thirdparty libs needed for webkitpy.

Side by Side Diff: Tools/Scripts/webkitpy/thirdparty/webpagereplay/third_party/nbhttp/http_common.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « Tools/Scripts/webkitpy/thirdparty/webpagereplay/third_party/nbhttp/error.py ('k') | Tools/Scripts/webkitpy/thirdparty/webpagereplay/third_party/nbhttp/push_tcp.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2

	3 """

	4 shared HTTP infrastructure

	5

	6 This module contains utility functions for nbhttp and a base class

	7 for the parsing portions of the client and server.

	8 """

	9

	10 __author__ = "Mark Nottingham <mnot@mnot.net>"

	11 __copyright__ = """\

	12 Copyright (c) 2008-2009 Mark Nottingham

	13

	14 Permission is hereby granted, free of charge, to any person obtaining a copy

	15 of this software and associated documentation files (the "Software"), to deal

	16 in the Software without restriction, including without limitation the rights

	17 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

	18 copies of the Software, and to permit persons to whom the Software is

	19 furnished to do so, subject to the following conditions:

	20

	21 The above copyright notice and this permission notice shall be included in

	22 all copies or substantial portions of the Software.

	23

	24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

	25 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

	26 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

	27 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

	28 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

	29 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

	30 THE SOFTWARE.

	31 """

	32

	33 import re

	34 lws = re.compile("\r?\n[ \t]+", re.M)

	35 hdr_end = re.compile(r"\r?\n\r?\n", re.M)

	36 linesep = "\r\n"

	37

	38 # conn_modes

	39 CLOSE, COUNTED, CHUNKED, NOBODY = 'close', 'counted', 'chunked', 'nobody'

	40

	41 # states

	42 WAITING, HEADERS_DONE = 1, 2

	43

	44 idempotent_methods = ['GET', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']

	45 safe_methods = ['GET', 'HEAD', 'OPTIONS', 'TRACE']

	46 no_body_status = ['100', '101', '204', '304']

	47 hop_by_hop_hdrs = ['connection', 'keep-alive', 'proxy-authenticate',

	48 'proxy-authorization', 'te', 'trailers', 'transfer-encoding',

	49 'upgrade', 'proxy-connection']

	50

	51

	52 from error import ERR_EXTRA_DATA, ERR_CHUNK, ERR_BODY_FORBIDDEN

	53

	54 def dummy(args, *kw):

	55 "Dummy method that does nothing; useful to ignore a callback."

	56 pass

	57

	58 def header_dict(header_tuple, strip=None):

	59 """

	60 Given a header tuple, return a dictionary keyed upon the lower-cased

	61 header names.

	62

	63 If strip is defined, each header listed (by lower-cased name) will not be

	64 returned in the dictionary.

	65 """

	66 # TODO: return a list of values; currently destructive.

	67 if strip == None:

	68 strip = []

	69 return dict([(n.strip().lower(), v.strip()) for (n, v) in header_tuple])

	70

	71 def get_hdr(hdr_tuples, name):

	72 """

	73 Given a list of (name, value) header tuples and a header name (lowercase),

	74 return a list of all values for that header.

	75

	76 This includes header lines with multiple values separated by a comma;

	77 such headers will be split into separate values. As a result, it is NOT

	78 safe to use this on headers whose values may include a comma (e.g.,

	79 Set-Cookie, or any value with a quoted string).

	80 """

	81 # TODO: support quoted strings

	82 return [v.strip() for v in sum(

	83 [l.split(',') for l in

	84 [i[1] for i in hdr_tuples if i[0].lower() == name]

	85 ]

	86 , [])]

	87

	88

	89 class HttpMessageHandler:

	90 """

	91 This is a base class for something that has to parse and/or serialise

	92 HTTP messages, request or response.

	93

	94 For parsing, it expects you to override _input_start, _input_body and

	95 _input_end, and call _handle_input when you get bytes from the network.

	96

	97 For serialising, it expects you to override _output.

	98 """

	99

	100 def __init__(self):

	101 self._input_buffer = ""

	102 self._input_state = WAITING

	103 self._input_delimit = None

	104 self._input_body_left = 0

	105 self._output_state = WAITING

	106 self._output_delimit = None

	107

	108 # input-related methods

	109

	110 def _input_start(self, top_line, hdr_tuples, conn_tokens, transfer_codes, co ntent_length):

	111 """

	112 Take the top set of headers from the input stream, parse them

	113 and queue the request to be processed by the application.

	114

	115 Returns boolean allows_body to indicate whether the message allows a

	116 body.

	117 """

	118 raise NotImplementedError

	119

	120 def _input_body(self, chunk):

	121 "Process a body chunk from the wire."

	122 raise NotImplementedError

	123

	124 def _input_end(self):

	125 "Indicate that the response body is complete."

	126 raise NotImplementedError

	127

	128 def _input_error(self, err, detail=None):

	129 "Indicate a parsing problem with the body."

	130 raise NotImplementedError

	131

	132 def _handle_input(self, instr):

	133 """

	134 Given a chunk of input, figure out what state we're in and handle it,

	135 making the appropriate calls.

	136 """

	137 if self._input_buffer != "":

	138 instr = self._input_buffer + instr # will need to move to a list if writev comes around

	139 self._input_buffer = ""

	140 if self._input_state == WAITING:

	141 if hdr_end.search(instr): # found one

	142 rest = self._parse_headers(instr)

	143 self._handle_input(rest)

	144 else: # partial headers; store it and wait for more

	145 self._input_buffer = instr

	146 elif self._input_state == HEADERS_DONE:

	147 try:

	148 getattr(self, '_handle_%s' % self._input_delimit)(instr)

	149 except AttributeError:

	150 raise Exception, "Unknown input delimiter %s" % self._input_deli mit

	151 else:

	152 raise Exception, "Unknown state %s" % self._input_state

	153

	154 def _handle_nobody(self, instr):

	155 "Handle input that shouldn't have a body."

	156 if instr:

	157 self._input_error(ERR_BODY_FORBIDDEN, instr) # FIXME: will not work with pipelining

	158 else:

	159 self._input_end()

	160 self._input_state = WAITING

	161 # self._handle_input(instr)

	162

	163 def _handle_close(self, instr):

	164 "Handle input where the body is delimited by the connection closing."

	165 self._input_body(instr)

	166

	167 def _handle_chunked(self, instr):

	168 "Handle input where the body is delimited by chunked encoding."

	169 while instr:

	170 if self._input_body_left < 0: # new chunk

	171 instr = self._handle_chunk_new(instr)

	172 elif self._input_body_left > 0: # we're in the middle of reading a c hunk

	173 instr = self._handle_chunk_body(instr)

	174 elif self._input_body_left == 0: # body is done

	175 instr = self._handle_chunk_done(instr)

	176

	177 def _handle_chunk_new(self, instr):

	178 try:

	179 # they really need to use CRLF

	180 chunk_size, rest = instr.split(linesep, 1)

	181 except ValueError:

	182 # got a CRLF without anything behind it.. wait a bit

	183 if len(instr) > 256:

	184 # OK, this is absurd...

	185 self._input_error(ERR_CHUNK, instr)

	186 else:

	187 self._input_buffer += instr

	188 return

	189 if chunk_size.strip() == "": # ignore bare lines

	190 self._handle_chunked(rest) # FIXME: recursion

	191 return

	192 if ";" in chunk_size: # ignore chunk extensions

	193 chunk_size = chunk_size.split(";", 1)[0]

	194 try:

	195 self._input_body_left = int(chunk_size, 16)

	196 except ValueError:

	197 self._input_error(ERR_CHUNK, chunk_size)

	198 return # blow up if we can't process a chunk.

	199 return rest

	200

	201 def _handle_chunk_body(self, instr):

	202 if self._input_body_left < len(instr): # got more than the chunk

	203 this_chunk = self._input_body_left

	204 self._input_body(instr[:this_chunk])

	205 self._input_body_left = -1

	206 return instr[this_chunk+2:] # +2 consumes the CRLF

	207 elif self._input_body_left == len(instr): # got the whole chunk exactly

	208 self._input_body(instr)

	209 self._input_body_left = -1

	210 else: # got partial chunk

	211 self._input_body(instr)

	212 self._input_body_left -= len(instr)

	213

	214 def _handle_chunk_done(self, instr):

	215 if len(instr) >= 2 and instr[:2] == linesep:

	216 self._input_state = WAITING

	217 self._input_end()

	218 # self._handle_input(instr[2:]) # pipelining

	219 elif hdr_end.search(instr): # trailers

	220 self._input_state = WAITING

	221 self._input_end()

	222 trailers, rest = hdr_end.split(instr, 1) # TODO: process trailers

	223 # self._handle_input(rest) # pipelining

	224 else: # don't have full headers yet

	225 self._input_buffer = instr

	226

	227 def _handle_counted(self, instr):

	228 "Handle input where the body is delimited by the Content-Length."

	229 assert self._input_body_left >= 0, \

	230 "message counting problem (%s)" % self._input_body_left

	231 # process body

	232 if self._input_body_left <= len(instr): # got it all (and more?)

	233 self._input_body(instr[:self._input_body_left])

	234 self._input_state = WAITING

	235 if instr[self._input_body_left:]:

	236 # This will catch extra input that isn't on packet boundaries.

	237 self._input_error(ERR_EXTRA_DATA, instr[self._input_body_left:])

	238 else:

	239 self._input_end()

	240 else: # got some of it

	241 self._input_body(instr)

	242 self._input_body_left -= len(instr)

	243

	244 def _parse_headers(self, instr):

	245 """

	246 Given a string that we knows contains a header block (possibly more),

	247 parse the headers out and return the rest. Calls self._input_start

	248 to kick off processing.

	249 """

	250 top, rest = hdr_end.split(instr, 1)

	251 hdr_lines = lws.sub(" ", top).splitlines() # Fold LWS

	252 try:

	253 top_line = hdr_lines.pop(0)

	254 except IndexError: # empty

	255 return ""

	256 hdr_tuples = []

	257 conn_tokens = []

	258 transfer_codes = []

	259 content_length = None

	260 for line in hdr_lines:

	261 try:

	262 fn, fv = line.split(":", 1)

	263 hdr_tuples.append((fn, fv))

	264 except ValueError:

	265 continue # TODO: flesh out bad header handling

	266 f_name = fn.strip().lower()

	267 f_val = fv.strip()

	268

	269 # parse connection-related headers

	270 if f_name == "connection":

	271 conn_tokens += [v.strip().lower() for v in f_val.split(',')]

	272 elif f_name == "transfer-encoding": # FIXME: parameters

	273 transfer_codes += [v.strip().lower() for v in f_val.split(',')]

	274 elif f_name == "content-length":

	275 if content_length != None:

	276 continue # ignore any C-L past the first.

	277 try:

	278 content_length = int(f_val)

	279 except ValueError:

	280 continue

	281

	282 # FIXME: WSP between name and colon; request = 400, response = discard

	283 # TODO: remove and ignore conn tokens if the message was 1.0

	284

	285 # ignore content-length if transfer-encoding is present

	286 if transfer_codes != [] and content_length != None:

	287 content_length = None

	288

	289 try:

	290 allows_body = self._input_start(top_line, hdr_tuples,

	291 conn_tokens, transfer_codes, content_length)

	292 except ValueError: # parsing error of some kind; abort.

	293 return ""

	294

	295 self._input_state = HEADERS_DONE

	296 if not allows_body:

	297 self._input_delimit = NOBODY

	298 elif len(transfer_codes) > 0:

	299 if 'chunked' in transfer_codes:

	300 self._input_delimit = CHUNKED

	301 self._input_body_left = -1 # flag that we don't know

	302 else:

	303 self._input_delimit = CLOSE

	304 elif content_length != None:

	305 self._input_delimit = COUNTED

	306 self._input_body_left = content_length

	307 else:

	308 self._input_delimit = CLOSE

	309 return rest

	310

	311 ### output-related methods

	312

	313 def _output(self, out):

	314 raise NotImplementedError

	315

	316 def _handle_error(self, err):

	317 raise NotImplementedError

	318

	319 def _output_start(self, top_line, hdr_tuples, delimit):

	320 """

	321 Start ouputting a HTTP message.

	322 """

	323 self._output_delimit = delimit

	324 # TODO: strip whitespace?

	325 out = linesep.join(

	326 [top_line] +

	327 ["%s: %s" % (k, v) for k, v in hdr_tuples] +

	328 ["", ""]

	329 )

	330 self._output(out)

	331 self._output_state = HEADERS_DONE

	332

	333 def _output_body(self, chunk):

	334 """

	335 Output a part of a HTTP message.

	336 """

	337 if not chunk:

	338 return

	339 if self._output_delimit == CHUNKED:

	340 chunk = "%s\r\n%s\r\n" % (hex(len(chunk))[2:], chunk)

	341 self._output(chunk)

	342 #FIXME: body counting

	343 # self._output_body_sent += len(chunk)

	344 # assert self._output_body_sent <= self._output_content_length, \

	345 # "Too many body bytes sent"

	346

	347 def _output_end(self, err):

	348 """

	349 Finish outputting a HTTP message.

	350 """

	351 if err:

	352 self.output_body_cb, self.output_done_cb = dummy, dummy

	353 self._tcp_conn.close()

	354 self._tcp_conn = None

	355 elif self._output_delimit == NOBODY:

	356 pass # didn't have a body at all.

	357 elif self._output_delimit == CHUNKED:

	358 self._output("0\r\n\r\n")

	359 elif self._output_delimit == COUNTED:

	360 pass # TODO: double-check the length

	361 elif self._output_delimit == CLOSE:

	362 self._tcp_conn.close() # FIXME: abstract out?

	363 else:

	364 raise AssertionError, "Unknown request delimiter %s" % self._output_ delimit

OLD	NEW