Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(628)

Side by Side Diff: Tools/Scripts/webkitpy/thirdparty/webpagereplay/third_party/nbhttp/http_common.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2
3 """
4 shared HTTP infrastructure
5
6 This module contains utility functions for nbhttp and a base class
7 for the parsing portions of the client and server.
8 """
9
10 __author__ = "Mark Nottingham <mnot@mnot.net>"
11 __copyright__ = """\
12 Copyright (c) 2008-2009 Mark Nottingham
13
14 Permission is hereby granted, free of charge, to any person obtaining a copy
15 of this software and associated documentation files (the "Software"), to deal
16 in the Software without restriction, including without limitation the rights
17 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 copies of the Software, and to permit persons to whom the Software is
19 furnished to do so, subject to the following conditions:
20
21 The above copyright notice and this permission notice shall be included in
22 all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30 THE SOFTWARE.
31 """
32
33 import re
34 lws = re.compile("\r?\n[ \t]+", re.M)
35 hdr_end = re.compile(r"\r?\n\r?\n", re.M)
36 linesep = "\r\n"
37
38 # conn_modes
39 CLOSE, COUNTED, CHUNKED, NOBODY = 'close', 'counted', 'chunked', 'nobody'
40
41 # states
42 WAITING, HEADERS_DONE = 1, 2
43
44 idempotent_methods = ['GET', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']
45 safe_methods = ['GET', 'HEAD', 'OPTIONS', 'TRACE']
46 no_body_status = ['100', '101', '204', '304']
47 hop_by_hop_hdrs = ['connection', 'keep-alive', 'proxy-authenticate',
48 'proxy-authorization', 'te', 'trailers', 'transfer-encoding',
49 'upgrade', 'proxy-connection']
50
51
52 from error import ERR_EXTRA_DATA, ERR_CHUNK, ERR_BODY_FORBIDDEN
53
54 def dummy(*args, **kw):
55 "Dummy method that does nothing; useful to ignore a callback."
56 pass
57
58 def header_dict(header_tuple, strip=None):
59 """
60 Given a header tuple, return a dictionary keyed upon the lower-cased
61 header names.
62
63 If strip is defined, each header listed (by lower-cased name) will not be
64 returned in the dictionary.
65 """
66 # TODO: return a list of values; currently destructive.
67 if strip == None:
68 strip = []
69 return dict([(n.strip().lower(), v.strip()) for (n, v) in header_tuple])
70
71 def get_hdr(hdr_tuples, name):
72 """
73 Given a list of (name, value) header tuples and a header name (lowercase),
74 return a list of all values for that header.
75
76 This includes header lines with multiple values separated by a comma;
77 such headers will be split into separate values. As a result, it is NOT
78 safe to use this on headers whose values may include a comma (e.g.,
79 Set-Cookie, or any value with a quoted string).
80 """
81 # TODO: support quoted strings
82 return [v.strip() for v in sum(
83 [l.split(',') for l in
84 [i[1] for i in hdr_tuples if i[0].lower() == name]
85 ]
86 , [])]
87
88
89 class HttpMessageHandler:
90 """
91 This is a base class for something that has to parse and/or serialise
92 HTTP messages, request or response.
93
94 For parsing, it expects you to override _input_start, _input_body and
95 _input_end, and call _handle_input when you get bytes from the network.
96
97 For serialising, it expects you to override _output.
98 """
99
100 def __init__(self):
101 self._input_buffer = ""
102 self._input_state = WAITING
103 self._input_delimit = None
104 self._input_body_left = 0
105 self._output_state = WAITING
106 self._output_delimit = None
107
108 # input-related methods
109
110 def _input_start(self, top_line, hdr_tuples, conn_tokens, transfer_codes, co ntent_length):
111 """
112 Take the top set of headers from the input stream, parse them
113 and queue the request to be processed by the application.
114
115 Returns boolean allows_body to indicate whether the message allows a
116 body.
117 """
118 raise NotImplementedError
119
120 def _input_body(self, chunk):
121 "Process a body chunk from the wire."
122 raise NotImplementedError
123
124 def _input_end(self):
125 "Indicate that the response body is complete."
126 raise NotImplementedError
127
128 def _input_error(self, err, detail=None):
129 "Indicate a parsing problem with the body."
130 raise NotImplementedError
131
132 def _handle_input(self, instr):
133 """
134 Given a chunk of input, figure out what state we're in and handle it,
135 making the appropriate calls.
136 """
137 if self._input_buffer != "":
138 instr = self._input_buffer + instr # will need to move to a list if writev comes around
139 self._input_buffer = ""
140 if self._input_state == WAITING:
141 if hdr_end.search(instr): # found one
142 rest = self._parse_headers(instr)
143 self._handle_input(rest)
144 else: # partial headers; store it and wait for more
145 self._input_buffer = instr
146 elif self._input_state == HEADERS_DONE:
147 try:
148 getattr(self, '_handle_%s' % self._input_delimit)(instr)
149 except AttributeError:
150 raise Exception, "Unknown input delimiter %s" % self._input_deli mit
151 else:
152 raise Exception, "Unknown state %s" % self._input_state
153
154 def _handle_nobody(self, instr):
155 "Handle input that shouldn't have a body."
156 if instr:
157 self._input_error(ERR_BODY_FORBIDDEN, instr) # FIXME: will not work with pipelining
158 else:
159 self._input_end()
160 self._input_state = WAITING
161 # self._handle_input(instr)
162
163 def _handle_close(self, instr):
164 "Handle input where the body is delimited by the connection closing."
165 self._input_body(instr)
166
167 def _handle_chunked(self, instr):
168 "Handle input where the body is delimited by chunked encoding."
169 while instr:
170 if self._input_body_left < 0: # new chunk
171 instr = self._handle_chunk_new(instr)
172 elif self._input_body_left > 0: # we're in the middle of reading a c hunk
173 instr = self._handle_chunk_body(instr)
174 elif self._input_body_left == 0: # body is done
175 instr = self._handle_chunk_done(instr)
176
177 def _handle_chunk_new(self, instr):
178 try:
179 # they really need to use CRLF
180 chunk_size, rest = instr.split(linesep, 1)
181 except ValueError:
182 # got a CRLF without anything behind it.. wait a bit
183 if len(instr) > 256:
184 # OK, this is absurd...
185 self._input_error(ERR_CHUNK, instr)
186 else:
187 self._input_buffer += instr
188 return
189 if chunk_size.strip() == "": # ignore bare lines
190 self._handle_chunked(rest) # FIXME: recursion
191 return
192 if ";" in chunk_size: # ignore chunk extensions
193 chunk_size = chunk_size.split(";", 1)[0]
194 try:
195 self._input_body_left = int(chunk_size, 16)
196 except ValueError:
197 self._input_error(ERR_CHUNK, chunk_size)
198 return # blow up if we can't process a chunk.
199 return rest
200
201 def _handle_chunk_body(self, instr):
202 if self._input_body_left < len(instr): # got more than the chunk
203 this_chunk = self._input_body_left
204 self._input_body(instr[:this_chunk])
205 self._input_body_left = -1
206 return instr[this_chunk+2:] # +2 consumes the CRLF
207 elif self._input_body_left == len(instr): # got the whole chunk exactly
208 self._input_body(instr)
209 self._input_body_left = -1
210 else: # got partial chunk
211 self._input_body(instr)
212 self._input_body_left -= len(instr)
213
214 def _handle_chunk_done(self, instr):
215 if len(instr) >= 2 and instr[:2] == linesep:
216 self._input_state = WAITING
217 self._input_end()
218 # self._handle_input(instr[2:]) # pipelining
219 elif hdr_end.search(instr): # trailers
220 self._input_state = WAITING
221 self._input_end()
222 trailers, rest = hdr_end.split(instr, 1) # TODO: process trailers
223 # self._handle_input(rest) # pipelining
224 else: # don't have full headers yet
225 self._input_buffer = instr
226
227 def _handle_counted(self, instr):
228 "Handle input where the body is delimited by the Content-Length."
229 assert self._input_body_left >= 0, \
230 "message counting problem (%s)" % self._input_body_left
231 # process body
232 if self._input_body_left <= len(instr): # got it all (and more?)
233 self._input_body(instr[:self._input_body_left])
234 self._input_state = WAITING
235 if instr[self._input_body_left:]:
236 # This will catch extra input that isn't on packet boundaries.
237 self._input_error(ERR_EXTRA_DATA, instr[self._input_body_left:])
238 else:
239 self._input_end()
240 else: # got some of it
241 self._input_body(instr)
242 self._input_body_left -= len(instr)
243
244 def _parse_headers(self, instr):
245 """
246 Given a string that we knows contains a header block (possibly more),
247 parse the headers out and return the rest. Calls self._input_start
248 to kick off processing.
249 """
250 top, rest = hdr_end.split(instr, 1)
251 hdr_lines = lws.sub(" ", top).splitlines() # Fold LWS
252 try:
253 top_line = hdr_lines.pop(0)
254 except IndexError: # empty
255 return ""
256 hdr_tuples = []
257 conn_tokens = []
258 transfer_codes = []
259 content_length = None
260 for line in hdr_lines:
261 try:
262 fn, fv = line.split(":", 1)
263 hdr_tuples.append((fn, fv))
264 except ValueError:
265 continue # TODO: flesh out bad header handling
266 f_name = fn.strip().lower()
267 f_val = fv.strip()
268
269 # parse connection-related headers
270 if f_name == "connection":
271 conn_tokens += [v.strip().lower() for v in f_val.split(',')]
272 elif f_name == "transfer-encoding": # FIXME: parameters
273 transfer_codes += [v.strip().lower() for v in f_val.split(',')]
274 elif f_name == "content-length":
275 if content_length != None:
276 continue # ignore any C-L past the first.
277 try:
278 content_length = int(f_val)
279 except ValueError:
280 continue
281
282 # FIXME: WSP between name and colon; request = 400, response = discard
283 # TODO: remove *and* ignore conn tokens if the message was 1.0
284
285 # ignore content-length if transfer-encoding is present
286 if transfer_codes != [] and content_length != None:
287 content_length = None
288
289 try:
290 allows_body = self._input_start(top_line, hdr_tuples,
291 conn_tokens, transfer_codes, content_length)
292 except ValueError: # parsing error of some kind; abort.
293 return ""
294
295 self._input_state = HEADERS_DONE
296 if not allows_body:
297 self._input_delimit = NOBODY
298 elif len(transfer_codes) > 0:
299 if 'chunked' in transfer_codes:
300 self._input_delimit = CHUNKED
301 self._input_body_left = -1 # flag that we don't know
302 else:
303 self._input_delimit = CLOSE
304 elif content_length != None:
305 self._input_delimit = COUNTED
306 self._input_body_left = content_length
307 else:
308 self._input_delimit = CLOSE
309 return rest
310
311 ### output-related methods
312
313 def _output(self, out):
314 raise NotImplementedError
315
316 def _handle_error(self, err):
317 raise NotImplementedError
318
319 def _output_start(self, top_line, hdr_tuples, delimit):
320 """
321 Start ouputting a HTTP message.
322 """
323 self._output_delimit = delimit
324 # TODO: strip whitespace?
325 out = linesep.join(
326 [top_line] +
327 ["%s: %s" % (k, v) for k, v in hdr_tuples] +
328 ["", ""]
329 )
330 self._output(out)
331 self._output_state = HEADERS_DONE
332
333 def _output_body(self, chunk):
334 """
335 Output a part of a HTTP message.
336 """
337 if not chunk:
338 return
339 if self._output_delimit == CHUNKED:
340 chunk = "%s\r\n%s\r\n" % (hex(len(chunk))[2:], chunk)
341 self._output(chunk)
342 #FIXME: body counting
343 # self._output_body_sent += len(chunk)
344 # assert self._output_body_sent <= self._output_content_length, \
345 # "Too many body bytes sent"
346
347 def _output_end(self, err):
348 """
349 Finish outputting a HTTP message.
350 """
351 if err:
352 self.output_body_cb, self.output_done_cb = dummy, dummy
353 self._tcp_conn.close()
354 self._tcp_conn = None
355 elif self._output_delimit == NOBODY:
356 pass # didn't have a body at all.
357 elif self._output_delimit == CHUNKED:
358 self._output("0\r\n\r\n")
359 elif self._output_delimit == COUNTED:
360 pass # TODO: double-check the length
361 elif self._output_delimit == CLOSE:
362 self._tcp_conn.close() # FIXME: abstract out?
363 else:
364 raise AssertionError, "Unknown request delimiter %s" % self._output_ delimit
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698