| (Empty) |
1 # -*- test-case-name: twisted.web.test.test_http -*- | |
2 | |
3 # Copyright (c) 2001-2007 Twisted Matrix Laboratories. | |
4 # See LICENSE for details. | |
5 | |
6 | |
7 """ | |
8 HyperText Transfer Protocol implementation. | |
9 | |
10 This is used by twisted.web. | |
11 | |
12 Future Plans: | |
13 - HTTP client support will at some point be refactored to support HTTP/1.1. | |
14 - Accept chunked data from clients in server. | |
15 - Other missing HTTP features from the RFC. | |
16 | |
17 Maintainer: U{Itamar Shtull-Trauring<mailto:twisted@itamarst.org>} | |
18 """ | |
19 | |
20 # system imports | |
21 from cStringIO import StringIO | |
22 import tempfile | |
23 import base64, binascii | |
24 import cgi | |
25 import socket | |
26 import math | |
27 import time | |
28 import calendar | |
29 import warnings | |
30 import os | |
31 from urlparse import urlparse as _urlparse | |
32 | |
33 from zope.interface import implements | |
34 | |
35 # twisted imports | |
36 from twisted.internet import interfaces, reactor, protocol, address | |
37 from twisted.protocols import policies, basic | |
38 from twisted.python import log | |
39 try: # try importing the fast, C version | |
40 from twisted.protocols._c_urlarg import unquote | |
41 except ImportError: | |
42 from urllib import unquote | |
43 | |
44 | |
45 protocol_version = "HTTP/1.1" | |
46 | |
47 _CONTINUE = 100 | |
48 SWITCHING = 101 | |
49 | |
50 OK = 200 | |
51 CREATED = 201 | |
52 ACCEPTED = 202 | |
54 NO_CONTENT = 204 | |
55 RESET_CONTENT = 205 | |
56 PARTIAL_CONTENT = 206 | |
57 MULTI_STATUS = 207 | |
58 | |
59 MULTIPLE_CHOICE = 300 | |
61 FOUND = 302 | |
62 SEE_OTHER = 303 | |
63 NOT_MODIFIED = 304 | |
64 USE_PROXY = 305 | |
66 | |
67 BAD_REQUEST = 400 | |
68 UNAUTHORIZED = 401 | |
70 FORBIDDEN = 403 | |
71 NOT_FOUND = 404 | |
72 NOT_ALLOWED = 405 | |
73 NOT_ACCEPTABLE = 406 | |
75 REQUEST_TIMEOUT = 408 | |
76 CONFLICT = 409 | |
77 GONE = 410 | |
78 LENGTH_REQUIRED = 411 | |
85 | |
87 NOT_IMPLEMENTED = 501 | |
88 BAD_GATEWAY = 502 | |
90 GATEWAY_TIMEOUT = 504 | |
93 NOT_EXTENDED = 510 | |
94 | |
95 RESPONSES = { | |
96 # 100 | |
97 _CONTINUE: "Continue", | |
98 SWITCHING: "Switching Protocols", | |
99 | |
100 # 200 | |
101 OK: "OK", | |
102 CREATED: "Created", | |
103 ACCEPTED: "Accepted", | |
104 NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information", | |
105 NO_CONTENT: "No Content", | |
106 RESET_CONTENT: "Reset Content.", | |
107 PARTIAL_CONTENT: "Partial Content", | |
108 MULTI_STATUS: "Multi-Status", | |
109 | |
110 # 300 | |
111 MULTIPLE_CHOICE: "Multiple Choices", | |
112 MOVED_PERMANENTLY: "Moved Permanently", | |
113 FOUND: "Found", | |
114 SEE_OTHER: "See Other", | |
115 NOT_MODIFIED: "Not Modified", | |
116 USE_PROXY: "Use Proxy", | |
117 # 306 not defined?? | |
118 TEMPORARY_REDIRECT: "Temporary Redirect", | |
119 | |
120 # 400 | |
121 BAD_REQUEST: "Bad Request", | |
122 UNAUTHORIZED: "Unauthorized", | |
123 PAYMENT_REQUIRED: "Payment Required", | |
124 FORBIDDEN: "Forbidden", | |
125 NOT_FOUND: "Not Found", | |
126 NOT_ALLOWED: "Method Not Allowed", | |
127 NOT_ACCEPTABLE: "Not Acceptable", | |
128 PROXY_AUTH_REQUIRED: "Proxy Authentication Required", | |
129 REQUEST_TIMEOUT: "Request Time-out", | |
130 CONFLICT: "Conflict", | |
131 GONE: "Gone", | |
132 LENGTH_REQUIRED: "Length Required", | |
133 PRECONDITION_FAILED: "Precondition Failed", | |
134 REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large", | |
135 REQUEST_URI_TOO_LONG: "Request-URI Too Long", | |
136 UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type", | |
137 REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable", | |
138 EXPECTATION_FAILED: "Expectation Failed", | |
139 | |
140 # 500 | |
141 INTERNAL_SERVER_ERROR: "Internal Server Error", | |
142 NOT_IMPLEMENTED: "Not Implemented", | |
143 BAD_GATEWAY: "Bad Gateway", | |
144 SERVICE_UNAVAILABLE: "Service Unavailable", | |
145 GATEWAY_TIMEOUT: "Gateway Time-out", | |
146 HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported", | |
147 INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space", | |
148 NOT_EXTENDED: "Not Extended" | |
149 } | |
150 | |
151 CACHED = """Magic constant returned by http.Request methods to set cache | |
152 validation headers when the request is conditional and the value fails | |
153 the condition.""" | |
154 | |
155 # backwards compatability | |
156 responses = RESPONSES | |
157 | |
158 | |
159 # datetime parsing and formatting | |
160 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] | |
161 monthname = [None, | |
162 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', | |
163 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] | |
164 weekdayname_lower = [name.lower() for name in weekdayname] | |
165 monthname_lower = [name and name.lower() for name in monthname] | |
166 | |
167 def urlparse(url): | |
168 """ | |
169 Parse an URL into six components. | |
170 | |
171 This is similar to L{urlparse.urlparse}, but rejects C{unicode} input | |
172 and always produces C{str} output. | |
173 | |
174 @type url: C{str} | |
175 | |
176 @raise TypeError: The given url was a C{unicode} string instead of a | |
177 C{str}. | |
178 | |
179 @rtype: six-tuple of str | |
180 @return: The scheme, net location, path, params, query string, and fragment | |
181 of the URL. | |
182 """ | |
183 if isinstance(url, unicode): | |
184 raise TypeError("url must be str, not unicode") | |
185 scheme, netloc, path, params, query, fragment = _urlparse(url) | |
186 if isinstance(scheme, unicode): | |
187 scheme = scheme.encode('ascii') | |
188 netloc = netloc.encode('ascii') | |
189 path = path.encode('ascii') | |
190 query = query.encode('ascii') | |
191 fragment = fragment.encode('ascii') | |
192 return scheme, netloc, path, params, query, fragment | |
193 | |
194 | |
195 def parse_qs(qs, keep_blank_values=0, strict_parsing=0, unquote=unquote): | |
196 """like cgi.parse_qs, only with custom unquote function""" | |
197 d = {} | |
198 items = [s2 for s1 in qs.split("&") for s2 in s1.split(";")] | |
199 for item in items: | |
200 try: | |
201 k, v = item.split("=", 1) | |
202 except ValueError: | |
203 if strict_parsing: | |
204 raise | |
205 continue | |
206 if v or keep_blank_values: | |
207 k = unquote(k.replace("+", " ")) | |
208 v = unquote(v.replace("+", " ")) | |
209 if k in d: | |
210 d[k].append(v) | |
211 else: | |
212 d[k] = [v] | |
213 return d | |
214 | |
215 def datetimeToString(msSinceEpoch=None): | |
216 """Convert seconds since epoch to HTTP datetime string.""" | |
217 if msSinceEpoch == None: | |
218 msSinceEpoch = time.time() | |
219 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch) | |
220 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( | |
221 weekdayname[wd], | |
222 day, monthname[month], year, | |
223 hh, mm, ss) | |
224 return s | |
225 | |
226 def datetimeToLogString(msSinceEpoch=None): | |
227 """Convert seconds since epoch to log datetime string.""" | |
228 if msSinceEpoch == None: | |
229 msSinceEpoch = time.time() | |
230 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch) | |
231 s = "[%02d/%3s/%4d:%02d:%02d:%02d +0000]" % ( | |
232 day, monthname[month], year, | |
233 hh, mm, ss) | |
234 return s | |
235 | |
236 | |
237 # a hack so we don't need to recalculate log datetime every hit, | |
238 # at the price of a small, unimportant, inaccuracy. | |
239 _logDateTime = None | |
240 _logDateTimeUsers = 0 | |
241 _resetLogDateTimeID = None | |
242 | |
243 def _resetLogDateTime(): | |
244 global _logDateTime | |
245 global _resetLogDateTime | |
246 global _resetLogDateTimeID | |
247 _logDateTime = datetimeToLogString() | |
248 _resetLogDateTimeID = reactor.callLater(1, _resetLogDateTime) | |
249 | |
250 def _logDateTimeStart(): | |
251 global _logDateTimeUsers | |
252 if not _logDateTimeUsers: | |
253 _resetLogDateTime() | |
254 _logDateTimeUsers += 1 | |
255 | |
256 def _logDateTimeStop(): | |
257 global _logDateTimeUsers | |
258 _logDateTimeUsers -= 1; | |
259 if (not _logDateTimeUsers and _resetLogDateTimeID | |
260 and _resetLogDateTimeID.active()): | |
261 _resetLogDateTimeID.cancel() | |
262 | |
263 def timegm(year, month, day, hour, minute, second): | |
264 """Convert time tuple in GMT to seconds since epoch, GMT""" | |
265 EPOCH = 1970 | |
266 assert year >= EPOCH | |
267 assert 1 <= month <= 12 | |
268 days = 365*(year-EPOCH) + calendar.leapdays(EPOCH, year) | |
269 for i in range(1, month): | |
270 days = days + calendar.mdays[i] | |
271 if month > 2 and calendar.isleap(year): | |
272 days = days + 1 | |
273 days = days + day - 1 | |
274 hours = days*24 + hour | |
275 minutes = hours*60 + minute | |
276 seconds = minutes*60 + second | |
277 return seconds | |
278 | |
279 def stringToDatetime(dateString): | |
280 """Convert an HTTP date string (one of three formats) to seconds since epoch
.""" | |
281 parts = dateString.split() | |
282 | |
283 if not parts[0][0:3].lower() in weekdayname_lower: | |
284 # Weekday is stupid. Might have been omitted. | |
285 try: | |
286 return stringToDatetime("Sun, "+dateString) | |
287 except ValueError: | |
288 # Guess not. | |
289 pass | |
290 | |
291 partlen = len(parts) | |
292 if (partlen == 5 or partlen == 6) and parts[1].isdigit(): | |
293 # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT | |
294 # (Note: "GMT" is literal, not a variable timezone) | |
295 # (also handles without "GMT") | |
296 # This is the normal format | |
297 day = parts[1] | |
298 month = parts[2] | |
299 year = parts[3] | |
300 time = parts[4] | |
301 elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1: | |
302 # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT | |
303 # (Note: "GMT" is literal, not a variable timezone) | |
304 # (also handles without without "GMT") | |
305 # Two digit year, yucko. | |
306 day, month, year = parts[1].split('-') | |
307 time = parts[2] | |
308 year=int(year) | |
309 if year < 69: | |
310 year = year + 2000 | |
311 elif year < 100: | |
312 year = year + 1900 | |
313 elif len(parts) == 5: | |
314 # 3rd date format: Sun Nov 6 08:49:37 1994 | |
315 # ANSI C asctime() format. | |
316 day = parts[2] | |
317 month = parts[1] | |
318 year = parts[4] | |
319 time = parts[3] | |
320 else: | |
321 raise ValueError("Unknown datetime format %r" % dateString) | |
322 | |
323 day = int(day) | |
324 month = int(monthname_lower.index(month.lower())) | |
325 year = int(year) | |
326 hour, min, sec = map(int, time.split(':')) | |
327 return int(timegm(year, month, day, hour, min, sec)) | |
328 | |
329 def toChunk(data): | |
330 """Convert string to a chunk. | |
331 | |
332 @returns: a tuple of strings representing the chunked encoding of data""" | |
333 return ("%x\r\n" % len(data), data, "\r\n") | |
334 | |
335 def fromChunk(data): | |
336 """Convert chunk to string. | |
337 | |
338 @returns: tuple (result, remaining), may raise ValueError. | |
339 """ | |
340 prefix, rest = data.split('\r\n', 1) | |
341 length = int(prefix, 16) | |
342 if length < 0: | |
343 raise ValueError("Chunk length must be >= 0, not %d" % (length,)) | |
344 if not rest[length:length + 2] == '\r\n': | |
345 raise ValueError, "chunk must end with CRLF" | |
346 return rest[:length], rest[length + 2:] | |
347 | |
348 | |
349 def parseContentRange(header): | |
350 """Parse a content-range header into (start, end, realLength). | |
351 | |
352 realLength might be None if real length is not known ('*'). | |
353 """ | |
354 kind, other = header.strip().split() | |
355 if kind.lower() != "bytes": | |
356 raise ValueError, "a range of type %r is not supported" | |
357 startend, realLength = other.split("/") | |
358 start, end = map(int, startend.split("-")) | |
359 if realLength == "*": | |
360 realLength = None | |
361 else: | |
362 realLength = int(realLength) | |
363 return (start, end, realLength) | |
364 | |
365 | |
366 class StringTransport: | |
367 """ | |
368 I am a StringIO wrapper that conforms for the transport API. I support | |
369 the `writeSequence' method. | |
370 """ | |
371 def __init__(self): | |
372 self.s = StringIO() | |
373 def writeSequence(self, seq): | |
374 self.s.write(''.join(seq)) | |
375 def __getattr__(self, attr): | |
376 return getattr(self.__dict__['s'], attr) | |
377 | |
378 | |
379 class HTTPClient(basic.LineReceiver): | |
380 """A client for HTTP 1.0 | |
381 | |
382 Notes: | |
383 You probably want to send a 'Host' header with the name of | |
384 the site you're connecting to, in order to not break name | |
385 based virtual hosting. | |
386 """ | |
387 length = None | |
388 firstLine = 1 | |
389 __buffer = None | |
390 | |
391 def sendCommand(self, command, path): | |
392 self.transport.write('%s %s HTTP/1.0\r\n' % (command, path)) | |
393 | |
394 def sendHeader(self, name, value): | |
395 self.transport.write('%s: %s\r\n' % (name, value)) | |
396 | |
397 def endHeaders(self): | |
398 self.transport.write('\r\n') | |
399 | |
400 def lineReceived(self, line): | |
401 if self.firstLine: | |
402 self.firstLine = 0 | |
403 l = line.split(None, 2) | |
404 version = l[0] | |
405 status = l[1] | |
406 try: | |
407 message = l[2] | |
408 except IndexError: | |
409 # sometimes there is no message | |
410 message = "" | |
411 self.handleStatus(version, status, message) | |
412 return | |
413 if line: | |
414 key, val = line.split(':', 1) | |
415 val = val.lstrip() | |
416 self.handleHeader(key, val) | |
417 if key.lower() == 'content-length': | |
418 self.length = int(val) | |
419 else: | |
420 self.__buffer = StringIO() | |
421 self.handleEndHeaders() | |
422 self.setRawMode() | |
423 | |
424 def connectionLost(self, reason): | |
425 self.handleResponseEnd() | |
426 | |
427 def handleResponseEnd(self): | |
428 if self.__buffer is not None: | |
429 b = self.__buffer.getvalue() | |
430 self.__buffer = None | |
431 self.handleResponse(b) | |
432 | |
433 def handleResponsePart(self, data): | |
434 self.__buffer.write(data) | |
435 | |
436 def connectionMade(self): | |
437 pass | |
438 | |
439 handleStatus = handleHeader = handleEndHeaders = lambda *args: None | |
440 | |
441 def rawDataReceived(self, data): | |
442 if self.length is not None: | |
443 data, rest = data[:self.length], data[self.length:] | |
444 self.length -= len(data) | |
445 else: | |
446 rest = '' | |
447 self.handleResponsePart(data) | |
448 if self.length == 0: | |
449 self.handleResponseEnd() | |
450 self.setLineMode(rest) | |
451 | |
452 | |
453 # response codes that must have empty bodies | |
454 NO_BODY_CODES = (204, 304) | |
455 | |
456 class Request: | |
457 """A HTTP request. | |
458 | |
459 Subclasses should override the process() method to determine how | |
460 the request will be processed. | |
461 | |
462 @ivar method: The HTTP method that was used. | |
463 @ivar uri: The full URI that was requested (includes arguments). | |
464 @ivar path: The path only (arguments not included). | |
465 @ivar args: All of the arguments, including URL and POST arguments. | |
466 @type args: A mapping of strings (the argument names) to lists of values. | |
467 i.e., ?foo=bar&foo=baz&quux=spam results in | |
468 {'foo': ['bar', 'baz'], 'quux': ['spam']}. | |
469 @ivar received_headers: All received headers | |
470 """ | |
471 | |
472 implements(interfaces.IConsumer) | |
473 | |
474 producer = None | |
475 finished = 0 | |
476 code = OK | |
477 code_message = RESPONSES[OK] | |
478 method = "(no method yet)" | |
479 clientproto = "(no clientproto yet)" | |
480 uri = "(no uri yet)" | |
481 startedWriting = 0 | |
482 chunked = 0 | |
483 sentLength = 0 # content-length of response, or total bytes sent via chunkin
g | |
484 etag = None | |
485 lastModified = None | |
486 _forceSSL = 0 | |
487 | |
488 def __init__(self, channel, queued): | |
489 """ | |
490 @param channel: the channel we're connected to. | |
491 @param queued: are we in the request queue, or can we start writing to | |
492 the transport? | |
493 """ | |
494 self.channel = channel | |
495 self.queued = queued | |
496 self.received_headers = {} | |
497 self.received_cookies = {} | |
498 self.headers = {} # outgoing headers | |
499 self.cookies = [] # outgoing cookies | |
500 | |
501 if queued: | |
502 self.transport = StringTransport() | |
503 else: | |
504 self.transport = self.channel.transport | |
505 | |
506 def _cleanup(self): | |
507 """Called when have finished responding and are no longer queued.""" | |
508 if self.producer: | |
509 log.err(RuntimeError("Producer was not unregistered for %s" % self.u
ri)) | |
510 self.unregisterProducer() | |
511 self.channel.requestDone(self) | |
512 del self.channel | |
513 try: | |
514 self.content.close() | |
515 except OSError: | |
516 # win32 suckiness, no idea why it does this | |
517 pass | |
518 del self.content | |
519 | |
520 # methods for channel - end users should not use these | |
521 | |
522 def noLongerQueued(self): | |
523 """Notify the object that it is no longer queued. | |
524 | |
525 We start writing whatever data we have to the transport, etc. | |
526 | |
527 This method is not intended for users. | |
528 """ | |
529 if not self.queued: | |
530 raise RuntimeError, "noLongerQueued() got called unnecessarily." | |
531 | |
532 self.queued = 0 | |
533 | |
534 # set transport to real one and send any buffer data | |
535 data = self.transport.getvalue() | |
536 self.transport = self.channel.transport | |
537 if data: | |
538 self.transport.write(data) | |
539 | |
540 # if we have producer, register it with transport | |
541 if (self.producer is not None) and not self.finished: | |
542 self.transport.registerProducer(self.producer, self.streamingProduce
r) | |
543 | |
544 # if we're finished, clean up | |
545 if self.finished: | |
546 self._cleanup() | |
547 | |
548 def gotLength(self, length): | |
549 """Called when HTTP channel got length of content in this request. | |
550 | |
551 This method is not intended for users. | |
552 """ | |
553 if length < 100000: | |
554 self.content = StringIO() | |
555 else: | |
556 self.content = tempfile.TemporaryFile() | |
557 | |
558 def parseCookies(self): | |
559 """Parse cookie headers. | |
560 | |
561 This method is not intended for users.""" | |
562 cookietxt = self.getHeader("cookie") | |
563 if cookietxt: | |
564 for cook in cookietxt.split(';'): | |
565 cook = cook.lstrip() | |
566 try: | |
567 k, v = cook.split('=', 1) | |
568 self.received_cookies[k] = v | |
569 except ValueError: | |
570 pass | |
571 | |
572 def handleContentChunk(self, data): | |
573 """Write a chunk of data. | |
574 | |
575 This method is not intended for users. | |
576 """ | |
577 self.content.write(data) | |
578 | |
579 def requestReceived(self, command, path, version): | |
580 """Called by channel when all data has been received. | |
581 | |
582 This method is not intended for users. | |
583 """ | |
584 self.content.seek(0,0) | |
585 self.args = {} | |
586 self.stack = [] | |
587 | |
588 self.method, self.uri = command, path | |
589 self.clientproto = version | |
590 x = self.uri.split('?', 1) | |
591 | |
592 if len(x) == 1: | |
593 self.path = self.uri | |
594 else: | |
595 self.path, argstring = x | |
596 self.args = parse_qs(argstring, 1) | |
597 | |
598 # cache the client and server information, we'll need this later to be | |
599 # serialized and sent with the request so CGIs will work remotely | |
600 self.client = self.channel.transport.getPeer() | |
601 self.host = self.channel.transport.getHost() | |
602 | |
603 # Argument processing | |
604 args = self.args | |
605 ctype = self.getHeader('content-type') | |
606 if self.method == "POST" and ctype: | |
607 mfd = 'multipart/form-data' | |
608 key, pdict = cgi.parse_header(ctype) | |
609 if key == 'application/x-www-form-urlencoded': | |
610 args.update(parse_qs(self.content.read(), 1)) | |
611 elif key == mfd: | |
612 try: | |
613 args.update(cgi.parse_multipart(self.content, pdict)) | |
614 except KeyError, e: | |
615 if e.args[0] == 'content-disposition': | |
616 # Parse_multipart can't cope with missing | |
617 # content-dispostion headers in multipart/form-data | |
618 # parts, so we catch the exception and tell the client | |
619 # it was a bad request. | |
620 self.channel.transport.write( | |
621 "HTTP/1.1 400 Bad Request\r\n\r\n") | |
622 self.channel.transport.loseConnection() | |
623 return | |
624 raise | |
625 | |
626 self.process() | |
627 | |
628 def __repr__(self): | |
629 return '<%s %s %s>'% (self.method, self.uri, self.clientproto) | |
630 | |
631 def process(self): | |
632 """Override in subclasses. | |
633 | |
634 This method is not intended for users. | |
635 """ | |
636 pass | |
637 | |
638 | |
639 # consumer interface | |
640 | |
641 def registerProducer(self, producer, streaming): | |
642 """Register a producer.""" | |
643 if self.producer: | |
644 raise ValueError, "registering producer %s before previous one (%s)
was unregistered" % (producer, self.producer) | |
645 | |
646 self.streamingProducer = streaming | |
647 self.producer = producer | |
648 | |
649 if self.queued: | |
650 producer.pauseProducing() | |
651 else: | |
652 self.transport.registerProducer(producer, streaming) | |
653 | |
654 def unregisterProducer(self): | |
655 """Unregister the producer.""" | |
656 if not self.queued: | |
657 self.transport.unregisterProducer() | |
658 self.producer = None | |
659 | |
660 # private http response methods | |
661 | |
662 def _sendError(self, code, resp=''): | |
663 self.transport.write('%s %s %s\r\n\r\n' % (self.clientproto, code, resp)
) | |
664 | |
665 | |
666 # The following is the public interface that people should be | |
667 # writing to. | |
668 | |
669 def getHeader(self, key): | |
670 """Get a header that was sent from the network. | |
671 """ | |
672 return self.received_headers.get(key.lower()) | |
673 | |
674 def getCookie(self, key): | |
675 """Get a cookie that was sent from the network. | |
676 """ | |
677 return self.received_cookies.get(key) | |
678 | |
679 def finish(self): | |
680 """We are finished writing data.""" | |
681 if self.finished: | |
682 warnings.warn("Warning! request.finish called twice.", stacklevel=2) | |
683 return | |
684 | |
685 if not self.startedWriting: | |
686 # write headers | |
687 self.write('') | |
688 | |
689 if self.chunked: | |
690 # write last chunk and closing CRLF | |
691 self.transport.write("0\r\n\r\n") | |
692 | |
693 # log request | |
694 if hasattr(self.channel, "factory"): | |
695 self.channel.factory.log(self) | |
696 | |
697 self.finished = 1 | |
698 if not self.queued: | |
699 self._cleanup() | |
700 | |
701 def write(self, data): | |
702 """ | |
703 Write some data as a result of an HTTP request. The first | |
704 time this is called, it writes out response data. | |
705 """ | |
706 if not self.startedWriting: | |
707 self.startedWriting = 1 | |
708 version = self.clientproto | |
709 l = [] | |
710 l.append('%s %s %s\r\n' % (version, self.code, | |
711 self.code_message)) | |
712 # if we don't have a content length, we send data in | |
713 # chunked mode, so that we can support pipelining in | |
714 # persistent connections. | |
715 if ((version == "HTTP/1.1") and | |
716 (self.headers.get('content-length', None) is None) and | |
717 self.method != "HEAD" and self.code not in NO_BODY_CODES): | |
718 l.append("%s: %s\r\n" % ('Transfer-encoding', 'chunked')) | |
719 self.chunked = 1 | |
720 if self.lastModified is not None: | |
721 if self.headers.has_key('last-modified'): | |
722 log.msg("Warning: last-modified specified both in" | |
723 " header list and lastModified attribute.") | |
724 else: | |
725 self.setHeader('last-modified', | |
726 datetimeToString(self.lastModified)) | |
727 if self.etag is not None: | |
728 self.setHeader('ETag', self.etag) | |
729 for name, value in self.headers.items(): | |
730 l.append("%s: %s\r\n" % (name.capitalize(), value)) | |
731 for cookie in self.cookies: | |
732 l.append('%s: %s\r\n' % ("Set-Cookie", cookie)) | |
733 l.append("\r\n") | |
734 | |
735 self.transport.writeSequence(l) | |
736 | |
737 # if this is a "HEAD" request, we shouldn't return any data | |
738 if self.method == "HEAD": | |
739 self.write = lambda data: None | |
740 return | |
741 | |
742 # for certain result codes, we should never return any data | |
743 if self.code in NO_BODY_CODES: | |
744 self.write = lambda data: None | |
745 return | |
746 | |
747 self.sentLength = self.sentLength + len(data) | |
748 if data: | |
749 if self.chunked: | |
750 self.transport.writeSequence(toChunk(data)) | |
751 else: | |
752 self.transport.write(data) | |
753 | |
754 def addCookie(self, k, v, expires=None, domain=None, path=None, max_age=None
, comment=None, secure=None): | |
755 """Set an outgoing HTTP cookie. | |
756 | |
757 In general, you should consider using sessions instead of cookies, see | |
758 twisted.web.server.Request.getSession and the | |
759 twisted.web.server.Session class for details. | |
760 """ | |
761 cookie = '%s=%s' % (k, v) | |
762 if expires is not None: | |
763 cookie = cookie +"; Expires=%s" % expires | |
764 if domain is not None: | |
765 cookie = cookie +"; Domain=%s" % domain | |
766 if path is not None: | |
767 cookie = cookie +"; Path=%s" % path | |
768 if max_age is not None: | |
769 cookie = cookie +"; Max-Age=%s" % max_age | |
770 if comment is not None: | |
771 cookie = cookie +"; Comment=%s" % comment | |
772 if secure: | |
773 cookie = cookie +"; Secure" | |
774 self.cookies.append(cookie) | |
775 | |
776 def setResponseCode(self, code, message=None): | |
777 """Set the HTTP response code. | |
778 """ | |
779 self.code = code | |
780 if message: | |
781 self.code_message = message | |
782 else: | |
783 self.code_message = RESPONSES.get(code, "Unknown Status") | |
784 | |
785 def setHeader(self, k, v): | |
786 """Set an outgoing HTTP header. | |
787 """ | |
788 self.headers[k.lower()] = v | |
789 | |
790 def redirect(self, url): | |
791 """Utility function that does a redirect. | |
792 | |
793 The request should have finish() called after this. | |
794 """ | |
795 self.setResponseCode(FOUND) | |
796 self.setHeader("location", url) | |
797 | |
798 def setLastModified(self, when): | |
799 """Set the X{Last-Modified} time for the response to this request. | |
800 | |
801 If I am called more than once, I ignore attempts to set | |
802 Last-Modified earlier, only replacing the Last-Modified time | |
803 if it is to a later value. | |
804 | |
805 If I am a conditional request, I may modify my response code | |
806 to L{NOT_MODIFIED} if appropriate for the time given. | |
807 | |
808 @param when: The last time the resource being returned was | |
809 modified, in seconds since the epoch. | |
810 @type when: number | |
811 @return: If I am a X{If-Modified-Since} conditional request and | |
812 the time given is not newer than the condition, I return | |
813 L{http.CACHED<CACHED>} to indicate that you should write no | |
814 body. Otherwise, I return a false value. | |
815 """ | |
816 # time.time() may be a float, but the HTTP-date strings are | |
817 # only good for whole seconds. | |
818 when = long(math.ceil(when)) | |
819 if (not self.lastModified) or (self.lastModified < when): | |
820 self.lastModified = when | |
821 | |
822 modified_since = self.getHeader('if-modified-since') | |
823 if modified_since: | |
824 modified_since = stringToDatetime(modified_since.split(';', 1)[0]) | |
825 if modified_since >= when: | |
826 self.setResponseCode(NOT_MODIFIED) | |
827 return CACHED | |
828 return None | |
829 | |
830 def setETag(self, etag): | |
831 """Set an X{entity tag} for the outgoing response. | |
832 | |
833 That's \"entity tag\" as in the HTTP/1.1 X{ETag} header, \"used | |
834 for comparing two or more entities from the same requested | |
835 resource.\" | |
836 | |
837 If I am a conditional request, I may modify my response code | |
838 to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate | |
839 for the tag given. | |
840 | |
841 @param etag: The entity tag for the resource being returned. | |
842 @type etag: string | |
843 @return: If I am a X{If-None-Match} conditional request and | |
844 the tag matches one in the request, I return | |
845 L{http.CACHED<CACHED>} to indicate that you should write | |
846 no body. Otherwise, I return a false value. | |
847 """ | |
848 if etag: | |
849 self.etag = etag | |
850 | |
851 tags = self.getHeader("if-none-match") | |
852 if tags: | |
853 tags = tags.split() | |
854 if (etag in tags) or ('*' in tags): | |
855 self.setResponseCode(((self.method in ("HEAD", "GET")) | |
856 and NOT_MODIFIED) | |
858 return CACHED | |
859 return None | |
860 | |
861 def getAllHeaders(self): | |
862 """Return dictionary of all headers the request received.""" | |
863 return self.received_headers | |
864 | |
865 def getRequestHostname(self): | |
866 """ | |
867 Get the hostname that the user passed in to the request. | |
868 | |
869 This will either use the Host: header (if it is available) or the | |
870 host we are listening on if the header is unavailable. | |
871 | |
872 @returns: the requested hostname | |
873 @rtype: C{str} | |
874 """ | |
875 return (self.getHeader('host') or | |
876 socket.gethostbyaddr(self.getHost()[1])[0] | |
877 ).split(':')[0] | |
878 | |
879 def getHost(self): | |
880 """Get my originally requesting transport's host. | |
881 | |
882 Don't rely on the 'transport' attribute, since Request objects may be | |
883 copied remotely. For information on this method's return value, see | |
884 twisted.internet.tcp.Port. | |
885 """ | |
886 return self.host | |
887 | |
888 def setHost(self, host, port, ssl=0): | |
889 """Change the host and port the request thinks it's using. | |
890 | |
891 This method is useful for working with reverse HTTP proxies (e.g. | |
892 both Squid and Apache's mod_proxy can do this), when the address | |
893 the HTTP client is using is different than the one we're listening on. | |
894 | |
895 For example, Apache may be listening on https://www.example.com, and the
n | |
896 forwarding requests to http://localhost:8080, but we don't want HTML pro
duced | |
897 by Twisted to say 'http://localhost:8080', they should say 'https://www.
example.com', | |
898 so we do:: | |
899 | |
900 request.setHost('www.example.com', 443, ssl=1) | |
901 | |
902 This method is experimental. | |
903 """ | |
904 self._forceSSL = ssl | |
905 self.received_headers["host"] = host | |
906 self.host = address.IPv4Address("TCP", host, port) | |
907 | |
908 def getClientIP(self): | |
909 """ | |
910 Return the IP address of the client who submitted this request. | |
911 | |
912 @returns: the client IP address | |
913 @rtype: C{str} | |
914 """ | |
915 if isinstance(self.client, address.IPv4Address): | |
916 return self.client.host | |
917 else: | |
918 return None | |
919 | |
920 def isSecure(self): | |
921 """ | |
922 Return True if this request is using a secure transport. | |
923 | |
924 Normally this method returns True if this request's HTTPChannel | |
925 instance is using a transport that implements ISSLTransport. | |
926 | |
927 This will also return True if setHost() has been called | |
928 with ssl=True. | |
929 | |
930 @returns: True if this request is secure | |
931 @rtype: C{bool} | |
932 """ | |
933 if self._forceSSL: | |
934 return True | |
935 transport = getattr(getattr(self, 'channel', None), 'transport', None) | |
936 if interfaces.ISSLTransport(transport, None) is not None: | |
937 return True | |
938 return False | |
939 | |
940 def _authorize(self): | |
941 # Authorization, (mostly) per the RFC | |
942 try: | |
943 authh = self.getHeader("Authorization") | |
944 if not authh: | |
945 self.user = self.password = '' | |
946 return | |
947 bas, upw = authh.split() | |
948 if bas.lower() != "basic": | |
949 raise ValueError | |
950 upw = base64.decodestring(upw) | |
951 self.user, self.password = upw.split(':', 1) | |
952 except (binascii.Error, ValueError): | |
953 self.user = self.password = "" | |
954 except: | |
955 log.err() | |
956 self.user = self.password = "" | |
957 | |
958 def getUser(self): | |
959 """ | |
960 Return the HTTP user sent with this request, if any. | |
961 | |
962 If no user was supplied, return the empty string. | |
963 | |
964 @returns: the HTTP user, if any | |
965 @rtype: C{str} | |
966 """ | |
967 try: | |
968 return self.user | |
969 except: | |
970 pass | |
971 self._authorize() | |
972 return self.user | |
973 | |
974 def getPassword(self): | |
975 """ | |
976 Return the HTTP password sent with this request, if any. | |
977 | |
978 If no password was supplied, return the empty string. | |
979 | |
980 @returns: the HTTP password, if any | |
981 @rtype: C{str} | |
982 """ | |
983 try: | |
984 return self.password | |
985 except: | |
986 pass | |
987 self._authorize() | |
988 return self.password | |
989 | |
990 def getClient(self): | |
991 if self.client.type != 'TCP': | |
992 return None | |
993 host = self.client.host | |
994 try: | |
995 name, names, addresses = socket.gethostbyaddr(host) | |
996 except socket.error: | |
997 return host | |
998 names.insert(0, name) | |
999 for name in names: | |
1000 if '.' in name: | |
1001 return name | |
1002 return names[0] | |
1003 | |
1004 def connectionLost(self, reason): | |
1005 """connection was lost""" | |
1006 pass | |
1007 | |
1008 class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): | |
1009 """A receiver for HTTP requests.""" | |
1010 | |
1011 maxHeaders = 500 # max number of headers allowed per request | |
1012 | |
1013 length = 0 | |
1014 persistent = 1 | |
1015 __header = '' | |
1016 __first_line = 1 | |
1017 __content = None | |
1018 | |
1019 # set in instances or subclasses | |
1020 requestFactory = Request | |
1021 | |
1022 _savedTimeOut = None | |
1023 | |
1024 def __init__(self): | |
1025 # the request queue | |
1026 self.requests = [] | |
1027 | |
1028 def connectionMade(self): | |
1029 self.setTimeout(self.timeOut) | |
1030 | |
1031 def lineReceived(self, line): | |
1032 self.resetTimeout() | |
1033 | |
1034 if self.__first_line: | |
1035 # if this connection is not persistent, drop any data which | |
1036 # the client (illegally) sent after the last request. | |
1037 if not self.persistent: | |
1038 self.dataReceived = self.lineReceived = lambda *args: None | |
1039 return | |
1040 | |
1041 # IE sends an extraneous empty line (\r\n) after a POST request; | |
1042 # eat up such a line, but only ONCE | |
1043 if not line and self.__first_line == 1: | |
1044 self.__first_line = 2 | |
1045 return | |
1046 | |
1047 # create a new Request object | |
1048 request = self.requestFactory(self, len(self.requests)) | |
1049 self.requests.append(request) | |
1050 | |
1051 self.__first_line = 0 | |
1052 parts = line.split() | |
1053 if len(parts) != 3: | |
1054 self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n") | |
1055 self.transport.loseConnection() | |
1056 return | |
1057 command, request, version = parts | |
1058 self._command = command | |
1059 self._path = request | |
1060 self._version = version | |
1061 elif line == '': | |
1062 if self.__header: | |
1063 self.headerReceived(self.__header) | |
1064 self.__header = '' | |
1065 self.allHeadersReceived() | |
1066 if self.length == 0: | |
1067 self.allContentReceived() | |
1068 else: | |
1069 self.setRawMode() | |
1070 elif line[0] in ' \t': | |
1071 self.__header = self.__header+'\n'+line | |
1072 else: | |
1073 if self.__header: | |
1074 self.headerReceived(self.__header) | |
1075 self.__header = line | |
1076 | |
1077 def headerReceived(self, line): | |
1078 """Do pre-processing (for content-length) and store this header away. | |
1079 """ | |
1080 header, data = line.split(':', 1) | |
1081 header = header.lower() | |
1082 data = data.strip() | |
1083 if header == 'content-length': | |
1084 self.length = int(data) | |
1085 reqHeaders = self.requests[-1].received_headers | |
1086 reqHeaders[header] = data | |
1087 if len(reqHeaders) > self.maxHeaders: | |
1088 self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n") | |
1089 self.transport.loseConnection() | |
1090 | |
1091 def allContentReceived(self): | |
1092 command = self._command | |
1093 path = self._path | |
1094 version = self._version | |
1095 | |
1096 # reset ALL state variables, so we don't interfere with next request | |
1097 self.length = 0 | |
1098 self._header = '' | |
1099 self.__first_line = 1 | |
1100 del self._command, self._path, self._version | |
1101 | |
1102 # Disable the idle timeout, in case this request takes a long | |
1103 # time to finish generating output. | |
1104 if self.timeOut: | |
1105 self._savedTimeOut = self.setTimeout(None) | |
1106 | |
1107 req = self.requests[-1] | |
1108 req.requestReceived(command, path, version) | |
1109 | |
1110 def rawDataReceived(self, data): | |
1111 if len(data) < self.length: | |
1112 self.requests[-1].handleContentChunk(data) | |
1113 self.length = self.length - len(data) | |
1114 else: | |
1115 self.requests[-1].handleContentChunk(data[:self.length]) | |
1116 extraneous = data[self.length:] | |
1117 self.allContentReceived() | |
1118 self.setLineMode(extraneous) | |
1119 | |
1120 def allHeadersReceived(self): | |
1121 req = self.requests[-1] | |
1122 req.parseCookies() | |
1123 self.persistent = self.checkPersistence(req, self._version) | |
1124 req.gotLength(self.length) | |
1125 | |
1126 def checkPersistence(self, request, version): | |
1127 """Check if the channel should close or not.""" | |
1128 connection = request.getHeader('connection') | |
1129 if connection: | |
1130 tokens = map(str.lower, connection.split(' ')) | |
1131 else: | |
1132 tokens = [] | |
1133 | |
1134 # HTTP 1.0 persistent connection support is currently disabled, | |
1135 # since we need a way to disable pipelining. HTTP 1.0 can't do | |
1136 # pipelining since we can't know in advance if we'll have a | |
1137 # content-length header, if we don't have the header we need to close th
e | |
1138 # connection. In HTTP 1.1 this is not an issue since we use chunked | |
1139 # encoding if content-length is not available. | |
1140 | |
1141 #if version == "HTTP/1.0": | |
1142 # if 'keep-alive' in tokens: | |
1143 # request.setHeader('connection', 'Keep-Alive') | |
1144 # return 1 | |
1145 # else: | |
1146 # return 0 | |
1147 if version == "HTTP/1.1": | |
1148 if 'close' in tokens: | |
1149 request.setHeader('connection', 'close') | |
1150 return 0 | |
1151 else: | |
1152 return 1 | |
1153 else: | |
1154 return 0 | |
1155 | |
1156 def requestDone(self, request): | |
1157 """Called by first request in queue when it is done.""" | |
1158 if request != self.requests[0]: raise TypeError | |
1159 del self.requests[0] | |
1160 | |
1161 if self.persistent: | |
1162 # notify next request it can start writing | |
1163 if self.requests: | |
1164 self.requests[0].noLongerQueued() | |
1165 else: | |
1166 if self._savedTimeOut: | |
1167 self.setTimeout(self._savedTimeOut) | |
1168 else: | |
1169 self.transport.loseConnection() | |
1170 | |
1171 def timeoutConnection(self): | |
1172 log.msg("Timing out client: %s" % str(self.transport.getPeer())) | |
1173 policies.TimeoutMixin.timeoutConnection(self) | |
1174 | |
1175 def connectionLost(self, reason): | |
1176 self.setTimeout(None) | |
1177 for request in self.requests: | |
1178 request.connectionLost(reason) | |
1179 | |
1180 | |
1181 class HTTPFactory(protocol.ServerFactory): | |
1182 """Factory for HTTP server.""" | |
1183 | |
1184 protocol = HTTPChannel | |
1185 | |
1186 logPath = None | |
1187 | |
1188 timeOut = 60 * 60 * 12 | |
1189 | |
1190 def __init__(self, logPath=None, timeout=60*60*12): | |
1191 if logPath is not None: | |
1192 logPath = os.path.abspath(logPath) | |
1193 self.logPath = logPath | |
1194 self.timeOut = timeout | |
1195 | |
1196 def buildProtocol(self, addr): | |
1197 p = protocol.ServerFactory.buildProtocol(self, addr) | |
1198 # timeOut needs to be on the Protocol instance cause | |
1199 # TimeoutMixin expects it there | |
1200 p.timeOut = self.timeOut | |
1201 return p | |
1202 | |
1203 def startFactory(self): | |
1204 _logDateTimeStart() | |
1205 if self.logPath: | |
1206 self.logFile = self._openLogFile(self.logPath) | |
1207 else: | |
1208 self.logFile = log.logfile | |
1209 | |
1210 def stopFactory(self): | |
1211 if hasattr(self, "logFile"): | |
1212 if self.logFile != log.logfile: | |
1213 self.logFile.close() | |
1214 del self.logFile | |
1215 _logDateTimeStop() | |
1216 | |
1217 def _openLogFile(self, path): | |
1218 """Override in subclasses, e.g. to use twisted.python.logfile.""" | |
1219 f = open(path, "a", 1) | |
1220 return f | |
1221 | |
1222 def _escape(self, s): | |
1223 # pain in the ass. Return a string like python repr, but always | |
1224 # escaped as if surrounding quotes were "". | |
1225 r = repr(s) | |
1226 if r[0] == "'": | |
1227 return r[1:-1].replace('"', '\\"').replace("\\'", "'") | |
1228 return r[1:-1] | |
1229 | |
1230 def log(self, request): | |
1231 """Log a request's result to the logfile, by default in combined log for
mat.""" | |
1232 if hasattr(self, "logFile"): | |
1233 line = '%s - - %s "%s" %d %s "%s" "%s"\n' % ( | |
1234 request.getClientIP(), | |
1235 # request.getUser() or "-", # the remote user is almost never im
portant | |
1236 _logDateTime, | |
1237 '%s %s %s' % (self._escape(request.method), | |
1238 self._escape(request.uri), | |
1239 self._escape(request.clientproto)), | |
1240 request.code, | |
1241 request.sentLength or "-", | |
1242 self._escape(request.getHeader("referer") or "-"), | |
1243 self._escape(request.getHeader("user-agent") or "-")) | |
1244 self.logFile.write(line) | |