Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2011 Google Inc. All Rights Reserved.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16 """Retrieve web resources over http."""
17
18 import copy
19 import httparchive
20 import httplib
21 import logging
22 import os
23 import platformsettings
24 import re
25 import util
26
27
28 HTML_RE = re.compile(r'^.{,256}?<html.*?>', re.IGNORECASE | re.DOTALL)
29 HEAD_RE = re.compile(r'^.{,256}?<head.*?>', re.IGNORECASE | re.DOTALL)
30 TIMER = platformsettings.get_platform_settings().timer
31
32
33 class HttpClientException(Exception):
34 """Base class for all exceptions in httpclient."""
35 pass
36
37
38 def GetInjectScript(scripts):
39 """Loads |scripts| from disk and returns a string of their content."""
40 lines = []
41 for script in scripts:
42 if os.path.exists(script):
43 lines += open(script).read()
44 elif util.resource_exists(script):
45 lines += util.resource_string(script)
46 else:
47 raise HttpClientException('Script does not exist: %s', script)
48 return ''.join(lines)
49
50
51 def _InjectScripts(response, inject_script):
52 """Injects |inject_script| immediately after <head> or <html>.
53
54 Copies |response| if it is modified.
55
56 Args:
57 response: an ArchivedHttpResponse
58 inject_script: JavaScript string (e.g. "Math.random = function(){...}")
59 Returns:
60 an ArchivedHttpResponse
61 """
62 if type(response) == tuple:
63 logging.warn('tuple response: %s', response)
64 content_type = response.get_header('content-type')
65 if content_type and content_type.startswith('text/html'):
66 text = response.get_data_as_text()
67
68 def InsertScriptAfter(matchobj):
69 return '%s<script>%s</script>' % (matchobj.group(0), inject_script)
70
71 if text and not inject_script in text:
72 text, is_injected = HEAD_RE.subn(InsertScriptAfter, text, 1)
73 if not is_injected:
74 text, is_injected = HTML_RE.subn(InsertScriptAfter, text, 1)
75 if not is_injected:
76 logging.warning('Failed to inject scripts.')
77 logging.debug('Response content: %s', text)
78 else:
79 response = copy.deepcopy(response)
80 response.set_data(text)
81 return response
82
83
84 class DetailedHTTPResponse(httplib.HTTPResponse):
85 """Preserve details relevant to replaying responses.
86
87 WARNING: This code uses attributes and methods of HTTPResponse
88 that are not part of the public interface.
89 """
90
91 def read_chunks(self):
92 """Return the response body content and timing data.
93
94 The returned chunks have the chunk size and CRLFs stripped off.
95 If the response was compressed, the returned data is still compressed.
96
97 Returns:
98 (chunks, delays)
99 chunks:
100 [response_body] # non-chunked responses
101 [chunk_1, chunk_2, ...] # chunked responses
102 delays:
103 [0] # non-chunked responses
104 [chunk_1_first_byte_delay, ...] # chunked responses
105
106 The delay for the first body item should be recorded by the caller.
107 """
108 buf = []
109 chunks = []
110 delays = []
111 if not self.chunked:
112 chunks.append(self.read())
113 delays.append(0)
114 else:
115 start = TIMER()
116 try:
117 while True:
118 line = self.fp.readline()
119 chunk_size = self._read_chunk_size(line)
120 if chunk_size is None:
121 raise httplib.IncompleteRead(''.join(chunks))
122 if chunk_size == 0:
123 break
124 delays.append(TIMER() - start)
125 chunks.append(self._safe_read(chunk_size))
126 self._safe_read(2) # skip the CRLF at the end of the chunk
127 start = TIMER()
128
129 # Ignore any trailers.
130 while True:
131 line = self.fp.readline()
132 if not line or line == '\r\n':
133 break
134 finally:
135 self.close()
136 return chunks, delays
137
138 @classmethod
139 def _read_chunk_size(cls, line):
140 chunk_extensions_pos = line.find(';')
141 if chunk_extensions_pos != -1:
142 line = line[:extention_pos] # strip chunk-extensions
143 try:
144 chunk_size = int(line, 16)
145 except ValueError:
146 return None
147 return chunk_size
148
149
150 class DetailedHTTPConnection(httplib.HTTPConnection):
151 """Preserve details relevant to replaying connections."""
152 response_class = DetailedHTTPResponse
153
154
155 class DetailedHTTPSResponse(DetailedHTTPResponse):
156 """Preserve details relevant to replaying SSL responses."""
157 pass
158
159 class DetailedHTTPSConnection(httplib.HTTPSConnection):
160 """Preserve details relevant to replaying SSL connections."""
161 response_class = DetailedHTTPSResponse
162
163
164 class RealHttpFetch(object):
165 def __init__(self, real_dns_lookup, get_server_rtt):
166 """Initialize RealHttpFetch.
167
168 Args:
169 real_dns_lookup: a function that resolves a host to an IP.
170 get_server_rtt: a function that returns the round-trip time of a host.
171 """
172 self._real_dns_lookup = real_dns_lookup
173 self._get_server_rtt = get_server_rtt
174
175 def __call__(self, request):
176 """Fetch an HTTP request.
177
178 Args:
179 request: an ArchivedHttpRequest
180 Returns:
181 an ArchivedHttpResponse
182 """
183 logging.debug('RealHttpFetch: %s %s', request.host, request.path)
184 host_ip = self._real_dns_lookup(request.host)
185 if not host_ip:
186 logging.critical('Unable to find host ip for name: %s', request.host)
187 return None
188 retries = 3
189 while True:
190 try:
191 if request.is_ssl:
192 connection = DetailedHTTPSConnection(host_ip)
193 else:
194 connection = DetailedHTTPConnection(host_ip)
195 start = TIMER()
196 connection.request(
197 request.command,
198 request.path,
199 request.request_body,
200 request.headers)
201 response = connection.getresponse()
202 headers_delay = int((TIMER() - start) * 1000)
203 headers_delay -= self._get_server_rtt(request.host)
204
205 chunks, chunk_delays = response.read_chunks()
206 delays = {
207 'headers': headers_delay,
208 'data': chunk_delays
209 }
210 archived_http_response = httparchive.ArchivedHttpResponse(
211 response.version,
212 response.status,
213 response.reason,
214 response.getheaders(),
215 chunks,
216 delays)
217 return archived_http_response
218 except Exception, e:
219 if retries:
220 retries -= 1
221 logging.warning('Retrying fetch %s: %s', request, e)
222 continue
223 logging.critical('Could not fetch %s: %s', request, e)
224 return None
225
226
227 class RecordHttpArchiveFetch(object):
228 """Make real HTTP fetches and save responses in the given HttpArchive."""
229
230 def __init__(self, http_archive, real_dns_lookup, inject_script,
231 cache_misses=None):
232 """Initialize RecordHttpArchiveFetch.
233
234 Args:
235 http_archive: an instance of a HttpArchive
236 real_dns_lookup: a function that resolves a host to an IP.
237 inject_script: script string to inject in all pages
238 cache_misses: instance of CacheMissArchive
239 """
240 self.http_archive = http_archive
241 self.real_http_fetch = RealHttpFetch(real_dns_lookup,
242 http_archive.get_server_rtt)
243 self.inject_script = inject_script
244 self.cache_misses = cache_misses
245
246 def __call__(self, request):
247 """Fetch the request and return the response.
248
249 Args:
250 request: an ArchivedHttpRequest.
251 Returns:
252 an ArchivedHttpResponse
253 """
254 if self.cache_misses:
255 self.cache_misses.record_request(
256 request, is_record_mode=True, is_cache_miss=False)
257
258 # If request is already in the archive, return the archived response.
259 if request in self.http_archive:
260 logging.debug('Repeated request found: %s', request)
261 response = self.http_archive[request]
262 else:
263 response = self.real_http_fetch(request)
264 if response is None:
265 return None
266 self.http_archive[request] = response
267 if self.inject_script:
268 response = _InjectScripts(response, self.inject_script)
269 logging.debug('Recorded: %s', request)
270 return response
271
272
273 class ReplayHttpArchiveFetch(object):
274 """Serve responses from the given HttpArchive."""
275
276 def __init__(self, http_archive, inject_script,
277 use_diff_on_unknown_requests=False, cache_misses=None,
278 use_closest_match=False):
279 """Initialize ReplayHttpArchiveFetch.
280
281 Args:
282 http_archive: an instance of a HttpArchive
283 inject_script: script string to inject in all pages
284 use_diff_on_unknown_requests: If True, log unknown requests
285 with a diff to requests that look similar.
286 cache_misses: Instance of CacheMissArchive.
287 Callback updates archive on cache misses
288 use_closest_match: If True, on replay mode, serve the closest match
289 in the archive instead of giving a 404.
290 """
291 self.http_archive = http_archive
292 self.inject_script = inject_script
293 self.use_diff_on_unknown_requests = use_diff_on_unknown_requests
294 self.cache_misses = cache_misses
295 self.use_closest_match = use_closest_match
296
297 def __call__(self, request):
298 """Fetch the request and return the response.
299
300 Args:
301 request: an instance of an ArchivedHttpRequest.
302 Returns:
303 Instance of ArchivedHttpResponse (if found) or None
304 """
305 response = self.http_archive.get(request)
306
307 if self.use_closest_match and not response:
308 closest_request = self.http_archive.find_closest_request(
309 request, use_path=True)
310 if closest_request:
311 response = self.http_archive.get(closest_request)
312 if response:
313 logging.info('Request not found: %s\nUsing closest match: %s',
314 request, closest_request)
315
316 if self.cache_misses:
317 self.cache_misses.record_request(
318 request, is_record_mode=False, is_cache_miss=not response)
319
320 if not response:
321 reason = str(request)
322 if self.use_diff_on_unknown_requests:
323 diff = self.http_archive.diff(request)
324 if diff:
325 reason += (
326 "\nNearest request diff "
327 "('-' for archived request, '+' for current request):\n%s" % diff)
328 logging.warning('Could not replay: %s', reason)
329 else:
330 response = _InjectScripts(response, self.inject_script)
331 return response
332
333
334 class ControllableHttpArchiveFetch(object):
335 """Controllable fetch function that can swap between record and replay."""
336
337 def __init__(self, http_archive, real_dns_lookup,
338 inject_script, use_diff_on_unknown_requests,
339 use_record_mode, cache_misses, use_closest_match):
340 """Initialize HttpArchiveFetch.
341
342 Args:
343 http_archive: an instance of a HttpArchive
344 real_dns_lookup: a function that resolves a host to an IP.
345 inject_script: script string to inject in all pages.
346 use_diff_on_unknown_requests: If True, log unknown requests
347 with a diff to requests that look similar.
348 use_record_mode: If True, start in server in record mode.
349 cache_misses: Instance of CacheMissArchive.
350 use_closest_match: If True, on replay mode, serve the closest match
351 in the archive instead of giving a 404.
352 """
353 self.record_fetch = RecordHttpArchiveFetch(
354 http_archive, real_dns_lookup, inject_script,
355 cache_misses)
356 self.replay_fetch = ReplayHttpArchiveFetch(
357 http_archive, inject_script, use_diff_on_unknown_requests, cache_misses,
358 use_closest_match)
359 if use_record_mode:
360 self.SetRecordMode()
361 else:
362 self.SetReplayMode()
363
364 def SetRecordMode(self):
365 self.fetch = self.record_fetch
366 self.is_record_mode = True
367
368 def SetReplayMode(self):
369 self.fetch = self.replay_fetch
370 self.is_record_mode = False
371
372 def __call__(self, *args, **kwargs):
373 """Forward calls to Replay/Record fetch functions depending on mode."""
374 return self.fetch(*args, **kwargs)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698