Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Unified Diff: Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py
diff --git a/Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py b/Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bcc3a58aa4283df64e217e3b7df2bdd78bb0373
--- /dev/null
+++ b/Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py
@@ -0,0 +1,374 @@
+#!/usr/bin/env python
+# Copyright 2011 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Retrieve web resources over http."""
+
+import copy
+import httparchive
+import httplib
+import logging
+import os
+import platformsettings
+import re
+import util
+
+
+HTML_RE = re.compile(r'^.{,256}?<html.*?>', re.IGNORECASE | re.DOTALL)
+HEAD_RE = re.compile(r'^.{,256}?<head.*?>', re.IGNORECASE | re.DOTALL)
+TIMER = platformsettings.get_platform_settings().timer
+
+
+class HttpClientException(Exception):
+ """Base class for all exceptions in httpclient."""
+ pass
+
+
+def GetInjectScript(scripts):
+ """Loads |scripts| from disk and returns a string of their content."""
+ lines = []
+ for script in scripts:
+ if os.path.exists(script):
+ lines += open(script).read()
+ elif util.resource_exists(script):
+ lines += util.resource_string(script)
+ else:
+ raise HttpClientException('Script does not exist: %s', script)
+ return ''.join(lines)
+
+
+def _InjectScripts(response, inject_script):
+ """Injects |inject_script| immediately after <head> or <html>.
+
+ Copies |response| if it is modified.
+
+ Args:
+ response: an ArchivedHttpResponse
+ inject_script: JavaScript string (e.g. "Math.random = function(){...}")
+ Returns:
+ an ArchivedHttpResponse
+ """
+ if type(response) == tuple:
+ logging.warn('tuple response: %s', response)
+ content_type = response.get_header('content-type')
+ if content_type and content_type.startswith('text/html'):
+ text = response.get_data_as_text()
+
+ def InsertScriptAfter(matchobj):
+ return '%s<script>%s</script>' % (matchobj.group(0), inject_script)
+
+ if text and not inject_script in text:
+ text, is_injected = HEAD_RE.subn(InsertScriptAfter, text, 1)
+ if not is_injected:
+ text, is_injected = HTML_RE.subn(InsertScriptAfter, text, 1)
+ if not is_injected:
+ logging.warning('Failed to inject scripts.')
+ logging.debug('Response content: %s', text)
+ else:
+ response = copy.deepcopy(response)
+ response.set_data(text)
+ return response
+
+
+class DetailedHTTPResponse(httplib.HTTPResponse):
+ """Preserve details relevant to replaying responses.
+
+ WARNING: This code uses attributes and methods of HTTPResponse
+ that are not part of the public interface.
+ """
+
+ def read_chunks(self):
+ """Return the response body content and timing data.
+
+ The returned chunks have the chunk size and CRLFs stripped off.
+ If the response was compressed, the returned data is still compressed.
+
+ Returns:
+ (chunks, delays)
+ chunks:
+ [response_body] # non-chunked responses
+ [chunk_1, chunk_2, ...] # chunked responses
+ delays:
+ [0] # non-chunked responses
+ [chunk_1_first_byte_delay, ...] # chunked responses
+
+ The delay for the first body item should be recorded by the caller.
+ """
+ buf = []
+ chunks = []
+ delays = []
+ if not self.chunked:
+ chunks.append(self.read())
+ delays.append(0)
+ else:
+ start = TIMER()
+ try:
+ while True:
+ line = self.fp.readline()
+ chunk_size = self._read_chunk_size(line)
+ if chunk_size is None:
+ raise httplib.IncompleteRead(''.join(chunks))
+ if chunk_size == 0:
+ break
+ delays.append(TIMER() - start)
+ chunks.append(self._safe_read(chunk_size))
+ self._safe_read(2) # skip the CRLF at the end of the chunk
+ start = TIMER()
+
+ # Ignore any trailers.
+ while True:
+ line = self.fp.readline()
+ if not line or line == '\r\n':
+ break
+ finally:
+ self.close()
+ return chunks, delays
+
+ @classmethod
+ def _read_chunk_size(cls, line):
+ chunk_extensions_pos = line.find(';')
+ if chunk_extensions_pos != -1:
+ line = line[:extention_pos] # strip chunk-extensions
+ try:
+ chunk_size = int(line, 16)
+ except ValueError:
+ return None
+ return chunk_size
+
+
+class DetailedHTTPConnection(httplib.HTTPConnection):
+ """Preserve details relevant to replaying connections."""
+ response_class = DetailedHTTPResponse
+
+
+class DetailedHTTPSResponse(DetailedHTTPResponse):
+ """Preserve details relevant to replaying SSL responses."""
+ pass
+
+class DetailedHTTPSConnection(httplib.HTTPSConnection):
+ """Preserve details relevant to replaying SSL connections."""
+ response_class = DetailedHTTPSResponse
+
+
+class RealHttpFetch(object):
+ def __init__(self, real_dns_lookup, get_server_rtt):
+ """Initialize RealHttpFetch.
+
+ Args:
+ real_dns_lookup: a function that resolves a host to an IP.
+ get_server_rtt: a function that returns the round-trip time of a host.
+ """
+ self._real_dns_lookup = real_dns_lookup
+ self._get_server_rtt = get_server_rtt
+
+ def __call__(self, request):
+ """Fetch an HTTP request.
+
+ Args:
+ request: an ArchivedHttpRequest
+ Returns:
+ an ArchivedHttpResponse
+ """
+ logging.debug('RealHttpFetch: %s %s', request.host, request.path)
+ host_ip = self._real_dns_lookup(request.host)
+ if not host_ip:
+ logging.critical('Unable to find host ip for name: %s', request.host)
+ return None
+ retries = 3
+ while True:
+ try:
+ if request.is_ssl:
+ connection = DetailedHTTPSConnection(host_ip)
+ else:
+ connection = DetailedHTTPConnection(host_ip)
+ start = TIMER()
+ connection.request(
+ request.command,
+ request.path,
+ request.request_body,
+ request.headers)
+ response = connection.getresponse()
+ headers_delay = int((TIMER() - start) * 1000)
+ headers_delay -= self._get_server_rtt(request.host)
+
+ chunks, chunk_delays = response.read_chunks()
+ delays = {
+ 'headers': headers_delay,
+ 'data': chunk_delays
+ }
+ archived_http_response = httparchive.ArchivedHttpResponse(
+ response.version,
+ response.status,
+ response.reason,
+ response.getheaders(),
+ chunks,
+ delays)
+ return archived_http_response
+ except Exception, e:
+ if retries:
+ retries -= 1
+ logging.warning('Retrying fetch %s: %s', request, e)
+ continue
+ logging.critical('Could not fetch %s: %s', request, e)
+ return None
+
+
+class RecordHttpArchiveFetch(object):
+ """Make real HTTP fetches and save responses in the given HttpArchive."""
+
+ def __init__(self, http_archive, real_dns_lookup, inject_script,
+ cache_misses=None):
+ """Initialize RecordHttpArchiveFetch.
+
+ Args:
+ http_archive: an instance of a HttpArchive
+ real_dns_lookup: a function that resolves a host to an IP.
+ inject_script: script string to inject in all pages
+ cache_misses: instance of CacheMissArchive
+ """
+ self.http_archive = http_archive
+ self.real_http_fetch = RealHttpFetch(real_dns_lookup,
+ http_archive.get_server_rtt)
+ self.inject_script = inject_script
+ self.cache_misses = cache_misses
+
+ def __call__(self, request):
+ """Fetch the request and return the response.
+
+ Args:
+ request: an ArchivedHttpRequest.
+ Returns:
+ an ArchivedHttpResponse
+ """
+ if self.cache_misses:
+ self.cache_misses.record_request(
+ request, is_record_mode=True, is_cache_miss=False)
+
+ # If request is already in the archive, return the archived response.
+ if request in self.http_archive:
+ logging.debug('Repeated request found: %s', request)
+ response = self.http_archive[request]
+ else:
+ response = self.real_http_fetch(request)
+ if response is None:
+ return None
+ self.http_archive[request] = response
+ if self.inject_script:
+ response = _InjectScripts(response, self.inject_script)
+ logging.debug('Recorded: %s', request)
+ return response
+
+
+class ReplayHttpArchiveFetch(object):
+ """Serve responses from the given HttpArchive."""
+
+ def __init__(self, http_archive, inject_script,
+ use_diff_on_unknown_requests=False, cache_misses=None,
+ use_closest_match=False):
+ """Initialize ReplayHttpArchiveFetch.
+
+ Args:
+ http_archive: an instance of a HttpArchive
+ inject_script: script string to inject in all pages
+ use_diff_on_unknown_requests: If True, log unknown requests
+ with a diff to requests that look similar.
+ cache_misses: Instance of CacheMissArchive.
+ Callback updates archive on cache misses
+ use_closest_match: If True, on replay mode, serve the closest match
+ in the archive instead of giving a 404.
+ """
+ self.http_archive = http_archive
+ self.inject_script = inject_script
+ self.use_diff_on_unknown_requests = use_diff_on_unknown_requests
+ self.cache_misses = cache_misses
+ self.use_closest_match = use_closest_match
+
+ def __call__(self, request):
+ """Fetch the request and return the response.
+
+ Args:
+ request: an instance of an ArchivedHttpRequest.
+ Returns:
+ Instance of ArchivedHttpResponse (if found) or None
+ """
+ response = self.http_archive.get(request)
+
+ if self.use_closest_match and not response:
+ closest_request = self.http_archive.find_closest_request(
+ request, use_path=True)
+ if closest_request:
+ response = self.http_archive.get(closest_request)
+ if response:
+ logging.info('Request not found: %s\nUsing closest match: %s',
+ request, closest_request)
+
+ if self.cache_misses:
+ self.cache_misses.record_request(
+ request, is_record_mode=False, is_cache_miss=not response)
+
+ if not response:
+ reason = str(request)
+ if self.use_diff_on_unknown_requests:
+ diff = self.http_archive.diff(request)
+ if diff:
+ reason += (
+ "\nNearest request diff "
+ "('-' for archived request, '+' for current request):\n%s" % diff)
+ logging.warning('Could not replay: %s', reason)
+ else:
+ response = _InjectScripts(response, self.inject_script)
+ return response
+
+
+class ControllableHttpArchiveFetch(object):
+ """Controllable fetch function that can swap between record and replay."""
+
+ def __init__(self, http_archive, real_dns_lookup,
+ inject_script, use_diff_on_unknown_requests,
+ use_record_mode, cache_misses, use_closest_match):
+ """Initialize HttpArchiveFetch.
+
+ Args:
+ http_archive: an instance of a HttpArchive
+ real_dns_lookup: a function that resolves a host to an IP.
+ inject_script: script string to inject in all pages.
+ use_diff_on_unknown_requests: If True, log unknown requests
+ with a diff to requests that look similar.
+ use_record_mode: If True, start in server in record mode.
+ cache_misses: Instance of CacheMissArchive.
+ use_closest_match: If True, on replay mode, serve the closest match
+ in the archive instead of giving a 404.
+ """
+ self.record_fetch = RecordHttpArchiveFetch(
+ http_archive, real_dns_lookup, inject_script,
+ cache_misses)
+ self.replay_fetch = ReplayHttpArchiveFetch(
+ http_archive, inject_script, use_diff_on_unknown_requests, cache_misses,
+ use_closest_match)
+ if use_record_mode:
+ self.SetRecordMode()
+ else:
+ self.SetReplayMode()
+
+ def SetRecordMode(self):
+ self.fetch = self.record_fetch
+ self.is_record_mode = True
+
+ def SetReplayMode(self):
+ self.fetch = self.replay_fetch
+ self.is_record_mode = False
+
+ def __call__(self, *args, **kwargs):
+ """Forward calls to Replay/Record fetch functions depending on mode."""
+ return self.fetch(*args, **kwargs)

Powered by Google App Engine
This is Rietveld 408576698