| Index: tools/telemetry/third_party/webpagereplay/httpclient.py
|
| diff --git a/tools/telemetry/third_party/webpagereplay/httpclient.py b/tools/telemetry/third_party/webpagereplay/httpclient.py
|
| deleted file mode 100644
|
| index 9159a48132c3d35e171e8b13ae8ce55da2a924e1..0000000000000000000000000000000000000000
|
| --- a/tools/telemetry/third_party/webpagereplay/httpclient.py
|
| +++ /dev/null
|
| @@ -1,492 +0,0 @@
|
| -#!/usr/bin/env python
|
| -# Copyright 2012 Google Inc. All Rights Reserved.
|
| -#
|
| -# Licensed under the Apache License, Version 2.0 (the "License");
|
| -# you may not use this file except in compliance with the License.
|
| -# You may obtain a copy of the License at
|
| -#
|
| -# http://www.apache.org/licenses/LICENSE-2.0
|
| -#
|
| -# Unless required by applicable law or agreed to in writing, software
|
| -# distributed under the License is distributed on an "AS IS" BASIS,
|
| -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -# See the License for the specific language governing permissions and
|
| -# limitations under the License.
|
| -
|
| -"""Retrieve web resources over http."""
|
| -
|
| -import copy
|
| -import httplib
|
| -import logging
|
| -import random
|
| -import StringIO
|
| -
|
| -import httparchive
|
| -import platformsettings
|
| -import script_injector
|
| -
|
| -
|
| -# PIL isn't always available, but we still want to be able to run without
|
| -# the image scrambling functionality in this case.
|
| -try:
|
| - import Image
|
| -except ImportError:
|
| - Image = None
|
| -
|
| -TIMER = platformsettings.timer
|
| -
|
| -
|
| -class HttpClientException(Exception):
|
| - """Base class for all exceptions in httpclient."""
|
| - pass
|
| -
|
| -
|
| -def _InjectScripts(response, inject_script):
|
| - """Injects |inject_script| immediately after <head> or <html>.
|
| -
|
| - Copies |response| if it is modified.
|
| -
|
| - Args:
|
| - response: an ArchivedHttpResponse
|
| - inject_script: JavaScript string (e.g. "Math.random = function(){...}")
|
| - Returns:
|
| - an ArchivedHttpResponse
|
| - """
|
| - if type(response) == tuple:
|
| - logging.warn('tuple response: %s', response)
|
| - content_type = response.get_header('content-type')
|
| - if content_type and content_type.startswith('text/html'):
|
| - text = response.get_data_as_text()
|
| - text, already_injected = script_injector.InjectScript(
|
| - text, 'text/html', inject_script)
|
| - if not already_injected:
|
| - response = copy.deepcopy(response)
|
| - response.set_data(text)
|
| - return response
|
| -
|
| -
|
| -def _ScrambleImages(response):
|
| - """If the |response| is an image, attempt to scramble it.
|
| -
|
| - Copies |response| if it is modified.
|
| -
|
| - Args:
|
| - response: an ArchivedHttpResponse
|
| - Returns:
|
| - an ArchivedHttpResponse
|
| - """
|
| -
|
| - assert Image, '--scramble_images requires the PIL module to be installed.'
|
| -
|
| - content_type = response.get_header('content-type')
|
| - if content_type and content_type.startswith('image/'):
|
| - try:
|
| - image_data = response.response_data[0]
|
| - image_data.decode(encoding='base64')
|
| - im = Image.open(StringIO.StringIO(image_data))
|
| -
|
| - pixel_data = list(im.getdata())
|
| - random.shuffle(pixel_data)
|
| -
|
| - scrambled_image = im.copy()
|
| - scrambled_image.putdata(pixel_data)
|
| -
|
| - output_image_io = StringIO.StringIO()
|
| - scrambled_image.save(output_image_io, im.format)
|
| - output_image_data = output_image_io.getvalue()
|
| - output_image_data.encode(encoding='base64')
|
| -
|
| - response = copy.deepcopy(response)
|
| - response.set_data(output_image_data)
|
| - except Exception:
|
| - pass
|
| -
|
| - return response
|
| -
|
| -
|
| -class DetailedHTTPResponse(httplib.HTTPResponse):
|
| - """Preserve details relevant to replaying responses.
|
| -
|
| - WARNING: This code uses attributes and methods of HTTPResponse
|
| - that are not part of the public interface.
|
| - """
|
| -
|
| - def read_chunks(self):
|
| - """Return the response body content and timing data.
|
| -
|
| - The returned chunks have the chunk size and CRLFs stripped off.
|
| - If the response was compressed, the returned data is still compressed.
|
| -
|
| - Returns:
|
| - (chunks, delays)
|
| - chunks:
|
| - [response_body] # non-chunked responses
|
| - [chunk_1, chunk_2, ...] # chunked responses
|
| - delays:
|
| - [0] # non-chunked responses
|
| - [chunk_1_first_byte_delay, ...] # chunked responses
|
| -
|
| - The delay for the first body item should be recorded by the caller.
|
| - """
|
| - buf = []
|
| - chunks = []
|
| - delays = []
|
| - if not self.chunked:
|
| - chunks.append(self.read())
|
| - delays.append(0)
|
| - else:
|
| - start = TIMER()
|
| - try:
|
| - while True:
|
| - line = self.fp.readline()
|
| - chunk_size = self._read_chunk_size(line)
|
| - if chunk_size is None:
|
| - raise httplib.IncompleteRead(''.join(chunks))
|
| - if chunk_size == 0:
|
| - break
|
| - delays.append(TIMER() - start)
|
| - chunks.append(self._safe_read(chunk_size))
|
| - self._safe_read(2) # skip the CRLF at the end of the chunk
|
| - start = TIMER()
|
| -
|
| - # Ignore any trailers.
|
| - while True:
|
| - line = self.fp.readline()
|
| - if not line or line == '\r\n':
|
| - break
|
| - finally:
|
| - self.close()
|
| - return chunks, delays
|
| -
|
| - @classmethod
|
| - def _read_chunk_size(cls, line):
|
| - chunk_extensions_pos = line.find(';')
|
| - if chunk_extensions_pos != -1:
|
| - line = line[:chunk_extensions_pos] # strip chunk-extensions
|
| - try:
|
| - chunk_size = int(line, 16)
|
| - except ValueError:
|
| - return None
|
| - return chunk_size
|
| -
|
| -
|
| -class DetailedHTTPConnection(httplib.HTTPConnection):
|
| - """Preserve details relevant to replaying connections."""
|
| - response_class = DetailedHTTPResponse
|
| -
|
| -
|
| -class DetailedHTTPSResponse(DetailedHTTPResponse):
|
| - """Preserve details relevant to replaying SSL responses."""
|
| - pass
|
| -
|
| -
|
| -class DetailedHTTPSConnection(httplib.HTTPSConnection):
|
| - """Preserve details relevant to replaying SSL connections."""
|
| - response_class = DetailedHTTPSResponse
|
| -
|
| -
|
| -class RealHttpFetch(object):
|
| -
|
| - def __init__(self, real_dns_lookup):
|
| - """Initialize RealHttpFetch.
|
| -
|
| - Args:
|
| - real_dns_lookup: a function that resolves a host to an IP.
|
| - """
|
| - self._real_dns_lookup = real_dns_lookup
|
| -
|
| - @staticmethod
|
| - def _GetHeaderNameValue(header):
|
| - """Parse the header line and return a name/value tuple.
|
| -
|
| - Args:
|
| - header: a string for a header such as "Content-Length: 314".
|
| - Returns:
|
| - A tuple (header_name, header_value) on success or None if the header
|
| - is not in expected format. header_name is in lowercase.
|
| - """
|
| - i = header.find(':')
|
| - if i > 0:
|
| - return (header[:i].lower(), header[i+1:].strip())
|
| - return None
|
| -
|
| - @staticmethod
|
| - def _ToTuples(headers):
|
| - """Parse headers and save them to a list of tuples.
|
| -
|
| - This method takes HttpResponse.msg.headers as input and convert it
|
| - to a list of (header_name, header_value) tuples.
|
| - HttpResponse.msg.headers is a list of strings where each string
|
| - represents either a header or a continuation line of a header.
|
| - 1. a normal header consists of two parts which are separated by colon :
|
| - "header_name:header_value..."
|
| - 2. a continuation line is a string starting with whitespace
|
| - "[whitespace]continued_header_value..."
|
| - If a header is not in good shape or an unexpected continuation line is
|
| - seen, it will be ignored.
|
| -
|
| - Should avoid using response.getheaders() directly
|
| - because response.getheaders() can't handle multiple headers
|
| - with the same name properly. Instead, parse the
|
| - response.msg.headers using this method to get all headers.
|
| -
|
| - Args:
|
| - headers: an instance of HttpResponse.msg.headers.
|
| - Returns:
|
| - A list of tuples which looks like:
|
| - [(header_name, header_value), (header_name2, header_value2)...]
|
| - """
|
| - all_headers = []
|
| - for line in headers:
|
| - if line[0] in '\t ':
|
| - if not all_headers:
|
| - logging.warning(
|
| - 'Unexpected response header continuation line [%s]', line)
|
| - continue
|
| - name, value = all_headers.pop()
|
| - value += '\n ' + line.strip()
|
| - else:
|
| - name_value = RealHttpFetch._GetHeaderNameValue(line)
|
| - if not name_value:
|
| - logging.warning(
|
| - 'Response header in wrong format [%s]', line)
|
| - continue
|
| - name, value = name_value # pylint: disable=unpacking-non-sequence
|
| - all_headers.append((name, value))
|
| - return all_headers
|
| -
|
| - @staticmethod
|
| - def _get_request_host_port(request):
|
| - host_parts = request.host.split(':')
|
| - host = host_parts[0]
|
| - port = int(host_parts[1]) if len(host_parts) == 2 else None
|
| - return host, port
|
| -
|
| - @staticmethod
|
| - def _get_system_proxy(is_ssl):
|
| - return platformsettings.get_system_proxy(is_ssl)
|
| -
|
| - def _get_connection(self, request_host, request_port, is_ssl):
|
| - """Return a detailed connection object for host/port pair.
|
| -
|
| - If a system proxy is defined (see platformsettings.py), it will be used.
|
| -
|
| - Args:
|
| - request_host: a host string (e.g. "www.example.com").
|
| - request_port: a port integer (e.g. 8080) or None (for the default port).
|
| - is_ssl: True if HTTPS connection is needed.
|
| - Returns:
|
| - A DetailedHTTPSConnection or DetailedHTTPConnection instance.
|
| - """
|
| - connection_host = request_host
|
| - connection_port = request_port
|
| - system_proxy = self._get_system_proxy(is_ssl)
|
| - if system_proxy:
|
| - connection_host = system_proxy.host
|
| - connection_port = system_proxy.port
|
| -
|
| - # Use an IP address because WPR may override DNS settings.
|
| - connection_ip = self._real_dns_lookup(connection_host)
|
| - if not connection_ip:
|
| - logging.critical('Unable to find host ip for name: %s', connection_host)
|
| - return None
|
| -
|
| - if is_ssl:
|
| - connection = DetailedHTTPSConnection(connection_ip, connection_port)
|
| - if system_proxy:
|
| - connection.set_tunnel(request_host, request_port)
|
| - else:
|
| - connection = DetailedHTTPConnection(connection_ip, connection_port)
|
| - return connection
|
| -
|
| - def __call__(self, request):
|
| - """Fetch an HTTP request.
|
| -
|
| - Args:
|
| - request: an ArchivedHttpRequest
|
| - Returns:
|
| - an ArchivedHttpResponse
|
| - """
|
| - logging.debug('RealHttpFetch: %s %s', request.host, request.full_path)
|
| - request_host, request_port = self._get_request_host_port(request)
|
| - retries = 3
|
| - while True:
|
| - try:
|
| - connection = self._get_connection(
|
| - request_host, request_port, request.is_ssl)
|
| - connect_start = TIMER()
|
| - connection.connect()
|
| - connect_delay = int((TIMER() - connect_start) * 1000)
|
| - start = TIMER()
|
| - connection.request(
|
| - request.command,
|
| - request.full_path,
|
| - request.request_body,
|
| - request.headers)
|
| - response = connection.getresponse()
|
| - headers_delay = int((TIMER() - start) * 1000)
|
| -
|
| - chunks, chunk_delays = response.read_chunks()
|
| - delays = {
|
| - 'connect': connect_delay,
|
| - 'headers': headers_delay,
|
| - 'data': chunk_delays
|
| - }
|
| - archived_http_response = httparchive.ArchivedHttpResponse(
|
| - response.version,
|
| - response.status,
|
| - response.reason,
|
| - RealHttpFetch._ToTuples(response.msg.headers),
|
| - chunks,
|
| - delays)
|
| - return archived_http_response
|
| - except Exception, e:
|
| - if retries:
|
| - retries -= 1
|
| - logging.warning('Retrying fetch %s: %s', request, repr(e))
|
| - continue
|
| - logging.critical('Could not fetch %s: %s', request, repr(e))
|
| - return None
|
| -
|
| -
|
| -class RecordHttpArchiveFetch(object):
|
| - """Make real HTTP fetches and save responses in the given HttpArchive."""
|
| -
|
| - def __init__(self, http_archive, real_dns_lookup, inject_script):
|
| - """Initialize RecordHttpArchiveFetch.
|
| -
|
| - Args:
|
| - http_archive: an instance of a HttpArchive
|
| - real_dns_lookup: a function that resolves a host to an IP.
|
| - inject_script: script string to inject in all pages
|
| - """
|
| - self.http_archive = http_archive
|
| - self.real_http_fetch = RealHttpFetch(real_dns_lookup)
|
| - self.inject_script = inject_script
|
| -
|
| - def __call__(self, request):
|
| - """Fetch the request and return the response.
|
| -
|
| - Args:
|
| - request: an ArchivedHttpRequest.
|
| - Returns:
|
| - an ArchivedHttpResponse
|
| - """
|
| - # If request is already in the archive, return the archived response.
|
| - if request in self.http_archive:
|
| - logging.debug('Repeated request found: %s', request)
|
| - response = self.http_archive[request]
|
| - else:
|
| - response = self.real_http_fetch(request)
|
| - if response is None:
|
| - return None
|
| - self.http_archive[request] = response
|
| - if self.inject_script:
|
| - response = _InjectScripts(response, self.inject_script)
|
| - logging.debug('Recorded: %s', request)
|
| - return response
|
| -
|
| -
|
| -class ReplayHttpArchiveFetch(object):
|
| - """Serve responses from the given HttpArchive."""
|
| -
|
| - def __init__(self, http_archive, real_dns_lookup, inject_script,
|
| - use_diff_on_unknown_requests=False,
|
| - use_closest_match=False, scramble_images=False):
|
| - """Initialize ReplayHttpArchiveFetch.
|
| -
|
| - Args:
|
| - http_archive: an instance of a HttpArchive
|
| - real_dns_lookup: a function that resolves a host to an IP.
|
| - inject_script: script string to inject in all pages
|
| - use_diff_on_unknown_requests: If True, log unknown requests
|
| - with a diff to requests that look similar.
|
| - use_closest_match: If True, on replay mode, serve the closest match
|
| - in the archive instead of giving a 404.
|
| - """
|
| - self.http_archive = http_archive
|
| - self.inject_script = inject_script
|
| - self.use_diff_on_unknown_requests = use_diff_on_unknown_requests
|
| - self.use_closest_match = use_closest_match
|
| - self.scramble_images = scramble_images
|
| - self.real_http_fetch = RealHttpFetch(real_dns_lookup)
|
| -
|
| - def __call__(self, request):
|
| - """Fetch the request and return the response.
|
| -
|
| - Args:
|
| - request: an instance of an ArchivedHttpRequest.
|
| - Returns:
|
| - Instance of ArchivedHttpResponse (if found) or None
|
| - """
|
| - if request.host.startswith('127.0.0.1:'):
|
| - return self.real_http_fetch(request)
|
| -
|
| - response = self.http_archive.get(request)
|
| -
|
| - if self.use_closest_match and not response:
|
| - closest_request = self.http_archive.find_closest_request(
|
| - request, use_path=True)
|
| - if closest_request:
|
| - response = self.http_archive.get(closest_request)
|
| - if response:
|
| - logging.info('Request not found: %s\nUsing closest match: %s',
|
| - request, closest_request)
|
| -
|
| - if not response:
|
| - reason = str(request)
|
| - if self.use_diff_on_unknown_requests:
|
| - diff = self.http_archive.diff(request)
|
| - if diff:
|
| - reason += (
|
| - "\nNearest request diff "
|
| - "('-' for archived request, '+' for current request):\n%s" % diff)
|
| - logging.warning('Could not replay: %s', reason)
|
| - else:
|
| - if self.inject_script:
|
| - response = _InjectScripts(response, self.inject_script)
|
| - if self.scramble_images:
|
| - response = _ScrambleImages(response)
|
| - return response
|
| -
|
| -
|
| -class ControllableHttpArchiveFetch(object):
|
| - """Controllable fetch function that can swap between record and replay."""
|
| -
|
| - def __init__(self, http_archive, real_dns_lookup,
|
| - inject_script, use_diff_on_unknown_requests,
|
| - use_record_mode, use_closest_match, scramble_images):
|
| - """Initialize HttpArchiveFetch.
|
| -
|
| - Args:
|
| - http_archive: an instance of a HttpArchive
|
| - real_dns_lookup: a function that resolves a host to an IP.
|
| - inject_script: script string to inject in all pages.
|
| - use_diff_on_unknown_requests: If True, log unknown requests
|
| - with a diff to requests that look similar.
|
| - use_record_mode: If True, start in server in record mode.
|
| - use_closest_match: If True, on replay mode, serve the closest match
|
| - in the archive instead of giving a 404.
|
| - """
|
| - self.http_archive = http_archive
|
| - self.record_fetch = RecordHttpArchiveFetch(
|
| - http_archive, real_dns_lookup, inject_script)
|
| - self.replay_fetch = ReplayHttpArchiveFetch(
|
| - http_archive, real_dns_lookup, inject_script,
|
| - use_diff_on_unknown_requests, use_closest_match, scramble_images)
|
| - if use_record_mode:
|
| - self.SetRecordMode()
|
| - else:
|
| - self.SetReplayMode()
|
| -
|
| - def SetRecordMode(self):
|
| - self.fetch = self.record_fetch
|
| - self.is_record_mode = True
|
| -
|
| - def SetReplayMode(self):
|
| - self.fetch = self.replay_fetch
|
| - self.is_record_mode = False
|
| -
|
| - def __call__(self, *args, **kwargs):
|
| - """Forward calls to Replay/Record fetch functions depending on mode."""
|
| - return self.fetch(*args, **kwargs)
|
|
|