Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(210)

Unified Diff: tools/telemetry/third_party/webpagereplay/httpclient.py

Issue 1647513002: Delete tools/telemetry. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/telemetry/third_party/webpagereplay/httpclient.py
diff --git a/tools/telemetry/third_party/webpagereplay/httpclient.py b/tools/telemetry/third_party/webpagereplay/httpclient.py
deleted file mode 100644
index 9159a48132c3d35e171e8b13ae8ce55da2a924e1..0000000000000000000000000000000000000000
--- a/tools/telemetry/third_party/webpagereplay/httpclient.py
+++ /dev/null
@@ -1,492 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2012 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Retrieve web resources over http."""
-
-import copy
-import httplib
-import logging
-import random
-import StringIO
-
-import httparchive
-import platformsettings
-import script_injector
-
-
-# PIL isn't always available, but we still want to be able to run without
-# the image scrambling functionality in this case.
-try:
- import Image
-except ImportError:
- Image = None
-
-TIMER = platformsettings.timer
-
-
-class HttpClientException(Exception):
- """Base class for all exceptions in httpclient."""
- pass
-
-
-def _InjectScripts(response, inject_script):
- """Injects |inject_script| immediately after <head> or <html>.
-
- Copies |response| if it is modified.
-
- Args:
- response: an ArchivedHttpResponse
- inject_script: JavaScript string (e.g. "Math.random = function(){...}")
- Returns:
- an ArchivedHttpResponse
- """
- if type(response) == tuple:
- logging.warn('tuple response: %s', response)
- content_type = response.get_header('content-type')
- if content_type and content_type.startswith('text/html'):
- text = response.get_data_as_text()
- text, already_injected = script_injector.InjectScript(
- text, 'text/html', inject_script)
- if not already_injected:
- response = copy.deepcopy(response)
- response.set_data(text)
- return response
-
-
-def _ScrambleImages(response):
- """If the |response| is an image, attempt to scramble it.
-
- Copies |response| if it is modified.
-
- Args:
- response: an ArchivedHttpResponse
- Returns:
- an ArchivedHttpResponse
- """
-
- assert Image, '--scramble_images requires the PIL module to be installed.'
-
- content_type = response.get_header('content-type')
- if content_type and content_type.startswith('image/'):
- try:
- image_data = response.response_data[0]
- image_data.decode(encoding='base64')
- im = Image.open(StringIO.StringIO(image_data))
-
- pixel_data = list(im.getdata())
- random.shuffle(pixel_data)
-
- scrambled_image = im.copy()
- scrambled_image.putdata(pixel_data)
-
- output_image_io = StringIO.StringIO()
- scrambled_image.save(output_image_io, im.format)
- output_image_data = output_image_io.getvalue()
- output_image_data.encode(encoding='base64')
-
- response = copy.deepcopy(response)
- response.set_data(output_image_data)
- except Exception:
- pass
-
- return response
-
-
-class DetailedHTTPResponse(httplib.HTTPResponse):
- """Preserve details relevant to replaying responses.
-
- WARNING: This code uses attributes and methods of HTTPResponse
- that are not part of the public interface.
- """
-
- def read_chunks(self):
- """Return the response body content and timing data.
-
- The returned chunks have the chunk size and CRLFs stripped off.
- If the response was compressed, the returned data is still compressed.
-
- Returns:
- (chunks, delays)
- chunks:
- [response_body] # non-chunked responses
- [chunk_1, chunk_2, ...] # chunked responses
- delays:
- [0] # non-chunked responses
- [chunk_1_first_byte_delay, ...] # chunked responses
-
- The delay for the first body item should be recorded by the caller.
- """
- buf = []
- chunks = []
- delays = []
- if not self.chunked:
- chunks.append(self.read())
- delays.append(0)
- else:
- start = TIMER()
- try:
- while True:
- line = self.fp.readline()
- chunk_size = self._read_chunk_size(line)
- if chunk_size is None:
- raise httplib.IncompleteRead(''.join(chunks))
- if chunk_size == 0:
- break
- delays.append(TIMER() - start)
- chunks.append(self._safe_read(chunk_size))
- self._safe_read(2) # skip the CRLF at the end of the chunk
- start = TIMER()
-
- # Ignore any trailers.
- while True:
- line = self.fp.readline()
- if not line or line == '\r\n':
- break
- finally:
- self.close()
- return chunks, delays
-
- @classmethod
- def _read_chunk_size(cls, line):
- chunk_extensions_pos = line.find(';')
- if chunk_extensions_pos != -1:
- line = line[:chunk_extensions_pos] # strip chunk-extensions
- try:
- chunk_size = int(line, 16)
- except ValueError:
- return None
- return chunk_size
-
-
-class DetailedHTTPConnection(httplib.HTTPConnection):
- """Preserve details relevant to replaying connections."""
- response_class = DetailedHTTPResponse
-
-
-class DetailedHTTPSResponse(DetailedHTTPResponse):
- """Preserve details relevant to replaying SSL responses."""
- pass
-
-
-class DetailedHTTPSConnection(httplib.HTTPSConnection):
- """Preserve details relevant to replaying SSL connections."""
- response_class = DetailedHTTPSResponse
-
-
-class RealHttpFetch(object):
-
- def __init__(self, real_dns_lookup):
- """Initialize RealHttpFetch.
-
- Args:
- real_dns_lookup: a function that resolves a host to an IP.
- """
- self._real_dns_lookup = real_dns_lookup
-
- @staticmethod
- def _GetHeaderNameValue(header):
- """Parse the header line and return a name/value tuple.
-
- Args:
- header: a string for a header such as "Content-Length: 314".
- Returns:
- A tuple (header_name, header_value) on success or None if the header
- is not in expected format. header_name is in lowercase.
- """
- i = header.find(':')
- if i > 0:
- return (header[:i].lower(), header[i+1:].strip())
- return None
-
- @staticmethod
- def _ToTuples(headers):
- """Parse headers and save them to a list of tuples.
-
- This method takes HttpResponse.msg.headers as input and convert it
- to a list of (header_name, header_value) tuples.
- HttpResponse.msg.headers is a list of strings where each string
- represents either a header or a continuation line of a header.
- 1. a normal header consists of two parts which are separated by colon :
- "header_name:header_value..."
- 2. a continuation line is a string starting with whitespace
- "[whitespace]continued_header_value..."
- If a header is not in good shape or an unexpected continuation line is
- seen, it will be ignored.
-
- Should avoid using response.getheaders() directly
- because response.getheaders() can't handle multiple headers
- with the same name properly. Instead, parse the
- response.msg.headers using this method to get all headers.
-
- Args:
- headers: an instance of HttpResponse.msg.headers.
- Returns:
- A list of tuples which looks like:
- [(header_name, header_value), (header_name2, header_value2)...]
- """
- all_headers = []
- for line in headers:
- if line[0] in '\t ':
- if not all_headers:
- logging.warning(
- 'Unexpected response header continuation line [%s]', line)
- continue
- name, value = all_headers.pop()
- value += '\n ' + line.strip()
- else:
- name_value = RealHttpFetch._GetHeaderNameValue(line)
- if not name_value:
- logging.warning(
- 'Response header in wrong format [%s]', line)
- continue
- name, value = name_value # pylint: disable=unpacking-non-sequence
- all_headers.append((name, value))
- return all_headers
-
- @staticmethod
- def _get_request_host_port(request):
- host_parts = request.host.split(':')
- host = host_parts[0]
- port = int(host_parts[1]) if len(host_parts) == 2 else None
- return host, port
-
- @staticmethod
- def _get_system_proxy(is_ssl):
- return platformsettings.get_system_proxy(is_ssl)
-
- def _get_connection(self, request_host, request_port, is_ssl):
- """Return a detailed connection object for host/port pair.
-
- If a system proxy is defined (see platformsettings.py), it will be used.
-
- Args:
- request_host: a host string (e.g. "www.example.com").
- request_port: a port integer (e.g. 8080) or None (for the default port).
- is_ssl: True if HTTPS connection is needed.
- Returns:
- A DetailedHTTPSConnection or DetailedHTTPConnection instance.
- """
- connection_host = request_host
- connection_port = request_port
- system_proxy = self._get_system_proxy(is_ssl)
- if system_proxy:
- connection_host = system_proxy.host
- connection_port = system_proxy.port
-
- # Use an IP address because WPR may override DNS settings.
- connection_ip = self._real_dns_lookup(connection_host)
- if not connection_ip:
- logging.critical('Unable to find host ip for name: %s', connection_host)
- return None
-
- if is_ssl:
- connection = DetailedHTTPSConnection(connection_ip, connection_port)
- if system_proxy:
- connection.set_tunnel(request_host, request_port)
- else:
- connection = DetailedHTTPConnection(connection_ip, connection_port)
- return connection
-
- def __call__(self, request):
- """Fetch an HTTP request.
-
- Args:
- request: an ArchivedHttpRequest
- Returns:
- an ArchivedHttpResponse
- """
- logging.debug('RealHttpFetch: %s %s', request.host, request.full_path)
- request_host, request_port = self._get_request_host_port(request)
- retries = 3
- while True:
- try:
- connection = self._get_connection(
- request_host, request_port, request.is_ssl)
- connect_start = TIMER()
- connection.connect()
- connect_delay = int((TIMER() - connect_start) * 1000)
- start = TIMER()
- connection.request(
- request.command,
- request.full_path,
- request.request_body,
- request.headers)
- response = connection.getresponse()
- headers_delay = int((TIMER() - start) * 1000)
-
- chunks, chunk_delays = response.read_chunks()
- delays = {
- 'connect': connect_delay,
- 'headers': headers_delay,
- 'data': chunk_delays
- }
- archived_http_response = httparchive.ArchivedHttpResponse(
- response.version,
- response.status,
- response.reason,
- RealHttpFetch._ToTuples(response.msg.headers),
- chunks,
- delays)
- return archived_http_response
- except Exception, e:
- if retries:
- retries -= 1
- logging.warning('Retrying fetch %s: %s', request, repr(e))
- continue
- logging.critical('Could not fetch %s: %s', request, repr(e))
- return None
-
-
-class RecordHttpArchiveFetch(object):
- """Make real HTTP fetches and save responses in the given HttpArchive."""
-
- def __init__(self, http_archive, real_dns_lookup, inject_script):
- """Initialize RecordHttpArchiveFetch.
-
- Args:
- http_archive: an instance of a HttpArchive
- real_dns_lookup: a function that resolves a host to an IP.
- inject_script: script string to inject in all pages
- """
- self.http_archive = http_archive
- self.real_http_fetch = RealHttpFetch(real_dns_lookup)
- self.inject_script = inject_script
-
- def __call__(self, request):
- """Fetch the request and return the response.
-
- Args:
- request: an ArchivedHttpRequest.
- Returns:
- an ArchivedHttpResponse
- """
- # If request is already in the archive, return the archived response.
- if request in self.http_archive:
- logging.debug('Repeated request found: %s', request)
- response = self.http_archive[request]
- else:
- response = self.real_http_fetch(request)
- if response is None:
- return None
- self.http_archive[request] = response
- if self.inject_script:
- response = _InjectScripts(response, self.inject_script)
- logging.debug('Recorded: %s', request)
- return response
-
-
-class ReplayHttpArchiveFetch(object):
- """Serve responses from the given HttpArchive."""
-
- def __init__(self, http_archive, real_dns_lookup, inject_script,
- use_diff_on_unknown_requests=False,
- use_closest_match=False, scramble_images=False):
- """Initialize ReplayHttpArchiveFetch.
-
- Args:
- http_archive: an instance of a HttpArchive
- real_dns_lookup: a function that resolves a host to an IP.
- inject_script: script string to inject in all pages
- use_diff_on_unknown_requests: If True, log unknown requests
- with a diff to requests that look similar.
- use_closest_match: If True, on replay mode, serve the closest match
- in the archive instead of giving a 404.
- """
- self.http_archive = http_archive
- self.inject_script = inject_script
- self.use_diff_on_unknown_requests = use_diff_on_unknown_requests
- self.use_closest_match = use_closest_match
- self.scramble_images = scramble_images
- self.real_http_fetch = RealHttpFetch(real_dns_lookup)
-
- def __call__(self, request):
- """Fetch the request and return the response.
-
- Args:
- request: an instance of an ArchivedHttpRequest.
- Returns:
- Instance of ArchivedHttpResponse (if found) or None
- """
- if request.host.startswith('127.0.0.1:'):
- return self.real_http_fetch(request)
-
- response = self.http_archive.get(request)
-
- if self.use_closest_match and not response:
- closest_request = self.http_archive.find_closest_request(
- request, use_path=True)
- if closest_request:
- response = self.http_archive.get(closest_request)
- if response:
- logging.info('Request not found: %s\nUsing closest match: %s',
- request, closest_request)
-
- if not response:
- reason = str(request)
- if self.use_diff_on_unknown_requests:
- diff = self.http_archive.diff(request)
- if diff:
- reason += (
- "\nNearest request diff "
- "('-' for archived request, '+' for current request):\n%s" % diff)
- logging.warning('Could not replay: %s', reason)
- else:
- if self.inject_script:
- response = _InjectScripts(response, self.inject_script)
- if self.scramble_images:
- response = _ScrambleImages(response)
- return response
-
-
-class ControllableHttpArchiveFetch(object):
- """Controllable fetch function that can swap between record and replay."""
-
- def __init__(self, http_archive, real_dns_lookup,
- inject_script, use_diff_on_unknown_requests,
- use_record_mode, use_closest_match, scramble_images):
- """Initialize HttpArchiveFetch.
-
- Args:
- http_archive: an instance of a HttpArchive
- real_dns_lookup: a function that resolves a host to an IP.
- inject_script: script string to inject in all pages.
- use_diff_on_unknown_requests: If True, log unknown requests
- with a diff to requests that look similar.
- use_record_mode: If True, start in server in record mode.
- use_closest_match: If True, on replay mode, serve the closest match
- in the archive instead of giving a 404.
- """
- self.http_archive = http_archive
- self.record_fetch = RecordHttpArchiveFetch(
- http_archive, real_dns_lookup, inject_script)
- self.replay_fetch = ReplayHttpArchiveFetch(
- http_archive, real_dns_lookup, inject_script,
- use_diff_on_unknown_requests, use_closest_match, scramble_images)
- if use_record_mode:
- self.SetRecordMode()
- else:
- self.SetReplayMode()
-
- def SetRecordMode(self):
- self.fetch = self.record_fetch
- self.is_record_mode = True
-
- def SetReplayMode(self):
- self.fetch = self.replay_fetch
- self.is_record_mode = False
-
- def __call__(self, *args, **kwargs):
- """Forward calls to Replay/Record fetch functions depending on mode."""
- return self.fetch(*args, **kwargs)

Powered by Google App Engine
This is Rietveld 408576698