tools/telemetry/third_party/webpagereplay/httpclient.py - Issue 1647513002: Delete tools/telemetry.

Unified Diff: tools/telemetry/third_party/webpagereplay/httpclient.py

Issue 1647513002: Delete tools/telemetry. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « tools/telemetry/third_party/webpagereplay/httparchive_test.py ('k') | tools/telemetry/third_party/webpagereplay/httpclient_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/telemetry/third_party/webpagereplay/httpclient.py

diff --git a/tools/telemetry/third_party/webpagereplay/httpclient.py b/tools/telemetry/third_party/webpagereplay/httpclient.py

deleted file mode 100644

index 9159a48132c3d35e171e8b13ae8ce55da2a924e1..0000000000000000000000000000000000000000

--- a/tools/telemetry/third_party/webpagereplay/httpclient.py

+++ /dev/null

@@ -1,492 +0,0 @@

-#!/usr/bin/env python

-# Licensed under the Apache License, Version 2.0 (the "License");

-# you may not use this file except in compliance with the License.

-# You may obtain a copy of the License at

-# http://www.apache.org/licenses/LICENSE-2.0

-# Unless required by applicable law or agreed to in writing, software

-# distributed under the License is distributed on an "AS IS" BASIS,

-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-# See the License for the specific language governing permissions and

-# limitations under the License.

-"""Retrieve web resources over http."""

-import copy

-import httplib

-import logging

-import random

-import StringIO

-import httparchive

-import platformsettings

-import script_injector

-# PIL isn't always available, but we still want to be able to run without

-# the image scrambling functionality in this case.

-try:

- import Image

-except ImportError:

- Image = None

-TIMER = platformsettings.timer

-class HttpClientException(Exception):

- """Base class for all exceptions in httpclient."""

- pass

-def _InjectScripts(response, inject_script):

- """Injects |inject_script| immediately after <head> or <html>.

- Copies |response| if it is modified.

- Args:

- response: an ArchivedHttpResponse

- inject_script: JavaScript string (e.g. "Math.random = function(){...}")

- Returns:

- an ArchivedHttpResponse

- """

- if type(response) == tuple:

- logging.warn('tuple response: %s', response)

- content_type = response.get_header('content-type')

- if content_type and content_type.startswith('text/html'):

- text = response.get_data_as_text()

- text, already_injected = script_injector.InjectScript(

- text, 'text/html', inject_script)

- if not already_injected:

- response = copy.deepcopy(response)

- response.set_data(text)

- return response

-def _ScrambleImages(response):

- """If the |response| is an image, attempt to scramble it.

- Copies |response| if it is modified.

- Args:

- response: an ArchivedHttpResponse

- Returns:

- an ArchivedHttpResponse

- """

- assert Image, '--scramble_images requires the PIL module to be installed.'

- content_type = response.get_header('content-type')

- if content_type and content_type.startswith('image/'):

- try:

- image_data = response.response_data[0]

- image_data.decode(encoding='base64')

- im = Image.open(StringIO.StringIO(image_data))

- pixel_data = list(im.getdata())

- random.shuffle(pixel_data)

- scrambled_image = im.copy()

- scrambled_image.putdata(pixel_data)

- output_image_io = StringIO.StringIO()

- scrambled_image.save(output_image_io, im.format)

- output_image_data = output_image_io.getvalue()

- output_image_data.encode(encoding='base64')

- response = copy.deepcopy(response)

- response.set_data(output_image_data)

- except Exception:

- pass

- return response

-class DetailedHTTPResponse(httplib.HTTPResponse):

- """Preserve details relevant to replaying responses.

- WARNING: This code uses attributes and methods of HTTPResponse

- that are not part of the public interface.

- """

- def read_chunks(self):

- """Return the response body content and timing data.

- The returned chunks have the chunk size and CRLFs stripped off.

- If the response was compressed, the returned data is still compressed.

- Returns:

- (chunks, delays)

- chunks:

- [response_body] # non-chunked responses

- [chunk_1, chunk_2, ...] # chunked responses

- delays:

- [0] # non-chunked responses

- [chunk_1_first_byte_delay, ...] # chunked responses

- The delay for the first body item should be recorded by the caller.

- """

- buf = []

- chunks = []

- delays = []

- if not self.chunked:

- chunks.append(self.read())

- delays.append(0)

- else:

- start = TIMER()

- try:

- while True:

- line = self.fp.readline()

- chunk_size = self._read_chunk_size(line)

- if chunk_size is None:

- raise httplib.IncompleteRead(''.join(chunks))

- if chunk_size == 0:

- break

- delays.append(TIMER() - start)

- chunks.append(self._safe_read(chunk_size))

- self._safe_read(2) # skip the CRLF at the end of the chunk

- start = TIMER()

- # Ignore any trailers.

- while True:

- line = self.fp.readline()

- if not line or line == '\r\n':

- break

- finally:

- self.close()

- return chunks, delays

- @classmethod

- def _read_chunk_size(cls, line):

- chunk_extensions_pos = line.find(';')

- if chunk_extensions_pos != -1:

- line = line[:chunk_extensions_pos] # strip chunk-extensions

- try:

- chunk_size = int(line, 16)

- except ValueError:

- return None

- return chunk_size

-class DetailedHTTPConnection(httplib.HTTPConnection):

- """Preserve details relevant to replaying connections."""

- response_class = DetailedHTTPResponse

-class DetailedHTTPSResponse(DetailedHTTPResponse):

- """Preserve details relevant to replaying SSL responses."""

- pass

-class DetailedHTTPSConnection(httplib.HTTPSConnection):

- """Preserve details relevant to replaying SSL connections."""

- response_class = DetailedHTTPSResponse

-class RealHttpFetch(object):

- def __init__(self, real_dns_lookup):

- """Initialize RealHttpFetch.

- Args:

- real_dns_lookup: a function that resolves a host to an IP.

- """

- self._real_dns_lookup = real_dns_lookup

- @staticmethod

- def _GetHeaderNameValue(header):

- """Parse the header line and return a name/value tuple.

- Args:

- header: a string for a header such as "Content-Length: 314".

- Returns:

- A tuple (header_name, header_value) on success or None if the header

- is not in expected format. header_name is in lowercase.

- """

- i = header.find(':')

- if i > 0:

- return (header[:i].lower(), header[i+1:].strip())

- return None

- @staticmethod

- def _ToTuples(headers):

- """Parse headers and save them to a list of tuples.

- This method takes HttpResponse.msg.headers as input and convert it

- to a list of (header_name, header_value) tuples.

- HttpResponse.msg.headers is a list of strings where each string

- represents either a header or a continuation line of a header.

- 1. a normal header consists of two parts which are separated by colon :

- "header_name:header_value..."

- 2. a continuation line is a string starting with whitespace

- "[whitespace]continued_header_value..."

- If a header is not in good shape or an unexpected continuation line is

- seen, it will be ignored.

- Should avoid using response.getheaders() directly

- because response.getheaders() can't handle multiple headers

- with the same name properly. Instead, parse the

- response.msg.headers using this method to get all headers.

- Args:

- headers: an instance of HttpResponse.msg.headers.

- Returns:

- A list of tuples which looks like:

- [(header_name, header_value), (header_name2, header_value2)...]

- """

- all_headers = []

- for line in headers:

- if line[0] in '\t ':

- if not all_headers:

- logging.warning(

- 'Unexpected response header continuation line [%s]', line)

- continue

- name, value = all_headers.pop()

- value += '\n ' + line.strip()

- else:

- name_value = RealHttpFetch._GetHeaderNameValue(line)

- if not name_value:

- logging.warning(

- 'Response header in wrong format [%s]', line)

- continue

- name, value = name_value # pylint: disable=unpacking-non-sequence

- all_headers.append((name, value))

- return all_headers

- @staticmethod

- def _get_request_host_port(request):

- host_parts = request.host.split(':')

- host = host_parts[0]

- port = int(host_parts[1]) if len(host_parts) == 2 else None

- return host, port

- @staticmethod

- def _get_system_proxy(is_ssl):

- return platformsettings.get_system_proxy(is_ssl)

- def _get_connection(self, request_host, request_port, is_ssl):

- """Return a detailed connection object for host/port pair.

- If a system proxy is defined (see platformsettings.py), it will be used.

- Args:

- request_host: a host string (e.g. "www.example.com").

- request_port: a port integer (e.g. 8080) or None (for the default port).

- is_ssl: True if HTTPS connection is needed.

- Returns:

- A DetailedHTTPSConnection or DetailedHTTPConnection instance.

- """

- connection_host = request_host

- connection_port = request_port

- system_proxy = self._get_system_proxy(is_ssl)

- if system_proxy:

- connection_host = system_proxy.host

- connection_port = system_proxy.port

- # Use an IP address because WPR may override DNS settings.

- connection_ip = self._real_dns_lookup(connection_host)

- if not connection_ip:

- logging.critical('Unable to find host ip for name: %s', connection_host)

- return None

- if is_ssl:

- connection = DetailedHTTPSConnection(connection_ip, connection_port)

- if system_proxy:

- connection.set_tunnel(request_host, request_port)

- else:

- connection = DetailedHTTPConnection(connection_ip, connection_port)

- return connection

- def __call__(self, request):

- """Fetch an HTTP request.

- Args:

- request: an ArchivedHttpRequest

- Returns:

- an ArchivedHttpResponse

- """

- logging.debug('RealHttpFetch: %s %s', request.host, request.full_path)

- request_host, request_port = self._get_request_host_port(request)

- retries = 3

- while True:

- try:

- connection = self._get_connection(

- request_host, request_port, request.is_ssl)

- connect_start = TIMER()

- connection.connect()

- connect_delay = int((TIMER() - connect_start) * 1000)

- start = TIMER()

- connection.request(

- request.command,

- request.full_path,

- request.request_body,

- request.headers)

- response = connection.getresponse()

- headers_delay = int((TIMER() - start) * 1000)

- chunks, chunk_delays = response.read_chunks()

- delays = {

- 'connect': connect_delay,

- 'headers': headers_delay,

- 'data': chunk_delays

- }

- archived_http_response = httparchive.ArchivedHttpResponse(

- response.version,

- response.status,

- response.reason,

- RealHttpFetch._ToTuples(response.msg.headers),

- chunks,

- delays)

- return archived_http_response

- except Exception, e:

- if retries:

- retries -= 1

- logging.warning('Retrying fetch %s: %s', request, repr(e))

- continue

- logging.critical('Could not fetch %s: %s', request, repr(e))

- return None

-class RecordHttpArchiveFetch(object):

- """Make real HTTP fetches and save responses in the given HttpArchive."""

- def __init__(self, http_archive, real_dns_lookup, inject_script):

- """Initialize RecordHttpArchiveFetch.

- Args:

- http_archive: an instance of a HttpArchive

- real_dns_lookup: a function that resolves a host to an IP.

- inject_script: script string to inject in all pages

- """

- self.http_archive = http_archive

- self.real_http_fetch = RealHttpFetch(real_dns_lookup)

- self.inject_script = inject_script

- def __call__(self, request):

- """Fetch the request and return the response.

- Args:

- request: an ArchivedHttpRequest.

- Returns:

- an ArchivedHttpResponse

- """

- # If request is already in the archive, return the archived response.

- if request in self.http_archive:

- logging.debug('Repeated request found: %s', request)

- response = self.http_archive[request]

- else:

- response = self.real_http_fetch(request)

- if response is None:

- return None

- self.http_archive[request] = response

- if self.inject_script:

- response = _InjectScripts(response, self.inject_script)

- logging.debug('Recorded: %s', request)

- return response

-class ReplayHttpArchiveFetch(object):

- """Serve responses from the given HttpArchive."""

- def __init__(self, http_archive, real_dns_lookup, inject_script,

- use_diff_on_unknown_requests=False,

- use_closest_match=False, scramble_images=False):

- """Initialize ReplayHttpArchiveFetch.

- Args:

- http_archive: an instance of a HttpArchive

- real_dns_lookup: a function that resolves a host to an IP.

- inject_script: script string to inject in all pages

- use_diff_on_unknown_requests: If True, log unknown requests

- with a diff to requests that look similar.

- use_closest_match: If True, on replay mode, serve the closest match

- in the archive instead of giving a 404.

- """

- self.http_archive = http_archive

- self.inject_script = inject_script

- self.use_diff_on_unknown_requests = use_diff_on_unknown_requests

- self.use_closest_match = use_closest_match

- self.scramble_images = scramble_images

- self.real_http_fetch = RealHttpFetch(real_dns_lookup)

- def __call__(self, request):

- """Fetch the request and return the response.

- Args:

- request: an instance of an ArchivedHttpRequest.

- Returns:

- Instance of ArchivedHttpResponse (if found) or None

- """

- if request.host.startswith('127.0.0.1:'):

- return self.real_http_fetch(request)

- response = self.http_archive.get(request)

- if self.use_closest_match and not response:

- closest_request = self.http_archive.find_closest_request(

- request, use_path=True)

- if closest_request:

- response = self.http_archive.get(closest_request)

- if response:

- logging.info('Request not found: %s\nUsing closest match: %s',

- request, closest_request)

- if not response:

- reason = str(request)

- if self.use_diff_on_unknown_requests:

- diff = self.http_archive.diff(request)

- if diff:

- reason += (

- "\nNearest request diff "

- "('-' for archived request, '+' for current request):\n%s" % diff)

- logging.warning('Could not replay: %s', reason)

- else:

- if self.inject_script:

- response = _InjectScripts(response, self.inject_script)

- if self.scramble_images:

- response = _ScrambleImages(response)

- return response

-class ControllableHttpArchiveFetch(object):

- """Controllable fetch function that can swap between record and replay."""

- def __init__(self, http_archive, real_dns_lookup,

- inject_script, use_diff_on_unknown_requests,

- use_record_mode, use_closest_match, scramble_images):

- """Initialize HttpArchiveFetch.

- Args:

- http_archive: an instance of a HttpArchive

- real_dns_lookup: a function that resolves a host to an IP.

- inject_script: script string to inject in all pages.

- use_diff_on_unknown_requests: If True, log unknown requests

- with a diff to requests that look similar.

- use_record_mode: If True, start in server in record mode.

- use_closest_match: If True, on replay mode, serve the closest match

- in the archive instead of giving a 404.

- """

- self.http_archive = http_archive

- self.record_fetch = RecordHttpArchiveFetch(

- http_archive, real_dns_lookup, inject_script)

- self.replay_fetch = ReplayHttpArchiveFetch(

- http_archive, real_dns_lookup, inject_script,

- use_diff_on_unknown_requests, use_closest_match, scramble_images)

- if use_record_mode:

- self.SetRecordMode()

- else:

- self.SetReplayMode()

- def SetRecordMode(self):

- self.fetch = self.record_fetch

- self.is_record_mode = True

- def SetReplayMode(self):

- self.fetch = self.replay_fetch

- self.is_record_mode = False

- def __call__(self, *args, **kwargs):

- """Forward calls to Replay/Record fetch functions depending on mode."""

- return self.fetch(*args, **kwargs)