Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py - Issue 18418010: Check in the thirdparty libs needed for webkitpy.

Unified Diff: Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « Tools/Scripts/webkitpy/thirdparty/webpagereplay/httparchive_test.py ('k') | Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpproxy.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py

diff --git a/Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py b/Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py

new file mode 100644

index 0000000000000000000000000000000000000000..7bcc3a58aa4283df64e217e3b7df2bdd78bb0373

--- /dev/null

+++ b/Tools/Scripts/webkitpy/thirdparty/webpagereplay/httpclient.py

@@ -0,0 +1,374 @@

+#!/usr/bin/env python

+# Licensed under the Apache License, Version 2.0 (the "License");

+# you may not use this file except in compliance with the License.

+# You may obtain a copy of the License at

+# http://www.apache.org/licenses/LICENSE-2.0

+# Unless required by applicable law or agreed to in writing, software

+# distributed under the License is distributed on an "AS IS" BASIS,

+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+# See the License for the specific language governing permissions and

+# limitations under the License.

+"""Retrieve web resources over http."""

+import copy

+import httparchive

+import httplib

+import logging

+import os

+import platformsettings

+import re

+import util

+HTML_RE = re.compile(r'^.{,256}?<html.*?>', re.IGNORECASE | re.DOTALL)

+HEAD_RE = re.compile(r'^.{,256}?<head.*?>', re.IGNORECASE | re.DOTALL)

+TIMER = platformsettings.get_platform_settings().timer

+class HttpClientException(Exception):

+ """Base class for all exceptions in httpclient."""

+ pass

+def GetInjectScript(scripts):

+ """Loads |scripts| from disk and returns a string of their content."""

+ lines = []

+ for script in scripts:

+ if os.path.exists(script):

+ lines += open(script).read()

+ elif util.resource_exists(script):

+ lines += util.resource_string(script)

+ else:

+ raise HttpClientException('Script does not exist: %s', script)

+ return ''.join(lines)

+def _InjectScripts(response, inject_script):

+ """Injects |inject_script| immediately after <head> or <html>.

+ Copies |response| if it is modified.

+ Args:

+ response: an ArchivedHttpResponse

+ inject_script: JavaScript string (e.g. "Math.random = function(){...}")

+ Returns:

+ an ArchivedHttpResponse

+ """

+ if type(response) == tuple:

+ logging.warn('tuple response: %s', response)

+ content_type = response.get_header('content-type')

+ if content_type and content_type.startswith('text/html'):

+ text = response.get_data_as_text()

+ def InsertScriptAfter(matchobj):

+ return '%s<script>%s</script>' % (matchobj.group(0), inject_script)

+ if text and not inject_script in text:

+ text, is_injected = HEAD_RE.subn(InsertScriptAfter, text, 1)

+ if not is_injected:

+ text, is_injected = HTML_RE.subn(InsertScriptAfter, text, 1)

+ if not is_injected:

+ logging.warning('Failed to inject scripts.')

+ logging.debug('Response content: %s', text)

+ else:

+ response = copy.deepcopy(response)

+ response.set_data(text)

+ return response

+class DetailedHTTPResponse(httplib.HTTPResponse):

+ """Preserve details relevant to replaying responses.

+ WARNING: This code uses attributes and methods of HTTPResponse

+ that are not part of the public interface.

+ """

+ def read_chunks(self):

+ """Return the response body content and timing data.

+ The returned chunks have the chunk size and CRLFs stripped off.

+ If the response was compressed, the returned data is still compressed.

+ Returns:

+ (chunks, delays)

+ chunks:

+ [response_body] # non-chunked responses

+ [chunk_1, chunk_2, ...] # chunked responses

+ delays:

+ [0] # non-chunked responses

+ [chunk_1_first_byte_delay, ...] # chunked responses

+ The delay for the first body item should be recorded by the caller.

+ """

+ buf = []

+ chunks = []

+ delays = []

+ if not self.chunked:

+ chunks.append(self.read())

+ delays.append(0)

+ else:

+ start = TIMER()

+ try:

+ while True:

+ line = self.fp.readline()

+ chunk_size = self._read_chunk_size(line)

+ if chunk_size is None:

+ raise httplib.IncompleteRead(''.join(chunks))

+ if chunk_size == 0:

+ break

+ delays.append(TIMER() - start)

+ chunks.append(self._safe_read(chunk_size))

+ self._safe_read(2) # skip the CRLF at the end of the chunk

+ start = TIMER()

+ # Ignore any trailers.

+ while True:

+ line = self.fp.readline()

+ if not line or line == '\r\n':

+ break

+ finally:

+ self.close()

+ return chunks, delays

+ @classmethod

+ def _read_chunk_size(cls, line):

+ chunk_extensions_pos = line.find(';')

+ if chunk_extensions_pos != -1:

+ line = line[:extention_pos] # strip chunk-extensions

+ try:

+ chunk_size = int(line, 16)

+ except ValueError:

+ return None

+ return chunk_size

+class DetailedHTTPConnection(httplib.HTTPConnection):

+ """Preserve details relevant to replaying connections."""

+ response_class = DetailedHTTPResponse

+class DetailedHTTPSResponse(DetailedHTTPResponse):

+ """Preserve details relevant to replaying SSL responses."""

+ pass

+class DetailedHTTPSConnection(httplib.HTTPSConnection):

+ """Preserve details relevant to replaying SSL connections."""

+ response_class = DetailedHTTPSResponse

+class RealHttpFetch(object):

+ def __init__(self, real_dns_lookup, get_server_rtt):

+ """Initialize RealHttpFetch.

+ Args:

+ real_dns_lookup: a function that resolves a host to an IP.

+ get_server_rtt: a function that returns the round-trip time of a host.

+ """

+ self._real_dns_lookup = real_dns_lookup

+ self._get_server_rtt = get_server_rtt

+ def __call__(self, request):

+ """Fetch an HTTP request.

+ Args:

+ request: an ArchivedHttpRequest

+ Returns:

+ an ArchivedHttpResponse

+ """

+ logging.debug('RealHttpFetch: %s %s', request.host, request.path)

+ host_ip = self._real_dns_lookup(request.host)

+ if not host_ip:

+ logging.critical('Unable to find host ip for name: %s', request.host)

+ return None

+ retries = 3

+ while True:

+ try:

+ if request.is_ssl:

+ connection = DetailedHTTPSConnection(host_ip)

+ else:

+ connection = DetailedHTTPConnection(host_ip)

+ start = TIMER()

+ connection.request(

+ request.command,

+ request.path,

+ request.request_body,

+ request.headers)

+ response = connection.getresponse()

+ headers_delay = int((TIMER() - start) * 1000)

+ headers_delay -= self._get_server_rtt(request.host)

+ chunks, chunk_delays = response.read_chunks()

+ delays = {

+ 'headers': headers_delay,

+ 'data': chunk_delays

+ }

+ archived_http_response = httparchive.ArchivedHttpResponse(

+ response.version,

+ response.status,

+ response.reason,

+ response.getheaders(),

+ chunks,

+ delays)

+ return archived_http_response

+ except Exception, e:

+ if retries:

+ retries -= 1

+ logging.warning('Retrying fetch %s: %s', request, e)

+ continue

+ logging.critical('Could not fetch %s: %s', request, e)

+ return None

+class RecordHttpArchiveFetch(object):

+ """Make real HTTP fetches and save responses in the given HttpArchive."""

+ def __init__(self, http_archive, real_dns_lookup, inject_script,

+ cache_misses=None):

+ """Initialize RecordHttpArchiveFetch.

+ Args:

+ http_archive: an instance of a HttpArchive

+ real_dns_lookup: a function that resolves a host to an IP.

+ inject_script: script string to inject in all pages

+ cache_misses: instance of CacheMissArchive

+ """

+ self.http_archive = http_archive

+ self.real_http_fetch = RealHttpFetch(real_dns_lookup,

+ http_archive.get_server_rtt)

+ self.inject_script = inject_script

+ self.cache_misses = cache_misses

+ def __call__(self, request):

+ """Fetch the request and return the response.

+ Args:

+ request: an ArchivedHttpRequest.

+ Returns:

+ an ArchivedHttpResponse

+ """

+ if self.cache_misses:

+ self.cache_misses.record_request(

+ request, is_record_mode=True, is_cache_miss=False)

+ # If request is already in the archive, return the archived response.

+ if request in self.http_archive:

+ logging.debug('Repeated request found: %s', request)

+ response = self.http_archive[request]

+ else:

+ response = self.real_http_fetch(request)

+ if response is None:

+ return None

+ self.http_archive[request] = response

+ if self.inject_script:

+ response = _InjectScripts(response, self.inject_script)

+ logging.debug('Recorded: %s', request)

+ return response

+class ReplayHttpArchiveFetch(object):

+ """Serve responses from the given HttpArchive."""

+ def __init__(self, http_archive, inject_script,

+ use_diff_on_unknown_requests=False, cache_misses=None,

+ use_closest_match=False):

+ """Initialize ReplayHttpArchiveFetch.

+ Args:

+ http_archive: an instance of a HttpArchive

+ inject_script: script string to inject in all pages

+ use_diff_on_unknown_requests: If True, log unknown requests

+ with a diff to requests that look similar.

+ cache_misses: Instance of CacheMissArchive.

+ Callback updates archive on cache misses

+ use_closest_match: If True, on replay mode, serve the closest match

+ in the archive instead of giving a 404.

+ """

+ self.http_archive = http_archive

+ self.inject_script = inject_script

+ self.use_diff_on_unknown_requests = use_diff_on_unknown_requests

+ self.cache_misses = cache_misses

+ self.use_closest_match = use_closest_match

+ def __call__(self, request):

+ """Fetch the request and return the response.

+ Args:

+ request: an instance of an ArchivedHttpRequest.

+ Returns:

+ Instance of ArchivedHttpResponse (if found) or None

+ """

+ response = self.http_archive.get(request)

+ if self.use_closest_match and not response:

+ closest_request = self.http_archive.find_closest_request(

+ request, use_path=True)

+ if closest_request:

+ response = self.http_archive.get(closest_request)

+ if response:

+ logging.info('Request not found: %s\nUsing closest match: %s',

+ request, closest_request)

+ if self.cache_misses:

+ self.cache_misses.record_request(

+ request, is_record_mode=False, is_cache_miss=not response)

+ if not response:

+ reason = str(request)

+ if self.use_diff_on_unknown_requests:

+ diff = self.http_archive.diff(request)

+ if diff:

+ reason += (

+ "\nNearest request diff "

+ "('-' for archived request, '+' for current request):\n%s" % diff)

+ logging.warning('Could not replay: %s', reason)

+ else:

+ response = _InjectScripts(response, self.inject_script)

+ return response

+class ControllableHttpArchiveFetch(object):

+ """Controllable fetch function that can swap between record and replay."""

+ def __init__(self, http_archive, real_dns_lookup,

+ inject_script, use_diff_on_unknown_requests,

+ use_record_mode, cache_misses, use_closest_match):

+ """Initialize HttpArchiveFetch.

+ Args:

+ http_archive: an instance of a HttpArchive

+ real_dns_lookup: a function that resolves a host to an IP.

+ inject_script: script string to inject in all pages.

+ use_diff_on_unknown_requests: If True, log unknown requests

+ with a diff to requests that look similar.

+ use_record_mode: If True, start in server in record mode.

+ cache_misses: Instance of CacheMissArchive.

+ use_closest_match: If True, on replay mode, serve the closest match

+ in the archive instead of giving a 404.

+ """

+ self.record_fetch = RecordHttpArchiveFetch(

+ http_archive, real_dns_lookup, inject_script,

+ cache_misses)

+ self.replay_fetch = ReplayHttpArchiveFetch(

+ http_archive, inject_script, use_diff_on_unknown_requests, cache_misses,

+ use_closest_match)

+ if use_record_mode:

+ self.SetRecordMode()

+ else:

+ self.SetReplayMode()

+ def SetRecordMode(self):

+ self.fetch = self.record_fetch

+ self.is_record_mode = True

+ def SetReplayMode(self):

+ self.fetch = self.replay_fetch

+ self.is_record_mode = False

+ def __call__(self, *args, **kwargs):

+ """Forward calls to Replay/Record fetch functions depending on mode."""

+ return self.fetch(*args, **kwargs)