| OLD | NEW |
| (Empty) |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Parses a JSON request log created by log_requests.py.""" | |
| 6 | |
| 7 import collections | |
| 8 import json | |
| 9 import operator | |
| 10 import urlparse | |
| 11 | |
| 12 Timing = collections.namedtuple( | |
| 13 'Timing', | |
| 14 ['connectEnd', 'connectStart', 'dnsEnd', 'dnsStart', 'proxyEnd', | |
| 15 'proxyStart', 'receiveHeadersEnd', 'requestTime', 'sendEnd', 'sendStart', | |
| 16 'sslEnd', 'sslStart', 'workerReady', 'workerStart', 'loadingFinished']) | |
| 17 | |
| 18 | |
| 19 class Resource(object): | |
| 20 """Describes a resource.""" | |
| 21 | |
| 22 def __init__(self, url, content_type): | |
| 23 """Creates an instance of Resource. | |
| 24 | |
| 25 Args: | |
| 26 url: URL of the resource | |
| 27 content_type: Content-Type of the resources. | |
| 28 """ | |
| 29 self.url = url | |
| 30 self.content_type = content_type | |
| 31 | |
| 32 def GetShortName(self): | |
| 33 """Returns either the hostname of the resource, or the filename, | |
| 34 or the end of the path. Tries to include the domain as much as possible. | |
| 35 """ | |
| 36 parsed = urlparse.urlparse(self.url) | |
| 37 path = parsed.path | |
| 38 if path != '' and path != '/': | |
| 39 last_path = parsed.path.split('/')[-1] | |
| 40 if len(last_path) < 10: | |
| 41 if len(path) < 10: | |
| 42 return parsed.hostname + '/' + path | |
| 43 else: | |
| 44 return parsed.hostname + '/..' + parsed.path[-10:] | |
| 45 elif len(last_path) > 10: | |
| 46 return parsed.hostname + '/..' + last_path[:5] | |
| 47 else: | |
| 48 return parsed.hostname + '/..' + last_path | |
| 49 else: | |
| 50 return parsed.hostname | |
| 51 | |
| 52 def GetContentType(self): | |
| 53 mime = self.content_type | |
| 54 if 'magic-debug-content' in mime: | |
| 55 # A silly hack to make the unittesting easier. | |
| 56 return 'magic-debug-content' | |
| 57 elif mime == 'text/html': | |
| 58 return 'html' | |
| 59 elif mime == 'text/css': | |
| 60 return 'css' | |
| 61 elif mime in ('application/x-javascript', 'text/javascript', | |
| 62 'application/javascript'): | |
| 63 return 'script' | |
| 64 elif mime == 'application/json': | |
| 65 return 'json' | |
| 66 elif mime == 'image/gif': | |
| 67 return 'gif_image' | |
| 68 elif mime.startswith('image/'): | |
| 69 return 'image' | |
| 70 else: | |
| 71 return 'other' | |
| 72 | |
| 73 @classmethod | |
| 74 def FromRequest(cls, request): | |
| 75 """Creates a Resource from an instance of RequestData.""" | |
| 76 return Resource(request.url, request.GetContentType()) | |
| 77 | |
| 78 def __Fields(self): | |
| 79 return (self.url, self.content_type) | |
| 80 | |
| 81 def __eq__(self, o): | |
| 82 return self.__Fields() == o.__Fields() | |
| 83 | |
| 84 def __hash__(self): | |
| 85 return hash(self.__Fields()) | |
| 86 | |
| 87 | |
| 88 class RequestData(object): | |
| 89 """Represents a request, as dumped by log_requests.py.""" | |
| 90 | |
| 91 def __init__(self, status, headers, request_headers, timestamp, timing, url, | |
| 92 served_from_cache, initiator): | |
| 93 self.status = status | |
| 94 self.headers = headers | |
| 95 self.request_headers = request_headers | |
| 96 self.timestamp = timestamp | |
| 97 self.timing = Timing(**timing) if timing else None | |
| 98 self.url = url | |
| 99 self.served_from_cache = served_from_cache | |
| 100 self.initiator = initiator | |
| 101 | |
| 102 def IsDataUrl(self): | |
| 103 return self.url.startswith('data:') | |
| 104 | |
| 105 def GetContentType(self): | |
| 106 content_type = self.headers['Content-Type'] | |
| 107 if ';' in content_type: | |
| 108 return content_type[:content_type.index(';')] | |
| 109 else: | |
| 110 return content_type | |
| 111 | |
| 112 @classmethod | |
| 113 def FromDict(cls, r): | |
| 114 """Creates a RequestData object from a dict.""" | |
| 115 return RequestData(r['status'], r['headers'], r['request_headers'], | |
| 116 r['timestamp'], r['timing'], r['url'], | |
| 117 r['served_from_cache'], r['initiator']) | |
| 118 | |
| 119 | |
| 120 def ParseJsonFile(filename): | |
| 121 """Converts a JSON file to a sequence of RequestData.""" | |
| 122 with open(filename) as f: | |
| 123 json_data = json.load(f) | |
| 124 return [RequestData.FromDict(r) for r in json_data] | |
| 125 | |
| 126 | |
| 127 def FilterRequests(requests): | |
| 128 """Filters a list of requests. | |
| 129 | |
| 130 Args: | |
| 131 requests: [RequestData, ...] | |
| 132 | |
| 133 Returns: | |
| 134 A list of requests that are not data URL, have a Content-Type, and are | |
| 135 not served from the cache. | |
| 136 """ | |
| 137 return [r for r in requests if not r.IsDataUrl() | |
| 138 and 'Content-Type' in r.headers and not r.served_from_cache] | |
| 139 | |
| 140 | |
| 141 def ResourceToRequestMap(requests): | |
| 142 """Returns a Resource -> Request map. | |
| 143 | |
| 144 A resource can be requested several times in a single page load. Keeps the | |
| 145 first request in this case. | |
| 146 | |
| 147 Args: | |
| 148 requests: [RequestData, ...] | |
| 149 | |
| 150 Returns: | |
| 151 [Resource, ...] | |
| 152 """ | |
| 153 # reversed(requests) because we want the first one to win. | |
| 154 return dict([(Resource.FromRequest(r), r) for r in reversed(requests)]) | |
| 155 | |
| 156 | |
| 157 def GetResources(requests): | |
| 158 """Returns an ordered list of resources from a list of requests. | |
| 159 | |
| 160 The same resource can be requested several time for a single page load. This | |
| 161 keeps only the first request. | |
| 162 | |
| 163 Args: | |
| 164 requests: [RequestData] | |
| 165 | |
| 166 Returns: | |
| 167 [Resource] | |
| 168 """ | |
| 169 resources = [] | |
| 170 known_resources = set() | |
| 171 for r in requests: | |
| 172 resource = Resource.FromRequest(r) | |
| 173 if r in known_resources: | |
| 174 continue | |
| 175 known_resources.add(resource) | |
| 176 resources.append(resource) | |
| 177 return resources | |
| 178 | |
| 179 | |
| 180 def ParseCacheControl(headers): | |
| 181 """Parses the "Cache-Control" header and returns a dict representing it. | |
| 182 | |
| 183 Args: | |
| 184 headers: (dict) Response headers. | |
| 185 | |
| 186 Returns: | |
| 187 {Directive: Value, ...} | |
| 188 """ | |
| 189 # TODO(lizeb): Handle the "Expires" header as well. | |
| 190 result = {} | |
| 191 cache_control = headers.get('Cache-Control', None) | |
| 192 if cache_control is None: | |
| 193 return result | |
| 194 directives = [s.strip() for s in cache_control.split(',')] | |
| 195 for directive in directives: | |
| 196 parts = [s.strip() for s in directive.split('=')] | |
| 197 if len(parts) == 1: | |
| 198 result[parts[0]] = True | |
| 199 else: | |
| 200 result[parts[0]] = parts[1] | |
| 201 return result | |
| 202 | |
| 203 | |
| 204 def MaxAge(request): | |
| 205 """Returns the max-age of a resource, or -1.""" | |
| 206 cache_control = ParseCacheControl(request.headers) | |
| 207 if (u'no-store' in cache_control | |
| 208 or u'no-cache' in cache_control | |
| 209 or len(cache_control) == 0): | |
| 210 return -1 | |
| 211 if 'max-age' in cache_control: | |
| 212 return int(cache_control['max-age']) | |
| 213 return -1 | |
| 214 | |
| 215 | |
| 216 def SortedByCompletion(requests): | |
| 217 """Returns the requests, sorted by completion time.""" | |
| 218 return sorted(requests, key=operator.attrgetter('timestamp')) | |
| OLD | NEW |