OLD | NEW |
| (Empty) |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 """Parses a JSON request log created by log_requests.py.""" | |
6 | |
7 import collections | |
8 import json | |
9 import operator | |
10 import urlparse | |
11 | |
12 Timing = collections.namedtuple( | |
13 'Timing', | |
14 ['connectEnd', 'connectStart', 'dnsEnd', 'dnsStart', 'proxyEnd', | |
15 'proxyStart', 'receiveHeadersEnd', 'requestTime', 'sendEnd', 'sendStart', | |
16 'sslEnd', 'sslStart', 'workerReady', 'workerStart', 'loadingFinished']) | |
17 | |
18 | |
19 class Resource(object): | |
20 """Describes a resource.""" | |
21 | |
22 def __init__(self, url, content_type): | |
23 """Creates an instance of Resource. | |
24 | |
25 Args: | |
26 url: URL of the resource | |
27 content_type: Content-Type of the resources. | |
28 """ | |
29 self.url = url | |
30 self.content_type = content_type | |
31 | |
32 def GetShortName(self): | |
33 """Returns either the hostname of the resource, or the filename, | |
34 or the end of the path. Tries to include the domain as much as possible. | |
35 """ | |
36 parsed = urlparse.urlparse(self.url) | |
37 path = parsed.path | |
38 if path != '' and path != '/': | |
39 last_path = parsed.path.split('/')[-1] | |
40 if len(last_path) < 10: | |
41 if len(path) < 10: | |
42 return parsed.hostname + '/' + path | |
43 else: | |
44 return parsed.hostname + '/..' + parsed.path[-10:] | |
45 elif len(last_path) > 10: | |
46 return parsed.hostname + '/..' + last_path[:5] | |
47 else: | |
48 return parsed.hostname + '/..' + last_path | |
49 else: | |
50 return parsed.hostname | |
51 | |
52 def GetContentType(self): | |
53 mime = self.content_type | |
54 if 'magic-debug-content' in mime: | |
55 # A silly hack to make the unittesting easier. | |
56 return 'magic-debug-content' | |
57 elif mime == 'text/html': | |
58 return 'html' | |
59 elif mime == 'text/css': | |
60 return 'css' | |
61 elif mime in ('application/x-javascript', 'text/javascript', | |
62 'application/javascript'): | |
63 return 'script' | |
64 elif mime == 'application/json': | |
65 return 'json' | |
66 elif mime == 'image/gif': | |
67 return 'gif_image' | |
68 elif mime.startswith('image/'): | |
69 return 'image' | |
70 else: | |
71 return 'other' | |
72 | |
73 @classmethod | |
74 def FromRequest(cls, request): | |
75 """Creates a Resource from an instance of RequestData.""" | |
76 return Resource(request.url, request.GetContentType()) | |
77 | |
78 def __Fields(self): | |
79 return (self.url, self.content_type) | |
80 | |
81 def __eq__(self, o): | |
82 return self.__Fields() == o.__Fields() | |
83 | |
84 def __hash__(self): | |
85 return hash(self.__Fields()) | |
86 | |
87 | |
88 class RequestData(object): | |
89 """Represents a request, as dumped by log_requests.py.""" | |
90 | |
91 def __init__(self, status, headers, request_headers, timestamp, timing, url, | |
92 served_from_cache, initiator): | |
93 self.status = status | |
94 self.headers = headers | |
95 self.request_headers = request_headers | |
96 self.timestamp = timestamp | |
97 self.timing = Timing(**timing) if timing else None | |
98 self.url = url | |
99 self.served_from_cache = served_from_cache | |
100 self.initiator = initiator | |
101 | |
102 def IsDataUrl(self): | |
103 return self.url.startswith('data:') | |
104 | |
105 def GetContentType(self): | |
106 content_type = self.headers['Content-Type'] | |
107 if ';' in content_type: | |
108 return content_type[:content_type.index(';')] | |
109 else: | |
110 return content_type | |
111 | |
112 @classmethod | |
113 def FromDict(cls, r): | |
114 """Creates a RequestData object from a dict.""" | |
115 return RequestData(r['status'], r['headers'], r['request_headers'], | |
116 r['timestamp'], r['timing'], r['url'], | |
117 r['served_from_cache'], r['initiator']) | |
118 | |
119 | |
120 def ParseJsonFile(filename): | |
121 """Converts a JSON file to a sequence of RequestData.""" | |
122 with open(filename) as f: | |
123 json_data = json.load(f) | |
124 return [RequestData.FromDict(r) for r in json_data] | |
125 | |
126 | |
127 def FilterRequests(requests): | |
128 """Filters a list of requests. | |
129 | |
130 Args: | |
131 requests: [RequestData, ...] | |
132 | |
133 Returns: | |
134 A list of requests that are not data URL, have a Content-Type, and are | |
135 not served from the cache. | |
136 """ | |
137 return [r for r in requests if not r.IsDataUrl() | |
138 and 'Content-Type' in r.headers and not r.served_from_cache] | |
139 | |
140 | |
141 def ResourceToRequestMap(requests): | |
142 """Returns a Resource -> Request map. | |
143 | |
144 A resource can be requested several times in a single page load. Keeps the | |
145 first request in this case. | |
146 | |
147 Args: | |
148 requests: [RequestData, ...] | |
149 | |
150 Returns: | |
151 [Resource, ...] | |
152 """ | |
153 # reversed(requests) because we want the first one to win. | |
154 return dict([(Resource.FromRequest(r), r) for r in reversed(requests)]) | |
155 | |
156 | |
157 def GetResources(requests): | |
158 """Returns an ordered list of resources from a list of requests. | |
159 | |
160 The same resource can be requested several time for a single page load. This | |
161 keeps only the first request. | |
162 | |
163 Args: | |
164 requests: [RequestData] | |
165 | |
166 Returns: | |
167 [Resource] | |
168 """ | |
169 resources = [] | |
170 known_resources = set() | |
171 for r in requests: | |
172 resource = Resource.FromRequest(r) | |
173 if r in known_resources: | |
174 continue | |
175 known_resources.add(resource) | |
176 resources.append(resource) | |
177 return resources | |
178 | |
179 | |
180 def ParseCacheControl(headers): | |
181 """Parses the "Cache-Control" header and returns a dict representing it. | |
182 | |
183 Args: | |
184 headers: (dict) Response headers. | |
185 | |
186 Returns: | |
187 {Directive: Value, ...} | |
188 """ | |
189 # TODO(lizeb): Handle the "Expires" header as well. | |
190 result = {} | |
191 cache_control = headers.get('Cache-Control', None) | |
192 if cache_control is None: | |
193 return result | |
194 directives = [s.strip() for s in cache_control.split(',')] | |
195 for directive in directives: | |
196 parts = [s.strip() for s in directive.split('=')] | |
197 if len(parts) == 1: | |
198 result[parts[0]] = True | |
199 else: | |
200 result[parts[0]] = parts[1] | |
201 return result | |
202 | |
203 | |
204 def MaxAge(request): | |
205 """Returns the max-age of a resource, or -1.""" | |
206 cache_control = ParseCacheControl(request.headers) | |
207 if (u'no-store' in cache_control | |
208 or u'no-cache' in cache_control | |
209 or len(cache_control) == 0): | |
210 return -1 | |
211 if 'max-age' in cache_control: | |
212 return int(cache_control['max-age']) | |
213 return -1 | |
214 | |
215 | |
216 def SortedByCompletion(requests): | |
217 """Returns the requests, sorted by completion time.""" | |
218 return sorted(requests, key=operator.attrgetter('timestamp')) | |
OLD | NEW |