Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: tools/android/loading/log_parser.py

Issue 1619713002: Upgrade analyze.py and related scripts to new world order. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: comments Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/android/loading/loading_model_unittest.py ('k') | tools/android/loading/log_requests.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Parses a JSON request log created by log_requests.py."""
6
7 import collections
8 import json
9 import operator
10 import urlparse
11
12 Timing = collections.namedtuple(
13 'Timing',
14 ['connectEnd', 'connectStart', 'dnsEnd', 'dnsStart', 'proxyEnd',
15 'proxyStart', 'receiveHeadersEnd', 'requestTime', 'sendEnd', 'sendStart',
16 'sslEnd', 'sslStart', 'workerReady', 'workerStart', 'loadingFinished'])
17
18
19 class Resource(object):
20 """Describes a resource."""
21
22 def __init__(self, url, content_type):
23 """Creates an instance of Resource.
24
25 Args:
26 url: URL of the resource
27 content_type: Content-Type of the resources.
28 """
29 self.url = url
30 self.content_type = content_type
31
32 def GetShortName(self):
33 """Returns either the hostname of the resource, or the filename,
34 or the end of the path. Tries to include the domain as much as possible.
35 """
36 parsed = urlparse.urlparse(self.url)
37 path = parsed.path
38 if path != '' and path != '/':
39 last_path = parsed.path.split('/')[-1]
40 if len(last_path) < 10:
41 if len(path) < 10:
42 return parsed.hostname + '/' + path
43 else:
44 return parsed.hostname + '/..' + parsed.path[-10:]
45 elif len(last_path) > 10:
46 return parsed.hostname + '/..' + last_path[:5]
47 else:
48 return parsed.hostname + '/..' + last_path
49 else:
50 return parsed.hostname
51
52 def GetContentType(self):
53 mime = self.content_type
54 if 'magic-debug-content' in mime:
55 # A silly hack to make the unittesting easier.
56 return 'magic-debug-content'
57 elif mime == 'text/html':
58 return 'html'
59 elif mime == 'text/css':
60 return 'css'
61 elif mime in ('application/x-javascript', 'text/javascript',
62 'application/javascript'):
63 return 'script'
64 elif mime == 'application/json':
65 return 'json'
66 elif mime == 'image/gif':
67 return 'gif_image'
68 elif mime.startswith('image/'):
69 return 'image'
70 else:
71 return 'other'
72
73 @classmethod
74 def FromRequest(cls, request):
75 """Creates a Resource from an instance of RequestData."""
76 return Resource(request.url, request.GetContentType())
77
78 def __Fields(self):
79 return (self.url, self.content_type)
80
81 def __eq__(self, o):
82 return self.__Fields() == o.__Fields()
83
84 def __hash__(self):
85 return hash(self.__Fields())
86
87
88 class RequestData(object):
89 """Represents a request, as dumped by log_requests.py."""
90
91 def __init__(self, status, headers, request_headers, timestamp, timing, url,
92 served_from_cache, initiator):
93 self.status = status
94 self.headers = headers
95 self.request_headers = request_headers
96 self.timestamp = timestamp
97 self.timing = Timing(**timing) if timing else None
98 self.url = url
99 self.served_from_cache = served_from_cache
100 self.initiator = initiator
101
102 def IsDataUrl(self):
103 return self.url.startswith('data:')
104
105 def GetContentType(self):
106 content_type = self.headers['Content-Type']
107 if ';' in content_type:
108 return content_type[:content_type.index(';')]
109 else:
110 return content_type
111
112 @classmethod
113 def FromDict(cls, r):
114 """Creates a RequestData object from a dict."""
115 return RequestData(r['status'], r['headers'], r['request_headers'],
116 r['timestamp'], r['timing'], r['url'],
117 r['served_from_cache'], r['initiator'])
118
119
120 def ParseJsonFile(filename):
121 """Converts a JSON file to a sequence of RequestData."""
122 with open(filename) as f:
123 json_data = json.load(f)
124 return [RequestData.FromDict(r) for r in json_data]
125
126
127 def FilterRequests(requests):
128 """Filters a list of requests.
129
130 Args:
131 requests: [RequestData, ...]
132
133 Returns:
134 A list of requests that are not data URL, have a Content-Type, and are
135 not served from the cache.
136 """
137 return [r for r in requests if not r.IsDataUrl()
138 and 'Content-Type' in r.headers and not r.served_from_cache]
139
140
141 def ResourceToRequestMap(requests):
142 """Returns a Resource -> Request map.
143
144 A resource can be requested several times in a single page load. Keeps the
145 first request in this case.
146
147 Args:
148 requests: [RequestData, ...]
149
150 Returns:
151 [Resource, ...]
152 """
153 # reversed(requests) because we want the first one to win.
154 return dict([(Resource.FromRequest(r), r) for r in reversed(requests)])
155
156
157 def GetResources(requests):
158 """Returns an ordered list of resources from a list of requests.
159
160 The same resource can be requested several time for a single page load. This
161 keeps only the first request.
162
163 Args:
164 requests: [RequestData]
165
166 Returns:
167 [Resource]
168 """
169 resources = []
170 known_resources = set()
171 for r in requests:
172 resource = Resource.FromRequest(r)
173 if r in known_resources:
174 continue
175 known_resources.add(resource)
176 resources.append(resource)
177 return resources
178
179
180 def ParseCacheControl(headers):
181 """Parses the "Cache-Control" header and returns a dict representing it.
182
183 Args:
184 headers: (dict) Response headers.
185
186 Returns:
187 {Directive: Value, ...}
188 """
189 # TODO(lizeb): Handle the "Expires" header as well.
190 result = {}
191 cache_control = headers.get('Cache-Control', None)
192 if cache_control is None:
193 return result
194 directives = [s.strip() for s in cache_control.split(',')]
195 for directive in directives:
196 parts = [s.strip() for s in directive.split('=')]
197 if len(parts) == 1:
198 result[parts[0]] = True
199 else:
200 result[parts[0]] = parts[1]
201 return result
202
203
204 def MaxAge(request):
205 """Returns the max-age of a resource, or -1."""
206 cache_control = ParseCacheControl(request.headers)
207 if (u'no-store' in cache_control
208 or u'no-cache' in cache_control
209 or len(cache_control) == 0):
210 return -1
211 if 'max-age' in cache_control:
212 return int(cache_control['max-age'])
213 return -1
214
215
216 def SortedByCompletion(requests):
217 """Returns the requests, sorted by completion time."""
218 return sorted(requests, key=operator.attrgetter('timestamp'))
OLDNEW
« no previous file with comments | « tools/android/loading/loading_model_unittest.py ('k') | tools/android/loading/log_requests.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698