tools/android/loading/log_parser.py - Issue 1619713002: Upgrade analyze.py and related scripts to new world order.

Side by Side Diff: tools/android/loading/log_parser.py

Issue 1619713002: Upgrade analyze.py and related scripts to new world order. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 """Parses a JSON request log created by log_requests.py."""

6

7 import collections

8 import json

9 import operator

10 import urlparse

11

12 Timing = collections.namedtuple(

13 'Timing',

14 ['connectEnd', 'connectStart', 'dnsEnd', 'dnsStart', 'proxyEnd',

15 'proxyStart', 'receiveHeadersEnd', 'requestTime', 'sendEnd', 'sendStart',

16 'sslEnd', 'sslStart', 'workerReady', 'workerStart', 'loadingFinished'])

17

18

19 class Resource(object):

20 """Describes a resource."""

21

22 def __init__(self, url, content_type):

23 """Creates an instance of Resource.

24

25 Args:

26 url: URL of the resource

27 content_type: Content-Type of the resources.

28 """

29 self.url = url

30 self.content_type = content_type

31

32 def GetShortName(self):

33 """Returns either the hostname of the resource, or the filename,

34 or the end of the path. Tries to include the domain as much as possible.

35 """

36 parsed = urlparse.urlparse(self.url)

37 path = parsed.path

38 if path != '' and path != '/':

39 last_path = parsed.path.split('/')[-1]

40 if len(last_path) < 10:

41 if len(path) < 10:

42 return parsed.hostname + '/' + path

43 else:

44 return parsed.hostname + '/..' + parsed.path[-10:]

45 elif len(last_path) > 10:

46 return parsed.hostname + '/..' + last_path[:5]

47 else:

48 return parsed.hostname + '/..' + last_path

49 else:

50 return parsed.hostname

51

52 def GetContentType(self):

53 mime = self.content_type

54 if 'magic-debug-content' in mime:

55 # A silly hack to make the unittesting easier.

56 return 'magic-debug-content'

57 elif mime == 'text/html':

58 return 'html'

59 elif mime == 'text/css':

60 return 'css'

61 elif mime in ('application/x-javascript', 'text/javascript',

62 'application/javascript'):

63 return 'script'

64 elif mime == 'application/json':

65 return 'json'

66 elif mime == 'image/gif':

67 return 'gif_image'

68 elif mime.startswith('image/'):

69 return 'image'

70 else:

71 return 'other'

72

73 @classmethod

74 def FromRequest(cls, request):

75 """Creates a Resource from an instance of RequestData."""

76 return Resource(request.url, request.GetContentType())

77

78 def __Fields(self):

79 return (self.url, self.content_type)

80

81 def __eq__(self, o):

82 return self.__Fields() == o.__Fields()

83

84 def __hash__(self):

85 return hash(self.__Fields())

86

87

88 class RequestData(object):

89 """Represents a request, as dumped by log_requests.py."""

90

91 def __init__(self, status, headers, request_headers, timestamp, timing, url,

92 served_from_cache, initiator):

93 self.status = status

94 self.headers = headers

95 self.request_headers = request_headers

96 self.timestamp = timestamp

97 self.timing = Timing(**timing) if timing else None

98 self.url = url

99 self.served_from_cache = served_from_cache

100 self.initiator = initiator

101

102 def IsDataUrl(self):

103 return self.url.startswith('data:')

104

105 def GetContentType(self):

106 content_type = self.headers['Content-Type']

107 if ';' in content_type:

108 return content_type[:content_type.index(';')]

109 else:

110 return content_type

111

112 @classmethod

113 def FromDict(cls, r):

114 """Creates a RequestData object from a dict."""

115 return RequestData(r['status'], r['headers'], r['request_headers'],

116 r['timestamp'], r['timing'], r['url'],

117 r['served_from_cache'], r['initiator'])

118

119

120 def ParseJsonFile(filename):

121 """Converts a JSON file to a sequence of RequestData."""

122 with open(filename) as f:

123 json_data = json.load(f)

124 return [RequestData.FromDict(r) for r in json_data]

125

126

127 def FilterRequests(requests):

128 """Filters a list of requests.

129

130 Args:

131 requests: [RequestData, ...]

132

133 Returns:

134 A list of requests that are not data URL, have a Content-Type, and are

135 not served from the cache.

136 """

137 return [r for r in requests if not r.IsDataUrl()

138 and 'Content-Type' in r.headers and not r.served_from_cache]

139

140

141 def ResourceToRequestMap(requests):

142 """Returns a Resource -> Request map.

143

144 A resource can be requested several times in a single page load. Keeps the

145 first request in this case.

146

147 Args:

148 requests: [RequestData, ...]

149

150 Returns:

151 [Resource, ...]

152 """

153 # reversed(requests) because we want the first one to win.

154 return dict([(Resource.FromRequest(r), r) for r in reversed(requests)])

155

156

157 def GetResources(requests):

158 """Returns an ordered list of resources from a list of requests.

159

160 The same resource can be requested several time for a single page load. This

161 keeps only the first request.

162

163 Args:

164 requests: [RequestData]

165

166 Returns:

167 [Resource]

168 """

169 resources = []

170 known_resources = set()

171 for r in requests:

172 resource = Resource.FromRequest(r)

173 if r in known_resources:

174 continue

175 known_resources.add(resource)

176 resources.append(resource)

177 return resources

178

179

180 def ParseCacheControl(headers):

181 """Parses the "Cache-Control" header and returns a dict representing it.

182

183 Args:

184 headers: (dict) Response headers.

185

186 Returns:

187 {Directive: Value, ...}

188 """

189 # TODO(lizeb): Handle the "Expires" header as well.

190 result = {}

191 cache_control = headers.get('Cache-Control', None)

192 if cache_control is None:

193 return result

194 directives = [s.strip() for s in cache_control.split(',')]

195 for directive in directives:

196 parts = [s.strip() for s in directive.split('=')]

197 if len(parts) == 1:

198 result[parts[0]] = True

199 else:

200 result[parts[0]] = parts[1]

201 return result

202

203

204 def MaxAge(request):

205 """Returns the max-age of a resource, or -1."""

206 cache_control = ParseCacheControl(request.headers)

207 if (u'no-store' in cache_control

208 or u'no-cache' in cache_control

209 or len(cache_control) == 0):

210 return -1

211 if 'max-age' in cache_control:

212 return int(cache_control['max-age'])

213 return -1

214

215

216 def SortedByCompletion(requests):

217 """Returns the requests, sorted by completion time."""

218 return sorted(requests, key=operator.attrgetter('timestamp'))

OLD	NEW