Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: tools/android/loading/loading_model.py

Issue 1610273002: Upgrade loading_model to use the new request_track and loading_trace. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Models for loading in chrome. 5 """Models for loading in chrome.
6 6
7 (Redirect the following to the general model module once we have one) 7 (Redirect the following to the general model module once we have one)
8 A model is an object with the following methods. 8 A model is an object with the following methods.
9 CostMs(): return the cost of the cost in milliseconds. 9 CostMs(): return the cost of the cost in milliseconds.
blundell 2016/01/21 09:22:53 the cost of the cost?
mattcary 2016/01/21 10:04:22 https://memegen.googleplex.com/5342822322929664
10 Set(): set model-specifical parameters. 10 Set(): set model-specifical parameters.
blundell 2016/01/21 09:22:53 nit: specific
mattcary 2016/01/21 10:04:22 https://memegen.googleplex.com/5322689026195456
11 11
12 ResourceGraph 12 ResourceGraph
13 This creates a DAG of resource dependancies from loading.log_requests to model 13 This creates a DAG of resource dependancies from loading.log_requests to model
blundell 2016/01/21 09:22:53 nit: dependencies
mattcary 2016/01/21 10:04:22 Done.
14 loading time. The model may be parameterized by changing the loading time of 14 loading time. The model may be parameterized by changing the loading time of
15 a particular or all resources. 15 a particular or all resources.
16 """ 16 """
17 17
18 import logging 18 import logging
19 import os 19 import os
20 import urlparse 20 import urlparse
21 import sys 21 import sys
22 22
23 import dag 23 import dag
24 import log_parser
25 24
26 class ResourceGraph(object): 25 class ResourceGraph(object):
27 """A model of loading by a DAG (tree?) of resource dependancies. 26 """A model of loading by a DAG (tree?) of resource dependancies.
28 27
29 Set parameters: 28 Set parameters:
30 cache_all: if true, assume zero loading time for all resources. 29 cache_all: if true, assume zero loading time for all resources.
31 """ 30 """
31 REQUEST_LENS = None
Benoit L 2016/01/21 09:09:26 nit: Can you add a comment saying that this a a cl
mattcary 2016/01/21 10:04:22 Oops, I meant to remove this after you landed your
32 32
33 def __init__(self, requests): 33 def __init__(self, trace):
34 """Create from a parsed request set. 34 """Create from a LoadingTrace.
35 35
36 Args: 36 Args:
37 requests: [RequestData, ...] filtered RequestData from loading.log_parser. 37 trace: (LoadingTrace) Loading trace.
38 """ 38 """
39 self._BuildDag(requests) 39 self._BuildDag(trace)
40 self._global_start = min([n.StartTime() for n in self._node_info]) 40 self._global_start = min([n.StartTime() for n in self._node_info])
41 # Sort before splitting children so that we can correctly dectect if a 41 # Sort before splitting children so that we can correctly dectect if a
42 # reparented child is actually a dependency for a child of its new parent. 42 # reparented child is actually a dependency for a child of its new parent.
43 try: 43 try:
44 for n in dag.TopologicalSort(self._nodes): 44 for n in dag.TopologicalSort(self._nodes):
45 self._SplitChildrenByTime(self._node_info[n.Index()]) 45 self._SplitChildrenByTime(self._node_info[n.Index()])
46 except AssertionError as exc: 46 except AssertionError as exc:
47 sys.stderr.write('Bad topological sort: %s\n' 47 sys.stderr.write('Bad topological sort: %s\n'
48 'Skipping child split\n' % str(exc)) 48 'Skipping child split\n' % str(exc))
49 self._cache_all = False 49 self._cache_all = False
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
175 costs[n.Index()] = cost 175 costs[n.Index()] = cost
176 max_cost = max(costs) 176 max_cost = max(costs)
177 assert max_cost > 0 # Otherwise probably the filter went awry. 177 assert max_cost > 0 # Otherwise probably the filter went awry.
178 if path_list is not None: 178 if path_list is not None:
179 del path_list[:-1] 179 del path_list[:-1]
180 n = (i for i in self._nodes if costs[i.Index()] == max_cost).next() 180 n = (i for i in self._nodes if costs[i.Index()] == max_cost).next()
181 path_list.append(self._node_info[n.Index()]) 181 path_list.append(self._node_info[n.Index()])
182 while n.Predecessors(): 182 while n.Predecessors():
183 n = reduce(lambda costliest, next: 183 n = reduce(lambda costliest, next:
184 next if (self._node_filter(next) and 184 next if (self._node_filter(next) and
185 cost[next.Index()] > cost[costliest.Index()]) 185 costs[next.Index()] > costs[costliest.Index()])
blundell 2016/01/21 09:22:53 hmm, was this a bug?
mattcary 2016/01/21 10:04:22 Yes, I'm not sure how that got introduced, because
186 else costliest, 186 else costliest,
187 n.Predecessors()) 187 n.Predecessors())
188 path_list.insert(0, self._node_info[n.Index()]) 188 path_list.insert(0, self._node_info[n.Index()])
189 return max_cost 189 return max_cost
190 190
191 def FilterAds(self, node): 191 def FilterAds(self, node):
192 """A filter for use in eg, Cost, to remove advertising nodes. 192 """A filter for use in eg, Cost, to remove advertising nodes.
193 193
194 Args: 194 Args:
195 node: A dag.Node. 195 node: A dag.Node.
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 """Create a new node info. 315 """Create a new node info.
316 316
317 Args: 317 Args:
318 node: The node to augment. 318 node: The node to augment.
319 request: The request associated with this node. 319 request: The request associated with this node.
320 """ 320 """
321 self._request = request 321 self._request = request
322 self._node = node 322 self._node = node
323 self._edge_costs = {} 323 self._edge_costs = {}
324 self._edge_annotations = {} 324 self._edge_annotations = {}
325 # All fields in timing are millis relative to requestTime, which is epoch 325 # All fields in timing are millis relative to request_time, which is epoch
326 # seconds. 326 # seconds.
327 self._node_cost = max([t for f, t in request.timing._asdict().iteritems() 327 self._node_cost = max([t for f, t in request.timing._asdict().iteritems()
328 if f != 'requestTime']) 328 if f != 'request_time'])
329 329
330 def __str__(self): 330 def __str__(self):
331 return self.ShortName() 331 return self.ShortName()
332 332
333 def Node(self): 333 def Node(self):
334 return self._node 334 return self._node
335 335
336 def Index(self): 336 def Index(self):
337 return self._node.Index() 337 return self._node.Index()
338 338
339 def Request(self): 339 def Request(self):
340 return self._request 340 return self._request
341 341
342 def NodeCost(self): 342 def NodeCost(self):
343 return self._node_cost 343 return self._node_cost
344 344
345 def EdgeCost(self, s): 345 def EdgeCost(self, s):
346 return self._edge_costs[s] 346 return self._edge_costs[s]
347 347
348 def StartTime(self): 348 def StartTime(self):
349 return self._request.timing.requestTime * 1000 349 return self._request.timing.request_time * 1000
350 350
351 def EndTime(self): 351 def EndTime(self):
352 return self._request.timing.requestTime * 1000 + self._node_cost 352 return self._request.timing.request_time * 1000 + self._node_cost
353 353
354 def EdgeAnnotation(self, s): 354 def EdgeAnnotation(self, s):
355 assert s.Node() in self.Node().Successors() 355 assert s.Node() in self.Node().Successors()
356 return self._edge_annotations.get(s, []) 356 return self._edge_annotations.get(s, [])
357 357
358 def ContentType(self): 358 def ContentType(self):
359 return log_parser.Resource.FromRequest(self._request).GetContentType() 359 return self._request.GetContentType()
360 360
361 def ShortName(self): 361 def ShortName(self):
Benoit L 2016/01/21 09:09:26 Might be better in "Request", but I don't have a s
mattcary 2016/01/21 10:04:22 I wasn't sure what you intended either. I tweaked
Benoit L 2016/01/21 10:38:01 Yes, you're right.
362 return log_parser.Resource.FromRequest(self._request).GetShortName() 362 """Returns either the hostname of the resource, or the filename,
363 or the end of the path. Tries to include the domain as much as possible.
364 """
365 parsed = urlparse.urlparse(self._request.url)
366 path = parsed.path
367 if path != '' and path != '/':
368 last_path = parsed.path.split('/')[-1]
369 if len(last_path) < 10:
370 if len(path) < 10:
371 return parsed.hostname + '/' + path
372 else:
373 return parsed.hostname + '/..' + parsed.path[-10:]
374 elif len(last_path) > 10:
375 return parsed.hostname + '/..' + last_path[:5]
376 else:
377 return parsed.hostname + '/..' + last_path
378 else:
379 return parsed.hostname
363 380
364 def Url(self): 381 def Url(self):
365 return self._request.url 382 return self._request.url
366 383
367 def SetEdgeCost(self, child, cost): 384 def SetEdgeCost(self, child, cost):
368 assert child.Node() in self._node.Successors() 385 assert child.Node() in self._node.Successors()
369 self._edge_costs[child] = cost 386 self._edge_costs[child] = cost
370 387
371 def AddEdgeAnnotation(self, s, annotation): 388 def AddEdgeAnnotation(self, s, annotation):
372 assert s.Node() in self._node.Successors() 389 assert s.Node() in self._node.Successors()
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 def _EdgeCost(self, parent, child): 432 def _EdgeCost(self, parent, child):
416 """Convenience function for redirecting Nodes to _NodeInfo.""" 433 """Convenience function for redirecting Nodes to _NodeInfo."""
417 return self._node_info[parent.Index()].EdgeCost( 434 return self._node_info[parent.Index()].EdgeCost(
418 self._node_info[child.Index()]) 435 self._node_info[child.Index()])
419 436
420 def _EdgeAnnotation(self, parent, child): 437 def _EdgeAnnotation(self, parent, child):
421 """Convenience function for redirecting Nodes to _NodeInfo.""" 438 """Convenience function for redirecting Nodes to _NodeInfo."""
422 return self._node_info[parent.Index()].EdgeAnnotation( 439 return self._node_info[parent.Index()].EdgeAnnotation(
423 self._node_info[child.Index()]) 440 self._node_info[child.Index()])
424 441
425 def _BuildDag(self, requests): 442 def _BuildDag(self, trace):
426 """Build DAG of resources. 443 """Build DAG of resources.
427 444
428 Build a DAG from our requests and augment with _NodeInfo (see above) in a 445 Build a DAG from our requests and augment with _NodeInfo (see above) in a
429 parallel array indexed by Node.Index(). 446 parallel array indexed by Node.Index().
430 447
431 Creates self._nodes and self._node_info. 448 Creates self._nodes and self._node_info.
432 449
433 Args: 450 Args:
434 requests: [Request, ...] Requests from loading.log_parser. 451 trace: A LoadingTrace.
435 """ 452 """
436 self._nodes = [] 453 self._nodes = []
437 self._node_info = [] 454 self._node_info = []
438 indicies_by_url = {} 455 index_by_request = {}
Benoit L 2016/01/21 09:09:26 Since we now identify everything by the request_id
mattcary 2016/01/21 10:04:22 DAG nodes don't have a request id, so I think we h
Benoit L 2016/01/21 10:38:01 Acknowledged.
439 requests_by_completion = log_parser.SortedByCompletion(requests) 456 for request in trace.request_track.GetEvents():
440 for request in requests:
441 next_index = len(self._nodes) 457 next_index = len(self._nodes)
442 indicies_by_url.setdefault(request.url, []).append(next_index) 458 assert request not in index_by_request
459 index_by_request[request] = next_index
443 node = dag.Node(next_index) 460 node = dag.Node(next_index)
444 node_info = self._NodeInfo(node, request) 461 node_info = self._NodeInfo(node, request)
445 self._nodes.append(node) 462 self._nodes.append(node)
446 self._node_info.append(node_info) 463 self._node_info.append(node_info)
447 for url, indicies in indicies_by_url.iteritems(): 464
448 if len(indicies) > 1: 465 dependencies = self.REQUEST_LENS(trace).GetRequestDependencies()
449 logging.warning('Multiple loads (%d) for url: %s' % 466 for child_rq, parent_rq, reason in dependencies:
450 (len(indicies), url)) 467 parent = self._node_info[index_by_request[parent_rq]]
451 for i in xrange(len(requests)): 468 child = self._node_info[index_by_request[child_rq]]
452 request = requests[i] 469 edge_cost = child.StartTime() - parent.EndTime()
453 current_node_info = self._node_info[i] 470 if edge_cost < 0:
454 resource = log_parser.Resource.FromRequest(current_node_info.Request()) 471 edge_cost = 0
455 initiator = request.initiator 472 if child.StartTime() < parent.StartTime():
456 initiator_type = initiator['type']
457 predecessor_url = None
458 predecessor_type = None
459 # Classify & infer the predecessor. If a candidate url we identify as the
460 # predecessor is not in index_by_url, then we haven't seen it in our
461 # requests and we will try to find a better predecessor.
462 if initiator_type == 'parser':
463 url = initiator['url']
464 if url in indicies_by_url:
465 predecessor_url = url
466 predecessor_type = 'parser'
467 elif initiator_type == 'script' and 'stackTrace' in initiator:
468 for frame in initiator['stackTrace']:
469 url = frame['url']
470 if url in indicies_by_url:
471 predecessor_url = url
472 predecessor_type = 'stack'
473 break
474 elif initiator_type == 'script':
475 # When the initiator is a script without a stackTrace, infer that it
476 # comes from the most recent script from the same hostname. TLD+1 might
477 # be better, but finding what is a TLD requires a database.
478 request_hostname = urlparse.urlparse(request.url).hostname
479 sorted_script_requests_from_hostname = [
480 r for r in requests_by_completion
481 if (resource.GetContentType() in ('script', 'html', 'json')
482 and urlparse.urlparse(r.url).hostname == request_hostname)]
483 most_recent = None
484 # Linear search is bad, but this shouldn't matter here.
485 for r in sorted_script_requests_from_hostname:
486 if r.timestamp < request.timing.requestTime:
487 most_recent = r
488 else:
489 break
490 if most_recent is not None:
491 url = most_recent.url
492 if url in indicies_by_url:
493 predecessor_url = url
494 predecessor_type = 'script_inferred'
495 # TODO(mattcary): we skip initiator type other, is that correct?
496 if predecessor_url is not None:
497 predecessor = self._FindBestPredecessor(
498 current_node_info, indicies_by_url[predecessor_url])
499 edge_cost = current_node_info.StartTime() - predecessor.EndTime()
500 if edge_cost < 0:
501 edge_cost = 0
502 if current_node_info.StartTime() < predecessor.StartTime():
503 logging.error('Inverted dependency: %s->%s', 473 logging.error('Inverted dependency: %s->%s',
504 predecessor.ShortName(), current_node_info.ShortName()) 474 parent.ShortName(), child.ShortName())
505 # Note that current.StartTime() < predecessor.EndTime() appears to 475 # Note that child.StartTime() < parent.EndTime() appears to happen a
506 # happen a fair amount in practice. 476 # fair amount in practice.
Benoit L 2016/01/21 09:09:26 Especially for "parser" dependencies, I suppose? (
mattcary 2016/01/21 10:04:22 Ah, yeah, that would make sense.
507 predecessor.Node().AddSuccessor(current_node_info.Node()) 477 parent.Node().AddSuccessor(child.Node())
508 predecessor.SetEdgeCost(current_node_info, edge_cost) 478 parent.SetEdgeCost(child, edge_cost)
509 predecessor.AddEdgeAnnotation(current_node_info, predecessor_type) 479 parent.AddEdgeAnnotation(child, reason)
510
511 def _FindBestPredecessor(self, node_info, candidate_indicies):
512 """Find best predecessor for node_info
513
514 If there is only one candidate, we use it regardless of timings. We will
515 later warn about inverted dependencies. If there are more than one, we use
516 the latest whose end time is before node_info's start time. If there is no
517 such candidate, we throw up our hands and return an arbitrary one.
518
519 Args:
520 node_info: _NodeInfo of interest.
521 candidate_indicies: indicies of candidate predecessors.
522
523 Returns:
524 _NodeInfo of best predecessor.
525 """
526 assert candidate_indicies
527 if len(candidate_indicies) == 1:
528 return self._node_info[candidate_indicies[0]]
529 candidate = self._node_info[candidate_indicies[0]]
530 for i in xrange(1, len(candidate_indicies)):
531 next_candidate = self._node_info[candidate_indicies[i]]
532 if (next_candidate.EndTime() < node_info.StartTime() and
533 next_candidate.StartTime() > candidate.StartTime()):
534 candidate = next_candidate
535 if candidate.EndTime() > node_info.StartTime():
536 logging.warning('Multiple candidates but all inverted for ' +
537 node_info.ShortName())
538 return candidate
539
540 480
541 def _SplitChildrenByTime(self, parent): 481 def _SplitChildrenByTime(self, parent):
542 """Split children of a node by request times. 482 """Split children of a node by request times.
543 483
544 The initiator of a request may not be the true dependency of a request. For 484 The initiator of a request may not be the true dependency of a request. For
545 example, a script may appear to load several resources independently, but in 485 example, a script may appear to load several resources independently, but in
546 fact one of them may be a JSON data file, and the remaining resources assets 486 fact one of them may be a JSON data file, and the remaining resources assets
547 described in the JSON. The assets should be dependent upon the JSON data 487 described in the JSON. The assets should be dependent upon the JSON data
548 file, and not the original script. 488 file, and not the original script.
549 489
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
617 highlight: a list of node items to emphasize. Any resource url which 557 highlight: a list of node items to emphasize. Any resource url which
618 contains any highlight text will be distinguished in the output. 558 contains any highlight text will be distinguished in the output.
619 559
620 Returns: 560 Returns:
621 A string describing the resource in graphviz format. 561 A string describing the resource in graphviz format.
622 The resource is color-coded according to its content type, and its shape 562 The resource is color-coded according to its content type, and its shape
623 is oval if its max-age is less than 300s (or if it's not cacheable). 563 is oval if its max-age is less than 300s (or if it's not cacheable).
624 """ 564 """
625 node_info = self._node_info[index] 565 node_info = self._node_info[index]
626 color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()] 566 color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()]
627 max_age = log_parser.MaxAge(node_info.Request()) 567 max_age = node_info.Request().MaxAge()
628 shape = 'polygon' if max_age > 300 else 'oval' 568 shape = 'polygon' if max_age > 300 else 'oval'
629 styles = ['filled'] 569 styles = ['filled']
630 if highlight: 570 if highlight:
631 for fragment in highlight: 571 for fragment in highlight:
632 if fragment in node_info.Url(): 572 if fragment in node_info.Url():
633 styles.append('dotted') 573 styles.append('dotted')
634 break 574 break
635 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' 575 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; '
636 'fillcolor = %s; shape = %s];\n' 576 'fillcolor = %s; shape = %s];\n'
637 % (index, node_info.ShortName(), 577 % (index, node_info.ShortName(),
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
726 Dict of image url + short name to NodeInfo. 666 Dict of image url + short name to NodeInfo.
727 """ 667 """
728 image_to_info = {} 668 image_to_info = {}
729 for n in self._node_info: 669 for n in self._node_info:
730 if (n.ContentType().startswith('image') and 670 if (n.ContentType().startswith('image') and
731 not self._IsAdUrl(n.Url())): 671 not self._IsAdUrl(n.Url())):
732 key = str((n.Url(), n.ShortName(), n.StartTime())) 672 key = str((n.Url(), n.ShortName(), n.StartTime()))
733 assert key not in image_to_info, n.Url() 673 assert key not in image_to_info, n.Url()
734 image_to_info[key] = n 674 image_to_info[key] = n
735 return image_to_info 675 return image_to_info
OLDNEW
« no previous file with comments | « no previous file | tools/android/loading/loading_model_unittest.py » ('j') | tools/android/loading/loading_model_unittest.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698