tools/android/loading/loading_model.py - Issue 1610273002: Upgrade loading_model to use the new request_track and loading_trace.

Side by Side Diff: tools/android/loading/loading_model.py

Issue 1610273002: Upgrade loading_model to use the new request_track and loading_trace. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """Models for loading in chrome.	5 """Models for loading in chrome.

6	6

7 (Redirect the following to the general model module once we have one)	7 (Redirect the following to the general model module once we have one)

8 A model is an object with the following methods.	8 A model is an object with the following methods.

9 CostMs(): return the cost of the cost in milliseconds.	9 CostMs(): return the cost of the model in milliseconds.

10 Set(): set model-specifical parameters.	10 Set(): set model-specific parameters.

11	11

12 ResourceGraph	12 ResourceGraph

13 This creates a DAG of resource dependancies from loading.log_requests to model	13 This creates a DAG of resource dependencies from loading.log_requests to model

14 loading time. The model may be parameterized by changing the loading time of	14 loading time. The model may be parameterized by changing the loading time of

15 a particular or all resources.	15 a particular or all resources.

16 """	16 """

17	17

18 import logging	18 import logging

19 import os	19 import os

20 import urlparse	20 import urlparse

21 import sys	21 import sys

22	22

23 import dag	23 import dag

24 import log_parser	24 import request_dependencies_lens

25	25

26 class ResourceGraph(object):	26 class ResourceGraph(object):

27 """A model of loading by a DAG (tree?) of resource dependancies.	27 """A model of loading by a DAG (tree?) of resource dependancies.

28	28

29 Set parameters:	29 Set parameters:

30 cache_all: if true, assume zero loading time for all resources.	30 cache_all: if true, assume zero loading time for all resources.

31 """	31 """

32	32 def __init__(self, trace):

33 def __init__(self, requests):	33 """Create from a LoadingTrace.

34 """Create from a parsed request set.

35	34

36 Args:	35 Args:

37 requests: [RequestData, ...] filtered RequestData from loading.log_parser.	36 trace: (LoadingTrace) Loading trace.

38 """	37 """

39 self._BuildDag(requests)	38 self._BuildDag(trace)

40 self._global_start = min([n.StartTime() for n in self._node_info])	39 self._global_start = min([n.StartTime() for n in self._node_info])

41 # Sort before splitting children so that we can correctly dectect if a	40 # Sort before splitting children so that we can correctly dectect if a

42 # reparented child is actually a dependency for a child of its new parent.	41 # reparented child is actually a dependency for a child of its new parent.

43 try:	42 try:

44 for n in dag.TopologicalSort(self._nodes):	43 for n in dag.TopologicalSort(self._nodes):

45 self._SplitChildrenByTime(self._node_info[n.Index()])	44 self._SplitChildrenByTime(self._node_info[n.Index()])

46 except AssertionError as exc:	45 except AssertionError as exc:

47 sys.stderr.write('Bad topological sort: %s\n'	46 sys.stderr.write('Bad topological sort: %s\n'

48 'Skipping child split\n' % str(exc))	47 'Skipping child split\n' % str(exc))

49 self._cache_all = False	48 self._cache_all = False

(...skipping 125 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
175 costs[n.Index()] = cost	174 costs[n.Index()] = cost

176 max_cost = max(costs)	175 max_cost = max(costs)

177 assert max_cost > 0 # Otherwise probably the filter went awry.	176 assert max_cost > 0 # Otherwise probably the filter went awry.

178 if path_list is not None:	177 if path_list is not None:

179 del path_list[:-1]	178 del path_list[:-1]

180 n = (i for i in self._nodes if costs[i.Index()] == max_cost).next()	179 n = (i for i in self._nodes if costs[i.Index()] == max_cost).next()

181 path_list.append(self._node_info[n.Index()])	180 path_list.append(self._node_info[n.Index()])

182 while n.Predecessors():	181 while n.Predecessors():

183 n = reduce(lambda costliest, next:	182 n = reduce(lambda costliest, next:

184 next if (self._node_filter(next) and	183 next if (self._node_filter(next) and

185 cost[next.Index()] > cost[costliest.Index()])	184 costs[next.Index()] > costs[costliest.Index()])

186 else costliest,	185 else costliest,

187 n.Predecessors())	186 n.Predecessors())

188 path_list.insert(0, self._node_info[n.Index()])	187 path_list.insert(0, self._node_info[n.Index()])

189 return max_cost	188 return max_cost

190	189

191 def FilterAds(self, node):	190 def FilterAds(self, node):

192 """A filter for use in eg, Cost, to remove advertising nodes.	191 """A filter for use in eg, Cost, to remove advertising nodes.

193	192

194 Args:	193 Args:

195 node: A dag.Node.	194 node: A dag.Node.

(...skipping 119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
315 """Create a new node info.	314 """Create a new node info.

316	315

317 Args:	316 Args:

318 node: The node to augment.	317 node: The node to augment.

319 request: The request associated with this node.	318 request: The request associated with this node.

320 """	319 """

321 self._request = request	320 self._request = request

322 self._node = node	321 self._node = node

323 self._edge_costs = {}	322 self._edge_costs = {}

324 self._edge_annotations = {}	323 self._edge_annotations = {}

325 # All fields in timing are millis relative to requestTime, which is epoch	324 # All fields in timing are millis relative to request_time, which is epoch

326 # seconds.	325 # seconds.

327 self._node_cost = max([t for f, t in request.timing._asdict().iteritems()	326 self._node_cost = max([t for f, t in request.timing._asdict().iteritems()

328 if f != 'requestTime'])	327 if f != 'request_time'])

329	328

330 def __str__(self):	329 def __str__(self):

331 return self.ShortName()	330 return self.ShortName()

332	331

333 def Node(self):	332 def Node(self):

334 return self._node	333 return self._node

335	334

336 def Index(self):	335 def Index(self):

337 return self._node.Index()	336 return self._node.Index()

338	337

339 def Request(self):	338 def Request(self):

340 return self._request	339 return self._request

341	340

342 def NodeCost(self):	341 def NodeCost(self):

343 return self._node_cost	342 return self._node_cost

344	343

345 def EdgeCost(self, s):	344 def EdgeCost(self, s):

346 return self._edge_costs[s]	345 return self._edge_costs[s]

347	346

348 def StartTime(self):	347 def StartTime(self):

349 return self._request.timing.requestTime * 1000	348 return self._request.timing.request_time * 1000

350	349

351 def EndTime(self):	350 def EndTime(self):

352 return self._request.timing.requestTime * 1000 + self._node_cost	351 return self._request.timing.request_time * 1000 + self._node_cost

353	352

354 def EdgeAnnotation(self, s):	353 def EdgeAnnotation(self, s):

355 assert s.Node() in self.Node().Successors()	354 assert s.Node() in self.Node().Successors()

356 return self._edge_annotations.get(s, [])	355 return self._edge_annotations.get(s, [])

357	356

358 def ContentType(self):	357 def ContentType(self):

359 return log_parser.Resource.FromRequest(self._request).GetContentType()	358 return self._request.GetContentType()

360	359

361 def ShortName(self):	360 def ShortName(self):

362 return log_parser.Resource.FromRequest(self._request).GetShortName()	361 """Returns either the hostname of the resource, or the filename,

	362 or the end of the path. Tries to include the domain as much as possible.

	363 """

	364 parsed = urlparse.urlparse(self._request.url)

	365 path = parsed.path

	366 if path != '' and path != '/':

	367 last_path = parsed.path.split('/')[-1]

	368 if len(last_path) < 10:

	369 if len(path) < 10:

	370 return parsed.hostname + '/' + path

	371 else:

	372 return parsed.hostname + '/..' + parsed.path[-10:]

	373 elif len(last_path) > 10:

	374 return parsed.hostname + '/..' + last_path[:5]

	375 else:

	376 return parsed.hostname + '/..' + last_path

	377 else:

	378 return parsed.hostname

363	379

364 def Url(self):	380 def Url(self):

365 return self._request.url	381 return self._request.url

366	382

367 def SetEdgeCost(self, child, cost):	383 def SetEdgeCost(self, child, cost):

368 assert child.Node() in self._node.Successors()	384 assert child.Node() in self._node.Successors()

369 self._edge_costs[child] = cost	385 self._edge_costs[child] = cost

370	386

371 def AddEdgeAnnotation(self, s, annotation):	387 def AddEdgeAnnotation(self, s, annotation):

372 assert s.Node() in self._node.Successors()	388 assert s.Node() in self._node.Successors()

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
415 def _EdgeCost(self, parent, child):	431 def _EdgeCost(self, parent, child):

416 """Convenience function for redirecting Nodes to _NodeInfo."""	432 """Convenience function for redirecting Nodes to _NodeInfo."""

417 return self._node_info[parent.Index()].EdgeCost(	433 return self._node_info[parent.Index()].EdgeCost(

418 self._node_info[child.Index()])	434 self._node_info[child.Index()])

419	435

420 def _EdgeAnnotation(self, parent, child):	436 def _EdgeAnnotation(self, parent, child):

421 """Convenience function for redirecting Nodes to _NodeInfo."""	437 """Convenience function for redirecting Nodes to _NodeInfo."""

422 return self._node_info[parent.Index()].EdgeAnnotation(	438 return self._node_info[parent.Index()].EdgeAnnotation(

423 self._node_info[child.Index()])	439 self._node_info[child.Index()])

424	440

425 def _BuildDag(self, requests):	441 def _BuildDag(self, trace):

426 """Build DAG of resources.	442 """Build DAG of resources.

427	443

428 Build a DAG from our requests and augment with _NodeInfo (see above) in a	444 Build a DAG from our requests and augment with _NodeInfo (see above) in a

429 parallel array indexed by Node.Index().	445 parallel array indexed by Node.Index().

430	446

431 Creates self._nodes and self._node_info.	447 Creates self._nodes and self._node_info.

432	448

433 Args:	449 Args:

434 requests: [Request, ...] Requests from loading.log_parser.	450 trace: A LoadingTrace.

435 """	451 """

436 self._nodes = []	452 self._nodes = []

437 self._node_info = []	453 self._node_info = []

438 indicies_by_url = {}	454 index_by_request = {}

439 requests_by_completion = log_parser.SortedByCompletion(requests)	455 for request in trace.request_track.GetEvents():

440 for request in requests:

441 next_index = len(self._nodes)	456 next_index = len(self._nodes)

442 indicies_by_url.setdefault(request.url, []).append(next_index)	457 assert request not in index_by_request

	458 index_by_request[request] = next_index

443 node = dag.Node(next_index)	459 node = dag.Node(next_index)

444 node_info = self._NodeInfo(node, request)	460 node_info = self._NodeInfo(node, request)

445 self._nodes.append(node)	461 self._nodes.append(node)

446 self._node_info.append(node_info)	462 self._node_info.append(node_info)

447 for url, indicies in indicies_by_url.iteritems():	463

448 if len(indicies) > 1:	464 dependencies = request_dependencies_lens.RequestDependencyLens(

449 logging.warning('Multiple loads (%d) for url: %s' %	465 trace).GetRequestDependencies()

450 (len(indicies), url))	466 for child_rq, parent_rq, reason in dependencies:

451 for i in xrange(len(requests)):	467 parent = self._node_info[index_by_request[parent_rq]]

452 request = requests[i]	468 child = self._node_info[index_by_request[child_rq]]

453 current_node_info = self._node_info[i]	469 edge_cost = child.StartTime() - parent.EndTime()

454 resource = log_parser.Resource.FromRequest(current_node_info.Request())	470 if edge_cost < 0:

455 initiator = request.initiator	471 edge_cost = 0

456 initiator_type = initiator['type']	472 if child.StartTime() < parent.StartTime():

457 predecessor_url = None

458 predecessor_type = None

459 # Classify & infer the predecessor. If a candidate url we identify as the

460 # predecessor is not in index_by_url, then we haven't seen it in our

461 # requests and we will try to find a better predecessor.

462 if initiator_type == 'parser':

463 url = initiator['url']

464 if url in indicies_by_url:

465 predecessor_url = url

466 predecessor_type = 'parser'

467 elif initiator_type == 'script' and 'stackTrace' in initiator:

468 for frame in initiator['stackTrace']:

469 url = frame['url']

470 if url in indicies_by_url:

471 predecessor_url = url

472 predecessor_type = 'stack'

473 break

474 elif initiator_type == 'script':

475 # When the initiator is a script without a stackTrace, infer that it

476 # comes from the most recent script from the same hostname. TLD+1 might

477 # be better, but finding what is a TLD requires a database.

478 request_hostname = urlparse.urlparse(request.url).hostname

479 sorted_script_requests_from_hostname = [

480 r for r in requests_by_completion

481 if (resource.GetContentType() in ('script', 'html', 'json')

482 and urlparse.urlparse(r.url).hostname == request_hostname)]

483 most_recent = None

484 # Linear search is bad, but this shouldn't matter here.

485 for r in sorted_script_requests_from_hostname:

486 if r.timestamp < request.timing.requestTime:

487 most_recent = r

488 else:

489 break

490 if most_recent is not None:

491 url = most_recent.url

492 if url in indicies_by_url:

493 predecessor_url = url

494 predecessor_type = 'script_inferred'

495 # TODO(mattcary): we skip initiator type other, is that correct?

496 if predecessor_url is not None:

497 predecessor = self._FindBestPredecessor(

498 current_node_info, indicies_by_url[predecessor_url])

499 edge_cost = current_node_info.StartTime() - predecessor.EndTime()

500 if edge_cost < 0:

501 edge_cost = 0

502 if current_node_info.StartTime() < predecessor.StartTime():

503 logging.error('Inverted dependency: %s->%s',	473 logging.error('Inverted dependency: %s->%s',

504 predecessor.ShortName(), current_node_info.ShortName())	474 parent.ShortName(), child.ShortName())

505 # Note that current.StartTime() < predecessor.EndTime() appears to	475 # Note that child.StartTime() < parent.EndTime() appears to happen a

506 # happen a fair amount in practice.	476 # fair amount in practice.

507 predecessor.Node().AddSuccessor(current_node_info.Node())	477 parent.Node().AddSuccessor(child.Node())

508 predecessor.SetEdgeCost(current_node_info, edge_cost)	478 parent.SetEdgeCost(child, edge_cost)

509 predecessor.AddEdgeAnnotation(current_node_info, predecessor_type)	479 parent.AddEdgeAnnotation(child, reason)

510

511 def _FindBestPredecessor(self, node_info, candidate_indicies):

512 """Find best predecessor for node_info

513

514 If there is only one candidate, we use it regardless of timings. We will

515 later warn about inverted dependencies. If there are more than one, we use

516 the latest whose end time is before node_info's start time. If there is no

517 such candidate, we throw up our hands and return an arbitrary one.

518

519 Args:

520 node_info: _NodeInfo of interest.

521 candidate_indicies: indicies of candidate predecessors.

522

523 Returns:

524 _NodeInfo of best predecessor.

525 """

526 assert candidate_indicies

527 if len(candidate_indicies) == 1:

528 return self._node_info[candidate_indicies[0]]

529 candidate = self._node_info[candidate_indicies[0]]

530 for i in xrange(1, len(candidate_indicies)):

531 next_candidate = self._node_info[candidate_indicies[i]]

532 if (next_candidate.EndTime() < node_info.StartTime() and

533 next_candidate.StartTime() > candidate.StartTime()):

534 candidate = next_candidate

535 if candidate.EndTime() > node_info.StartTime():

536 logging.warning('Multiple candidates but all inverted for ' +

537 node_info.ShortName())

538 return candidate

539

540	480

541 def _SplitChildrenByTime(self, parent):	481 def _SplitChildrenByTime(self, parent):

542 """Split children of a node by request times.	482 """Split children of a node by request times.

543	483

544 The initiator of a request may not be the true dependency of a request. For	484 The initiator of a request may not be the true dependency of a request. For

545 example, a script may appear to load several resources independently, but in	485 example, a script may appear to load several resources independently, but in

546 fact one of them may be a JSON data file, and the remaining resources assets	486 fact one of them may be a JSON data file, and the remaining resources assets

547 described in the JSON. The assets should be dependent upon the JSON data	487 described in the JSON. The assets should be dependent upon the JSON data

548 file, and not the original script.	488 file, and not the original script.

549	489

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
617 highlight: a list of node items to emphasize. Any resource url which	557 highlight: a list of node items to emphasize. Any resource url which

618 contains any highlight text will be distinguished in the output.	558 contains any highlight text will be distinguished in the output.

619	559

620 Returns:	560 Returns:

621 A string describing the resource in graphviz format.	561 A string describing the resource in graphviz format.

622 The resource is color-coded according to its content type, and its shape	562 The resource is color-coded according to its content type, and its shape

623 is oval if its max-age is less than 300s (or if it's not cacheable).	563 is oval if its max-age is less than 300s (or if it's not cacheable).

624 """	564 """

625 node_info = self._node_info[index]	565 node_info = self._node_info[index]

626 color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()]	566 color = self._CONTENT_TYPE_TO_COLOR[node_info.ContentType()]

627 max_age = log_parser.MaxAge(node_info.Request())	567 max_age = node_info.Request().MaxAge()

628 shape = 'polygon' if max_age > 300 else 'oval'	568 shape = 'polygon' if max_age > 300 else 'oval'

629 styles = ['filled']	569 styles = ['filled']

630 if highlight:	570 if highlight:

631 for fragment in highlight:	571 for fragment in highlight:

632 if fragment in node_info.Url():	572 if fragment in node_info.Url():

633 styles.append('dotted')	573 styles.append('dotted')

634 break	574 break

635 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; '	575 return ('%d [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; '

636 'fillcolor = %s; shape = %s];\n'	576 'fillcolor = %s; shape = %s];\n'

637 % (index, node_info.ShortName(),	577 % (index, node_info.ShortName(),

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
726 Dict of image url + short name to NodeInfo.	666 Dict of image url + short name to NodeInfo.

727 """	667 """

728 image_to_info = {}	668 image_to_info = {}

729 for n in self._node_info:	669 for n in self._node_info:

730 if (n.ContentType().startswith('image') and	670 if (n.ContentType().startswith('image') and

731 not self._IsAdUrl(n.Url())):	671 not self._IsAdUrl(n.Url())):

732 key = str((n.Url(), n.ShortName(), n.StartTime()))	672 key = str((n.Url(), n.ShortName(), n.StartTime()))

733 assert key not in image_to_info, n.Url()	673 assert key not in image_to_info, n.Url()

734 image_to_info[key] = n	674 image_to_info[key] = n

735 return image_to_info	675 return image_to_info

OLD	NEW

« no previous file with comments | « no previous file | tools/android/loading/loading_model_unittest.py » ('j') | no next file with comments »