OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef BASE_TRACKED_OBJECTS_H_ | 5 #ifndef BASE_TRACKED_OBJECTS_H_ |
6 #define BASE_TRACKED_OBJECTS_H_ | 6 #define BASE_TRACKED_OBJECTS_H_ |
7 | 7 |
8 #include <map> | 8 #include <map> |
9 #include <set> | 9 #include <set> |
10 #include <stack> | 10 #include <stack> |
(...skipping 25 matching lines...) Expand all Loading... |
36 // | 36 // |
37 // These classes serve as the basis of a profiler of sorts for the Tasks system. | 37 // These classes serve as the basis of a profiler of sorts for the Tasks system. |
38 // As a result, design decisions were made to maximize speed, by minimizing | 38 // As a result, design decisions were made to maximize speed, by minimizing |
39 // recurring allocation/deallocation, lock contention and data copying. In the | 39 // recurring allocation/deallocation, lock contention and data copying. In the |
40 // "stable" state, which is reached relatively quickly, there is no separate | 40 // "stable" state, which is reached relatively quickly, there is no separate |
41 // marginal allocation cost associated with construction or destruction of | 41 // marginal allocation cost associated with construction or destruction of |
42 // tracked objects, no locks are generally employed, and probably the largest | 42 // tracked objects, no locks are generally employed, and probably the largest |
43 // computational cost is associated with obtaining start and stop times for | 43 // computational cost is associated with obtaining start and stop times for |
44 // instances as they are created and destroyed. | 44 // instances as they are created and destroyed. |
45 // | 45 // |
46 // The following describes the lifecycle of tracking an instance. | 46 // The following describes the life cycle of tracking an instance. |
47 // | 47 // |
48 // First off, when the instance is created, the FROM_HERE macro is expanded | 48 // First off, when the instance is created, the FROM_HERE macro is expanded |
49 // to specify the birth place (file, line, function) where the instance was | 49 // to specify the birth place (file, line, function) where the instance was |
50 // created. That data is used to create a transient Location instance | 50 // created. That data is used to create a transient Location instance |
51 // encapsulating the above triple of information. The strings (like __FILE__) | 51 // encapsulating the above triple of information. The strings (like __FILE__) |
52 // are passed around by reference, with the assumption that they are static, and | 52 // are passed around by reference, with the assumption that they are static, and |
53 // will never go away. This ensures that the strings can be dealt with as atoms | 53 // will never go away. This ensures that the strings can be dealt with as atoms |
54 // with great efficiency (i.e., copying of strings is never needed, and | 54 // with great efficiency (i.e., copying of strings is never needed, and |
55 // comparisons for equality can be based on pointer comparisons). | 55 // comparisons for equality can be based on pointer comparisons). |
56 // | 56 // |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
89 // Lastly, when an instance is deleted, the final tallies of statistics are | 89 // Lastly, when an instance is deleted, the final tallies of statistics are |
90 // carefully accumulated. That tallying writes into slots (members) in a | 90 // carefully accumulated. That tallying writes into slots (members) in a |
91 // collection of DeathData instances. For each birth place Location that is | 91 // collection of DeathData instances. For each birth place Location that is |
92 // destroyed on a thread, there is a DeathData instance to record the additional | 92 // destroyed on a thread, there is a DeathData instance to record the additional |
93 // death count, as well as accumulate the run-time and queue-time durations for | 93 // death count, as well as accumulate the run-time and queue-time durations for |
94 // the instance as it is destroyed (dies). By maintaining a single place to | 94 // the instance as it is destroyed (dies). By maintaining a single place to |
95 // aggregate this running sum *only* for the given thread, we avoid the need to | 95 // aggregate this running sum *only* for the given thread, we avoid the need to |
96 // lock such DeathData instances. (i.e., these accumulated stats in a DeathData | 96 // lock such DeathData instances. (i.e., these accumulated stats in a DeathData |
97 // instance are exclusively updated by the singular owning thread). | 97 // instance are exclusively updated by the singular owning thread). |
98 // | 98 // |
99 // With the above lifecycle description complete, the major remaining detail is | 99 // With the above life cycle description complete, the major remaining detail |
100 // explaining how each thread maintains a list of DeathData instances, and of | 100 // is explaining how each thread maintains a list of DeathData instances, and |
101 // Births instances, and is able to avoid additional (redundant/unnecessary) | 101 // of Births instances, and is able to avoid additional (redundant/unnecessary) |
102 // allocations. | 102 // allocations. |
103 // | 103 // |
104 // Each thread maintains a list of data items specific to that thread in a | 104 // Each thread maintains a list of data items specific to that thread in a |
105 // ThreadData instance (for that specific thread only). The two critical items | 105 // ThreadData instance (for that specific thread only). The two critical items |
106 // are lists of DeathData and Births instances. These lists are maintained in | 106 // are lists of DeathData and Births instances. These lists are maintained in |
107 // STL maps, which are indexed by Location. As noted earlier, we can compare | 107 // STL maps, which are indexed by Location. As noted earlier, we can compare |
108 // locations very efficiently as we consider the underlying data (file, | 108 // locations very efficiently as we consider the underlying data (file, |
109 // function, line) to be atoms, and hence pointer comparison is used rather than | 109 // function, line) to be atoms, and hence pointer comparison is used rather than |
110 // (slow) string comparisons. | 110 // (slow) string comparisons. |
111 // | 111 // |
112 // To provide a mechanism for iterating over all "known threads," which means | 112 // To provide a mechanism for iterating over all "known threads," which means |
113 // threads that have recorded a birth or a death, we create a singly linked list | 113 // threads that have recorded a birth or a death, we create a singly linked list |
114 // of ThreadData instances. Each such instance maintains a pointer to the next | 114 // of ThreadData instances. Each such instance maintains a pointer to the next |
115 // one. A static member of ThreadData provides a pointer to the first item on | 115 // one. A static member of ThreadData provides a pointer to the first item on |
116 // this global list, and access via that all_thread_data_list_head_ item | 116 // this global list, and access via that all_thread_data_list_head_ item |
117 // requires the use of the list_lock_. | 117 // requires the use of the list_lock_. |
118 // When new ThreadData instances is added to the global list, it is pre-pended, | 118 // When new ThreadData instances is added to the global list, it is pre-pended, |
119 // which ensures that any prior acquisition of the list is valid (i.e., the | 119 // which ensures that any prior acquisition of the list is valid (i.e., the |
120 // holder can iterate over it without fear of it changing, or the necessity of | 120 // holder can iterate over it without fear of it changing, or the necessity of |
121 // using an additional lock. Iterations are actually pretty rare (used | 121 // using an additional lock. Iterations are actually pretty rare (used |
122 // primarilly for cleanup, or snapshotting data for display), so this lock has | 122 // primarily for cleanup, or snapshotting data for display), so this lock has |
123 // very little global performance impact. | 123 // very little global performance impact. |
124 // | 124 // |
125 // The above description tries to define the high performance (run time) | 125 // The above description tries to define the high performance (run time) |
126 // portions of these classes. After gathering statistics, calls instigated | 126 // portions of these classes. After gathering statistics, calls instigated |
127 // by visiting about:profiler will assemble and aggregate data for display. The | 127 // by visiting about:profiler will assemble and aggregate data for display. The |
128 // following data structures are used for producing such displays. They are | 128 // following data structures are used for producing such displays. They are |
129 // not performance critical, and their only major constraint is that they should | 129 // not performance critical, and their only major constraint is that they should |
130 // be able to run concurrently with ongoing augmentation of the birth and death | 130 // be able to run concurrently with ongoing augmentation of the birth and death |
131 // data. | 131 // data. |
132 // | 132 // |
(...skipping 16 matching lines...) Expand all Loading... |
149 // The ProcessDataSnapshot struct is a serialized representation of the list | 149 // The ProcessDataSnapshot struct is a serialized representation of the list |
150 // of ThreadData objects for a process. It holds a set of TaskSnapshots | 150 // of ThreadData objects for a process. It holds a set of TaskSnapshots |
151 // and tracks parent/child relationships for the executed tasks. The statistics | 151 // and tracks parent/child relationships for the executed tasks. The statistics |
152 // in a snapshot are gathered asynhcronously relative to their ongoing updates. | 152 // in a snapshot are gathered asynhcronously relative to their ongoing updates. |
153 // It is possible, though highly unlikely, that stats could be incorrectly | 153 // It is possible, though highly unlikely, that stats could be incorrectly |
154 // recorded by this process (all data is held in 32 bit ints, but we are not | 154 // recorded by this process (all data is held in 32 bit ints, but we are not |
155 // atomically collecting all data, so we could have count that does not, for | 155 // atomically collecting all data, so we could have count that does not, for |
156 // example, match with the number of durations we accumulated). The advantage | 156 // example, match with the number of durations we accumulated). The advantage |
157 // to having fast (non-atomic) updates of the data outweighs the minimal risk of | 157 // to having fast (non-atomic) updates of the data outweighs the minimal risk of |
158 // a singular corrupt statistic snapshot (only the snapshot could be corrupt, | 158 // a singular corrupt statistic snapshot (only the snapshot could be corrupt, |
159 // not the underlying and ongoing statistic). In constrast, pointer data that | 159 // not the underlying and ongoing statistic). In contrast, pointer data that |
160 // is accessed during snapshotting is completely invariant, and hence is | 160 // is accessed during snapshotting is completely invariant, and hence is |
161 // perfectly acquired (i.e., no potential corruption, and no risk of a bad | 161 // perfectly acquired (i.e., no potential corruption, and no risk of a bad |
162 // memory reference). | 162 // memory reference). |
163 // | 163 // |
164 // TODO(jar): We can implement a Snapshot system that *tries* to grab the | 164 // TODO(jar): We can implement a Snapshot system that *tries* to grab the |
165 // snapshots on the source threads *when* they have MessageLoops available | 165 // snapshots on the source threads *when* they have MessageLoops available |
166 // (worker threads don't have message loops generally, and hence gathering from | 166 // (worker threads don't have message loops generally, and hence gathering from |
167 // them will continue to be asynchronous). We had an implementation of this in | 167 // them will continue to be asynchronous). We had an implementation of this in |
168 // the past, but the difficulty is dealing with message loops being terminated. | 168 // the past, but the difficulty is dealing with message loops being terminated. |
169 // We can *try* to spam the available threads via some message loop proxy to | 169 // We can *try* to spam the available threads via some message loop proxy to |
170 // achieve this feat, and it *might* be valuable when we are colecting data for | 170 // achieve this feat, and it *might* be valuable when we are collecting data |
171 // upload via UMA (where correctness of data may be more significant than for a | 171 // for upload via UMA (where correctness of data may be more significant than |
172 // single screen of about:profiler). | 172 // for a single screen of about:profiler). |
173 // | 173 // |
174 // TODO(jar): We should support (optionally) the recording of parent-child | 174 // TODO(jar): We should support (optionally) the recording of parent-child |
175 // relationships for tasks. This should be done by detecting what tasks are | 175 // relationships for tasks. This should be done by detecting what tasks are |
176 // Born during the running of a parent task. The resulting data can be used by | 176 // Born during the running of a parent task. The resulting data can be used by |
177 // a smarter profiler to aggregate the cost of a series of child tasks into | 177 // a smarter profiler to aggregate the cost of a series of child tasks into |
178 // the ancestor task. It can also be used to illuminate what child or parent is | 178 // the ancestor task. It can also be used to illuminate what child or parent is |
179 // related to each task. | 179 // related to each task. |
180 // | 180 // |
181 // TODO(jar): We need to store DataCollections, and provide facilities for | 181 // TODO(jar): We need to store DataCollections, and provide facilities for |
182 // taking the difference between two gathered DataCollections. For now, we're | 182 // taking the difference between two gathered DataCollections. For now, we're |
183 // just adding a hack that Reset()s to zero all counts and stats. This is also | 183 // just adding a hack that Reset()s to zero all counts and stats. This is also |
184 // done in a slighly thread-unsafe fashion, as the resetting is done | 184 // done in a slightly thread-unsafe fashion, as the resetting is done |
185 // asynchronously relative to ongoing updates (but all data is 32 bit in size). | 185 // asynchronously relative to ongoing updates (but all data is 32 bit in size). |
186 // For basic profiling, this will work "most of the time," and should be | 186 // For basic profiling, this will work "most of the time," and should be |
187 // sufficient... but storing away DataCollections is the "right way" to do this. | 187 // sufficient... but storing away DataCollections is the "right way" to do this. |
188 // We'll accomplish this via JavaScript storage of snapshots, and then we'll | 188 // We'll accomplish this via JavaScript storage of snapshots, and then we'll |
189 // remove the Reset() methods. We may also need a short-term-max value in | 189 // remove the Reset() methods. We may also need a short-term-max value in |
190 // DeathData that is reset (as synchronously as possible) during each snapshot. | 190 // DeathData that is reset (as synchronously as possible) during each snapshot. |
191 // This will facilitate displaying a max value for each snapshot period. | 191 // This will facilitate displaying a max value for each snapshot period. |
192 | 192 |
193 namespace tracked_objects { | 193 namespace tracked_objects { |
194 | 194 |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
354 struct ProcessDataSnapshot; | 354 struct ProcessDataSnapshot; |
355 class BASE_EXPORT TaskStopwatch; | 355 class BASE_EXPORT TaskStopwatch; |
356 | 356 |
357 class BASE_EXPORT ThreadData { | 357 class BASE_EXPORT ThreadData { |
358 public: | 358 public: |
359 // Current allowable states of the tracking system. The states can vary | 359 // Current allowable states of the tracking system. The states can vary |
360 // between ACTIVE and DEACTIVATED, but can never go back to UNINITIALIZED. | 360 // between ACTIVE and DEACTIVATED, but can never go back to UNINITIALIZED. |
361 enum Status { | 361 enum Status { |
362 UNINITIALIZED, // PRistine, link-time state before running. | 362 UNINITIALIZED, // PRistine, link-time state before running. |
363 DORMANT_DURING_TESTS, // Only used during testing. | 363 DORMANT_DURING_TESTS, // Only used during testing. |
364 DEACTIVATED, // No longer recording profling. | 364 DEACTIVATED, // No longer recording profiling. |
365 PROFILING_ACTIVE, // Recording profiles (no parent-child links). | 365 PROFILING_ACTIVE, // Recording profiles (no parent-child links). |
366 PROFILING_CHILDREN_ACTIVE, // Fully active, recording parent-child links. | 366 PROFILING_CHILDREN_ACTIVE, // Fully active, recording parent-child links. |
367 STATUS_LAST = PROFILING_CHILDREN_ACTIVE | 367 STATUS_LAST = PROFILING_CHILDREN_ACTIVE |
368 }; | 368 }; |
369 | 369 |
370 typedef std::map<Location, Births*> BirthMap; | 370 typedef std::map<Location, Births*> BirthMap; |
371 typedef std::map<const Births*, DeathData> DeathMap; | 371 typedef std::map<const Births*, DeathData> DeathMap; |
372 typedef std::pair<const Births*, const Births*> ParentChildPair; | 372 typedef std::pair<const Births*, const Births*> ParentChildPair; |
373 typedef std::set<ParentChildPair> ParentChildSet; | 373 typedef std::set<ParentChildPair> ParentChildSet; |
374 typedef std::stack<const Births*> ParentStack; | 374 typedef std::stack<const Births*> ParentStack; |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
544 // living instances of the task -- that is, each task maps to the number of | 544 // living instances of the task -- that is, each task maps to the number of |
545 // births for the task that have not yet been balanced by a death. If | 545 // births for the task that have not yet been balanced by a death. If |
546 // |reset_max| is true, then the max values in each DeathData instance are | 546 // |reset_max| is true, then the max values in each DeathData instance are |
547 // reset during the scan. | 547 // reset during the scan. |
548 void SnapshotExecutedTasks(bool reset_max, | 548 void SnapshotExecutedTasks(bool reset_max, |
549 ProcessDataSnapshot* process_data, | 549 ProcessDataSnapshot* process_data, |
550 BirthCountMap* birth_counts); | 550 BirthCountMap* birth_counts); |
551 | 551 |
552 // Using our lock, make a copy of the specified maps. This call may be made | 552 // Using our lock, make a copy of the specified maps. This call may be made |
553 // on non-local threads, which necessitate the use of the lock to prevent | 553 // on non-local threads, which necessitate the use of the lock to prevent |
554 // the map(s) from being reallocaed while they are copied. If |reset_max| is | 554 // the map(s) from being reallocated while they are copied. If |reset_max| is |
555 // true, then, just after we copy the DeathMap, we will set the max values to | 555 // true, then, just after we copy the DeathMap, we will set the max values to |
556 // zero in the active DeathMap (not the snapshot). | 556 // zero in the active DeathMap (not the snapshot). |
557 void SnapshotMaps(bool reset_max, | 557 void SnapshotMaps(bool reset_max, |
558 BirthMap* birth_map, | 558 BirthMap* birth_map, |
559 DeathMap* death_map, | 559 DeathMap* death_map, |
560 ParentChildSet* parent_child_set); | 560 ParentChildSet* parent_child_set); |
561 | 561 |
562 // Using our lock to protect the iteration, Clear all birth and death data. | 562 // Using our lock to protect the iteration, Clear all birth and death data. |
563 void Reset(); | 563 void Reset(); |
564 | 564 |
(...skipping 21 matching lines...) Expand all Loading... |
586 static NowFunction* now_function_; | 586 static NowFunction* now_function_; |
587 | 587 |
588 // If true, now_function_ returns values that can be used to calculate queue | 588 // If true, now_function_ returns values that can be used to calculate queue |
589 // time. | 589 // time. |
590 static bool now_function_is_time_; | 590 static bool now_function_is_time_; |
591 | 591 |
592 // We use thread local store to identify which ThreadData to interact with. | 592 // We use thread local store to identify which ThreadData to interact with. |
593 static base::ThreadLocalStorage::StaticSlot tls_index_; | 593 static base::ThreadLocalStorage::StaticSlot tls_index_; |
594 | 594 |
595 // List of ThreadData instances for use with worker threads. When a worker | 595 // List of ThreadData instances for use with worker threads. When a worker |
596 // thread is done (terminated), we push it onto this llist. When a new worker | 596 // thread is done (terminated), we push it onto this list. When a new worker |
597 // thread is created, we first try to re-use a ThreadData instance from the | 597 // thread is created, we first try to re-use a ThreadData instance from the |
598 // list, and if none are available, construct a new one. | 598 // list, and if none are available, construct a new one. |
599 // This is only accessed while list_lock_ is held. | 599 // This is only accessed while list_lock_ is held. |
600 static ThreadData* first_retired_worker_; | 600 static ThreadData* first_retired_worker_; |
601 | 601 |
602 // Link to the most recently created instance (starts a null terminated list). | 602 // Link to the most recently created instance (starts a null terminated list). |
603 // The list is traversed by about:profiler when it needs to snapshot data. | 603 // The list is traversed by about:profiler when it needs to snapshot data. |
604 // This is only accessed while list_lock_ is held. | 604 // This is only accessed while list_lock_ is held. |
605 static ThreadData* all_thread_data_list_head_; | 605 static ThreadData* all_thread_data_list_head_; |
606 | 606 |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
669 // thread, or reading from another thread. For reading from this thread we | 669 // thread, or reading from another thread. For reading from this thread we |
670 // don't need a lock, as there is no potential for a conflict since the | 670 // don't need a lock, as there is no potential for a conflict since the |
671 // writing is only done from this thread. | 671 // writing is only done from this thread. |
672 mutable base::Lock map_lock_; | 672 mutable base::Lock map_lock_; |
673 | 673 |
674 // The stack of parents that are currently being profiled. This includes only | 674 // The stack of parents that are currently being profiled. This includes only |
675 // tasks that have started a timer recently via PrepareForStartOfRun(), but | 675 // tasks that have started a timer recently via PrepareForStartOfRun(), but |
676 // not yet concluded with a NowForEndOfRun(). Usually this stack is one deep, | 676 // not yet concluded with a NowForEndOfRun(). Usually this stack is one deep, |
677 // but if a scoped region is profiled, or <sigh> a task runs a nested-message | 677 // but if a scoped region is profiled, or <sigh> a task runs a nested-message |
678 // loop, then the stack can grow larger. Note that we don't try to deduct | 678 // loop, then the stack can grow larger. Note that we don't try to deduct |
679 // time in nested porfiles, as our current timer is based on wall-clock time, | 679 // time in nested profiles, as our current timer is based on wall-clock time, |
680 // and not CPU time (and we're hopeful that nested timing won't be a | 680 // and not CPU time (and we're hopeful that nested timing won't be a |
681 // significant additional cost). | 681 // significant additional cost). |
682 ParentStack parent_stack_; | 682 ParentStack parent_stack_; |
683 | 683 |
684 // A random number that we used to select decide which sample to keep as a | 684 // A random number that we used to select decide which sample to keep as a |
685 // representative sample in each DeathData instance. We can't start off with | 685 // representative sample in each DeathData instance. We can't start off with |
686 // much randomness (because we can't call RandInt() on all our threads), so | 686 // much randomness (because we can't call RandInt() on all our threads), so |
687 // we stir in more and more as we go. | 687 // we stir in more and more as we go. |
688 uint32 random_number_; | 688 uint32 random_number_; |
689 | 689 |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
784 ~ProcessDataSnapshot(); | 784 ~ProcessDataSnapshot(); |
785 | 785 |
786 std::vector<TaskSnapshot> tasks; | 786 std::vector<TaskSnapshot> tasks; |
787 std::vector<ParentChildPairSnapshot> descendants; | 787 std::vector<ParentChildPairSnapshot> descendants; |
788 int process_id; | 788 int process_id; |
789 }; | 789 }; |
790 | 790 |
791 } // namespace tracked_objects | 791 } // namespace tracked_objects |
792 | 792 |
793 #endif // BASE_TRACKED_OBJECTS_H_ | 793 #endif // BASE_TRACKED_OBJECTS_H_ |
OLD | NEW |