| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/tracked_objects.h" | |
| 6 | |
| 7 #include <limits.h> | |
| 8 #include <stdlib.h> | |
| 9 | |
| 10 #include "base/atomicops.h" | |
| 11 #include "base/base_switches.h" | |
| 12 #include "base/command_line.h" | |
| 13 #include "base/compiler_specific.h" | |
| 14 #include "base/debug/leak_annotations.h" | |
| 15 #include "base/logging.h" | |
| 16 #include "base/process/process_handle.h" | |
| 17 #include "base/profiler/alternate_timer.h" | |
| 18 #include "base/strings/stringprintf.h" | |
| 19 #include "base/third_party/valgrind/memcheck.h" | |
| 20 #include "base/tracking_info.h" | |
| 21 | |
| 22 using base::TimeDelta; | |
| 23 | |
| 24 namespace base { | |
| 25 class TimeDelta; | |
| 26 } | |
| 27 | |
| 28 namespace tracked_objects { | |
| 29 | |
| 30 namespace { | |
| 31 // When ThreadData is first initialized, should we start in an ACTIVE state to | |
| 32 // record all of the startup-time tasks, or should we start up DEACTIVATED, so | |
| 33 // that we only record after parsing the command line flag --enable-tracking. | |
| 34 // Note that the flag may force either state, so this really controls only the | |
| 35 // period of time up until that flag is parsed. If there is no flag seen, then | |
| 36 // this state may prevail for much or all of the process lifetime. | |
| 37 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE; | |
| 38 | |
| 39 // Control whether an alternate time source (Now() function) is supported by | |
| 40 // the ThreadData class. This compile time flag should be set to true if we | |
| 41 // want other modules (such as a memory allocator, or a thread-specific CPU time | |
| 42 // clock) to be able to provide a thread-specific Now() function. Without this | |
| 43 // compile-time flag, the code will only support the wall-clock time. This flag | |
| 44 // can be flipped to efficiently disable this path (if there is a performance | |
| 45 // problem with its presence). | |
| 46 static const bool kAllowAlternateTimeSourceHandling = true; | |
| 47 | |
| 48 // Possible states of the profiler timing enabledness. | |
| 49 enum { | |
| 50 UNDEFINED_TIMING, | |
| 51 ENABLED_TIMING, | |
| 52 DISABLED_TIMING, | |
| 53 }; | |
| 54 | |
| 55 // State of the profiler timing enabledness. | |
| 56 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING; | |
| 57 | |
| 58 // Returns whether profiler timing is enabled. The default is true, but this | |
| 59 // may be overridden by a command-line flag. Some platforms may | |
| 60 // programmatically set this command-line flag to the "off" value if it's not | |
| 61 // specified. | |
| 62 // This in turn can be overridden by explicitly calling | |
| 63 // ThreadData::EnableProfilerTiming, say, based on a field trial. | |
| 64 inline bool IsProfilerTimingEnabled() { | |
| 65 // Reading |g_profiler_timing_enabled| is done without barrier because | |
| 66 // multiple initialization is not an issue while the barrier can be relatively | |
| 67 // costly given that this method is sometimes called in a tight loop. | |
| 68 base::subtle::Atomic32 current_timing_enabled = | |
| 69 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled); | |
| 70 if (current_timing_enabled == UNDEFINED_TIMING) { | |
| 71 if (!base::CommandLine::InitializedForCurrentProcess()) | |
| 72 return true; | |
| 73 current_timing_enabled = | |
| 74 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
| 75 switches::kProfilerTiming) == | |
| 76 switches::kProfilerTimingDisabledValue) | |
| 77 ? DISABLED_TIMING | |
| 78 : ENABLED_TIMING; | |
| 79 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, | |
| 80 current_timing_enabled); | |
| 81 } | |
| 82 return current_timing_enabled == ENABLED_TIMING; | |
| 83 } | |
| 84 | |
| 85 } // namespace | |
| 86 | |
| 87 //------------------------------------------------------------------------------ | |
| 88 // DeathData tallies durations when a death takes place. | |
| 89 | |
| 90 DeathData::DeathData() | |
| 91 : count_(0), | |
| 92 sample_probability_count_(0), | |
| 93 run_duration_sum_(0), | |
| 94 queue_duration_sum_(0), | |
| 95 run_duration_max_(0), | |
| 96 queue_duration_max_(0), | |
| 97 run_duration_sample_(0), | |
| 98 queue_duration_sample_(0), | |
| 99 last_phase_snapshot_(nullptr) { | |
| 100 } | |
| 101 | |
| 102 DeathData::DeathData(const DeathData& other) | |
| 103 : count_(other.count_), | |
| 104 sample_probability_count_(other.sample_probability_count_), | |
| 105 run_duration_sum_(other.run_duration_sum_), | |
| 106 queue_duration_sum_(other.queue_duration_sum_), | |
| 107 run_duration_max_(other.run_duration_max_), | |
| 108 queue_duration_max_(other.queue_duration_max_), | |
| 109 run_duration_sample_(other.run_duration_sample_), | |
| 110 queue_duration_sample_(other.queue_duration_sample_), | |
| 111 last_phase_snapshot_(nullptr) { | |
| 112 // This constructor will be used by std::map when adding new DeathData values | |
| 113 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't | |
| 114 // need to worry about ownership transfer. | |
| 115 DCHECK(other.last_phase_snapshot_ == nullptr); | |
| 116 } | |
| 117 | |
| 118 DeathData::~DeathData() { | |
| 119 while (last_phase_snapshot_) { | |
| 120 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_; | |
| 121 last_phase_snapshot_ = snapshot->prev; | |
| 122 delete snapshot; | |
| 123 } | |
| 124 } | |
| 125 | |
| 126 // TODO(jar): I need to see if this macro to optimize branching is worth using. | |
| 127 // | |
| 128 // This macro has no branching, so it is surely fast, and is equivalent to: | |
| 129 // if (assign_it) | |
| 130 // target = source; | |
| 131 // We use a macro rather than a template to force this to inline. | |
| 132 // Related code for calculating max is discussed on the web. | |
| 133 #define CONDITIONAL_ASSIGN(assign_it, target, source) \ | |
| 134 ((target) ^= ((target) ^ (source)) & -static_cast<int32>(assign_it)) | |
| 135 | |
| 136 void DeathData::RecordDeath(const int32 queue_duration, | |
| 137 const int32 run_duration, | |
| 138 const uint32 random_number) { | |
| 139 // We'll just clamp at INT_MAX, but we should note this in the UI as such. | |
| 140 if (count_ < INT_MAX) | |
| 141 ++count_; | |
| 142 | |
| 143 int sample_probability_count = sample_probability_count_; | |
| 144 if (sample_probability_count < INT_MAX) | |
| 145 ++sample_probability_count; | |
| 146 sample_probability_count_ = sample_probability_count; | |
| 147 | |
| 148 queue_duration_sum_ += queue_duration; | |
| 149 run_duration_sum_ += run_duration; | |
| 150 | |
| 151 if (queue_duration_max_ < queue_duration) | |
| 152 queue_duration_max_ = queue_duration; | |
| 153 if (run_duration_max_ < run_duration) | |
| 154 run_duration_max_ = run_duration; | |
| 155 | |
| 156 // Take a uniformly distributed sample over all durations ever supplied during | |
| 157 // the current profiling phase. | |
| 158 // The probability that we (instead) use this new sample is | |
| 159 // 1/sample_probability_count_. This results in a completely uniform selection | |
| 160 // of the sample (at least when we don't clamp sample_probability_count_... | |
| 161 // but that should be inconsequentially likely). We ignore the fact that we | |
| 162 // correlated our selection of a sample to the run and queue times (i.e., we | |
| 163 // used them to generate random_number). | |
| 164 CHECK_GT(sample_probability_count, 0); | |
| 165 if (0 == (random_number % sample_probability_count)) { | |
| 166 queue_duration_sample_ = queue_duration; | |
| 167 run_duration_sample_ = run_duration; | |
| 168 } | |
| 169 } | |
| 170 | |
| 171 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) { | |
| 172 // Snapshotting and storing current state. | |
| 173 last_phase_snapshot_ = new DeathDataPhaseSnapshot( | |
| 174 profiling_phase, count_, run_duration_sum_, run_duration_max_, | |
| 175 run_duration_sample_, queue_duration_sum_, queue_duration_max_, | |
| 176 queue_duration_sample_, last_phase_snapshot_); | |
| 177 | |
| 178 // Not touching fields for which a delta can be computed by comparing with a | |
| 179 // snapshot from the previous phase. Resetting other fields. Sample values | |
| 180 // will be reset upon next death recording because sample_probability_count_ | |
| 181 // is set to 0. | |
| 182 // We avoid resetting to 0 in favor of deltas whenever possible. The reason | |
| 183 // is that for incrementable fields, resetting to 0 from the snapshot thread | |
| 184 // potentially in parallel with incrementing in the death thread may result in | |
| 185 // significant data corruption that has a potential to grow with time. Not | |
| 186 // resetting incrementable fields and using deltas will cause any | |
| 187 // off-by-little corruptions to be likely fixed at the next snapshot. | |
| 188 // The max values are not incrementable, and cannot be deduced using deltas | |
| 189 // for a given phase. Hence, we have to reset them to 0. But the potential | |
| 190 // damage is limited to getting the previous phase's max to apply for the next | |
| 191 // phase, and the error doesn't have a potential to keep growing with new | |
| 192 // resets. | |
| 193 // sample_probability_count_ is incrementable, but must be reset to 0 at the | |
| 194 // phase end, so that we start a new uniformly randomized sample selection | |
| 195 // after the reset. Corruptions due to race conditions are possible, but the | |
| 196 // damage is limited to selecting a wrong sample, which is not something that | |
| 197 // can cause accumulating or cascading effects. | |
| 198 // If there were no corruptions caused by race conditions, we never send a | |
| 199 // sample for the previous phase in the next phase's snapshot because | |
| 200 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count. | |
| 201 sample_probability_count_ = 0; | |
| 202 run_duration_max_ = 0; | |
| 203 queue_duration_max_ = 0; | |
| 204 } | |
| 205 | |
| 206 //------------------------------------------------------------------------------ | |
| 207 DeathDataSnapshot::DeathDataSnapshot() | |
| 208 : count(-1), | |
| 209 run_duration_sum(-1), | |
| 210 run_duration_max(-1), | |
| 211 run_duration_sample(-1), | |
| 212 queue_duration_sum(-1), | |
| 213 queue_duration_max(-1), | |
| 214 queue_duration_sample(-1) { | |
| 215 } | |
| 216 | |
| 217 DeathDataSnapshot::DeathDataSnapshot(int count, | |
| 218 int32 run_duration_sum, | |
| 219 int32 run_duration_max, | |
| 220 int32 run_duration_sample, | |
| 221 int32 queue_duration_sum, | |
| 222 int32 queue_duration_max, | |
| 223 int32 queue_duration_sample) | |
| 224 : count(count), | |
| 225 run_duration_sum(run_duration_sum), | |
| 226 run_duration_max(run_duration_max), | |
| 227 run_duration_sample(run_duration_sample), | |
| 228 queue_duration_sum(queue_duration_sum), | |
| 229 queue_duration_max(queue_duration_max), | |
| 230 queue_duration_sample(queue_duration_sample) { | |
| 231 } | |
| 232 | |
| 233 DeathDataSnapshot::~DeathDataSnapshot() { | |
| 234 } | |
| 235 | |
| 236 DeathDataSnapshot DeathDataSnapshot::Delta( | |
| 237 const DeathDataSnapshot& older) const { | |
| 238 return DeathDataSnapshot(count - older.count, | |
| 239 run_duration_sum - older.run_duration_sum, | |
| 240 run_duration_max, run_duration_sample, | |
| 241 queue_duration_sum - older.queue_duration_sum, | |
| 242 queue_duration_max, queue_duration_sample); | |
| 243 } | |
| 244 | |
| 245 //------------------------------------------------------------------------------ | |
| 246 BirthOnThread::BirthOnThread(const Location& location, | |
| 247 const ThreadData& current) | |
| 248 : location_(location), | |
| 249 birth_thread_(¤t) { | |
| 250 } | |
| 251 | |
| 252 //------------------------------------------------------------------------------ | |
| 253 BirthOnThreadSnapshot::BirthOnThreadSnapshot() { | |
| 254 } | |
| 255 | |
| 256 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth) | |
| 257 : location(birth.location()), | |
| 258 thread_name(birth.birth_thread()->thread_name()) { | |
| 259 } | |
| 260 | |
| 261 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() { | |
| 262 } | |
| 263 | |
| 264 //------------------------------------------------------------------------------ | |
| 265 Births::Births(const Location& location, const ThreadData& current) | |
| 266 : BirthOnThread(location, current), | |
| 267 birth_count_(1) { } | |
| 268 | |
| 269 int Births::birth_count() const { return birth_count_; } | |
| 270 | |
| 271 void Births::RecordBirth() { ++birth_count_; } | |
| 272 | |
| 273 //------------------------------------------------------------------------------ | |
| 274 // ThreadData maintains the central data for all births and deaths on a single | |
| 275 // thread. | |
| 276 | |
| 277 // TODO(jar): We should pull all these static vars together, into a struct, and | |
| 278 // optimize layout so that we benefit from locality of reference during accesses | |
| 279 // to them. | |
| 280 | |
| 281 // static | |
| 282 NowFunction* ThreadData::now_function_ = NULL; | |
| 283 | |
| 284 // static | |
| 285 bool ThreadData::now_function_is_time_ = false; | |
| 286 | |
| 287 // A TLS slot which points to the ThreadData instance for the current thread. | |
| 288 // We do a fake initialization here (zeroing out data), and then the real | |
| 289 // in-place construction happens when we call tls_index_.Initialize(). | |
| 290 // static | |
| 291 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER; | |
| 292 | |
| 293 // static | |
| 294 int ThreadData::worker_thread_data_creation_count_ = 0; | |
| 295 | |
| 296 // static | |
| 297 int ThreadData::cleanup_count_ = 0; | |
| 298 | |
| 299 // static | |
| 300 int ThreadData::incarnation_counter_ = 0; | |
| 301 | |
| 302 // static | |
| 303 ThreadData* ThreadData::all_thread_data_list_head_ = NULL; | |
| 304 | |
| 305 // static | |
| 306 ThreadData* ThreadData::first_retired_worker_ = NULL; | |
| 307 | |
| 308 // static | |
| 309 base::LazyInstance<base::Lock>::Leaky | |
| 310 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER; | |
| 311 | |
| 312 // static | |
| 313 ThreadData::Status ThreadData::status_ = ThreadData::UNINITIALIZED; | |
| 314 | |
| 315 ThreadData::ThreadData(const std::string& suggested_name) | |
| 316 : next_(NULL), | |
| 317 next_retired_worker_(NULL), | |
| 318 worker_thread_number_(0), | |
| 319 incarnation_count_for_pool_(-1), | |
| 320 current_stopwatch_(NULL) { | |
| 321 DCHECK_GE(suggested_name.size(), 0u); | |
| 322 thread_name_ = suggested_name; | |
| 323 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. | |
| 324 } | |
| 325 | |
| 326 ThreadData::ThreadData(int thread_number) | |
| 327 : next_(NULL), | |
| 328 next_retired_worker_(NULL), | |
| 329 worker_thread_number_(thread_number), | |
| 330 incarnation_count_for_pool_(-1), | |
| 331 current_stopwatch_(NULL) { | |
| 332 CHECK_GT(thread_number, 0); | |
| 333 base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number); | |
| 334 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_. | |
| 335 } | |
| 336 | |
| 337 ThreadData::~ThreadData() { | |
| 338 } | |
| 339 | |
| 340 void ThreadData::PushToHeadOfList() { | |
| 341 // Toss in a hint of randomness (atop the uniniitalized value). | |
| 342 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_, | |
| 343 sizeof(random_number_)); | |
| 344 MSAN_UNPOISON(&random_number_, sizeof(random_number_)); | |
| 345 random_number_ += static_cast<uint32>(this - static_cast<ThreadData*>(0)); | |
| 346 random_number_ ^= (Now() - TrackedTime()).InMilliseconds(); | |
| 347 | |
| 348 DCHECK(!next_); | |
| 349 base::AutoLock lock(*list_lock_.Pointer()); | |
| 350 incarnation_count_for_pool_ = incarnation_counter_; | |
| 351 next_ = all_thread_data_list_head_; | |
| 352 all_thread_data_list_head_ = this; | |
| 353 } | |
| 354 | |
| 355 // static | |
| 356 ThreadData* ThreadData::first() { | |
| 357 base::AutoLock lock(*list_lock_.Pointer()); | |
| 358 return all_thread_data_list_head_; | |
| 359 } | |
| 360 | |
| 361 ThreadData* ThreadData::next() const { return next_; } | |
| 362 | |
| 363 // static | |
| 364 void ThreadData::InitializeThreadContext(const std::string& suggested_name) { | |
| 365 Initialize(); | |
| 366 ThreadData* current_thread_data = | |
| 367 reinterpret_cast<ThreadData*>(tls_index_.Get()); | |
| 368 if (current_thread_data) | |
| 369 return; // Browser tests instigate this. | |
| 370 current_thread_data = new ThreadData(suggested_name); | |
| 371 tls_index_.Set(current_thread_data); | |
| 372 } | |
| 373 | |
| 374 // static | |
| 375 ThreadData* ThreadData::Get() { | |
| 376 if (!tls_index_.initialized()) | |
| 377 return NULL; // For unittests only. | |
| 378 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get()); | |
| 379 if (registered) | |
| 380 return registered; | |
| 381 | |
| 382 // We must be a worker thread, since we didn't pre-register. | |
| 383 ThreadData* worker_thread_data = NULL; | |
| 384 int worker_thread_number = 0; | |
| 385 { | |
| 386 base::AutoLock lock(*list_lock_.Pointer()); | |
| 387 if (first_retired_worker_) { | |
| 388 worker_thread_data = first_retired_worker_; | |
| 389 first_retired_worker_ = first_retired_worker_->next_retired_worker_; | |
| 390 worker_thread_data->next_retired_worker_ = NULL; | |
| 391 } else { | |
| 392 worker_thread_number = ++worker_thread_data_creation_count_; | |
| 393 } | |
| 394 } | |
| 395 | |
| 396 // If we can't find a previously used instance, then we have to create one. | |
| 397 if (!worker_thread_data) { | |
| 398 DCHECK_GT(worker_thread_number, 0); | |
| 399 worker_thread_data = new ThreadData(worker_thread_number); | |
| 400 } | |
| 401 DCHECK_GT(worker_thread_data->worker_thread_number_, 0); | |
| 402 | |
| 403 tls_index_.Set(worker_thread_data); | |
| 404 return worker_thread_data; | |
| 405 } | |
| 406 | |
| 407 // static | |
| 408 void ThreadData::OnThreadTermination(void* thread_data) { | |
| 409 DCHECK(thread_data); // TLS should *never* call us with a NULL. | |
| 410 // We must NOT do any allocations during this callback. There is a chance | |
| 411 // that the allocator is no longer active on this thread. | |
| 412 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup(); | |
| 413 } | |
| 414 | |
| 415 void ThreadData::OnThreadTerminationCleanup() { | |
| 416 // The list_lock_ was created when we registered the callback, so it won't be | |
| 417 // allocated here despite the lazy reference. | |
| 418 base::AutoLock lock(*list_lock_.Pointer()); | |
| 419 if (incarnation_counter_ != incarnation_count_for_pool_) | |
| 420 return; // ThreadData was constructed in an earlier unit test. | |
| 421 ++cleanup_count_; | |
| 422 // Only worker threads need to be retired and reused. | |
| 423 if (!worker_thread_number_) { | |
| 424 return; | |
| 425 } | |
| 426 // We must NOT do any allocations during this callback. | |
| 427 // Using the simple linked lists avoids all allocations. | |
| 428 DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL)); | |
| 429 this->next_retired_worker_ = first_retired_worker_; | |
| 430 first_retired_worker_ = this; | |
| 431 } | |
| 432 | |
| 433 // static | |
| 434 void ThreadData::Snapshot(int current_profiling_phase, | |
| 435 ProcessDataSnapshot* process_data_snapshot) { | |
| 436 // Get an unchanging copy of a ThreadData list. | |
| 437 ThreadData* my_list = ThreadData::first(); | |
| 438 | |
| 439 // Gather data serially. | |
| 440 // This hackish approach *can* get some slightly corrupt tallies, as we are | |
| 441 // grabbing values without the protection of a lock, but it has the advantage | |
| 442 // of working even with threads that don't have message loops. If a user | |
| 443 // sees any strangeness, they can always just run their stats gathering a | |
| 444 // second time. | |
| 445 BirthCountMap birth_counts; | |
| 446 for (ThreadData* thread_data = my_list; thread_data; | |
| 447 thread_data = thread_data->next()) { | |
| 448 thread_data->SnapshotExecutedTasks(current_profiling_phase, | |
| 449 &process_data_snapshot->phased_snapshots, | |
| 450 &birth_counts); | |
| 451 } | |
| 452 | |
| 453 // Add births that are still active -- i.e. objects that have tallied a birth, | |
| 454 // but have not yet tallied a matching death, and hence must be either | |
| 455 // running, queued up, or being held in limbo for future posting. | |
| 456 auto* current_phase_tasks = | |
| 457 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks; | |
| 458 for (const auto& birth_count : birth_counts) { | |
| 459 if (birth_count.second > 0) { | |
| 460 current_phase_tasks->push_back( | |
| 461 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first), | |
| 462 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0), | |
| 463 "Still_Alive")); | |
| 464 } | |
| 465 } | |
| 466 } | |
| 467 | |
| 468 // static | |
| 469 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) { | |
| 470 // Get an unchanging copy of a ThreadData list. | |
| 471 ThreadData* my_list = ThreadData::first(); | |
| 472 | |
| 473 // Add snapshots for all instances of death data in all threads serially. | |
| 474 // This hackish approach *can* get some slightly corrupt tallies, as we are | |
| 475 // grabbing values without the protection of a lock, but it has the advantage | |
| 476 // of working even with threads that don't have message loops. Any corruption | |
| 477 // shouldn't cause "cascading damage" to anything else (in later phases). | |
| 478 for (ThreadData* thread_data = my_list; thread_data; | |
| 479 thread_data = thread_data->next()) { | |
| 480 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase); | |
| 481 } | |
| 482 } | |
| 483 | |
| 484 Births* ThreadData::TallyABirth(const Location& location) { | |
| 485 BirthMap::iterator it = birth_map_.find(location); | |
| 486 Births* child; | |
| 487 if (it != birth_map_.end()) { | |
| 488 child = it->second; | |
| 489 child->RecordBirth(); | |
| 490 } else { | |
| 491 child = new Births(location, *this); // Leak this. | |
| 492 // Lock since the map may get relocated now, and other threads sometimes | |
| 493 // snapshot it (but they lock before copying it). | |
| 494 base::AutoLock lock(map_lock_); | |
| 495 birth_map_[location] = child; | |
| 496 } | |
| 497 | |
| 498 return child; | |
| 499 } | |
| 500 | |
| 501 void ThreadData::TallyADeath(const Births& births, | |
| 502 int32 queue_duration, | |
| 503 const TaskStopwatch& stopwatch) { | |
| 504 int32 run_duration = stopwatch.RunDurationMs(); | |
| 505 | |
| 506 // Stir in some randomness, plus add constant in case durations are zero. | |
| 507 const uint32 kSomePrimeNumber = 2147483647; | |
| 508 random_number_ += queue_duration + run_duration + kSomePrimeNumber; | |
| 509 // An address is going to have some randomness to it as well ;-). | |
| 510 random_number_ ^= static_cast<uint32>(&births - reinterpret_cast<Births*>(0)); | |
| 511 | |
| 512 // We don't have queue durations without OS timer. OS timer is automatically | |
| 513 // used for task-post-timing, so the use of an alternate timer implies all | |
| 514 // queue times are invalid, unless it was explicitly said that we can trust | |
| 515 // the alternate timer. | |
| 516 if (kAllowAlternateTimeSourceHandling && | |
| 517 now_function_ && | |
| 518 !now_function_is_time_) { | |
| 519 queue_duration = 0; | |
| 520 } | |
| 521 | |
| 522 DeathMap::iterator it = death_map_.find(&births); | |
| 523 DeathData* death_data; | |
| 524 if (it != death_map_.end()) { | |
| 525 death_data = &it->second; | |
| 526 } else { | |
| 527 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now. | |
| 528 death_data = &death_map_[&births]; | |
| 529 } // Release lock ASAP. | |
| 530 death_data->RecordDeath(queue_duration, run_duration, random_number_); | |
| 531 } | |
| 532 | |
| 533 // static | |
| 534 Births* ThreadData::TallyABirthIfActive(const Location& location) { | |
| 535 if (!TrackingStatus()) | |
| 536 return NULL; | |
| 537 ThreadData* current_thread_data = Get(); | |
| 538 if (!current_thread_data) | |
| 539 return NULL; | |
| 540 return current_thread_data->TallyABirth(location); | |
| 541 } | |
| 542 | |
| 543 // static | |
| 544 void ThreadData::TallyRunOnNamedThreadIfTracking( | |
| 545 const base::TrackingInfo& completed_task, | |
| 546 const TaskStopwatch& stopwatch) { | |
| 547 // Even if we have been DEACTIVATED, we will process any pending births so | |
| 548 // that our data structures (which counted the outstanding births) remain | |
| 549 // consistent. | |
| 550 const Births* births = completed_task.birth_tally; | |
| 551 if (!births) | |
| 552 return; | |
| 553 ThreadData* current_thread_data = stopwatch.GetThreadData(); | |
| 554 if (!current_thread_data) | |
| 555 return; | |
| 556 | |
| 557 // Watch out for a race where status_ is changing, and hence one or both | |
| 558 // of start_of_run or end_of_run is zero. In that case, we didn't bother to | |
| 559 // get a time value since we "weren't tracking" and we were trying to be | |
| 560 // efficient by not calling for a genuine time value. For simplicity, we'll | |
| 561 // use a default zero duration when we can't calculate a true value. | |
| 562 TrackedTime start_of_run = stopwatch.StartTime(); | |
| 563 int32 queue_duration = 0; | |
| 564 if (!start_of_run.is_null()) { | |
| 565 queue_duration = (start_of_run - completed_task.EffectiveTimePosted()) | |
| 566 .InMilliseconds(); | |
| 567 } | |
| 568 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); | |
| 569 } | |
| 570 | |
| 571 // static | |
| 572 void ThreadData::TallyRunOnWorkerThreadIfTracking( | |
| 573 const Births* births, | |
| 574 const TrackedTime& time_posted, | |
| 575 const TaskStopwatch& stopwatch) { | |
| 576 // Even if we have been DEACTIVATED, we will process any pending births so | |
| 577 // that our data structures (which counted the outstanding births) remain | |
| 578 // consistent. | |
| 579 if (!births) | |
| 580 return; | |
| 581 | |
| 582 // TODO(jar): Support the option to coalesce all worker-thread activity under | |
| 583 // one ThreadData instance that uses locks to protect *all* access. This will | |
| 584 // reduce memory (making it provably bounded), but run incrementally slower | |
| 585 // (since we'll use locks on TallyABirth and TallyADeath). The good news is | |
| 586 // that the locks on TallyADeath will be *after* the worker thread has run, | |
| 587 // and hence nothing will be waiting for the completion (... besides some | |
| 588 // other thread that might like to run). Also, the worker threads tasks are | |
| 589 // generally longer, and hence the cost of the lock may perchance be amortized | |
| 590 // over the long task's lifetime. | |
| 591 ThreadData* current_thread_data = stopwatch.GetThreadData(); | |
| 592 if (!current_thread_data) | |
| 593 return; | |
| 594 | |
| 595 TrackedTime start_of_run = stopwatch.StartTime(); | |
| 596 int32 queue_duration = 0; | |
| 597 if (!start_of_run.is_null()) { | |
| 598 queue_duration = (start_of_run - time_posted).InMilliseconds(); | |
| 599 } | |
| 600 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); | |
| 601 } | |
| 602 | |
| 603 // static | |
| 604 void ThreadData::TallyRunInAScopedRegionIfTracking( | |
| 605 const Births* births, | |
| 606 const TaskStopwatch& stopwatch) { | |
| 607 // Even if we have been DEACTIVATED, we will process any pending births so | |
| 608 // that our data structures (which counted the outstanding births) remain | |
| 609 // consistent. | |
| 610 if (!births) | |
| 611 return; | |
| 612 | |
| 613 ThreadData* current_thread_data = stopwatch.GetThreadData(); | |
| 614 if (!current_thread_data) | |
| 615 return; | |
| 616 | |
| 617 int32 queue_duration = 0; | |
| 618 current_thread_data->TallyADeath(*births, queue_duration, stopwatch); | |
| 619 } | |
| 620 | |
| 621 void ThreadData::SnapshotExecutedTasks( | |
| 622 int current_profiling_phase, | |
| 623 PhasedProcessDataSnapshotMap* phased_snapshots, | |
| 624 BirthCountMap* birth_counts) { | |
| 625 // Get copy of data, so that the data will not change during the iterations | |
| 626 // and processing. | |
| 627 BirthMap birth_map; | |
| 628 DeathsSnapshot deaths; | |
| 629 SnapshotMaps(current_profiling_phase, &birth_map, &deaths); | |
| 630 | |
| 631 for (const auto& birth : birth_map) { | |
| 632 (*birth_counts)[birth.second] += birth.second->birth_count(); | |
| 633 } | |
| 634 | |
| 635 for (const auto& death : deaths) { | |
| 636 (*birth_counts)[death.first] -= death.first->birth_count(); | |
| 637 | |
| 638 // For the current death data, walk through all its snapshots, starting from | |
| 639 // the current one, then from the previous profiling phase etc., and for | |
| 640 // each snapshot calculate the delta between the snapshot and the previous | |
| 641 // phase, if any. Store the deltas in the result. | |
| 642 for (const DeathDataPhaseSnapshot* phase = &death.second; phase; | |
| 643 phase = phase->prev) { | |
| 644 const DeathDataSnapshot& death_data = | |
| 645 phase->prev ? phase->death_data.Delta(phase->prev->death_data) | |
| 646 : phase->death_data; | |
| 647 | |
| 648 if (death_data.count > 0) { | |
| 649 (*phased_snapshots)[phase->profiling_phase].tasks.push_back( | |
| 650 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data, | |
| 651 thread_name())); | |
| 652 } | |
| 653 } | |
| 654 } | |
| 655 } | |
| 656 | |
| 657 // This may be called from another thread. | |
| 658 void ThreadData::SnapshotMaps(int profiling_phase, | |
| 659 BirthMap* birth_map, | |
| 660 DeathsSnapshot* deaths) { | |
| 661 base::AutoLock lock(map_lock_); | |
| 662 | |
| 663 for (const auto& birth : birth_map_) | |
| 664 (*birth_map)[birth.first] = birth.second; | |
| 665 | |
| 666 for (const auto& death : death_map_) { | |
| 667 deaths->push_back(std::make_pair( | |
| 668 death.first, | |
| 669 DeathDataPhaseSnapshot(profiling_phase, death.second.count(), | |
| 670 death.second.run_duration_sum(), | |
| 671 death.second.run_duration_max(), | |
| 672 death.second.run_duration_sample(), | |
| 673 death.second.queue_duration_sum(), | |
| 674 death.second.queue_duration_max(), | |
| 675 death.second.queue_duration_sample(), | |
| 676 death.second.last_phase_snapshot()))); | |
| 677 } | |
| 678 } | |
| 679 | |
| 680 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) { | |
| 681 base::AutoLock lock(map_lock_); | |
| 682 | |
| 683 for (auto& death : death_map_) { | |
| 684 death.second.OnProfilingPhaseCompleted(profiling_phase); | |
| 685 } | |
| 686 } | |
| 687 | |
| 688 static void OptionallyInitializeAlternateTimer() { | |
| 689 NowFunction* alternate_time_source = GetAlternateTimeSource(); | |
| 690 if (alternate_time_source) | |
| 691 ThreadData::SetAlternateTimeSource(alternate_time_source); | |
| 692 } | |
| 693 | |
| 694 void ThreadData::Initialize() { | |
| 695 if (status_ >= DEACTIVATED) | |
| 696 return; // Someone else did the initialization. | |
| 697 // Due to racy lazy initialization in tests, we'll need to recheck status_ | |
| 698 // after we acquire the lock. | |
| 699 | |
| 700 // Ensure that we don't double initialize tls. We are called when single | |
| 701 // threaded in the product, but some tests may be racy and lazy about our | |
| 702 // initialization. | |
| 703 base::AutoLock lock(*list_lock_.Pointer()); | |
| 704 if (status_ >= DEACTIVATED) | |
| 705 return; // Someone raced in here and beat us. | |
| 706 | |
| 707 // Put an alternate timer in place if the environment calls for it, such as | |
| 708 // for tracking TCMalloc allocations. This insertion is idempotent, so we | |
| 709 // don't mind if there is a race, and we'd prefer not to be in a lock while | |
| 710 // doing this work. | |
| 711 if (kAllowAlternateTimeSourceHandling) | |
| 712 OptionallyInitializeAlternateTimer(); | |
| 713 | |
| 714 // Perform the "real" TLS initialization now, and leave it intact through | |
| 715 // process termination. | |
| 716 if (!tls_index_.initialized()) { // Testing may have initialized this. | |
| 717 DCHECK_EQ(status_, UNINITIALIZED); | |
| 718 tls_index_.Initialize(&ThreadData::OnThreadTermination); | |
| 719 DCHECK(tls_index_.initialized()); | |
| 720 } else { | |
| 721 // TLS was initialzed for us earlier. | |
| 722 DCHECK_EQ(status_, DORMANT_DURING_TESTS); | |
| 723 } | |
| 724 | |
| 725 // Incarnation counter is only significant to testing, as it otherwise will | |
| 726 // never again change in this process. | |
| 727 ++incarnation_counter_; | |
| 728 | |
| 729 // The lock is not critical for setting status_, but it doesn't hurt. It also | |
| 730 // ensures that if we have a racy initialization, that we'll bail as soon as | |
| 731 // we get the lock earlier in this method. | |
| 732 status_ = kInitialStartupState; | |
| 733 DCHECK(status_ != UNINITIALIZED); | |
| 734 } | |
| 735 | |
| 736 // static | |
| 737 void ThreadData::InitializeAndSetTrackingStatus(Status status) { | |
| 738 DCHECK_GE(status, DEACTIVATED); | |
| 739 DCHECK_LE(status, PROFILING_ACTIVE); | |
| 740 | |
| 741 Initialize(); // No-op if already initialized. | |
| 742 | |
| 743 if (status > DEACTIVATED) | |
| 744 status = PROFILING_ACTIVE; | |
| 745 status_ = status; | |
| 746 } | |
| 747 | |
| 748 // static | |
| 749 ThreadData::Status ThreadData::status() { | |
| 750 return status_; | |
| 751 } | |
| 752 | |
| 753 // static | |
| 754 bool ThreadData::TrackingStatus() { | |
| 755 return status_ > DEACTIVATED; | |
| 756 } | |
| 757 | |
| 758 // static | |
| 759 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) { | |
| 760 DCHECK(now_function); | |
| 761 if (kAllowAlternateTimeSourceHandling) | |
| 762 now_function_ = now_function; | |
| 763 } | |
| 764 | |
| 765 // static | |
| 766 void ThreadData::EnableProfilerTiming() { | |
| 767 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING); | |
| 768 } | |
| 769 | |
| 770 // static | |
| 771 TrackedTime ThreadData::Now() { | |
| 772 if (kAllowAlternateTimeSourceHandling && now_function_) | |
| 773 return TrackedTime::FromMilliseconds((*now_function_)()); | |
| 774 if (IsProfilerTimingEnabled() && TrackingStatus()) | |
| 775 return TrackedTime::Now(); | |
| 776 return TrackedTime(); // Super fast when disabled, or not compiled. | |
| 777 } | |
| 778 | |
| 779 // static | |
| 780 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) { | |
| 781 base::AutoLock lock(*list_lock_.Pointer()); | |
| 782 if (worker_thread_data_creation_count_ == 0) | |
| 783 return; // We haven't really run much, and couldn't have leaked. | |
| 784 | |
| 785 // TODO(jar): until this is working on XP, don't run the real test. | |
| 786 #if 0 | |
| 787 // Verify that we've at least shutdown/cleanup the major namesd threads. The | |
| 788 // caller should tell us how many thread shutdowns should have taken place by | |
| 789 // now. | |
| 790 CHECK_GT(cleanup_count_, major_threads_shutdown_count); | |
| 791 #endif | |
| 792 } | |
| 793 | |
| 794 // static | |
| 795 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) { | |
| 796 // This is only called from test code, where we need to cleanup so that | |
| 797 // additional tests can be run. | |
| 798 // We must be single threaded... but be careful anyway. | |
| 799 InitializeAndSetTrackingStatus(DEACTIVATED); | |
| 800 | |
| 801 ThreadData* thread_data_list; | |
| 802 { | |
| 803 base::AutoLock lock(*list_lock_.Pointer()); | |
| 804 thread_data_list = all_thread_data_list_head_; | |
| 805 all_thread_data_list_head_ = NULL; | |
| 806 ++incarnation_counter_; | |
| 807 // To be clean, break apart the retired worker list (though we leak them). | |
| 808 while (first_retired_worker_) { | |
| 809 ThreadData* worker = first_retired_worker_; | |
| 810 CHECK_GT(worker->worker_thread_number_, 0); | |
| 811 first_retired_worker_ = worker->next_retired_worker_; | |
| 812 worker->next_retired_worker_ = NULL; | |
| 813 } | |
| 814 } | |
| 815 | |
| 816 // Put most global static back in pristine shape. | |
| 817 worker_thread_data_creation_count_ = 0; | |
| 818 cleanup_count_ = 0; | |
| 819 tls_index_.Set(NULL); | |
| 820 status_ = DORMANT_DURING_TESTS; // Almost UNINITIALIZED. | |
| 821 | |
| 822 // To avoid any chance of racing in unit tests, which is the only place we | |
| 823 // call this function, we may sometimes leak all the data structures we | |
| 824 // recovered, as they may still be in use on threads from prior tests! | |
| 825 if (leak) { | |
| 826 ThreadData* thread_data = thread_data_list; | |
| 827 while (thread_data) { | |
| 828 ANNOTATE_LEAKING_OBJECT_PTR(thread_data); | |
| 829 thread_data = thread_data->next(); | |
| 830 } | |
| 831 return; | |
| 832 } | |
| 833 | |
| 834 // When we want to cleanup (on a single thread), here is what we do. | |
| 835 | |
| 836 // Do actual recursive delete in all ThreadData instances. | |
| 837 while (thread_data_list) { | |
| 838 ThreadData* next_thread_data = thread_data_list; | |
| 839 thread_data_list = thread_data_list->next(); | |
| 840 | |
| 841 for (BirthMap::iterator it = next_thread_data->birth_map_.begin(); | |
| 842 next_thread_data->birth_map_.end() != it; ++it) | |
| 843 delete it->second; // Delete the Birth Records. | |
| 844 delete next_thread_data; // Includes all Death Records. | |
| 845 } | |
| 846 } | |
| 847 | |
| 848 //------------------------------------------------------------------------------ | |
| 849 TaskStopwatch::TaskStopwatch() | |
| 850 : wallclock_duration_ms_(0), | |
| 851 current_thread_data_(NULL), | |
| 852 excluded_duration_ms_(0), | |
| 853 parent_(NULL) { | |
| 854 #if DCHECK_IS_ON() | |
| 855 state_ = CREATED; | |
| 856 child_ = NULL; | |
| 857 #endif | |
| 858 } | |
| 859 | |
| 860 TaskStopwatch::~TaskStopwatch() { | |
| 861 #if DCHECK_IS_ON() | |
| 862 DCHECK(state_ != RUNNING); | |
| 863 DCHECK(child_ == NULL); | |
| 864 #endif | |
| 865 } | |
| 866 | |
| 867 void TaskStopwatch::Start() { | |
| 868 #if DCHECK_IS_ON() | |
| 869 DCHECK(state_ == CREATED); | |
| 870 state_ = RUNNING; | |
| 871 #endif | |
| 872 | |
| 873 start_time_ = ThreadData::Now(); | |
| 874 | |
| 875 current_thread_data_ = ThreadData::Get(); | |
| 876 if (!current_thread_data_) | |
| 877 return; | |
| 878 | |
| 879 parent_ = current_thread_data_->current_stopwatch_; | |
| 880 #if DCHECK_IS_ON() | |
| 881 if (parent_) { | |
| 882 DCHECK(parent_->state_ == RUNNING); | |
| 883 DCHECK(parent_->child_ == NULL); | |
| 884 parent_->child_ = this; | |
| 885 } | |
| 886 #endif | |
| 887 current_thread_data_->current_stopwatch_ = this; | |
| 888 } | |
| 889 | |
| 890 void TaskStopwatch::Stop() { | |
| 891 const TrackedTime end_time = ThreadData::Now(); | |
| 892 #if DCHECK_IS_ON() | |
| 893 DCHECK(state_ == RUNNING); | |
| 894 state_ = STOPPED; | |
| 895 DCHECK(child_ == NULL); | |
| 896 #endif | |
| 897 | |
| 898 if (!start_time_.is_null() && !end_time.is_null()) { | |
| 899 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds(); | |
| 900 } | |
| 901 | |
| 902 if (!current_thread_data_) | |
| 903 return; | |
| 904 | |
| 905 DCHECK(current_thread_data_->current_stopwatch_ == this); | |
| 906 current_thread_data_->current_stopwatch_ = parent_; | |
| 907 if (!parent_) | |
| 908 return; | |
| 909 | |
| 910 #if DCHECK_IS_ON() | |
| 911 DCHECK(parent_->state_ == RUNNING); | |
| 912 DCHECK(parent_->child_ == this); | |
| 913 parent_->child_ = NULL; | |
| 914 #endif | |
| 915 parent_->excluded_duration_ms_ += wallclock_duration_ms_; | |
| 916 parent_ = NULL; | |
| 917 } | |
| 918 | |
| 919 TrackedTime TaskStopwatch::StartTime() const { | |
| 920 #if DCHECK_IS_ON() | |
| 921 DCHECK(state_ != CREATED); | |
| 922 #endif | |
| 923 | |
| 924 return start_time_; | |
| 925 } | |
| 926 | |
| 927 int32 TaskStopwatch::RunDurationMs() const { | |
| 928 #if DCHECK_IS_ON() | |
| 929 DCHECK(state_ == STOPPED); | |
| 930 #endif | |
| 931 | |
| 932 return wallclock_duration_ms_ - excluded_duration_ms_; | |
| 933 } | |
| 934 | |
| 935 ThreadData* TaskStopwatch::GetThreadData() const { | |
| 936 #if DCHECK_IS_ON() | |
| 937 DCHECK(state_ != CREATED); | |
| 938 #endif | |
| 939 | |
| 940 return current_thread_data_; | |
| 941 } | |
| 942 | |
| 943 //------------------------------------------------------------------------------ | |
| 944 // DeathDataPhaseSnapshot | |
| 945 | |
| 946 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot( | |
| 947 int profiling_phase, | |
| 948 int count, | |
| 949 int32 run_duration_sum, | |
| 950 int32 run_duration_max, | |
| 951 int32 run_duration_sample, | |
| 952 int32 queue_duration_sum, | |
| 953 int32 queue_duration_max, | |
| 954 int32 queue_duration_sample, | |
| 955 const DeathDataPhaseSnapshot* prev) | |
| 956 : profiling_phase(profiling_phase), | |
| 957 death_data(count, | |
| 958 run_duration_sum, | |
| 959 run_duration_max, | |
| 960 run_duration_sample, | |
| 961 queue_duration_sum, | |
| 962 queue_duration_max, | |
| 963 queue_duration_sample), | |
| 964 prev(prev) { | |
| 965 } | |
| 966 | |
| 967 //------------------------------------------------------------------------------ | |
| 968 // TaskSnapshot | |
| 969 | |
| 970 TaskSnapshot::TaskSnapshot() { | |
| 971 } | |
| 972 | |
| 973 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth, | |
| 974 const DeathDataSnapshot& death_data, | |
| 975 const std::string& death_thread_name) | |
| 976 : birth(birth), | |
| 977 death_data(death_data), | |
| 978 death_thread_name(death_thread_name) { | |
| 979 } | |
| 980 | |
| 981 TaskSnapshot::~TaskSnapshot() { | |
| 982 } | |
| 983 | |
| 984 //------------------------------------------------------------------------------ | |
| 985 // ProcessDataPhaseSnapshot | |
| 986 | |
| 987 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() { | |
| 988 } | |
| 989 | |
| 990 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() { | |
| 991 } | |
| 992 | |
| 993 //------------------------------------------------------------------------------ | |
| 994 // ProcessDataPhaseSnapshot | |
| 995 | |
| 996 ProcessDataSnapshot::ProcessDataSnapshot() | |
| 997 #if !defined(OS_NACL) | |
| 998 : process_id(base::GetCurrentProcId()) { | |
| 999 #else | |
| 1000 : process_id(base::kNullProcessId) { | |
| 1001 #endif | |
| 1002 } | |
| 1003 | |
| 1004 ProcessDataSnapshot::~ProcessDataSnapshot() { | |
| 1005 } | |
| 1006 | |
| 1007 } // namespace tracked_objects | |
| OLD | NEW |