Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(236)

Side by Side Diff: chrome/browser/task_management/sampling/shared_sampler_win.cc

Issue 2178733002: Task manager should support Idle Wakeups on Windows (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fixed build error on win_clang. Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/task_management/sampling/shared_sampler.h"
6
7 #include <windows.h>
8 #include <winternl.h>
9
10 #include <algorithm>
11
12 #include "base/bind.h"
13 #include "base/command_line.h"
14 #include "base/time/time.h"
15 #include "chrome/browser/task_management/task_manager_observer.h"
16 #include "content/public/browser/browser_thread.h"
17
18 namespace task_management {
19
20 namespace {
21
22 const wchar_t kNacl64Exe[] = L"nacl64.exe";
23
24 // From <wdm.h>
25 typedef LONG KPRIORITY;
26 typedef LONG KWAIT_REASON; // Full definition is in wdm.h
27
28 // From ntddk.h
29 typedef struct _VM_COUNTERS {
30 SIZE_T PeakVirtualSize;
31 SIZE_T VirtualSize;
32 ULONG PageFaultCount;
33 // Padding here in 64-bit
34 SIZE_T PeakWorkingSetSize;
35 SIZE_T WorkingSetSize;
36 SIZE_T QuotaPeakPagedPoolUsage;
37 SIZE_T QuotaPagedPoolUsage;
38 SIZE_T QuotaPeakNonPagedPoolUsage;
39 SIZE_T QuotaNonPagedPoolUsage;
40 SIZE_T PagefileUsage;
41 SIZE_T PeakPagefileUsage;
42 } VM_COUNTERS;
43
44 // Two possibilities available from here:
45 // http://stackoverflow.com/questions/28858849/where-is-system-information-class -defined
46
47 typedef enum _SYSTEM_INFORMATION_CLASS {
48 SystemProcessInformation = 5, // This is the number that we need.
49 } SYSTEM_INFORMATION_CLASS;
50
51 // https://msdn.microsoft.com/en-us/library/gg750647.aspx?f=255&MSPPError=-21472 17396
52 typedef struct {
53 HANDLE UniqueProcess; // Actually process ID
54 HANDLE UniqueThread; // Actually thread ID
55 } CLIENT_ID;
56
57 // From http://alax.info/blog/1182, with corrections and modifications
58 // Originally from
59 // http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%2 0Functions%2FSystem%20Information%2FStructures%2FSYSTEM_THREAD.html
60 struct SYSTEM_THREAD_INFORMATION {
61 ULONGLONG KernelTime;
62 ULONGLONG UserTime;
63 ULONGLONG CreateTime;
64 ULONG WaitTime;
65 // Padding here in 64-bit
66 PVOID StartAddress;
67 CLIENT_ID ClientId;
68 KPRIORITY Priority;
69 LONG BasePriority;
70 ULONG ContextSwitchCount;
71 ULONG State;
72 KWAIT_REASON WaitReason;
73 };
74 #if _M_X64
75 static_assert(sizeof(SYSTEM_THREAD_INFORMATION) == 80,
76 "Structure size mismatch");
77 #else
78 static_assert(sizeof(SYSTEM_THREAD_INFORMATION) == 64,
79 "Structure size mismatch");
80 #endif
81
82 // From http://alax.info/blog/1182, with corrections and modifications
83 // Originally from
84 // http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%2 0Functions%2FSystem%20Information%2FStructures%2FSYSTEM_THREAD.html
85 struct SYSTEM_PROCESS_INFORMATION {
ncarter (slow) 2016/07/28 21:37:26 This is also declared (slightly differently) in:
stanisc 2016/07/28 22:38:44 I saw the process_structs.h version and slight dif
86 ULONG NextEntryOffset;
87 ULONG NumberOfThreads;
88 // http://processhacker.sourceforge.net/doc/struct___s_y_s_t_e_m___p_r_o_c_e_s _s___i_n_f_o_r_m_a_t_i_o_n.html
89 ULONGLONG WorkingSetPrivateSize;
90 ULONG HardFaultCount;
91 ULONG Reserved1;
92 ULONGLONG CycleTime;
93 ULONGLONG CreateTime;
94 ULONGLONG UserTime;
95 ULONGLONG KernelTime;
96 UNICODE_STRING ImageName;
97 KPRIORITY BasePriority;
98 HANDLE ProcessId;
99 HANDLE ParentProcessId;
100 ULONG HandleCount;
101 ULONG Reserved2[2];
102 // Padding here in 64-bit
103 VM_COUNTERS VirtualMemoryCounters;
104 size_t Reserved3;
105 IO_COUNTERS IoCounters;
106 SYSTEM_THREAD_INFORMATION Threads[1];
107 };
108 #if _M_X64
109 static_assert(sizeof(SYSTEM_PROCESS_INFORMATION) == 336,
110 "Structure size mismatch");
111 #else
112 static_assert(sizeof(SYSTEM_PROCESS_INFORMATION) == 248,
113 "Structure size mismatch");
114 #endif
115
116 // ntstatus.h conflicts with windows.h so define this locally.
117 #define STATUS_SUCCESS ((NTSTATUS)0x00000000L)
118 #define STATUS_BUFFER_TOO_SMALL ((NTSTATUS)0xC0000023L)
119 #define STATUS_INFO_LENGTH_MISMATCH ((NTSTATUS)0xC0000004L)
120
121 // Wrapper for NtQuerySystemProcessInformation with buffer reallocation logic.
122 bool QuerySystemProcessInformation(std::vector<BYTE>* data_buffer,
123 ULONG* data_size) {
124 typedef NTSTATUS(WINAPI * NTQUERYSYSTEMINFORMATION)(
125 SYSTEM_INFORMATION_CLASS SystemInformationClass, PVOID SystemInformation,
126 ULONG SystemInformationLength, PULONG ReturnLength);
127
128 HMODULE ntdll = ::GetModuleHandle(L"ntdll.dll");
129 if (!ntdll) {
130 NOTREACHED();
131 return false;
132 }
133
134 NTQUERYSYSTEMINFORMATION nt_query_system_information_ptr =
135 reinterpret_cast<NTQUERYSYSTEMINFORMATION>(
136 ::GetProcAddress(ntdll, "NtQuerySystemInformation"));
137 if (!nt_query_system_information_ptr) {
138 NOTREACHED();
139 return false;
140 }
141
142 ULONG buffer_size = data_buffer->size();
143
144 NTSTATUS result;
145
146 // There is a potential race condition between growing the buffer and new
147 // processes being creating. Try a few times before giving up.
148 for (int i = 0; i < 10; i++) {
149 *data_size = 0;
150 result = nt_query_system_information_ptr(
151 SystemProcessInformation,
152 buffer_size > 0 ? data_buffer->data() : nullptr,
153 buffer_size, data_size);
154
155 if (result == STATUS_INFO_LENGTH_MISMATCH ||
156 result == STATUS_BUFFER_TOO_SMALL) {
157 // Insufficient buffer. Resize to the returned |data_size| plus 10% extra
158 // to avoid frequent reallocations and try again.
159 DCHECK_GT(*data_size, buffer_size);
160 buffer_size = static_cast<ULONG>(*data_size * 1.1);
161 data_buffer->resize(buffer_size);
162 } else {
163 // Either STATUS_SUCCESS or an error other than the two above.
164 break;
165 }
166 }
167
168 return result == STATUS_SUCCESS;
169 }
170
171 } // namespace
172
173 // Per-thread data extracted from SYSTEM_THREAD_INFORMATION
174 // and stored in a snapshot.
175 // This structure is accessed only on the worker thread.
176 struct ThreadData {
177 base::PlatformThreadId thread_id;
178 ULONG context_switches;
179 };
180
181 // Per-process data extracted from SYSTEM_PROCESS_INFORMATION
182 // and stored in a snapshot.
183 // This structure is accessed only on the worker thread.
184 struct ProcessData {
185 ProcessData() = default;
186 ProcessData(ProcessData&&) = default;
187
188 std::vector<ThreadData> threads;
189
190 private:
191 DISALLOW_COPY_AND_ASSIGN(ProcessData);
192 };
193
194 typedef std::map<base::ProcessId, ProcessData> ProcessDataMap;
195
196 // ProcessDataSnapshot gets created and accessed only on the worker thread.
197 // This is used to calculate metrics like Idle Wakeups / sec that require
198 // a delta between two snapshots.
199 struct ProcessDataSnapshot {
200 ProcessDataMap processes;
201 base::TimeTicks timestamp;
202 };
203
204 ULONG CountContextSwitches(const ProcessData& process_data) {
205 ULONG context_switches = 0;
206 for (auto& thread_data : process_data.threads) {
207 context_switches += thread_data.context_switches;
208 }
209
210 return context_switches;
211 }
212
213 ULONG CountContextSwitchesDelta(const ProcessData& prev_process_data,
214 const ProcessData& new_process_data) {
215 ULONG delta = 0;
216 // This one pass algorithm relies on the threads vectors to be
217 // ordered by thread_id.
218 size_t prev_index = 0;
219 size_t new_index = 0;
220 while (new_index < new_process_data.threads.size()) {
221 auto& new_thread = new_process_data.threads[new_index];
222
223 if (prev_index < prev_process_data.threads.size()) {
224 auto& prev_thread = prev_process_data.threads[prev_index];
225
226 if (prev_thread.thread_id < new_thread.thread_id) {
227 // Thread exists in previous snapshot only - skip it and don't count
228 // its context switches towards the delta.
229 prev_index++;
230 } else if (prev_thread.thread_id > new_thread.thread_id) {
231 // A new thread that didn't exist in prev_process_data.
232 // Add its entire number of context switches to the delta (since it
233 // started with zero).
234 delta = new_thread.context_switches;
235 new_index++;
236 } else {
237 // Threads match between two snapshots - add the difference between
238 // context switches to the delta.
239 delta += new_thread.context_switches - prev_thread.context_switches;
240 prev_index++;
241 new_index++;
242 }
243 } else {
244 // prev_index reached the end of |prev_process_data.threads| size, so this
245 // is a new thread that didn't exist in prev_process_data.
246 // Add its entire number of context switches to the delta (since it
247 // started with zero).
248 delta = new_thread.context_switches;
249 new_index++;
250 }
251 }
252
253 return delta;
254 }
255
256 SharedSampler::SharedSampler(
257 const scoped_refptr<base::SequencedTaskRunner>& blocking_pool_runner)
258 : previous_buffer_size_(0),
259 current_process_image_name_(
260 base::CommandLine::ForCurrentProcess()->GetProgram().BaseName()),
261 blocking_pool_runner_(blocking_pool_runner) {
262 DCHECK(blocking_pool_runner.get());
263
264 // This object will be created on the UI thread, however the sequenced checker
265 // will be used to assert we're running the expensive operations on one of the
266 // blocking pool threads.
267 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
268 worker_pool_sequenced_checker_.DetachFromSequence();
269 }
270
271 SharedSampler::~SharedSampler() {}
272
273 int64_t SharedSampler::SupportsFlags() const {
274 return REFRESH_TYPE_IDLE_WAKEUPS;
275 }
276
277 SharedSampler::Callbacks::Callbacks() {}
278
279 SharedSampler::Callbacks::~Callbacks() {}
280
281 SharedSampler::Callbacks::Callbacks(Callbacks&& other) {
282 on_idle_wakeups = std::move(other.on_idle_wakeups);
283 }
284
285 void SharedSampler::RegisterCallbacks(
286 base::ProcessId process_id,
287 const OnIdleWakeupsCallback& on_idle_wakeups) {
288 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
289 Callbacks callbacks;
290 callbacks.on_idle_wakeups = on_idle_wakeups;
291 bool result = callbacks_map_.insert(
292 std::make_pair(process_id, std::move(callbacks))).second;
293 DCHECK(result);
294 }
295
296 void SharedSampler::UnregisterCallbacks(base::ProcessId process_id) {
297 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
298 callbacks_map_.erase(process_id);
299 }
300
301 void SharedSampler::Refresh(base::ProcessId process_id, int64_t refresh_flags) {
302 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
303 DCHECK(callbacks_map_.find(process_id) != callbacks_map_.end());
304 DCHECK_NE(0, refresh_flags & SupportsFlags());
305
306 bool need_refresh = refresh_flags_.empty();
307
308 refresh_flags_.push_back(std::make_pair(process_id, refresh_flags));
309
310 if (need_refresh) {
311 base::PostTaskAndReplyWithResult(
312 blocking_pool_runner_.get(), FROM_HERE,
313 base::Bind(&SharedSampler::RefreshOnWorkerThread, this),
314 base::Bind(&SharedSampler::OnRefreshDone, this));
315 }
316 }
317
318 std::unique_ptr<SharedSampler::RefreshResultMap>
319 SharedSampler::RefreshOnWorkerThread() {
320 DCHECK(worker_pool_sequenced_checker_.CalledOnValidSequencedThread());
321
322 std::unique_ptr<ProcessDataSnapshot> snapshot = CaptureSnapshot();
323 DCHECK(snapshot);
324
325 std::unique_ptr<RefreshResultMap> results(new RefreshResultMap);
326
327 if (previous_snapshot_) {
328 MakeResultsFromTwoSnapshots(*previous_snapshot_, *snapshot, results.get());
329 } else {
330 MakeResultsFromSnapshot(*snapshot, results.get());
ncarter (slow) 2016/07/28 21:37:26 Is MakeResultsFromSnapshot different from just doi
stanisc 2016/07/28 22:38:44 It is different. MakeResultsFromTwoSnapshots would
331 }
332
333 previous_snapshot_ = std::move(snapshot);
ncarter (slow) 2016/07/28 21:37:26 If we close the TaskManager, or hide the idle wake
stanisc 2016/07/28 22:38:44 Hmm... Yes, that is something I didn't consider. I
stanisc 2016/08/01 22:34:26 OK, I've handled the "closing the TaskManager" cas
334
335 return results;
336 }
337
338 bool SharedSampler::IsChromeImageName(const wchar_t* image_name) const {
ncarter (slow) 2016/07/28 21:37:26 Using base:: string comparison function would resu
stanisc 2016/08/01 22:34:27 Done.
339 return _wcsnicmp(image_name, current_process_image_name_.value().c_str(),
340 current_process_image_name_.value().size()) == 0 ||
341 _wcsnicmp(image_name, kNacl64Exe, _countof(kNacl64Exe) - 1) == 0;
342 }
343
344 std::unique_ptr<ProcessDataSnapshot> SharedSampler::CaptureSnapshot() {
345 DCHECK(worker_pool_sequenced_checker_.CalledOnValidSequencedThread());
346
347 ULONG data_size;
348
349 // Preallocate the buffer with the size determined on the previous call to
350 // QuerySystemProcessInformation. This should be sufficient most of the time.
351 // QuerySystemProcessInformation will grow the buffer if necessary.
352 std::vector<BYTE> data_buffer(previous_buffer_size_);
353
354 if (!QuerySystemProcessInformation(&data_buffer, &data_size))
355 return std::unique_ptr<ProcessDataSnapshot>();
ncarter (slow) 2016/07/28 21:37:26 Would it be better to return an empty snapshot her
stanisc 2016/07/28 22:38:44 I realized the current implementation doesn't actu
stanisc 2016/08/01 22:34:26 I decided to keep this approach to indicate that s
356
357 previous_buffer_size_ = data_buffer.size();
358
359 std::unique_ptr<ProcessDataSnapshot> snapshot(new ProcessDataSnapshot);
360 snapshot->timestamp = base::TimeTicks::Now();
361
362 for (ULONG offset = 0; offset < data_size; ) {
363 auto pi = reinterpret_cast<const SYSTEM_PROCESS_INFORMATION*>(
364 &data_buffer[offset]);
365
366 // Validate that the offset is valid and all needed data is within
367 // the buffer boundary.
368 if (offset + sizeof(SYSTEM_PROCESS_INFORMATION) > data_size)
369 break;
370 if (offset + sizeof(SYSTEM_PROCESS_INFORMATION) +
371 (pi->NumberOfThreads - 1) * sizeof(SYSTEM_THREAD_INFORMATION) >
372 data_size)
373 break;
374
375 if (pi->ImageName.Buffer) {
376 // Validate that the image name is within the buffer boundary.
377 // ImageName.Length seems to be in bytes rather than characters.
378 ULONG image_name_offset =
379 reinterpret_cast<BYTE*>(pi->ImageName.Buffer) - data_buffer.data();
380 if (image_name_offset + pi->ImageName.Length > data_size)
381 break;
382
383 // Check if this is a chrome process. Ignore all other processes.
384 if (IsChromeImageName(pi->ImageName.Buffer)) {
385 // Collect enough data to be able to do a diff between two snapshots.
386 // Some threads might stop or new threads might be created between two
387 // snapshots. If a thread with a large number of context switches gets
388 // terminated the total number of context switches for the process might
389 // go down and the delta would be negative.
390 // To avoid that we need to compare thread IDs between two snapshots and
391 // not count context switches for threads that are missing in the most
392 // recent snapshot.
393 ProcessData process_data;
394
395 // Iterate over threads and store each thread's ID and number of context
396 // switches.
397 for (ULONG thread_index = 0; thread_index < pi->NumberOfThreads;
398 ++thread_index) {
399 const SYSTEM_THREAD_INFORMATION* ti = &pi->Threads[thread_index];
400 if (ti->ClientId.UniqueProcess != pi->ProcessId)
401 continue;
402
403 ThreadData thread_data;
404 thread_data.thread_id = static_cast<base::PlatformThreadId>(
405 reinterpret_cast<uintptr_t>(ti->ClientId.UniqueThread));
406 thread_data.context_switches = ti->ContextSwitchCount;
407 process_data.threads.push_back(thread_data);
408 }
409
410 // Order thread data by thread ID to help diff two snapshots.
411 std::sort(process_data.threads.begin(), process_data.threads.end(),
412 [](const ThreadData& l, const ThreadData r) {
413 return l.thread_id < r.thread_id;
414 });
415
416 base::ProcessId process_id = static_cast<base::ProcessId>(
417 reinterpret_cast<uintptr_t>(pi->ProcessId));
418 bool inserted = snapshot->processes.insert(
419 std::make_pair(process_id, std::move(process_data))).second;
420 DCHECK(inserted);
421 }
422 }
423
424 // Check for end of the list.
425 if (!pi->NextEntryOffset)
426 break;
427
428 // Jump to the next entry.
429 offset += pi->NextEntryOffset;
430 }
431
432 return snapshot;
433 }
434
435 void SharedSampler::MakeResultsFromTwoSnapshots(
436 const ProcessDataSnapshot& prev_snapshot,
437 const ProcessDataSnapshot& snapshot,
438 RefreshResultMap* results) {
439 // Time delta in seconds.
440 double time_delta = (snapshot.timestamp - prev_snapshot.timestamp)
441 .InMillisecondsF() / 1000;
442
443 // Iterate over processes in both snapshots in parallel. This algorithm relies
444 // on map entries being ordered by Process ID.
445 ProcessDataMap::const_iterator prev_iter = prev_snapshot.processes.begin();
446 ProcessDataMap::const_iterator iter = snapshot.processes.begin();
447 while (iter != snapshot.processes.end()) {
448 auto process_id = iter->first;
ncarter (slow) 2016/07/28 21:37:27 I would use base::ProcessId instead of auto for th
stanisc 2016/08/01 22:34:27 Done.
449
450 // Delta between the old snapshot and the new snapshot.
451 int idle_wakeups_delta;
452
453 if (prev_iter != prev_snapshot.processes.end()) {
454 auto prev_snapshot_process_id = prev_iter->first;
455 if (prev_snapshot_process_id < process_id) {
ncarter (slow) 2016/07/28 21:37:26 The two-iterator approach would be natural if we n
stanisc 2016/07/28 22:38:44 Good idea! I'll think about this.
stanisc 2016/08/01 22:34:27 Done.
456 // Process is missing in the last snapshot - skip it and continue.
457 prev_iter++;
458 continue;
459 } else if (prev_snapshot_process_id > process_id) {
460 // Process is missing in the previous snapshot.
461 // Use its entire number of context switches.
462 idle_wakeups_delta = CountContextSwitches(iter->second);
463 iter++;
464 } else {
465 // Processes match between two snapshots.
466 idle_wakeups_delta =
467 CountContextSwitchesDelta(prev_iter->second, iter->second);
468 prev_iter++;
469 iter++;
470 }
471 } else {
472 // Since prev_index is at the end of |prev_snapshot.processes|, this
473 // is a new process that is missing in the previous snapshot.
474 // Use its entire number of context switches.
475 idle_wakeups_delta = CountContextSwitches(iter->second);
476 iter++;
477 }
478
479 RefreshResult result;
480 result.idle_wakeups_per_second =
481 static_cast<int>(round(idle_wakeups_delta / time_delta));
482 bool inserted = results->insert(std::make_pair(process_id, result)).second;
483 DCHECK(inserted);
484 }
485 }
486
487 void SharedSampler::MakeResultsFromSnapshot(const ProcessDataSnapshot& snapshot,
488 RefreshResultMap* results) {
489 for (auto& pair : snapshot.processes) {
490 auto process_id = pair.first;
491 RefreshResult result;
492 // Use 0 for Idle Wakeups / sec in this case. This is consistent with
493 // ProcessMetrics::CalculateIdleWakeupsPerSecond implementation.
494 result.idle_wakeups_per_second = 0;
495 bool inserted = results->insert(std::make_pair(process_id, result)).second;
496 DCHECK(inserted);
497 }
498 }
499
500 void SharedSampler::OnRefreshDone(
501 std::unique_ptr<RefreshResultMap> refresh_results) {
502 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
503
504 for (auto& process_flags : refresh_flags_) {
ncarter (slow) 2016/07/28 21:37:27 const auto&, unless you intend to mutuate.
stanisc 2016/08/01 22:34:27 Done.
505 RefreshResultMap::iterator result_iter = refresh_results->find(
506 process_flags.first);
507 // TODO(stanisc): what to do if the result for this process is missing?
508 // Apparently when a new tab is created Refresh is called a few times with
509 // thread_id = 0 before the actual thread ID gets assigned.
ncarter (slow) 2016/07/28 21:37:26 The question is whether or not to invoke the callb
stanisc 2016/08/01 22:34:27 OK. I ended up rewriting this whole method so the
510 // Should callback still be called with a default value in that case?
511 if (result_iter == refresh_results->end())
512 continue;
513
514 auto& result = result_iter->second;
ncarter (slow) 2016/07/28 21:37:27 This declaration seems far from its first use.
stanisc 2016/08/01 22:34:27 Done.
515
516 CallbacksMap::iterator callback_iter = callbacks_map_.find(
517 process_flags.first);
518 if (callback_iter == callbacks_map_.end())
519 continue;
520
521 if (TaskManagerObserver::IsResourceRefreshEnabled(REFRESH_TYPE_IDLE_WAKEUPS,
522 process_flags.second)) {
523 callback_iter->second.on_idle_wakeups.Run(result.idle_wakeups_per_second);
524 }
525 }
526
527 // Clear refresh_flags_.
528 refresh_flags_.clear();
529 }
530
531 } // namespace task_management
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698