Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(60)

Side by Side Diff: components/browser_watcher/postmortem_report_collector.cc

Issue 2715903003: Bound the impact of system instability on chrome instability. (Closed)
Patch Set: Address Siggi's comments Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/browser_watcher/postmortem_report_collector.h" 5 #include "components/browser_watcher/postmortem_report_collector.h"
6 6
7 #include <utility> 7 #include <utility>
8 8
9 #include "base/debug/activity_analyzer.h" 9 #include "base/debug/activity_analyzer.h"
10 #include "base/files/file_enumerator.h" 10 #include "base/files/file_enumerator.h"
(...skipping 18 matching lines...) Expand all
29 using base::debug::ActivityUserData; 29 using base::debug::ActivityUserData;
30 using base::debug::GlobalActivityAnalyzer; 30 using base::debug::GlobalActivityAnalyzer;
31 using base::debug::GlobalActivityTracker; 31 using base::debug::GlobalActivityTracker;
32 using base::debug::ThreadActivityAnalyzer; 32 using base::debug::ThreadActivityAnalyzer;
33 using crashpad::CrashReportDatabase; 33 using crashpad::CrashReportDatabase;
34 34
35 namespace { 35 namespace {
36 36
37 const char kFieldTrialKeyPrefix[] = "FieldTrial."; 37 const char kFieldTrialKeyPrefix[] = "FieldTrial.";
38 38
39 // DO NOT CHANGE VALUES. This is logged persistently in a histogram.
40 enum SystemSessionAnalysisStatus {
41 SYSTEM_SESSION_ANALYSIS_SUCCESS = 0,
42 SYSTEM_SESSION_ANALYSIS_NO_TIMESTAMP = 1,
43 SYSTEM_SESSION_ANALYSIS_NO_ANALYZER = 2,
44 SYSTEM_SESSION_ANALYSIS_FAILED = 3,
45 SYSTEM_SESSION_ANALYSIS_OUTSIDE_RANGE = 4,
46 SYSTEM_SESSION_ANALYSIS_STATUS_MAX = 5
47 };
48
39 // Collects stability user data from the recorded format to the collected 49 // Collects stability user data from the recorded format to the collected
40 // format. 50 // format.
41 void CollectUserData( 51 void CollectUserData(
42 const ActivityUserData::Snapshot& recorded_map, 52 const ActivityUserData::Snapshot& recorded_map,
43 google::protobuf::Map<std::string, TypedValue>* collected_map, 53 google::protobuf::Map<std::string, TypedValue>* collected_map,
44 StabilityReport* report) { 54 StabilityReport* report) {
45 DCHECK(collected_map); 55 DCHECK(collected_map);
46 56
47 for (const auto& name_and_value : recorded_map) { 57 for (const auto& name_and_value : recorded_map) {
48 const std::string& key = name_and_value.first; 58 const std::string& key = name_and_value.first;
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
108 break; 118 break;
109 case ActivityUserData::UNSIGNED_VALUE: 119 case ActivityUserData::UNSIGNED_VALUE:
110 collected_value.set_unsigned_value(recorded_value.GetUint()); 120 collected_value.set_unsigned_value(recorded_value.GetUint());
111 break; 121 break;
112 } 122 }
113 123
114 (*collected_map)[key].Swap(&collected_value); 124 (*collected_map)[key].Swap(&collected_value);
115 } 125 }
116 } 126 }
117 127
128 bool GetStartTimestamp(
129 const google::protobuf::Map<std::string, TypedValue>& global_data,
130 base::Time* time) {
131 DCHECK(time);
132
133 const auto& it = global_data.find(kStabilityStartTimestamp);
134 if (it == global_data.end())
135 return false;
136
137 const TypedValue& value = it->second;
138 if (value.value_case() != TypedValue::kSignedValue)
139 return false;
140
141 *time = base::Time::FromInternalValue(value.signed_value());
142 return true;
143 }
144
118 void CollectModuleInformation( 145 void CollectModuleInformation(
119 const std::vector<GlobalActivityTracker::ModuleInfo>& modules, 146 const std::vector<GlobalActivityTracker::ModuleInfo>& modules,
120 ProcessState* process_state) { 147 ProcessState* process_state) {
121 DCHECK(process_state); 148 DCHECK(process_state);
122 149
123 char code_identifier[17]; 150 char code_identifier[17];
124 char debug_identifier[41]; 151 char debug_identifier[41];
125 152
126 for (const GlobalActivityTracker::ModuleInfo& recorded : modules) { 153 for (const GlobalActivityTracker::ModuleInfo& recorded : modules) {
127 CodeModule* collected = process_state->add_modules(); 154 CodeModule* collected = process_state->add_modules();
(...skipping 18 matching lines...) Expand all
146 collected->set_debug_identifier(debug_identifier); 173 collected->set_debug_identifier(debug_identifier);
147 collected->set_is_unloaded(!recorded.is_loaded); 174 collected->set_is_unloaded(!recorded.is_loaded);
148 } 175 }
149 } 176 }
150 177
151 } // namespace 178 } // namespace
152 179
153 PostmortemReportCollector::PostmortemReportCollector( 180 PostmortemReportCollector::PostmortemReportCollector(
154 const std::string& product_name, 181 const std::string& product_name,
155 const std::string& version_number, 182 const std::string& version_number,
156 const std::string& channel_name) 183 const std::string& channel_name,
184 SystemSessionAnalyzer* analyzer)
157 : product_name_(product_name), 185 : product_name_(product_name),
158 version_number_(version_number), 186 version_number_(version_number),
159 channel_name_(channel_name) {} 187 channel_name_(channel_name),
188 system_session_analyzer_(analyzer) {}
189
190 PostmortemReportCollector::~PostmortemReportCollector() {}
160 191
161 int PostmortemReportCollector::CollectAndSubmitForUpload( 192 int PostmortemReportCollector::CollectAndSubmitForUpload(
Sigurður Ásgeirsson 2017/03/06 19:00:09 MaybeCollectAndSubmit or CollectAndSubmitAllPendin
manzagop (departed) 2017/03/06 21:14:41 Done.
162 const base::FilePath& debug_info_dir, 193 const base::FilePath& debug_info_dir,
163 const base::FilePath::StringType& debug_file_pattern, 194 const base::FilePath::StringType& debug_file_pattern,
164 const std::set<base::FilePath>& excluded_debug_files, 195 const std::set<base::FilePath>& excluded_debug_files,
165 crashpad::CrashReportDatabase* report_database) { 196 crashpad::CrashReportDatabase* report_database) {
166 DCHECK_NE(true, debug_info_dir.empty()); 197 DCHECK_NE(true, debug_info_dir.empty());
167 DCHECK_NE(true, debug_file_pattern.empty()); 198 DCHECK_NE(true, debug_file_pattern.empty());
168 DCHECK_NE(nullptr, report_database); 199 DCHECK_NE(nullptr, report_database);
169 200
170 // Collect the list of files to harvest. 201 // Collect the list of files to harvest.
171 std::vector<FilePath> debug_files = GetDebugStateFilePaths( 202 std::vector<FilePath> debug_files = GetDebugStateFilePaths(
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
208 debug_file_pattern); 239 debug_file_pattern);
209 FilePath path; 240 FilePath path;
210 for (path = enumerator.Next(); !path.empty(); path = enumerator.Next()) { 241 for (path = enumerator.Next(); !path.empty(); path = enumerator.Next()) {
211 if (excluded_debug_files.find(path) == excluded_debug_files.end()) 242 if (excluded_debug_files.find(path) == excluded_debug_files.end())
212 paths.push_back(path); 243 paths.push_back(path);
213 } 244 }
214 return paths; 245 return paths;
215 } 246 }
216 247
217 PostmortemReportCollector::CollectionStatus 248 PostmortemReportCollector::CollectionStatus
218 PostmortemReportCollector::CollectAndSubmit( 249 PostmortemReportCollector::CollectAndSubmit(
Sigurður Ásgeirsson 2017/03/06 19:00:09 Per below comment, this could be collectAndSubmitO
manzagop (departed) 2017/03/06 21:14:41 Done.
219 const crashpad::UUID& client_id, 250 const crashpad::UUID& client_id,
220 const FilePath& file, 251 const FilePath& file,
221 crashpad::CrashReportDatabase* report_database) { 252 crashpad::CrashReportDatabase* report_database) {
222 DCHECK_NE(nullptr, report_database); 253 DCHECK_NE(nullptr, report_database);
223 254
224 // Note: the code below involves two notions of report: chrome internal state 255 // Note: the code below involves two notions of report: chrome internal state
225 // reports and the crashpad reports they get wrapped into. 256 // reports and the crashpad reports they get wrapped into.
226 257
227 // Collect the data from the debug file to a proto. Note: a non-empty report 258 // Collect the data from the debug file to a proto. Note: a non-empty report
228 // is interpreted here as an unclean exit. 259 // is interpreted here as an unclean exit.
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
272 database_status = report_database->FinishedWritingCrashReport( 303 database_status = report_database->FinishedWritingCrashReport(
273 new_report, &unused_report_id); 304 new_report, &unused_report_id);
274 if (database_status != CrashReportDatabase::kNoError) { 305 if (database_status != CrashReportDatabase::kNoError) {
275 LOG(ERROR) << "FinishedWritingCrashReport failed"; 306 LOG(ERROR) << "FinishedWritingCrashReport failed";
276 return FINISHED_WRITING_CRASH_REPORT_FAILED; 307 return FINISHED_WRITING_CRASH_REPORT_FAILED;
277 } 308 }
278 309
279 return SUCCESS; 310 return SUCCESS;
280 } 311 }
281 312
282 PostmortemReportCollector::CollectionStatus PostmortemReportCollector::Collect( 313 PostmortemReportCollector::CollectionStatus PostmortemReportCollector::Collect(
Sigurður Ásgeirsson 2017/03/06 19:00:09 I'm having trouble keeping pluralities straight in
manzagop (departed) 2017/03/06 21:14:41 Done.
283 const base::FilePath& debug_state_file, 314 const base::FilePath& debug_state_file,
284 std::unique_ptr<StabilityReport>* report) { 315 std::unique_ptr<StabilityReport>* report) {
285 DCHECK_NE(nullptr, report); 316 DCHECK_NE(nullptr, report);
286 report->reset(); 317 report->reset();
287 318
288 // Create a global analyzer. 319 // Create a global analyzer.
289 std::unique_ptr<GlobalActivityAnalyzer> global_analyzer = 320 std::unique_ptr<GlobalActivityAnalyzer> global_analyzer =
290 GlobalActivityAnalyzer::CreateWithFile(debug_state_file); 321 GlobalActivityAnalyzer::CreateWithFile(debug_state_file);
291 if (!global_analyzer) 322 if (!global_analyzer)
292 return ANALYZER_CREATION_FAILED; 323 return ANALYZER_CREATION_FAILED;
(...skipping 10 matching lines...) Expand all
303 334
304 // Create the report, then flesh it out. 335 // Create the report, then flesh it out.
305 report->reset(new StabilityReport()); 336 report->reset(new StabilityReport());
306 337
307 // Collect log messages. 338 // Collect log messages.
308 for (const std::string& message : log_messages) { 339 for (const std::string& message : log_messages) {
309 (*report)->add_log_messages(message); 340 (*report)->add_log_messages(message);
310 } 341 }
311 342
312 // Collect global user data. 343 // Collect global user data.
313 google::protobuf::Map<std::string, TypedValue>& global_data = 344 CollectUserData(global_data_snapshot, (*report)->mutable_global_data(),
314 *(*report)->mutable_global_data(); 345 report->get());
315 CollectUserData(global_data_snapshot, &global_data, report->get()); 346 SetReporterDetails(report->get());
316 347 RecordSystemShutdownState(report->get());
317 // Add the reporting Chrome's details to the report.
318 global_data[kStabilityReporterChannel].set_string_value(channel_name());
319 #if defined(ARCH_CPU_X86)
320 global_data[kStabilityReporterPlatform].set_string_value(
321 std::string("Win32"));
322 #elif defined(ARCH_CPU_X86_64)
323 global_data[kStabilityReporterPlatform].set_string_value(
324 std::string("Win64"));
325 #endif
326 global_data[kStabilityReporterProduct].set_string_value(product_name());
327 global_data[kStabilityReporterVersion].set_string_value(version_number());
328 348
329 // Collect thread activity data. 349 // Collect thread activity data.
330 // Note: a single process is instrumented. 350 // Note: only the browser process records stability data for now.
331 ProcessState* process_state = (*report)->add_process_states(); 351 ProcessState* process_state = (*report)->add_process_states();
332 for (; thread_analyzer != nullptr; 352 for (; thread_analyzer != nullptr;
333 thread_analyzer = global_analyzer->GetNextAnalyzer()) { 353 thread_analyzer = global_analyzer->GetNextAnalyzer()) {
334 // Only valid analyzers are expected per contract of GetFirstAnalyzer / 354 // Only valid analyzers are expected per contract of GetFirstAnalyzer /
335 // GetNextAnalyzer. 355 // GetNextAnalyzer.
336 DCHECK(thread_analyzer->IsValid()); 356 DCHECK(thread_analyzer->IsValid());
337 357
338 if (!process_state->has_process_id()) { 358 if (!process_state->has_process_id()) {
339 process_state->set_process_id( 359 process_state->set_process_id(
340 thread_analyzer->activity_snapshot().process_id); 360 thread_analyzer->activity_snapshot().process_id);
341 } 361 }
342 DCHECK_EQ(thread_analyzer->activity_snapshot().process_id, 362 DCHECK_EQ(thread_analyzer->activity_snapshot().process_id,
343 process_state->process_id()); 363 process_state->process_id());
344 364
345 ThreadState* thread_state = process_state->add_threads(); 365 ThreadState* thread_state = process_state->add_threads();
346 CollectThread(thread_analyzer->activity_snapshot(), thread_state); 366 CollectThread(thread_analyzer->activity_snapshot(), thread_state);
347 } 367 }
348 368
349 // Collect module information. 369 // Collect module information.
350 CollectModuleInformation(global_analyzer->GetModules(), process_state); 370 CollectModuleInformation(global_analyzer->GetModules(), process_state);
351 371
352 return SUCCESS; 372 return SUCCESS;
353 } 373 }
354 374
375 void PostmortemReportCollector::SetReporterDetails(
376 StabilityReport* report) const {
377 DCHECK(report);
378
Sigurður Ásgeirsson 2017/03/06 19:00:09 a quick comment here that we do this because the r
manzagop (departed) 2017/03/06 21:14:41 Done.
379 google::protobuf::Map<std::string, TypedValue>& global_data =
380 *(report->mutable_global_data());
381
382 // Reporter version details.
383 global_data[kStabilityReporterChannel].set_string_value(channel_name());
384 #if defined(ARCH_CPU_X86)
385 global_data[kStabilityReporterPlatform].set_string_value(
386 std::string("Win32"));
387 #elif defined(ARCH_CPU_X86_64)
388 global_data[kStabilityReporterPlatform].set_string_value(
389 std::string("Win64"));
390 #endif
391 global_data[kStabilityReporterProduct].set_string_value(product_name());
392 global_data[kStabilityReporterVersion].set_string_value(version_number());
393 }
394
395 void PostmortemReportCollector::RecordSystemShutdownState(
396 StabilityReport* report) const {
397 DCHECK(report);
398
399 // A session state for the stability report: was the system session clean?
Sigurður Ásgeirsson 2017/03/06 19:00:09 rather than phrasing things as questions, maybe ha
manzagop (departed) 2017/03/06 21:14:41 Done.
400 SystemState::SessionState session_state = SystemState::UNKNOWN;
401 // An analysis status for metrics: did the analysis succeed?
402 SystemSessionAnalysisStatus status = SYSTEM_SESSION_ANALYSIS_SUCCESS;
403
404 base::Time time;
405 if (!GetStartTimestamp(report->global_data(), &time)) {
406 status = SYSTEM_SESSION_ANALYSIS_NO_TIMESTAMP;
407 } else if (!system_session_analyzer_) {
408 status = SYSTEM_SESSION_ANALYSIS_NO_ANALYZER;
409 } else {
410 SystemSessionAnalyzer::Status analyzer_status =
411 system_session_analyzer_->IsSessionUnclean(time);
412 switch (analyzer_status) {
413 case SystemSessionAnalyzer::FAILED:
414 status = SYSTEM_SESSION_ANALYSIS_FAILED;
415 break;
416 case SystemSessionAnalyzer::CLEAN:
417 session_state = SystemState::CLEAN;
418 break;
419 case SystemSessionAnalyzer::UNCLEAN:
420 session_state = SystemState::UNCLEAN;
421 break;
422 case SystemSessionAnalyzer::OUTSIDE_RANGE:
423 status = SYSTEM_SESSION_ANALYSIS_OUTSIDE_RANGE;
424 break;
425 }
426 }
427
428 report->mutable_system_state()->set_session_state(session_state);
429 UMA_HISTOGRAM_ENUMERATION(
430 "ActivityTracker.Collect.SystemSessionAnalysisStatus", status,
431 SYSTEM_SESSION_ANALYSIS_STATUS_MAX);
432 }
433
355 void PostmortemReportCollector::CollectThread( 434 void PostmortemReportCollector::CollectThread(
356 const base::debug::ThreadActivityAnalyzer::Snapshot& snapshot, 435 const base::debug::ThreadActivityAnalyzer::Snapshot& snapshot,
357 ThreadState* thread_state) { 436 ThreadState* thread_state) {
358 DCHECK(thread_state); 437 DCHECK(thread_state);
359 438
360 thread_state->set_thread_name(snapshot.thread_name); 439 thread_state->set_thread_name(snapshot.thread_name);
361 thread_state->set_thread_id(snapshot.thread_id); 440 thread_state->set_thread_id(snapshot.thread_id);
362 thread_state->set_activity_count(snapshot.activity_stack_depth); 441 thread_state->set_activity_count(snapshot.activity_stack_depth);
363 442
364 for (size_t i = 0; i < snapshot.activity_stack.size(); ++i) { 443 for (size_t i = 0; i < snapshot.activity_stack.size(); ++i) {
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
404 StabilityReport* report, 483 StabilityReport* report,
405 const crashpad::UUID& client_id, 484 const crashpad::UUID& client_id,
406 const crashpad::UUID& report_id, 485 const crashpad::UUID& report_id,
407 base::PlatformFile minidump_file) { 486 base::PlatformFile minidump_file) {
408 DCHECK(report); 487 DCHECK(report);
409 488
410 return WritePostmortemDump(minidump_file, client_id, report_id, report); 489 return WritePostmortemDump(minidump_file, client_id, report_id, report);
411 } 490 }
412 491
413 } // namespace browser_watcher 492 } // namespace browser_watcher
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698