Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Side by Side Diff: components/browser_watcher/postmortem_report_collector.cc

Issue 2883103002: Quantify instability according to the stability instrumentation (Closed)
Patch Set: Siggi round 3 Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/browser_watcher/postmortem_report_collector.h" 5 #include "components/browser_watcher/postmortem_report_collector.h"
6 6
7 #include <utility> 7 #include <utility>
8 8
9 #include "base/files/file_enumerator.h" 9 #include "base/files/file_enumerator.h"
10 #include "base/files/file_util.h" 10 #include "base/files/file_util.h"
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
46 return false; 46 return false;
47 47
48 const TypedValue& value = it->second; 48 const TypedValue& value = it->second;
49 if (value.value_case() != TypedValue::kSignedValue) 49 if (value.value_case() != TypedValue::kSignedValue)
50 return false; 50 return false;
51 51
52 *time = base::Time::FromInternalValue(value.signed_value()); 52 *time = base::Time::FromInternalValue(value.signed_value());
53 return true; 53 return true;
54 } 54 }
55 55
56 void LogCollectionStatus(CollectionStatus status) {
57 UMA_HISTOGRAM_ENUMERATION("ActivityTracker.Collect.Status", status,
58 COLLECTION_STATUS_MAX);
59 }
60
56 } // namespace 61 } // namespace
57 62
58 PostmortemReportCollector::PostmortemReportCollector( 63 PostmortemReportCollector::PostmortemReportCollector(
59 const std::string& product_name, 64 const std::string& product_name,
60 const std::string& version_number, 65 const std::string& version_number,
61 const std::string& channel_name, 66 const std::string& channel_name,
62 SystemSessionAnalyzer* analyzer) 67 SystemSessionAnalyzer* analyzer)
63 : product_name_(product_name), 68 : product_name_(product_name),
64 version_number_(version_number), 69 version_number_(version_number),
65 channel_name_(channel_name), 70 channel_name_(channel_name),
(...skipping 18 matching lines...) Expand all
84 89
85 // Determine the crashpad client id. 90 // Determine the crashpad client id.
86 crashpad::UUID client_id; 91 crashpad::UUID client_id;
87 crashpad::Settings* settings = report_database->GetSettings(); 92 crashpad::Settings* settings = report_database->GetSettings();
88 if (settings) { 93 if (settings) {
89 // If GetSettings() or GetClientID() fails client_id will be left at its 94 // If GetSettings() or GetClientID() fails client_id will be left at its
90 // default value, all zeroes, which is appropriate. 95 // default value, all zeroes, which is appropriate.
91 settings->GetClientID(&client_id); 96 settings->GetClientID(&client_id);
92 } 97 }
93 98
99 // Number of unclean shutdowns based on successful collection of stability
100 // reports.
101 int unclean_cnt = 0;
102 // Number of unclean shutdowns that may be attributable to system instability
103 // based on successful collection of stability reports. This number should be
104 // smaller or equal to |unclean_cnt|.
105 int unclean_system_cnt = 0;
106
94 // Process each stability file. 107 // Process each stability file.
95 int success_cnt = 0;
96 for (const FilePath& file : debug_files) { 108 for (const FilePath& file : debug_files) {
97 CollectionStatus status = 109 bool system_unclean = false;
98 CollectAndSubmitOneReport(client_id, file, report_database); 110 if (CollectAndSubmitOneReport(client_id, file, report_database,
99 // TODO(manzagop): consider making this a stability metric. 111 &system_unclean)) {
100 UMA_HISTOGRAM_ENUMERATION("ActivityTracker.Collect.Status", status, 112 ++unclean_cnt;
101 COLLECTION_STATUS_MAX); 113 }
102 if (status == SUCCESS) 114 if (system_unclean)
Sigurður Ásgeirsson 2017/05/18 16:38:18 nit: put this inside the block, as that way you're
manzagop (departed) 2017/05/18 16:57:03 Done.
103 ++success_cnt; 115 ++unclean_system_cnt;
104 } 116 }
105 117
106 return success_cnt; 118 UMA_STABILITY_HISTOGRAM_COUNTS_100(
119 "ActivityTracker.Collect.UncleanShutdownCount", unclean_cnt);
120 UMA_STABILITY_HISTOGRAM_COUNTS_100(
121 "ActivityTracker.Collect.UncleanSystemCount", unclean_system_cnt);
122
123 return unclean_cnt;
107 } 124 }
108 125
109 std::vector<FilePath> PostmortemReportCollector::GetDebugStateFilePaths( 126 std::vector<FilePath> PostmortemReportCollector::GetDebugStateFilePaths(
110 const FilePath& debug_info_dir, 127 const FilePath& debug_info_dir,
111 const FilePath::StringType& debug_file_pattern, 128 const FilePath::StringType& debug_file_pattern,
112 const std::set<FilePath>& excluded_debug_files) { 129 const std::set<FilePath>& excluded_debug_files) {
113 DCHECK_NE(true, debug_info_dir.empty()); 130 DCHECK_NE(true, debug_info_dir.empty());
114 DCHECK_NE(true, debug_file_pattern.empty()); 131 DCHECK_NE(true, debug_file_pattern.empty());
115 132
116 std::vector<FilePath> paths; 133 std::vector<FilePath> paths;
117 base::FileEnumerator enumerator(debug_info_dir, false /* recursive */, 134 base::FileEnumerator enumerator(debug_info_dir, false /* recursive */,
118 base::FileEnumerator::FILES, 135 base::FileEnumerator::FILES,
119 debug_file_pattern); 136 debug_file_pattern);
120 FilePath path; 137 FilePath path;
121 for (path = enumerator.Next(); !path.empty(); path = enumerator.Next()) { 138 for (path = enumerator.Next(); !path.empty(); path = enumerator.Next()) {
122 if (excluded_debug_files.find(path) == excluded_debug_files.end()) 139 if (excluded_debug_files.find(path) == excluded_debug_files.end())
123 paths.push_back(path); 140 paths.push_back(path);
124 } 141 }
125 return paths; 142 return paths;
126 } 143 }
127 144
128 CollectionStatus PostmortemReportCollector::CollectAndSubmitOneReport( 145 bool PostmortemReportCollector::CollectAndSubmitOneReport(
129 const crashpad::UUID& client_id, 146 const crashpad::UUID& client_id,
130 const FilePath& file, 147 const FilePath& file,
131 crashpad::CrashReportDatabase* report_database) { 148 crashpad::CrashReportDatabase* report_database,
149 bool* system_unclean) {
132 DCHECK_NE(nullptr, report_database); 150 DCHECK_NE(nullptr, report_database);
151 DCHECK_NE(nullptr, system_unclean);
152 *system_unclean = false;
133 153
134 // Note: the code below involves two notions of report: chrome internal state 154 // Note: the code below involves two notions of report: chrome internal state
135 // reports and the crashpad reports they get wrapped into. 155 // reports and the crashpad reports they get wrapped into.
136 156
137 // Collect the data from the debug file to a proto. Note: a non-empty report 157 // Collect the data from the debug file to a proto. Note: a non-empty report
138 // is interpreted here as an unclean exit. 158 // is interpreted here as an unclean exit.
139 StabilityReport report_proto; 159 StabilityReport report_proto;
140 CollectionStatus status = CollectOneReport(file, &report_proto); 160 CollectionStatus status = CollectOneReport(file, &report_proto);
141 if (status != SUCCESS) { 161 if (status != SUCCESS) {
142 // The file was empty, or there was an error collecting the data. Detailed 162 // The file was empty, or there was an error collecting the data. Detailed
143 // logging happens within the Collect function. 163 // logging happens within the Collect function.
144 if (!base::DeleteFile(file, false)) 164 if (!base::DeleteFile(file, false))
145 DLOG(ERROR) << "Failed to delete " << file.value(); 165 DLOG(ERROR) << "Failed to delete " << file.value();
146 return status; 166 LogCollectionStatus(status);
167 return false;
147 } 168 }
148 169
149 // Prepare a crashpad report. 170 // Prepare a crashpad report.
150 CrashReportDatabase::NewReport* new_report = nullptr; 171 CrashReportDatabase::NewReport* new_report = nullptr;
151 CrashReportDatabase::OperationStatus database_status = 172 CrashReportDatabase::OperationStatus database_status =
152 report_database->PrepareNewCrashReport(&new_report); 173 report_database->PrepareNewCrashReport(&new_report);
153 if (database_status != CrashReportDatabase::kNoError) { 174 if (database_status != CrashReportDatabase::kNoError) {
154 // Assume this is recoverable: not deleting the file. 175 // Assume this is recoverable: not deleting the file.
155 DLOG(ERROR) << "PrepareNewCrashReport failed"; 176 DLOG(ERROR) << "PrepareNewCrashReport failed";
156 return PREPARE_NEW_CRASH_REPORT_FAILED; 177 LogCollectionStatus(PREPARE_NEW_CRASH_REPORT_FAILED);
178 return false;
157 } 179 }
158 CrashReportDatabase::CallErrorWritingCrashReport 180 CrashReportDatabase::CallErrorWritingCrashReport
159 call_error_writing_crash_report(report_database, new_report); 181 call_error_writing_crash_report(report_database, new_report);
160 182
161 // Write the report to a minidump. 183 // Write the report to a minidump.
162 if (!WriteReportToMinidump(&report_proto, client_id, new_report->uuid, 184 if (!WriteReportToMinidump(&report_proto, client_id, new_report->uuid,
163 reinterpret_cast<FILE*>(new_report->handle))) { 185 reinterpret_cast<FILE*>(new_report->handle))) {
164 // Assume this is not recoverable and delete the file. 186 // Assume this is not recoverable and delete the file.
165 if (!base::DeleteFile(file, false)) 187 if (!base::DeleteFile(file, false))
166 DLOG(ERROR) << "Failed to delete " << file.value(); 188 DLOG(ERROR) << "Failed to delete " << file.value();
167 return WRITE_TO_MINIDUMP_FAILED; 189 LogCollectionStatus(WRITE_TO_MINIDUMP_FAILED);
190 return false;
168 } 191 }
169 192
170 // If the file cannot be deleted, do not report its contents. Note this can 193 // If the file cannot be deleted, do not report its contents. Note this can
171 // lead to under reporting and retries. However, under reporting is 194 // lead to under reporting and retries. However, under reporting is
172 // preferable to the over reporting that would happen with a file that 195 // preferable to the over reporting that would happen with a file that
173 // cannot be deleted. 196 // cannot be deleted.
174 // TODO(manzagop): metrics for the number of non-deletable files. 197 // TODO(manzagop): metrics for the number of non-deletable files.
175 if (!base::DeleteFile(file, false)) { 198 if (!base::DeleteFile(file, false)) {
176 DLOG(ERROR) << "Failed to delete " << file.value(); 199 DLOG(ERROR) << "Failed to delete " << file.value();
177 return DEBUG_FILE_DELETION_FAILED; 200 LogCollectionStatus(DEBUG_FILE_DELETION_FAILED);
201 return false;
178 } 202 }
179 203
180 // Finalize the report wrt the report database. Note that this doesn't trigger 204 // Finalize the report wrt the report database. Note that this doesn't trigger
181 // an immediate upload, but Crashpad will eventually upload the report (as of 205 // an immediate upload, but Crashpad will eventually upload the report (as of
182 // writing, the delay is on the order of up to 15 minutes). 206 // writing, the delay is on the order of up to 15 minutes).
183 call_error_writing_crash_report.Disarm(); 207 call_error_writing_crash_report.Disarm();
184 crashpad::UUID unused_report_id; 208 crashpad::UUID unused_report_id;
185 database_status = report_database->FinishedWritingCrashReport( 209 database_status = report_database->FinishedWritingCrashReport(
186 new_report, &unused_report_id); 210 new_report, &unused_report_id);
187 if (database_status != CrashReportDatabase::kNoError) { 211 if (database_status != CrashReportDatabase::kNoError) {
188 DLOG(ERROR) << "FinishedWritingCrashReport failed"; 212 DLOG(ERROR) << "FinishedWritingCrashReport failed";
189 return FINISHED_WRITING_CRASH_REPORT_FAILED; 213 LogCollectionStatus(FINISHED_WRITING_CRASH_REPORT_FAILED);
214 return false;
190 } 215 }
191 216
192 return SUCCESS; 217 // Report collection is successful. We may increment |unclean_system_cnt|.
218 if (report_proto.system_state().session_state() == SystemState::UNCLEAN)
219 *system_unclean = true;
220
221 return true;
193 } 222 }
194 223
195 CollectionStatus PostmortemReportCollector::CollectOneReport( 224 CollectionStatus PostmortemReportCollector::CollectOneReport(
196 const base::FilePath& file, 225 const base::FilePath& file,
197 StabilityReport* report) { 226 StabilityReport* report) {
198 DCHECK(report); 227 DCHECK(report);
199 228
200 CollectionStatus status = Extract(file, report); 229 CollectionStatus status = Extract(file, report);
201 if (status != SUCCESS) 230 if (status != SUCCESS)
202 return status; 231 return status;
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
273 StabilityReport* report, 302 StabilityReport* report,
274 const crashpad::UUID& client_id, 303 const crashpad::UUID& client_id,
275 const crashpad::UUID& report_id, 304 const crashpad::UUID& report_id,
276 base::PlatformFile minidump_file) { 305 base::PlatformFile minidump_file) {
277 DCHECK(report); 306 DCHECK(report);
278 307
279 return WritePostmortemDump(minidump_file, client_id, report_id, report); 308 return WritePostmortemDump(minidump_file, client_id, report_id, report);
280 } 309 }
281 310
282 } // namespace browser_watcher 311 } // namespace browser_watcher
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698