Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: components/browser_watcher/postmortem_report_collector.cc

Issue 2883103002: Quantify instability according to the stability instrumentation (Closed)
Patch Set: Siggi round 2 Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/browser_watcher/postmortem_report_collector.h" 5 #include "components/browser_watcher/postmortem_report_collector.h"
6 6
7 #include <utility> 7 #include <utility>
8 8
9 #include "base/files/file_enumerator.h" 9 #include "base/files/file_enumerator.h"
10 #include "base/files/file_util.h" 10 #include "base/files/file_util.h"
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
46 return false; 46 return false;
47 47
48 const TypedValue& value = it->second; 48 const TypedValue& value = it->second;
49 if (value.value_case() != TypedValue::kSignedValue) 49 if (value.value_case() != TypedValue::kSignedValue)
50 return false; 50 return false;
51 51
52 *time = base::Time::FromInternalValue(value.signed_value()); 52 *time = base::Time::FromInternalValue(value.signed_value());
53 return true; 53 return true;
54 } 54 }
55 55
56 void LogCollectionStatus(CollectionStatus status) {
57 UMA_HISTOGRAM_ENUMERATION("ActivityTracker.Collect.Status", status,
58 COLLECTION_STATUS_MAX);
59 }
60
56 } // namespace 61 } // namespace
57 62
58 PostmortemReportCollector::PostmortemReportCollector( 63 PostmortemReportCollector::PostmortemReportCollector(
59 const std::string& product_name, 64 const std::string& product_name,
60 const std::string& version_number, 65 const std::string& version_number,
61 const std::string& channel_name, 66 const std::string& channel_name,
62 SystemSessionAnalyzer* analyzer) 67 SystemSessionAnalyzer* analyzer)
63 : product_name_(product_name), 68 : product_name_(product_name),
64 version_number_(version_number), 69 version_number_(version_number),
65 channel_name_(channel_name), 70 channel_name_(channel_name),
(...skipping 18 matching lines...) Expand all
84 89
85 // Determine the crashpad client id. 90 // Determine the crashpad client id.
86 crashpad::UUID client_id; 91 crashpad::UUID client_id;
87 crashpad::Settings* settings = report_database->GetSettings(); 92 crashpad::Settings* settings = report_database->GetSettings();
88 if (settings) { 93 if (settings) {
89 // If GetSettings() or GetClientID() fails client_id will be left at its 94 // If GetSettings() or GetClientID() fails client_id will be left at its
90 // default value, all zeroes, which is appropriate. 95 // default value, all zeroes, which is appropriate.
91 settings->GetClientID(&client_id); 96 settings->GetClientID(&client_id);
92 } 97 }
93 98
99 // Number of unclean shutdowns based on successful collection of stability
100 // reports.
101 int unclean_cnt = 0;
102 // Number of unclean shutdowns that may be attributable to system instability
103 // based on successful collection of stability reports. This number should be
104 // smaller or equal to |unclean_cnt|.
105 int unclean_system_cnt = 0;
106
94 // Process each stability file. 107 // Process each stability file.
95 int success_cnt = 0;
96 for (const FilePath& file : debug_files) { 108 for (const FilePath& file : debug_files) {
97 CollectionStatus status = 109 if (CollectAndSubmitOneReport(client_id, file, report_database,
98 CollectAndSubmitOneReport(client_id, file, report_database); 110 &unclean_system_cnt)) {
99 // TODO(manzagop): consider making this a stability metric. 111 ++unclean_cnt;
100 UMA_HISTOGRAM_ENUMERATION("ActivityTracker.Collect.Status", status, 112 }
101 COLLECTION_STATUS_MAX);
102 if (status == SUCCESS)
103 ++success_cnt;
104 } 113 }
105 114
106 return success_cnt; 115 UMA_STABILITY_HISTOGRAM_COUNTS_100(
116 "ActivityTracker.Collect.UncleanShutdownCount", unclean_cnt);
117 UMA_STABILITY_HISTOGRAM_COUNTS_100(
118 "ActivityTracker.Collect.UncleanSystemCount", unclean_system_cnt);
119
120 return unclean_cnt;
107 } 121 }
108 122
109 std::vector<FilePath> PostmortemReportCollector::GetDebugStateFilePaths( 123 std::vector<FilePath> PostmortemReportCollector::GetDebugStateFilePaths(
110 const FilePath& debug_info_dir, 124 const FilePath& debug_info_dir,
111 const FilePath::StringType& debug_file_pattern, 125 const FilePath::StringType& debug_file_pattern,
112 const std::set<FilePath>& excluded_debug_files) { 126 const std::set<FilePath>& excluded_debug_files) {
113 DCHECK_NE(true, debug_info_dir.empty()); 127 DCHECK_NE(true, debug_info_dir.empty());
114 DCHECK_NE(true, debug_file_pattern.empty()); 128 DCHECK_NE(true, debug_file_pattern.empty());
115 129
116 std::vector<FilePath> paths; 130 std::vector<FilePath> paths;
117 base::FileEnumerator enumerator(debug_info_dir, false /* recursive */, 131 base::FileEnumerator enumerator(debug_info_dir, false /* recursive */,
118 base::FileEnumerator::FILES, 132 base::FileEnumerator::FILES,
119 debug_file_pattern); 133 debug_file_pattern);
120 FilePath path; 134 FilePath path;
121 for (path = enumerator.Next(); !path.empty(); path = enumerator.Next()) { 135 for (path = enumerator.Next(); !path.empty(); path = enumerator.Next()) {
122 if (excluded_debug_files.find(path) == excluded_debug_files.end()) 136 if (excluded_debug_files.find(path) == excluded_debug_files.end())
123 paths.push_back(path); 137 paths.push_back(path);
124 } 138 }
125 return paths; 139 return paths;
126 } 140 }
127 141
128 CollectionStatus PostmortemReportCollector::CollectAndSubmitOneReport( 142 bool PostmortemReportCollector::CollectAndSubmitOneReport(
129 const crashpad::UUID& client_id, 143 const crashpad::UUID& client_id,
130 const FilePath& file, 144 const FilePath& file,
131 crashpad::CrashReportDatabase* report_database) { 145 crashpad::CrashReportDatabase* report_database,
146 int* unclean_system_cnt) {
132 DCHECK_NE(nullptr, report_database); 147 DCHECK_NE(nullptr, report_database);
148 DCHECK_NE(nullptr, unclean_system_cnt);
133 149
134 // Note: the code below involves two notions of report: chrome internal state 150 // Note: the code below involves two notions of report: chrome internal state
135 // reports and the crashpad reports they get wrapped into. 151 // reports and the crashpad reports they get wrapped into.
136 152
137 // Collect the data from the debug file to a proto. Note: a non-empty report 153 // Collect the data from the debug file to a proto. Note: a non-empty report
138 // is interpreted here as an unclean exit. 154 // is interpreted here as an unclean exit.
139 StabilityReport report_proto; 155 StabilityReport report_proto;
140 CollectionStatus status = CollectOneReport(file, &report_proto); 156 CollectionStatus status = CollectOneReport(file, &report_proto);
141 if (status != SUCCESS) { 157 if (status != SUCCESS) {
142 // The file was empty, or there was an error collecting the data. Detailed 158 // The file was empty, or there was an error collecting the data. Detailed
143 // logging happens within the Collect function. 159 // logging happens within the Collect function.
144 if (!base::DeleteFile(file, false)) 160 if (!base::DeleteFile(file, false))
145 DLOG(ERROR) << "Failed to delete " << file.value(); 161 DLOG(ERROR) << "Failed to delete " << file.value();
146 return status; 162 LogCollectionStatus(status);
163 return false;
147 } 164 }
148 165
149 // Prepare a crashpad report. 166 // Prepare a crashpad report.
150 CrashReportDatabase::NewReport* new_report = nullptr; 167 CrashReportDatabase::NewReport* new_report = nullptr;
151 CrashReportDatabase::OperationStatus database_status = 168 CrashReportDatabase::OperationStatus database_status =
152 report_database->PrepareNewCrashReport(&new_report); 169 report_database->PrepareNewCrashReport(&new_report);
153 if (database_status != CrashReportDatabase::kNoError) { 170 if (database_status != CrashReportDatabase::kNoError) {
154 // Assume this is recoverable: not deleting the file. 171 // Assume this is recoverable: not deleting the file.
155 DLOG(ERROR) << "PrepareNewCrashReport failed"; 172 DLOG(ERROR) << "PrepareNewCrashReport failed";
156 return PREPARE_NEW_CRASH_REPORT_FAILED; 173 LogCollectionStatus(PREPARE_NEW_CRASH_REPORT_FAILED);
174 return false;
157 } 175 }
158 CrashReportDatabase::CallErrorWritingCrashReport 176 CrashReportDatabase::CallErrorWritingCrashReport
159 call_error_writing_crash_report(report_database, new_report); 177 call_error_writing_crash_report(report_database, new_report);
160 178
161 // Write the report to a minidump. 179 // Write the report to a minidump.
162 if (!WriteReportToMinidump(&report_proto, client_id, new_report->uuid, 180 if (!WriteReportToMinidump(&report_proto, client_id, new_report->uuid,
163 reinterpret_cast<FILE*>(new_report->handle))) { 181 reinterpret_cast<FILE*>(new_report->handle))) {
164 // Assume this is not recoverable and delete the file. 182 // Assume this is not recoverable and delete the file.
165 if (!base::DeleteFile(file, false)) 183 if (!base::DeleteFile(file, false))
166 DLOG(ERROR) << "Failed to delete " << file.value(); 184 DLOG(ERROR) << "Failed to delete " << file.value();
167 return WRITE_TO_MINIDUMP_FAILED; 185 LogCollectionStatus(WRITE_TO_MINIDUMP_FAILED);
186 return false;
168 } 187 }
169 188
170 // If the file cannot be deleted, do not report its contents. Note this can 189 // If the file cannot be deleted, do not report its contents. Note this can
171 // lead to under reporting and retries. However, under reporting is 190 // lead to under reporting and retries. However, under reporting is
172 // preferable to the over reporting that would happen with a file that 191 // preferable to the over reporting that would happen with a file that
173 // cannot be deleted. 192 // cannot be deleted.
174 // TODO(manzagop): metrics for the number of non-deletable files. 193 // TODO(manzagop): metrics for the number of non-deletable files.
175 if (!base::DeleteFile(file, false)) { 194 if (!base::DeleteFile(file, false)) {
176 DLOG(ERROR) << "Failed to delete " << file.value(); 195 DLOG(ERROR) << "Failed to delete " << file.value();
177 return DEBUG_FILE_DELETION_FAILED; 196 LogCollectionStatus(DEBUG_FILE_DELETION_FAILED);
197 return false;
178 } 198 }
179 199
180 // Finalize the report wrt the report database. Note that this doesn't trigger 200 // Finalize the report wrt the report database. Note that this doesn't trigger
181 // an immediate upload, but Crashpad will eventually upload the report (as of 201 // an immediate upload, but Crashpad will eventually upload the report (as of
182 // writing, the delay is on the order of up to 15 minutes). 202 // writing, the delay is on the order of up to 15 minutes).
183 call_error_writing_crash_report.Disarm(); 203 call_error_writing_crash_report.Disarm();
184 crashpad::UUID unused_report_id; 204 crashpad::UUID unused_report_id;
185 database_status = report_database->FinishedWritingCrashReport( 205 database_status = report_database->FinishedWritingCrashReport(
186 new_report, &unused_report_id); 206 new_report, &unused_report_id);
187 if (database_status != CrashReportDatabase::kNoError) { 207 if (database_status != CrashReportDatabase::kNoError) {
188 DLOG(ERROR) << "FinishedWritingCrashReport failed"; 208 DLOG(ERROR) << "FinishedWritingCrashReport failed";
189 return FINISHED_WRITING_CRASH_REPORT_FAILED; 209 LogCollectionStatus(FINISHED_WRITING_CRASH_REPORT_FAILED);
210 return false;
190 } 211 }
191 212
192 return SUCCESS; 213 // Report collection is successful. We may increment |unclean_system_cnt|.
214 if (report_proto.system_state().session_state() == SystemState::UNCLEAN)
215 (*unclean_system_cnt)++;
216
217 return true;
193 } 218 }
194 219
195 CollectionStatus PostmortemReportCollector::CollectOneReport( 220 CollectionStatus PostmortemReportCollector::CollectOneReport(
196 const base::FilePath& file, 221 const base::FilePath& file,
197 StabilityReport* report) { 222 StabilityReport* report) {
198 DCHECK(report); 223 DCHECK(report);
199 224
200 CollectionStatus status = Extract(file, report); 225 CollectionStatus status = Extract(file, report);
201 if (status != SUCCESS) 226 if (status != SUCCESS)
202 return status; 227 return status;
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
273 StabilityReport* report, 298 StabilityReport* report,
274 const crashpad::UUID& client_id, 299 const crashpad::UUID& client_id,
275 const crashpad::UUID& report_id, 300 const crashpad::UUID& report_id,
276 base::PlatformFile minidump_file) { 301 base::PlatformFile minidump_file) {
277 DCHECK(report); 302 DCHECK(report);
278 303
279 return WritePostmortemDump(minidump_file, client_id, report_id, report); 304 return WritePostmortemDump(minidump_file, client_id, report_id, report);
280 } 305 }
281 306
282 } // namespace browser_watcher 307 } // namespace browser_watcher
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698