Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(227)

Side by Side Diff: components/browser_watcher/postmortem_report_collector.cc

Issue 2923523002: Decouple stability instrumentation recording and collection (Closed)
Patch Set: Address Siggi's comments Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/browser_watcher/postmortem_report_collector.h" 5 #include "components/browser_watcher/postmortem_report_collector.h"
6 6
7 #include <utility> 7 #include <utility>
8 8
9 #include "base/files/file_enumerator.h"
10 #include "base/files/file_util.h" 9 #include "base/files/file_util.h"
11 #include "base/logging.h" 10 #include "base/logging.h"
12 #include "base/metrics/histogram_macros.h" 11 #include "base/metrics/histogram_macros.h"
13 #include "base/path_service.h" 12 #include "base/path_service.h"
14 #include "base/strings/string_piece.h" 13 #include "base/strings/string_piece.h"
15 #include "base/strings/string_util.h" 14 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
17 #include "components/browser_watcher/postmortem_minidump_writer.h" 16 #include "components/browser_watcher/postmortem_minidump_writer.h"
18 #include "components/browser_watcher/stability_data_names.h" 17 #include "components/browser_watcher/stability_data_names.h"
19 #include "third_party/crashpad/crashpad/client/settings.h" 18 #include "third_party/crashpad/crashpad/client/settings.h"
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 return true; 52 return true;
54 } 53 }
55 54
56 void LogCollectionStatus(CollectionStatus status) { 55 void LogCollectionStatus(CollectionStatus status) {
57 UMA_HISTOGRAM_ENUMERATION("ActivityTracker.Collect.Status", status, 56 UMA_HISTOGRAM_ENUMERATION("ActivityTracker.Collect.Status", status,
58 COLLECTION_STATUS_MAX); 57 COLLECTION_STATUS_MAX);
59 } 58 }
60 59
61 } // namespace 60 } // namespace
62 61
62 void PostmortemDeleter::Process(
63 const std::vector<base::FilePath>& stability_files) {
64 for (const FilePath& file : stability_files) {
65 if (base::DeleteFile(file, false)) {
Sigurður Ásgeirsson 2017/06/05 18:09:00 ubernit: I believe our coding guidelines specify n
66 LogCollectionStatus(UNCLEAN_SHUTDOWN);
67 } else {
68 LogCollectionStatus(DEBUG_FILE_DELETION_FAILED);
69 }
70 }
71 }
72
63 PostmortemReportCollector::PostmortemReportCollector( 73 PostmortemReportCollector::PostmortemReportCollector(
64 const std::string& product_name, 74 const std::string& product_name,
65 const std::string& version_number, 75 const std::string& version_number,
66 const std::string& channel_name, 76 const std::string& channel_name,
77 crashpad::CrashReportDatabase* report_database,
67 SystemSessionAnalyzer* analyzer) 78 SystemSessionAnalyzer* analyzer)
68 : product_name_(product_name), 79 : product_name_(product_name),
69 version_number_(version_number), 80 version_number_(version_number),
70 channel_name_(channel_name), 81 channel_name_(channel_name),
71 system_session_analyzer_(analyzer) {} 82 report_database_(report_database),
83 system_session_analyzer_(analyzer) {
84 DCHECK_NE(nullptr, report_database);
85 }
72 86
73 PostmortemReportCollector::~PostmortemReportCollector() {} 87 PostmortemReportCollector::~PostmortemReportCollector() {}
74 88
75 int PostmortemReportCollector::CollectAndSubmitAllPendingReports( 89 void PostmortemReportCollector::Process(
76 const base::FilePath& debug_info_dir, 90 const std::vector<base::FilePath>& stability_files) {
77 const base::FilePath::StringType& debug_file_pattern,
78 const std::set<base::FilePath>& excluded_debug_files,
79 crashpad::CrashReportDatabase* report_database) {
80 DCHECK_NE(true, debug_info_dir.empty());
81 DCHECK_NE(true, debug_file_pattern.empty());
82 DCHECK_NE(nullptr, report_database);
83
84 // Collect the list of files to harvest.
85 std::vector<FilePath> debug_files = GetDebugStateFilePaths(
86 debug_info_dir, debug_file_pattern, excluded_debug_files);
87 UMA_HISTOGRAM_COUNTS_100("ActivityTracker.Collect.StabilityFileCount",
88 debug_files.size());
89
90 // Determine the crashpad client id. 91 // Determine the crashpad client id.
91 crashpad::UUID client_id; 92 crashpad::UUID client_id;
92 crashpad::Settings* settings = report_database->GetSettings(); 93 crashpad::Settings* settings = report_database_->GetSettings();
93 if (settings) { 94 if (settings) {
94 // If GetSettings() or GetClientID() fails client_id will be left at its 95 // If GetSettings() or GetClientID() fails client_id will be left at its
95 // default value, all zeroes, which is appropriate. 96 // default value, all zeroes, which is appropriate.
96 settings->GetClientID(&client_id); 97 settings->GetClientID(&client_id);
97 } 98 }
98 99
99 // Number of unclean shutdowns based on successful collection of stability 100 for (const FilePath& file : stability_files) {
100 // reports. 101 CollectAndSubmitOneReport(client_id, file);
101 int unclean_cnt = 0;
102 // Number of unclean shutdowns that may be attributable to system instability
103 // based on successful collection of stability reports. This number should be
104 // smaller or equal to |unclean_cnt|.
105 int unclean_system_cnt = 0;
106
107 // Process each stability file.
108 for (const FilePath& file : debug_files) {
109 bool system_unclean = false;
110 if (CollectAndSubmitOneReport(client_id, file, report_database,
111 &system_unclean)) {
112 ++unclean_cnt;
113 if (system_unclean)
114 ++unclean_system_cnt;
115 }
116 } 102 }
117
118 UMA_STABILITY_HISTOGRAM_COUNTS_100(
119 "ActivityTracker.Collect.UncleanShutdownCount", unclean_cnt);
120 UMA_STABILITY_HISTOGRAM_COUNTS_100(
121 "ActivityTracker.Collect.UncleanSystemCount", unclean_system_cnt);
122
123 return unclean_cnt;
124 } 103 }
125 104
126 std::vector<FilePath> PostmortemReportCollector::GetDebugStateFilePaths( 105 void PostmortemReportCollector::CollectAndSubmitOneReport(
127 const FilePath& debug_info_dir,
128 const FilePath::StringType& debug_file_pattern,
129 const std::set<FilePath>& excluded_debug_files) {
130 DCHECK_NE(true, debug_info_dir.empty());
131 DCHECK_NE(true, debug_file_pattern.empty());
132
133 std::vector<FilePath> paths;
134 base::FileEnumerator enumerator(debug_info_dir, false /* recursive */,
135 base::FileEnumerator::FILES,
136 debug_file_pattern);
137 FilePath path;
138 for (path = enumerator.Next(); !path.empty(); path = enumerator.Next()) {
139 if (excluded_debug_files.find(path) == excluded_debug_files.end())
140 paths.push_back(path);
141 }
142 return paths;
143 }
144
145 bool PostmortemReportCollector::CollectAndSubmitOneReport(
146 const crashpad::UUID& client_id, 106 const crashpad::UUID& client_id,
147 const FilePath& file, 107 const FilePath& file) {
148 crashpad::CrashReportDatabase* report_database, 108 DCHECK_NE(nullptr, report_database_);
149 bool* system_unclean) { 109 LogCollectionStatus(COLLECTION_ATTEMPT);
150 DCHECK_NE(nullptr, report_database);
151 DCHECK_NE(nullptr, system_unclean);
152 *system_unclean = false;
153 110
154 // Note: the code below involves two notions of report: chrome internal state 111 // Note: the code below involves two notions of report: chrome internal state
155 // reports and the crashpad reports they get wrapped into. 112 // reports and the crashpad reports they get wrapped into.
156 113
157 // Collect the data from the debug file to a proto. Note: a non-empty report 114 // Collect the data from the debug file to a proto.
158 // is interpreted here as an unclean exit.
159 StabilityReport report_proto; 115 StabilityReport report_proto;
160 CollectionStatus status = CollectOneReport(file, &report_proto); 116 CollectionStatus status = CollectOneReport(file, &report_proto);
161 if (status != SUCCESS) { 117 if (status != SUCCESS) {
162 // The file was empty, or there was an error collecting the data. Detailed 118 // The file was empty, or there was an error collecting the data. Detailed
163 // logging happens within the Collect function. 119 // logging happens within the Collect function.
164 if (!base::DeleteFile(file, false)) 120 if (!base::DeleteFile(file, false))
165 DLOG(ERROR) << "Failed to delete " << file.value(); 121 DLOG(ERROR) << "Failed to delete " << file.value();
166 LogCollectionStatus(status); 122 LogCollectionStatus(status);
167 return false; 123 return;
168 } 124 }
169 125
126 // Delete the stability file. If the file cannot be deleted, do not report its
127 // contents - it will be retried in a future processing run. Note that this
128 // approach can lead to under reporting and retries. However, under reporting
129 // is preferable to the over reporting that would happen with a file that
130 // cannot be deleted. Also note that the crash registration may still fail at
131 // this point: losing the report in such a case is deemed acceptable.
132 if (!base::DeleteFile(file, false)) {
133 DLOG(ERROR) << "Failed to delete " << file.value();
134 LogCollectionStatus(DEBUG_FILE_DELETION_FAILED);
135 return;
136 }
137
138 LogCollectionStatus(UNCLEAN_SHUTDOWN);
139 if (report_proto.system_state().session_state() == SystemState::UNCLEAN)
140 LogCollectionStatus(UNCLEAN_SESSION);
141
170 // Prepare a crashpad report. 142 // Prepare a crashpad report.
171 CrashReportDatabase::NewReport* new_report = nullptr; 143 CrashReportDatabase::NewReport* new_report = nullptr;
172 CrashReportDatabase::OperationStatus database_status = 144 CrashReportDatabase::OperationStatus database_status =
173 report_database->PrepareNewCrashReport(&new_report); 145 report_database_->PrepareNewCrashReport(&new_report);
174 if (database_status != CrashReportDatabase::kNoError) { 146 if (database_status != CrashReportDatabase::kNoError) {
175 // Assume this is recoverable: not deleting the file.
176 DLOG(ERROR) << "PrepareNewCrashReport failed";
177 LogCollectionStatus(PREPARE_NEW_CRASH_REPORT_FAILED); 147 LogCollectionStatus(PREPARE_NEW_CRASH_REPORT_FAILED);
178 return false; 148 return;
179 } 149 }
180 CrashReportDatabase::CallErrorWritingCrashReport 150 CrashReportDatabase::CallErrorWritingCrashReport
181 call_error_writing_crash_report(report_database, new_report); 151 call_error_writing_crash_report(report_database_, new_report);
182 152
183 // Write the report to a minidump. 153 // Write the report to a minidump.
184 if (!WriteReportToMinidump(&report_proto, client_id, new_report->uuid, 154 if (!WriteReportToMinidump(&report_proto, client_id, new_report->uuid,
185 reinterpret_cast<FILE*>(new_report->handle))) { 155 reinterpret_cast<FILE*>(new_report->handle))) {
186 // Assume this is not recoverable and delete the file.
187 if (!base::DeleteFile(file, false))
188 DLOG(ERROR) << "Failed to delete " << file.value();
189 LogCollectionStatus(WRITE_TO_MINIDUMP_FAILED); 156 LogCollectionStatus(WRITE_TO_MINIDUMP_FAILED);
190 return false; 157 return;
191 }
192
193 // If the file cannot be deleted, do not report its contents. Note this can
194 // lead to under reporting and retries. However, under reporting is
195 // preferable to the over reporting that would happen with a file that
196 // cannot be deleted.
197 // TODO(manzagop): metrics for the number of non-deletable files.
198 if (!base::DeleteFile(file, false)) {
199 DLOG(ERROR) << "Failed to delete " << file.value();
200 LogCollectionStatus(DEBUG_FILE_DELETION_FAILED);
201 return false;
202 } 158 }
203 159
204 // Finalize the report wrt the report database. Note that this doesn't trigger 160 // Finalize the report wrt the report database. Note that this doesn't trigger
205 // an immediate upload, but Crashpad will eventually upload the report (as of 161 // an immediate upload, but Crashpad will eventually upload the report (as of
206 // writing, the delay is on the order of up to 15 minutes). 162 // writing, the delay is on the order of up to 15 minutes).
207 call_error_writing_crash_report.Disarm(); 163 call_error_writing_crash_report.Disarm();
208 crashpad::UUID unused_report_id; 164 crashpad::UUID unused_report_id;
209 database_status = report_database->FinishedWritingCrashReport( 165 database_status = report_database_->FinishedWritingCrashReport(
210 new_report, &unused_report_id); 166 new_report, &unused_report_id);
211 if (database_status != CrashReportDatabase::kNoError) { 167 if (database_status != CrashReportDatabase::kNoError) {
212 DLOG(ERROR) << "FinishedWritingCrashReport failed";
213 LogCollectionStatus(FINISHED_WRITING_CRASH_REPORT_FAILED); 168 LogCollectionStatus(FINISHED_WRITING_CRASH_REPORT_FAILED);
214 return false; 169 return;
215 } 170 }
216 171
217 LogCollectionStatus(SUCCESS); 172 LogCollectionStatus(SUCCESS);
218 if (report_proto.system_state().session_state() == SystemState::UNCLEAN)
219 *system_unclean = true;
220 return true;
221 } 173 }
222 174
223 CollectionStatus PostmortemReportCollector::CollectOneReport( 175 CollectionStatus PostmortemReportCollector::CollectOneReport(
224 const base::FilePath& file, 176 const base::FilePath& file,
225 StabilityReport* report) { 177 StabilityReport* report) {
226 DCHECK(report); 178 DCHECK(report);
227 179
228 CollectionStatus status = Extract(file, report); 180 CollectionStatus status = Extract(file, report);
229 if (status != SUCCESS) 181 if (status != SUCCESS)
230 return status; 182 return status;
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 StabilityReport* report, 253 StabilityReport* report,
302 const crashpad::UUID& client_id, 254 const crashpad::UUID& client_id,
303 const crashpad::UUID& report_id, 255 const crashpad::UUID& report_id,
304 base::PlatformFile minidump_file) { 256 base::PlatformFile minidump_file) {
305 DCHECK(report); 257 DCHECK(report);
306 258
307 return WritePostmortemDump(minidump_file, client_id, report_id, report); 259 return WritePostmortemDump(minidump_file, client_id, report_id, report);
308 } 260 }
309 261
310 } // namespace browser_watcher 262 } // namespace browser_watcher
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698