Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1267)

Side by Side Diff: components/browser_watcher/postmortem_report_collector.cc

Issue 2923523002: Decouple stability instrumentation recording and collection (Closed)
Patch Set: PostmortemDeleter unittest Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/browser_watcher/postmortem_report_collector.h" 5 #include "components/browser_watcher/postmortem_report_collector.h"
6 6
7 #include <utility> 7 #include <utility>
8 8
9 #include "base/files/file_enumerator.h"
10 #include "base/files/file_util.h" 9 #include "base/files/file_util.h"
11 #include "base/logging.h" 10 #include "base/logging.h"
12 #include "base/metrics/histogram_macros.h" 11 #include "base/metrics/histogram_macros.h"
13 #include "base/path_service.h" 12 #include "base/path_service.h"
14 #include "base/strings/string_piece.h" 13 #include "base/strings/string_piece.h"
15 #include "base/strings/string_util.h" 14 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
17 #include "components/browser_watcher/postmortem_minidump_writer.h" 16 #include "components/browser_watcher/postmortem_minidump_writer.h"
18 #include "components/browser_watcher/stability_data_names.h" 17 #include "components/browser_watcher/stability_data_names.h"
19 #include "third_party/crashpad/crashpad/client/settings.h" 18 #include "third_party/crashpad/crashpad/client/settings.h"
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 return true; 52 return true;
54 } 53 }
55 54
56 void LogCollectionStatus(CollectionStatus status) { 55 void LogCollectionStatus(CollectionStatus status) {
57 UMA_HISTOGRAM_ENUMERATION("ActivityTracker.Collect.Status", status, 56 UMA_HISTOGRAM_ENUMERATION("ActivityTracker.Collect.Status", status,
58 COLLECTION_STATUS_MAX); 57 COLLECTION_STATUS_MAX);
59 } 58 }
60 59
61 } // namespace 60 } // namespace
62 61
62 void PostmortemDeleter::Process(
63 const std::vector<base::FilePath>& stability_files) {
64 for (const FilePath& file : stability_files) {
65 base::DeleteFile(file, false)
Sigurður Ásgeirsson 2017/06/05 15:39:39 I'm not fond of using the trinary operator in this
manzagop (departed) 2017/06/05 18:02:16 Done.
66 ? LogCollectionStatus(UNCLEAN_SHUTDOWN)
67 : LogCollectionStatus(DEBUG_FILE_DELETION_FAILED);
68 }
69 }
70
63 PostmortemReportCollector::PostmortemReportCollector( 71 PostmortemReportCollector::PostmortemReportCollector(
64 const std::string& product_name, 72 const std::string& product_name,
65 const std::string& version_number, 73 const std::string& version_number,
66 const std::string& channel_name, 74 const std::string& channel_name,
75 crashpad::CrashReportDatabase* report_database,
67 SystemSessionAnalyzer* analyzer) 76 SystemSessionAnalyzer* analyzer)
68 : product_name_(product_name), 77 : product_name_(product_name),
69 version_number_(version_number), 78 version_number_(version_number),
70 channel_name_(channel_name), 79 channel_name_(channel_name),
71 system_session_analyzer_(analyzer) {} 80 report_database_(report_database),
81 system_session_analyzer_(analyzer) {
82 DCHECK_NE(nullptr, report_database);
83 }
72 84
73 PostmortemReportCollector::~PostmortemReportCollector() {} 85 PostmortemReportCollector::~PostmortemReportCollector() {}
74 86
75 int PostmortemReportCollector::CollectAndSubmitAllPendingReports( 87 void PostmortemReportCollector::Process(
76 const base::FilePath& debug_info_dir, 88 const std::vector<base::FilePath>& stability_files) {
77 const base::FilePath::StringType& debug_file_pattern,
78 const std::set<base::FilePath>& excluded_debug_files,
79 crashpad::CrashReportDatabase* report_database) {
80 DCHECK_NE(true, debug_info_dir.empty());
81 DCHECK_NE(true, debug_file_pattern.empty());
82 DCHECK_NE(nullptr, report_database);
83
84 // Collect the list of files to harvest.
85 std::vector<FilePath> debug_files = GetDebugStateFilePaths(
86 debug_info_dir, debug_file_pattern, excluded_debug_files);
87 UMA_HISTOGRAM_COUNTS_100("ActivityTracker.Collect.StabilityFileCount",
88 debug_files.size());
89
90 // Determine the crashpad client id. 89 // Determine the crashpad client id.
91 crashpad::UUID client_id; 90 crashpad::UUID client_id;
92 crashpad::Settings* settings = report_database->GetSettings(); 91 crashpad::Settings* settings = report_database_->GetSettings();
93 if (settings) { 92 if (settings) {
94 // If GetSettings() or GetClientID() fails client_id will be left at its 93 // If GetSettings() or GetClientID() fails client_id will be left at its
95 // default value, all zeroes, which is appropriate. 94 // default value, all zeroes, which is appropriate.
96 settings->GetClientID(&client_id); 95 settings->GetClientID(&client_id);
97 } 96 }
98 97
99 // Number of unclean shutdowns based on successful collection of stability 98 for (const FilePath& file : stability_files) {
100 // reports. 99 CollectAndSubmitOneReport(client_id, file);
101 int unclean_cnt = 0;
102 // Number of unclean shutdowns that may be attributable to system instability
103 // based on successful collection of stability reports. This number should be
104 // smaller or equal to |unclean_cnt|.
105 int unclean_system_cnt = 0;
106
107 // Process each stability file.
108 for (const FilePath& file : debug_files) {
109 bool system_unclean = false;
110 if (CollectAndSubmitOneReport(client_id, file, report_database,
111 &system_unclean)) {
112 ++unclean_cnt;
113 if (system_unclean)
114 ++unclean_system_cnt;
115 }
116 } 100 }
117
118 UMA_STABILITY_HISTOGRAM_COUNTS_100(
119 "ActivityTracker.Collect.UncleanShutdownCount", unclean_cnt);
120 UMA_STABILITY_HISTOGRAM_COUNTS_100(
121 "ActivityTracker.Collect.UncleanSystemCount", unclean_system_cnt);
122
123 return unclean_cnt;
124 } 101 }
125 102
126 std::vector<FilePath> PostmortemReportCollector::GetDebugStateFilePaths( 103 void PostmortemReportCollector::CollectAndSubmitOneReport(
127 const FilePath& debug_info_dir,
128 const FilePath::StringType& debug_file_pattern,
129 const std::set<FilePath>& excluded_debug_files) {
130 DCHECK_NE(true, debug_info_dir.empty());
131 DCHECK_NE(true, debug_file_pattern.empty());
132
133 std::vector<FilePath> paths;
134 base::FileEnumerator enumerator(debug_info_dir, false /* recursive */,
135 base::FileEnumerator::FILES,
136 debug_file_pattern);
137 FilePath path;
138 for (path = enumerator.Next(); !path.empty(); path = enumerator.Next()) {
139 if (excluded_debug_files.find(path) == excluded_debug_files.end())
140 paths.push_back(path);
141 }
142 return paths;
143 }
144
145 bool PostmortemReportCollector::CollectAndSubmitOneReport(
146 const crashpad::UUID& client_id, 104 const crashpad::UUID& client_id,
147 const FilePath& file, 105 const FilePath& file) {
148 crashpad::CrashReportDatabase* report_database, 106 DCHECK_NE(nullptr, report_database_);
149 bool* system_unclean) { 107 LogCollectionStatus(COLLECTION_ATTEMPT);
150 DCHECK_NE(nullptr, report_database);
151 DCHECK_NE(nullptr, system_unclean);
152 *system_unclean = false;
153 108
154 // Note: the code below involves two notions of report: chrome internal state 109 // Note: the code below involves two notions of report: chrome internal state
155 // reports and the crashpad reports they get wrapped into. 110 // reports and the crashpad reports they get wrapped into.
156 111
157 // Collect the data from the debug file to a proto. Note: a non-empty report 112 // Collect the data from the debug file to a proto.
158 // is interpreted here as an unclean exit.
159 StabilityReport report_proto; 113 StabilityReport report_proto;
160 CollectionStatus status = CollectOneReport(file, &report_proto); 114 CollectionStatus status = CollectOneReport(file, &report_proto);
161 if (status != SUCCESS) { 115 if (status != SUCCESS) {
162 // The file was empty, or there was an error collecting the data. Detailed 116 // The file was empty, or there was an error collecting the data. Detailed
163 // logging happens within the Collect function. 117 // logging happens within the Collect function.
164 if (!base::DeleteFile(file, false)) 118 if (!base::DeleteFile(file, false))
165 DLOG(ERROR) << "Failed to delete " << file.value(); 119 DLOG(ERROR) << "Failed to delete " << file.value();
166 LogCollectionStatus(status); 120 LogCollectionStatus(status);
167 return false; 121 return;
168 } 122 }
169 123
124 // Delete the stability file. If the file cannot be deleted, do not report its
125 // contents - it will be retried in a future processing run. Note that this
126 // approach can lead to under reporting and retries. However, under reporting
127 // is preferable to the over reporting that would happen with a file that
128 // cannot be deleted. Also note that the crash registration may still fail at
129 // this point: losing the report in such a case is deemed acceptable.
130 if (!base::DeleteFile(file, false)) {
131 DLOG(ERROR) << "Failed to delete " << file.value();
132 LogCollectionStatus(DEBUG_FILE_DELETION_FAILED);
Sigurður Ásgeirsson 2017/06/05 15:39:39 Out of curiosity - assuming a file is somehow perm
manzagop (departed) 2017/06/05 18:02:16 That is correct.
133 return;
134 }
135
136 LogCollectionStatus(UNCLEAN_SHUTDOWN);
137 if (report_proto.system_state().session_state() == SystemState::UNCLEAN)
138 LogCollectionStatus(UNCLEAN_SESSION);
139
170 // Prepare a crashpad report. 140 // Prepare a crashpad report.
171 CrashReportDatabase::NewReport* new_report = nullptr; 141 CrashReportDatabase::NewReport* new_report = nullptr;
172 CrashReportDatabase::OperationStatus database_status = 142 CrashReportDatabase::OperationStatus database_status =
173 report_database->PrepareNewCrashReport(&new_report); 143 report_database_->PrepareNewCrashReport(&new_report);
174 if (database_status != CrashReportDatabase::kNoError) { 144 if (database_status != CrashReportDatabase::kNoError) {
175 // Assume this is recoverable: not deleting the file.
176 DLOG(ERROR) << "PrepareNewCrashReport failed";
177 LogCollectionStatus(PREPARE_NEW_CRASH_REPORT_FAILED); 145 LogCollectionStatus(PREPARE_NEW_CRASH_REPORT_FAILED);
178 return false; 146 return;
179 } 147 }
180 CrashReportDatabase::CallErrorWritingCrashReport 148 CrashReportDatabase::CallErrorWritingCrashReport
181 call_error_writing_crash_report(report_database, new_report); 149 call_error_writing_crash_report(report_database_, new_report);
182 150
183 // Write the report to a minidump. 151 // Write the report to a minidump.
184 if (!WriteReportToMinidump(&report_proto, client_id, new_report->uuid, 152 if (!WriteReportToMinidump(&report_proto, client_id, new_report->uuid,
185 reinterpret_cast<FILE*>(new_report->handle))) { 153 reinterpret_cast<FILE*>(new_report->handle))) {
186 // Assume this is not recoverable and delete the file.
187 if (!base::DeleteFile(file, false))
188 DLOG(ERROR) << "Failed to delete " << file.value();
189 LogCollectionStatus(WRITE_TO_MINIDUMP_FAILED); 154 LogCollectionStatus(WRITE_TO_MINIDUMP_FAILED);
190 return false; 155 return;
191 }
192
193 // If the file cannot be deleted, do not report its contents. Note this can
194 // lead to under reporting and retries. However, under reporting is
195 // preferable to the over reporting that would happen with a file that
196 // cannot be deleted.
197 // TODO(manzagop): metrics for the number of non-deletable files.
198 if (!base::DeleteFile(file, false)) {
199 DLOG(ERROR) << "Failed to delete " << file.value();
200 LogCollectionStatus(DEBUG_FILE_DELETION_FAILED);
201 return false;
202 } 156 }
203 157
204 // Finalize the report wrt the report database. Note that this doesn't trigger 158 // Finalize the report wrt the report database. Note that this doesn't trigger
205 // an immediate upload, but Crashpad will eventually upload the report (as of 159 // an immediate upload, but Crashpad will eventually upload the report (as of
206 // writing, the delay is on the order of up to 15 minutes). 160 // writing, the delay is on the order of up to 15 minutes).
207 call_error_writing_crash_report.Disarm(); 161 call_error_writing_crash_report.Disarm();
208 crashpad::UUID unused_report_id; 162 crashpad::UUID unused_report_id;
209 database_status = report_database->FinishedWritingCrashReport( 163 database_status = report_database_->FinishedWritingCrashReport(
210 new_report, &unused_report_id); 164 new_report, &unused_report_id);
211 if (database_status != CrashReportDatabase::kNoError) { 165 if (database_status != CrashReportDatabase::kNoError) {
212 DLOG(ERROR) << "FinishedWritingCrashReport failed";
213 LogCollectionStatus(FINISHED_WRITING_CRASH_REPORT_FAILED); 166 LogCollectionStatus(FINISHED_WRITING_CRASH_REPORT_FAILED);
214 return false; 167 return;
215 } 168 }
216 169
217 LogCollectionStatus(SUCCESS); 170 LogCollectionStatus(SUCCESS);
218 if (report_proto.system_state().session_state() == SystemState::UNCLEAN)
219 *system_unclean = true;
220 return true;
221 } 171 }
222 172
223 CollectionStatus PostmortemReportCollector::CollectOneReport( 173 CollectionStatus PostmortemReportCollector::CollectOneReport(
224 const base::FilePath& file, 174 const base::FilePath& file,
225 StabilityReport* report) { 175 StabilityReport* report) {
226 DCHECK(report); 176 DCHECK(report);
227 177
228 CollectionStatus status = Extract(file, report); 178 CollectionStatus status = Extract(file, report);
229 if (status != SUCCESS) 179 if (status != SUCCESS)
230 return status; 180 return status;
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 StabilityReport* report, 251 StabilityReport* report,
302 const crashpad::UUID& client_id, 252 const crashpad::UUID& client_id,
303 const crashpad::UUID& report_id, 253 const crashpad::UUID& report_id,
304 base::PlatformFile minidump_file) { 254 base::PlatformFile minidump_file) {
305 DCHECK(report); 255 DCHECK(report);
306 256
307 return WritePostmortemDump(minidump_file, client_id, report_id, report); 257 return WritePostmortemDump(minidump_file, client_id, report_id, report);
308 } 258 }
309 259
310 } // namespace browser_watcher 260 } // namespace browser_watcher
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698