OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/history/core/browser/top_sites_database.h" | 5 #include "components/history/core/browser/top_sites_database.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <stdint.h> | 8 #include <stdint.h> |
9 #include <utility> | 9 #include <utility> |
10 | 10 |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
50 | 50 |
51 // For this database, schema migrations are deprecated after two | 51 // For this database, schema migrations are deprecated after two |
52 // years. This means that the oldest non-deprecated version should be | 52 // years. This means that the oldest non-deprecated version should be |
53 // two years old or greater (thus the migrations to get there are | 53 // two years old or greater (thus the migrations to get there are |
54 // older). Databases containing deprecated versions will be cleared | 54 // older). Databases containing deprecated versions will be cleared |
55 // at startup. Since this database is a cache, losing old data is not | 55 // at startup. Since this database is a cache, losing old data is not |
56 // fatal (in fact, very old data may be expired immediately at startup | 56 // fatal (in fact, very old data may be expired immediately at startup |
57 // anyhow). | 57 // anyhow). |
58 | 58 |
59 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 | 59 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 |
60 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 | 60 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 (deprecated) |
61 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) | 61 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) |
62 | 62 |
63 // NOTE(shess): When changing the version, add a new golden file for | 63 // NOTE(shess): When changing the version, add a new golden file for |
64 // the new version and a test to verify that Init() works with it. | 64 // the new version and a test to verify that Init() works with it. |
65 // NOTE(shess): RecoverDatabaseOrRaze() depends on the specific | |
66 // version number. The code is subtle and in development, contact me | |
67 // if the necessary changes are not obvious. | |
68 static const int kVersionNumber = 3; | 65 static const int kVersionNumber = 3; |
69 static const int kDeprecatedVersionNumber = 1; // and earlier. | 66 static const int kDeprecatedVersionNumber = 2; // and earlier. |
70 | 67 |
71 bool InitTables(sql::Connection* db) { | 68 bool InitTables(sql::Connection* db) { |
72 const char kThumbnailsSql[] = | 69 const char kThumbnailsSql[] = |
73 "CREATE TABLE IF NOT EXISTS thumbnails (" | 70 "CREATE TABLE IF NOT EXISTS thumbnails (" |
74 "url LONGVARCHAR PRIMARY KEY," | 71 "url LONGVARCHAR PRIMARY KEY," |
75 "url_rank INTEGER," | 72 "url_rank INTEGER," |
76 "title LONGVARCHAR," | 73 "title LONGVARCHAR," |
77 "thumbnail BLOB," | 74 "thumbnail BLOB," |
78 "redirects LONGVARCHAR," | 75 "redirects LONGVARCHAR," |
79 "boring_score DOUBLE DEFAULT 1.0," | 76 "boring_score DOUBLE DEFAULT 1.0," |
(...skipping 20 matching lines...) Expand all Loading... |
100 base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY)) { | 97 base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY)) { |
101 GURL redirect_url(redirect); | 98 GURL redirect_url(redirect); |
102 if (redirect_url.is_valid()) | 99 if (redirect_url.is_valid()) |
103 url->redirects.push_back(redirect_url); | 100 url->redirects.push_back(redirect_url); |
104 } | 101 } |
105 } | 102 } |
106 | 103 |
107 // Track various failure (and success) cases in recovery code. | 104 // Track various failure (and success) cases in recovery code. |
108 // | 105 // |
109 // TODO(shess): The recovery code is complete, but by nature runs in challenging | 106 // TODO(shess): The recovery code is complete, but by nature runs in challenging |
110 // circumstances, so initially the default error response is to leave the | 107 // circumstances, so errors will happen. This histogram is intended to expose |
111 // existing database in place. This histogram is intended to expose the | 108 // the failures seen in the fleet. Frequent failure cases can be explored more |
112 // failures seen in the fleet. Frequent failure cases can be explored more | |
113 // deeply to see if the complexity to fix them is warranted. Infrequent failure | 109 // deeply to see if the complexity to fix them is warranted. Infrequent failure |
114 // cases can be resolved by marking the database unrecoverable (which will | 110 // cases can be resolved by marking the database unrecoverable (which will |
115 // delete the data). | 111 // delete the data). |
116 // | 112 // |
117 // Based on the thumbnail_database.cc recovery code, FAILED_SCOPER should | 113 // Based on the thumbnail_database.cc recovery code, FAILED_SCOPER should |
118 // dominate, followed distantly by FAILED_META, with few or no other failures. | 114 // dominate, followed distantly by FAILED_META, with few or no other failures. |
119 enum RecoveryEventType { | 115 enum RecoveryEventType { |
120 // Database successfully recovered. | 116 // Database successfully recovered. |
121 RECOVERY_EVENT_RECOVERED = 0, | 117 RECOVERY_EVENT_RECOVERED = 0, |
122 | 118 |
123 // Database successfully deprecated. | 119 // Database successfully deprecated. |
124 RECOVERY_EVENT_DEPRECATED, | 120 RECOVERY_EVENT_DEPRECATED, |
125 | 121 |
126 // Sqlite.RecoveryEvent can usually be used to get more detail about the | 122 // Sqlite.RecoveryEvent can usually be used to get more detail about the |
127 // specific failure (see sql/recovery.cc). | 123 // specific failure (see sql/recovery.cc). |
128 RECOVERY_EVENT_FAILED_SCOPER, | 124 OBSOLETE_RECOVERY_EVENT_FAILED_SCOPER, |
129 RECOVERY_EVENT_FAILED_META_VERSION, | 125 RECOVERY_EVENT_FAILED_META_VERSION, |
130 RECOVERY_EVENT_FAILED_META_WRONG_VERSION, | 126 RECOVERY_EVENT_FAILED_META_WRONG_VERSION, |
131 RECOVERY_EVENT_FAILED_META_INIT, | 127 OBSOLETE_RECOVERY_EVENT_FAILED_META_INIT, |
132 RECOVERY_EVENT_FAILED_SCHEMA_INIT, | 128 OBSOLETE_RECOVERY_EVENT_FAILED_SCHEMA_INIT, |
133 RECOVERY_EVENT_FAILED_AUTORECOVER_THUMBNAILS, | 129 OBSOLETE_RECOVERY_EVENT_FAILED_AUTORECOVER_THUMBNAILS, |
134 RECOVERY_EVENT_FAILED_COMMIT, | 130 RECOVERY_EVENT_FAILED_COMMIT, |
135 | 131 |
136 // Track invariants resolved by FixThumbnailsTable(). | 132 // Track invariants resolved by FixThumbnailsTable(). |
137 RECOVERY_EVENT_INVARIANT_RANK, | 133 RECOVERY_EVENT_INVARIANT_RANK, |
138 RECOVERY_EVENT_INVARIANT_REDIRECT, | 134 RECOVERY_EVENT_INVARIANT_REDIRECT, |
139 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, | 135 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, |
140 | 136 |
| 137 // Track automated full-database recovery. |
| 138 RECOVERY_EVENT_FAILED_AUTORECOVER, |
| 139 |
141 // Always keep this at the end. | 140 // Always keep this at the end. |
142 RECOVERY_EVENT_MAX, | 141 RECOVERY_EVENT_MAX, |
143 }; | 142 }; |
144 | 143 |
145 void RecordRecoveryEvent(RecoveryEventType recovery_event) { | 144 void RecordRecoveryEvent(RecoveryEventType recovery_event) { |
146 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", recovery_event, | 145 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", recovery_event, |
147 RECOVERY_EVENT_MAX); | 146 RECOVERY_EVENT_MAX); |
148 } | 147 } |
149 | 148 |
150 // Most corruption comes down to atomic updates between pages being broken | 149 // Most corruption comes down to atomic updates between pages being broken |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
195 update_statement.BindInt(0, next_rank); | 194 update_statement.BindInt(0, next_rank); |
196 update_statement.BindInt64(1, select_statement.ColumnInt64(1)); | 195 update_statement.BindInt64(1, select_statement.ColumnInt64(1)); |
197 update_statement.Run(); | 196 update_statement.Run(); |
198 } | 197 } |
199 ++next_rank; | 198 ++next_rank; |
200 } | 199 } |
201 if (adjusted) | 200 if (adjusted) |
202 RecordRecoveryEvent(RECOVERY_EVENT_INVARIANT_CONTIGUOUS); | 201 RecordRecoveryEvent(RECOVERY_EVENT_INVARIANT_CONTIGUOUS); |
203 } | 202 } |
204 | 203 |
205 // Recover the database to the extent possible, razing it if recovery is not | 204 // Recover the database to the extent possible, then fixup any broken |
206 // possible. | 205 // constraints. |
207 void RecoverDatabaseOrRaze(sql::Connection* db, const base::FilePath& db_path) { | 206 void RecoverAndFixup(sql::Connection* db, const base::FilePath& db_path) { |
208 // NOTE(shess): If the version changes, review this code. | 207 // NOTE(shess): If the version changes, review this code. |
209 DCHECK_EQ(3, kVersionNumber); | 208 DCHECK_EQ(3, kVersionNumber); |
210 | 209 |
211 // It is almost certain that some operation against |db| will fail, prevent | 210 std::unique_ptr<sql::Recovery> recovery = |
212 // reentry. | 211 sql::Recovery::BeginRecoverDatabase(db, db_path); |
213 db->reset_error_callback(); | |
214 | |
215 // For generating histogram stats. | |
216 size_t thumbnails_recovered = 0; | |
217 int64_t original_size = 0; | |
218 base::GetFileSize(db_path, &original_size); | |
219 | |
220 std::unique_ptr<sql::Recovery> recovery = sql::Recovery::Begin(db, db_path); | |
221 if (!recovery) { | 212 if (!recovery) { |
222 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_SCOPER); | 213 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER); |
223 return; | 214 return; |
224 } | 215 } |
225 | 216 |
226 // Setup the meta recovery table and fetch the version number from the corrupt | 217 // If the [meta] table does not exist, or the [version] key cannot be found, |
227 // database. | 218 // then the schema is indeterminate. The only plausible approach would be to |
| 219 // validate that the schema contains all of the tables and indices and columns |
| 220 // expected, but that complexity may not be warranted, this case has only been |
| 221 // seen for a few thousand database files. |
228 int version = 0; | 222 int version = 0; |
229 if (!recovery->SetupMeta() || !recovery->GetMetaVersionNumber(&version)) { | 223 if (!recovery->SetupMeta() || !recovery->GetMetaVersionNumber(&version)) { |
230 // TODO(shess): Prior histograms indicate all failures are in creating the | 224 sql::Recovery::Unrecoverable(std::move(recovery)); |
231 // recover virtual table for corrupt.meta. The table may not exist, or the | |
232 // database may be too far gone. Either way, unclear how to resolve. | |
233 sql::Recovery::Rollback(std::move(recovery)); | |
234 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_VERSION); | 225 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_VERSION); |
235 return; | 226 return; |
236 } | 227 } |
237 | 228 |
238 // This code runs in a context which may be able to read version information | 229 // In this case the next open will clear the database anyhow. |
239 // that the regular deprecation path cannot. The effect of this code will be | |
240 // to raze the database. | |
241 if (version <= kDeprecatedVersionNumber) { | 230 if (version <= kDeprecatedVersionNumber) { |
242 sql::Recovery::Unrecoverable(std::move(recovery)); | 231 sql::Recovery::Unrecoverable(std::move(recovery)); |
243 RecordRecoveryEvent(RECOVERY_EVENT_DEPRECATED); | 232 RecordRecoveryEvent(RECOVERY_EVENT_DEPRECATED); |
244 return; | 233 return; |
245 } | 234 } |
246 | 235 |
247 // TODO(shess): Earlier versions have been deprecated, later versions should | 236 // TODO(shess): Consider marking corrupt databases from the future |
248 // be impossible. Unrecoverable() seems like a feasible response if this is | 237 // Unrecoverable(), since this histogram value has never been seen. OTOH, |
249 // infrequent enough. | 238 // this may be too risky, because if future code was correlated with |
250 if (version != 2 && version != 3) { | 239 // corruption then rollback would be a sensible response. |
| 240 if (version > kVersionNumber) { |
251 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_WRONG_VERSION); | 241 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_WRONG_VERSION); |
252 sql::Recovery::Rollback(std::move(recovery)); | 242 sql::Recovery::Rollback(std::move(recovery)); |
253 return; | 243 return; |
254 } | 244 } |
255 | 245 |
256 // Both v2 and v3 recover to current schema version. | |
257 sql::MetaTable recover_meta_table; | |
258 if (!recover_meta_table.Init(recovery->db(), kVersionNumber, | |
259 kVersionNumber)) { | |
260 sql::Recovery::Rollback(std::move(recovery)); | |
261 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_INIT); | |
262 return; | |
263 } | |
264 | |
265 // Create a fresh version of the schema. The recovery code uses | |
266 // conflict-resolution to handle duplicates, so any indices are necessary. | |
267 if (!InitTables(recovery->db())) { | |
268 // TODO(shess): Unable to create the new schema in the new database. The | |
269 // new database should be a temporary file, so being unable to work with it | |
270 // is pretty unclear. | |
271 // | |
272 // What are the potential responses, even? The recovery database could be | |
273 // opened as in-memory. If the temp database had a filesystem problem and | |
274 // the temp filesystem differs from the main database, then that could fix | |
275 // it. | |
276 sql::Recovery::Rollback(std::move(recovery)); | |
277 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_SCHEMA_INIT); | |
278 return; | |
279 } | |
280 | |
281 // In the v2 case the missing column will get default values. | |
282 if (!recovery->AutoRecoverTable("thumbnails", &thumbnails_recovered)) { | |
283 sql::Recovery::Rollback(std::move(recovery)); | |
284 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER_THUMBNAILS); | |
285 return; | |
286 } | |
287 | |
288 // TODO(shess): Inline this? | 246 // TODO(shess): Inline this? |
289 FixThumbnailsTable(recovery->db()); | 247 FixThumbnailsTable(recovery->db()); |
290 | 248 |
291 if (!sql::Recovery::Recovered(std::move(recovery))) { | 249 if (!sql::Recovery::Recovered(std::move(recovery))) { |
292 // TODO(shess): Very unclear what this failure would actually mean, and what | 250 // TODO(shess): Very unclear what this failure would actually mean, and what |
293 // should be done. Add histograms to Recovered() implementation to get some | 251 // should be done. Add histograms to Recovered() implementation to get some |
294 // insight. | 252 // insight. |
295 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_COMMIT); | 253 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_COMMIT); |
296 return; | 254 return; |
297 } | 255 } |
298 | 256 |
299 // Track the size of the recovered database relative to the size of the input | |
300 // database. The size should almost always be smaller, unless the input | |
301 // database was empty to start with. If the percentage results are very low, | |
302 // something is awry. | |
303 int64_t final_size = 0; | |
304 if (original_size > 0 && base::GetFileSize(db_path, &final_size) && | |
305 final_size > 0) { | |
306 UMA_HISTOGRAM_PERCENTAGE("History.TopSitesRecoveredPercentage", | |
307 final_size * 100 / original_size); | |
308 } | |
309 | |
310 // Using 10,000 because these cases mostly care about "none recovered" and | |
311 // "lots recovered". More than 10,000 rows recovered probably means there's | |
312 // something wrong with the profile. | |
313 UMA_HISTOGRAM_COUNTS_10000("History.TopSitesRecoveredRowsThumbnails", | |
314 static_cast<int>(thumbnails_recovered)); | |
315 | |
316 RecordRecoveryEvent(RECOVERY_EVENT_RECOVERED); | 257 RecordRecoveryEvent(RECOVERY_EVENT_RECOVERED); |
317 } | 258 } |
318 | 259 |
319 void DatabaseErrorCallback(sql::Connection* db, | 260 void DatabaseErrorCallback(sql::Connection* db, |
320 const base::FilePath& db_path, | 261 const base::FilePath& db_path, |
321 int extended_error, | 262 int extended_error, |
322 sql::Statement* stmt) { | 263 sql::Statement* stmt) { |
323 // TODO(shess): Assert that this is running on a safe thread. AFAICT, should | 264 // TODO(shess): Assert that this is running on a safe thread. AFAICT, should |
324 // be the history thread, but at this level I can't see how to reach that. | 265 // be the history thread, but at this level I can't see how to reach that. |
325 | 266 |
326 // Attempt to recover corrupt databases. | 267 // Attempt to recover corrupt databases. |
327 int error = (extended_error & 0xFF); | 268 if (sql::Recovery::ShouldRecover(extended_error)) { |
328 if (error == SQLITE_CORRUPT || | 269 // Prevent reentrant calls. |
329 error == SQLITE_CANTOPEN || | 270 db->reset_error_callback(); |
330 error == SQLITE_NOTADB) { | 271 |
331 RecoverDatabaseOrRaze(db, db_path); | 272 // After this call, the |db| handle is poisoned so that future calls will |
| 273 // return errors until the handle is re-opened. |
| 274 RecoverAndFixup(db, db_path); |
| 275 |
| 276 // The DLOG(FATAL) below is intended to draw immediate attention to errors |
| 277 // in newly-written code. Database corruption is generally a result of OS |
| 278 // or hardware issues, not coding errors at the client level, so displaying |
| 279 // the error would probably lead to confusion. The ignored call signals the |
| 280 // test-expectation framework that the error was handled. |
| 281 ignore_result(sql::Connection::IsExpectedSqliteError(extended_error)); |
| 282 return; |
332 } | 283 } |
333 | 284 |
334 // TODO(shess): This database's error histograms look like: | 285 // TODO(shess): This database's error histograms look like: |
335 // 84% SQLITE_CORRUPT, SQLITE_CANTOPEN, SQLITE_NOTADB | 286 // 84% SQLITE_CORRUPT, SQLITE_CANTOPEN, SQLITE_NOTADB |
336 // 7% SQLITE_ERROR | 287 // 7% SQLITE_ERROR |
337 // 6% SQLITE_IOERR variants | 288 // 6% SQLITE_IOERR variants |
338 // 2% SQLITE_READONLY | 289 // 2% SQLITE_READONLY |
339 // .4% SQLITE_FULL | 290 // .4% SQLITE_FULL |
340 // nominal SQLITE_TOBIG, SQLITE_AUTH, and SQLITE_BUSY. In the case of | 291 // nominal SQLITE_TOBIG, SQLITE_AUTH, and SQLITE_BUSY. In the case of |
341 // thumbnail_database.cc, as soon as the recovery code landed, SQLITE_IOERR | 292 // thumbnail_database.cc, as soon as the recovery code landed, SQLITE_IOERR |
(...skipping 385 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
727 db->set_error_callback(base::Bind(&DatabaseErrorCallback, db.get(), db_name)); | 678 db->set_error_callback(base::Bind(&DatabaseErrorCallback, db.get(), db_name)); |
728 db->set_page_size(4096); | 679 db->set_page_size(4096); |
729 db->set_cache_size(32); | 680 db->set_cache_size(32); |
730 | 681 |
731 if (!db->Open(db_name)) | 682 if (!db->Open(db_name)) |
732 return NULL; | 683 return NULL; |
733 return db.release(); | 684 return db.release(); |
734 } | 685 } |
735 | 686 |
736 } // namespace history | 687 } // namespace history |
OLD | NEW |