Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/history/core/browser/top_sites_database.h" | 5 #include "components/history/core/browser/top_sites_database.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <stdint.h> | 8 #include <stdint.h> |
| 9 #include <utility> | 9 #include <utility> |
| 10 | 10 |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 50 | 50 |
| 51 // For this database, schema migrations are deprecated after two | 51 // For this database, schema migrations are deprecated after two |
| 52 // years. This means that the oldest non-deprecated version should be | 52 // years. This means that the oldest non-deprecated version should be |
| 53 // two years old or greater (thus the migrations to get there are | 53 // two years old or greater (thus the migrations to get there are |
| 54 // older). Databases containing deprecated versions will be cleared | 54 // older). Databases containing deprecated versions will be cleared |
| 55 // at startup. Since this database is a cache, losing old data is not | 55 // at startup. Since this database is a cache, losing old data is not |
| 56 // fatal (in fact, very old data may be expired immediately at startup | 56 // fatal (in fact, very old data may be expired immediately at startup |
| 57 // anyhow). | 57 // anyhow). |
| 58 | 58 |
| 59 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 | 59 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 |
| 60 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 | 60 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 (deprecated) |
| 61 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) | 61 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) |
| 62 | 62 |
| 63 // NOTE(shess): When changing the version, add a new golden file for | 63 // NOTE(shess): When changing the version, add a new golden file for |
| 64 // the new version and a test to verify that Init() works with it. | 64 // the new version and a test to verify that Init() works with it. |
| 65 // NOTE(shess): RecoverDatabaseOrRaze() depends on the specific | |
| 66 // version number. The code is subtle and in development, contact me | |
| 67 // if the necessary changes are not obvious. | |
| 68 static const int kVersionNumber = 3; | 65 static const int kVersionNumber = 3; |
| 69 static const int kDeprecatedVersionNumber = 1; // and earlier. | 66 static const int kDeprecatedVersionNumber = 2; // and earlier. |
|
pwnall
2017/03/04 01:35:32
I think this breaks TopSitesDatabaseTest.Version2,
Scott Hess - ex-Googler
2017/03/07 01:34:31
Current-version tests are under Version/Recovery r
pwnall
2017/03/16 18:55:44
Fair enough! I think you'd get less churn if all t
| |
| 70 | 67 |
| 71 bool InitTables(sql::Connection* db) { | 68 bool InitTables(sql::Connection* db) { |
| 72 const char kThumbnailsSql[] = | 69 const char kThumbnailsSql[] = |
| 73 "CREATE TABLE IF NOT EXISTS thumbnails (" | 70 "CREATE TABLE IF NOT EXISTS thumbnails (" |
| 74 "url LONGVARCHAR PRIMARY KEY," | 71 "url LONGVARCHAR PRIMARY KEY," |
| 75 "url_rank INTEGER," | 72 "url_rank INTEGER," |
| 76 "title LONGVARCHAR," | 73 "title LONGVARCHAR," |
| 77 "thumbnail BLOB," | 74 "thumbnail BLOB," |
| 78 "redirects LONGVARCHAR," | 75 "redirects LONGVARCHAR," |
| 79 "boring_score DOUBLE DEFAULT 1.0," | 76 "boring_score DOUBLE DEFAULT 1.0," |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 100 base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY)) { | 97 base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY)) { |
| 101 GURL redirect_url(redirect); | 98 GURL redirect_url(redirect); |
| 102 if (redirect_url.is_valid()) | 99 if (redirect_url.is_valid()) |
| 103 url->redirects.push_back(redirect_url); | 100 url->redirects.push_back(redirect_url); |
| 104 } | 101 } |
| 105 } | 102 } |
| 106 | 103 |
| 107 // Track various failure (and success) cases in recovery code. | 104 // Track various failure (and success) cases in recovery code. |
| 108 // | 105 // |
| 109 // TODO(shess): The recovery code is complete, but by nature runs in challenging | 106 // TODO(shess): The recovery code is complete, but by nature runs in challenging |
| 110 // circumstances, so initially the default error response is to leave the | 107 // circumstances, so errors will happen. This histogram is intended to expose |
| 111 // existing database in place. This histogram is intended to expose the | 108 // the failures seen in the fleet. Frequent failure cases can be explored more |
| 112 // failures seen in the fleet. Frequent failure cases can be explored more | |
| 113 // deeply to see if the complexity to fix them is warranted. Infrequent failure | 109 // deeply to see if the complexity to fix them is warranted. Infrequent failure |
| 114 // cases can be resolved by marking the database unrecoverable (which will | 110 // cases can be resolved by marking the database unrecoverable (which will |
| 115 // delete the data). | 111 // delete the data). |
| 116 // | 112 // |
| 117 // Based on the thumbnail_database.cc recovery code, FAILED_SCOPER should | 113 // Based on the thumbnail_database.cc recovery code, FAILED_SCOPER should |
| 118 // dominate, followed distantly by FAILED_META, with few or no other failures. | 114 // dominate, followed distantly by FAILED_META, with few or no other failures. |
| 119 enum RecoveryEventType { | 115 enum RecoveryEventType { |
| 120 // Database successfully recovered. | 116 // Database successfully recovered. |
| 121 RECOVERY_EVENT_RECOVERED = 0, | 117 RECOVERY_EVENT_RECOVERED = 0, |
| 122 | 118 |
| 123 // Database successfully deprecated. | 119 // Database successfully deprecated. |
| 124 RECOVERY_EVENT_DEPRECATED, | 120 RECOVERY_EVENT_DEPRECATED, |
| 125 | 121 |
| 126 // Sqlite.RecoveryEvent can usually be used to get more detail about the | 122 // Sqlite.RecoveryEvent can usually be used to get more detail about the |
| 127 // specific failure (see sql/recovery.cc). | 123 // specific failure (see sql/recovery.cc). |
| 128 RECOVERY_EVENT_FAILED_SCOPER, | 124 RECOVERY_EVENT_FAILED_SCOPER, // obsolete |
|
pwnall
2017/03/04 01:35:32
Would make sense to add OBSOLETE_ / DEPRECATED_ to
Scott Hess - ex-Googler
2017/03/07 01:34:31
Done.
| |
| 129 RECOVERY_EVENT_FAILED_META_VERSION, | 125 RECOVERY_EVENT_FAILED_META_VERSION, |
| 130 RECOVERY_EVENT_FAILED_META_WRONG_VERSION, | 126 RECOVERY_EVENT_FAILED_META_WRONG_VERSION, |
| 131 RECOVERY_EVENT_FAILED_META_INIT, | 127 RECOVERY_EVENT_FAILED_META_INIT, // obsolete |
| 132 RECOVERY_EVENT_FAILED_SCHEMA_INIT, | 128 RECOVERY_EVENT_FAILED_SCHEMA_INIT, // obsolete |
| 133 RECOVERY_EVENT_FAILED_AUTORECOVER_THUMBNAILS, | 129 RECOVERY_EVENT_FAILED_AUTORECOVER_THUMBNAILS, // obsolete |
| 134 RECOVERY_EVENT_FAILED_COMMIT, | 130 RECOVERY_EVENT_FAILED_COMMIT, |
| 135 | 131 |
| 136 // Track invariants resolved by FixThumbnailsTable(). | 132 // Track invariants resolved by FixThumbnailsTable(). |
| 137 RECOVERY_EVENT_INVARIANT_RANK, | 133 RECOVERY_EVENT_INVARIANT_RANK, |
| 138 RECOVERY_EVENT_INVARIANT_REDIRECT, | 134 RECOVERY_EVENT_INVARIANT_REDIRECT, |
| 139 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, | 135 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, |
| 140 | 136 |
| 137 // Track automated full-database recovery. | |
| 138 RECOVERY_EVENT_FAILED_AUTORECOVER, | |
| 139 | |
| 141 // Always keep this at the end. | 140 // Always keep this at the end. |
| 142 RECOVERY_EVENT_MAX, | 141 RECOVERY_EVENT_MAX, |
| 143 }; | 142 }; |
| 144 | 143 |
| 145 void RecordRecoveryEvent(RecoveryEventType recovery_event) { | 144 void RecordRecoveryEvent(RecoveryEventType recovery_event) { |
| 146 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", recovery_event, | 145 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", recovery_event, |
| 147 RECOVERY_EVENT_MAX); | 146 RECOVERY_EVENT_MAX); |
| 148 } | 147 } |
| 149 | 148 |
| 150 // Most corruption comes down to atomic updates between pages being broken | 149 // Most corruption comes down to atomic updates between pages being broken |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 195 update_statement.BindInt(0, next_rank); | 194 update_statement.BindInt(0, next_rank); |
| 196 update_statement.BindInt64(1, select_statement.ColumnInt64(1)); | 195 update_statement.BindInt64(1, select_statement.ColumnInt64(1)); |
| 197 update_statement.Run(); | 196 update_statement.Run(); |
| 198 } | 197 } |
| 199 ++next_rank; | 198 ++next_rank; |
| 200 } | 199 } |
| 201 if (adjusted) | 200 if (adjusted) |
| 202 RecordRecoveryEvent(RECOVERY_EVENT_INVARIANT_CONTIGUOUS); | 201 RecordRecoveryEvent(RECOVERY_EVENT_INVARIANT_CONTIGUOUS); |
| 203 } | 202 } |
| 204 | 203 |
| 205 // Recover the database to the extent possible, razing it if recovery is not | 204 // Recover the database to the extent possible, then fixup any broken |
| 206 // possible. | 205 // constraints. |
| 207 void RecoverDatabaseOrRaze(sql::Connection* db, const base::FilePath& db_path) { | 206 void RecoverAndFixup(sql::Connection* db, const base::FilePath& db_path) { |
| 208 // NOTE(shess): If the version changes, review this code. | 207 // NOTE(shess): If the version changes, review this code. |
| 209 DCHECK_EQ(3, kVersionNumber); | 208 DCHECK_EQ(3, kVersionNumber); |
| 210 | 209 |
| 211 // It is almost certain that some operation against |db| will fail, prevent | 210 std::unique_ptr<sql::Recovery> recovery = |
| 212 // reentry. | 211 sql::Recovery::BeginRecoverDatabase(db, db_path); |
| 213 db->reset_error_callback(); | |
| 214 | |
| 215 // For generating histogram stats. | |
| 216 size_t thumbnails_recovered = 0; | |
| 217 int64_t original_size = 0; | |
| 218 base::GetFileSize(db_path, &original_size); | |
| 219 | |
| 220 std::unique_ptr<sql::Recovery> recovery = sql::Recovery::Begin(db, db_path); | |
| 221 if (!recovery) { | 212 if (!recovery) { |
| 222 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_SCOPER); | 213 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER); |
| 223 return; | 214 return; |
| 224 } | 215 } |
| 225 | 216 |
| 226 // Setup the meta recovery table and fetch the version number from the corrupt | 217 // If the [meta] table does not exist, or the [version] key cannot be found, |
| 227 // database. | 218 // then the schema is indeterminate. The only plausible approach would be to |
| 219 // validate that the schema contains all of the tables and indices and columns | |
| 220 // expected, but that complexity may not be warranted, this case has only been | |
| 221 // seen for a few thousand database files. | |
|
pwnall
2017/03/04 01:35:32
A percentage might give the reader a better intuit
Scott Hess - ex-Googler
2017/03/07 01:34:31
The databases in this state were previously persis
pwnall
2017/03/16 18:55:44
Seems like we can use the 1bn+ users, which is a p
| |
| 228 int version = 0; | 222 int version = 0; |
| 229 if (!recovery->SetupMeta() || !recovery->GetMetaVersionNumber(&version)) { | 223 if (!recovery->SetupMeta() || !recovery->GetMetaVersionNumber(&version)) { |
| 230 // TODO(shess): Prior histograms indicate all failures are in creating the | 224 sql::Recovery::Unrecoverable(std::move(recovery)); |
| 231 // recover virtual table for corrupt.meta. The table may not exist, or the | |
| 232 // database may be too far gone. Either way, unclear how to resolve. | |
| 233 sql::Recovery::Rollback(std::move(recovery)); | |
| 234 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_VERSION); | 225 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_VERSION); |
| 235 return; | 226 return; |
| 236 } | 227 } |
| 237 | 228 |
| 238 // This code runs in a context which may be able to read version information | 229 // In this case the next open will clear the database anyhow. |
| 239 // that the regular deprecation path cannot. The effect of this code will be | |
| 240 // to raze the database. | |
| 241 if (version <= kDeprecatedVersionNumber) { | 230 if (version <= kDeprecatedVersionNumber) { |
| 242 sql::Recovery::Unrecoverable(std::move(recovery)); | 231 sql::Recovery::Unrecoverable(std::move(recovery)); |
| 243 RecordRecoveryEvent(RECOVERY_EVENT_DEPRECATED); | 232 RecordRecoveryEvent(RECOVERY_EVENT_DEPRECATED); |
| 244 return; | 233 return; |
| 245 } | 234 } |
| 246 | 235 |
| 247 // TODO(shess): Earlier versions have been deprecated, later versions should | 236 // TODO(shess): Consider marking corrupt databases from the future |
| 248 // be impossible. Unrecoverable() seems like a feasible response if this is | 237 // Unrecoverable(), since this histogram value has never been seen. OTOH, |
| 249 // infrequent enough. | 238 // this may be too risky, because if future code was correlated with |
| 250 if (version != 2 && version != 3) { | 239 // corruption then rollback would be a sensible response. |
| 240 if (version > kVersionNumber) { | |
| 251 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_WRONG_VERSION); | 241 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_WRONG_VERSION); |
| 252 sql::Recovery::Rollback(std::move(recovery)); | 242 sql::Recovery::Rollback(std::move(recovery)); |
| 253 return; | 243 return; |
| 254 } | 244 } |
| 255 | 245 |
| 256 // Both v2 and v3 recover to current schema version. | |
| 257 sql::MetaTable recover_meta_table; | |
| 258 if (!recover_meta_table.Init(recovery->db(), kVersionNumber, | |
| 259 kVersionNumber)) { | |
| 260 sql::Recovery::Rollback(std::move(recovery)); | |
| 261 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_INIT); | |
| 262 return; | |
| 263 } | |
| 264 | |
| 265 // Create a fresh version of the schema. The recovery code uses | |
| 266 // conflict-resolution to handle duplicates, so any indices are necessary. | |
| 267 if (!InitTables(recovery->db())) { | |
| 268 // TODO(shess): Unable to create the new schema in the new database. The | |
| 269 // new database should be a temporary file, so being unable to work with it | |
| 270 // is pretty unclear. | |
| 271 // | |
| 272 // What are the potential responses, even? The recovery database could be | |
| 273 // opened as in-memory. If the temp database had a filesystem problem and | |
| 274 // the temp filesystem differs from the main database, then that could fix | |
| 275 // it. | |
| 276 sql::Recovery::Rollback(std::move(recovery)); | |
| 277 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_SCHEMA_INIT); | |
| 278 return; | |
| 279 } | |
| 280 | |
| 281 // In the v2 case the missing column will get default values. | |
| 282 if (!recovery->AutoRecoverTable("thumbnails", &thumbnails_recovered)) { | |
| 283 sql::Recovery::Rollback(std::move(recovery)); | |
| 284 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER_THUMBNAILS); | |
| 285 return; | |
| 286 } | |
| 287 | |
| 288 // TODO(shess): Inline this? | 246 // TODO(shess): Inline this? |
| 289 FixThumbnailsTable(recovery->db()); | 247 FixThumbnailsTable(recovery->db()); |
| 290 | 248 |
| 291 if (!sql::Recovery::Recovered(std::move(recovery))) { | 249 if (!sql::Recovery::Recovered(std::move(recovery))) { |
| 292 // TODO(shess): Very unclear what this failure would actually mean, and what | 250 // TODO(shess): Very unclear what this failure would actually mean, and what |
| 293 // should be done. Add histograms to Recovered() implementation to get some | 251 // should be done. Add histograms to Recovered() implementation to get some |
| 294 // insight. | 252 // insight. |
| 295 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_COMMIT); | 253 RecordRecoveryEvent(RECOVERY_EVENT_FAILED_COMMIT); |
| 296 return; | 254 return; |
| 297 } | 255 } |
| 298 | 256 |
| 299 // Track the size of the recovered database relative to the size of the input | |
| 300 // database. The size should almost always be smaller, unless the input | |
| 301 // database was empty to start with. If the percentage results are very low, | |
| 302 // something is awry. | |
| 303 int64_t final_size = 0; | |
| 304 if (original_size > 0 && base::GetFileSize(db_path, &final_size) && | |
| 305 final_size > 0) { | |
| 306 UMA_HISTOGRAM_PERCENTAGE("History.TopSitesRecoveredPercentage", | |
|
pwnall
2017/03/04 01:35:32
Sorry for my bad memory, but dose sql::Recovery::R
Scott Hess - ex-Googler
2017/03/07 01:34:31
The RecoverDatabase() code does not really have a
pwnall
2017/03/16 18:55:44
Yeah, I don't see anything obvious either. Let's t
| |
| 307 final_size * 100 / original_size); | |
| 308 } | |
| 309 | |
| 310 // Using 10,000 because these cases mostly care about "none recovered" and | |
| 311 // "lots recovered". More than 10,000 rows recovered probably means there's | |
| 312 // something wrong with the profile. | |
| 313 UMA_HISTOGRAM_COUNTS_10000("History.TopSitesRecoveredRowsThumbnails", | |
| 314 static_cast<int>(thumbnails_recovered)); | |
| 315 | |
| 316 RecordRecoveryEvent(RECOVERY_EVENT_RECOVERED); | 257 RecordRecoveryEvent(RECOVERY_EVENT_RECOVERED); |
| 317 } | 258 } |
| 318 | 259 |
| 319 void DatabaseErrorCallback(sql::Connection* db, | 260 void DatabaseErrorCallback(sql::Connection* db, |
| 320 const base::FilePath& db_path, | 261 const base::FilePath& db_path, |
| 321 int extended_error, | 262 int extended_error, |
| 322 sql::Statement* stmt) { | 263 sql::Statement* stmt) { |
| 323 // TODO(shess): Assert that this is running on a safe thread. AFAICT, should | 264 // TODO(shess): Assert that this is running on a safe thread. AFAICT, should |
| 324 // be the history thread, but at this level I can't see how to reach that. | 265 // be the history thread, but at this level I can't see how to reach that. |
| 325 | 266 |
| 326 // Attempt to recover corrupt databases. | 267 // Attempt to recover corrupt databases. |
| 327 int error = (extended_error & 0xFF); | 268 if (sql::Recovery::ShouldRecover(extended_error)) { |
| 328 if (error == SQLITE_CORRUPT || | 269 // Prevent reentrant calls. |
| 329 error == SQLITE_CANTOPEN || | 270 db->reset_error_callback(); |
| 330 error == SQLITE_NOTADB) { | 271 |
| 331 RecoverDatabaseOrRaze(db, db_path); | 272 // After this call, the |db| handle is poisoned so that future calls will |
| 273 // return errors until the handle is re-opened. | |
| 274 RecoverAndFixup(db, db_path); | |
| 275 | |
| 276 // The DLOG(FATAL) below is intended to draw immediate attention to errors | |
| 277 // in newly-written code. Database corruption is generally a result of OS | |
| 278 // or hardware issues, not coding errors at the client level, so displaying | |
| 279 // the error would probably lead to confusion. The ignored call signals the | |
| 280 // test-expectation framework that the error was handled. | |
| 281 ignore_result(sql::Connection::IsExpectedSqliteError(extended_error)); | |
| 282 return; | |
| 332 } | 283 } |
| 333 | 284 |
| 334 // TODO(shess): This database's error histograms look like: | 285 // TODO(shess): This database's error histograms look like: |
| 335 // 84% SQLITE_CORRUPT, SQLITE_CANTOPEN, SQLITE_NOTADB | 286 // 84% SQLITE_CORRUPT, SQLITE_CANTOPEN, SQLITE_NOTADB |
| 336 // 7% SQLITE_ERROR | 287 // 7% SQLITE_ERROR |
| 337 // 6% SQLITE_IOERR variants | 288 // 6% SQLITE_IOERR variants |
| 338 // 2% SQLITE_READONLY | 289 // 2% SQLITE_READONLY |
| 339 // .4% SQLITE_FULL | 290 // .4% SQLITE_FULL |
| 340 // nominal SQLITE_TOBIG, SQLITE_AUTH, and SQLITE_BUSY. In the case of | 291 // nominal SQLITE_TOBIG, SQLITE_AUTH, and SQLITE_BUSY. In the case of |
| 341 // thumbnail_database.cc, as soon as the recovery code landed, SQLITE_IOERR | 292 // thumbnail_database.cc, as soon as the recovery code landed, SQLITE_IOERR |
| (...skipping 385 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 727 db->set_error_callback(base::Bind(&DatabaseErrorCallback, db.get(), db_name)); | 678 db->set_error_callback(base::Bind(&DatabaseErrorCallback, db.get(), db_name)); |
| 728 db->set_page_size(4096); | 679 db->set_page_size(4096); |
| 729 db->set_cache_size(32); | 680 db->set_cache_size(32); |
| 730 | 681 |
| 731 if (!db->Open(db_name)) | 682 if (!db->Open(db_name)) |
| 732 return NULL; | 683 return NULL; |
| 733 return db.release(); | 684 return db.release(); |
| 734 } | 685 } |
| 735 | 686 |
| 736 } // namespace history | 687 } // namespace history |
| OLD | NEW |