Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/top_sites_database.h" | 5 #include "chrome/browser/history/top_sites_database.h" |
| 6 | 6 |
| 7 #include "base/files/file_util.h" | 7 #include "base/files/file_util.h" |
| 8 #include "base/memory/ref_counted.h" | 8 #include "base/memory/ref_counted.h" |
| 9 #include "base/metrics/histogram.h" | 9 #include "base/metrics/histogram.h" |
| 10 #include "base/strings/string_split.h" | 10 #include "base/strings/string_split.h" |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 51 | 51 |
| 52 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 | 52 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 |
| 53 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 | 53 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 |
| 54 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) | 54 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) |
| 55 | 55 |
| 56 // NOTE(shess): When changing the version, add a new golden file for | 56 // NOTE(shess): When changing the version, add a new golden file for |
| 57 // the new version and a test to verify that Init() works with it. | 57 // the new version and a test to verify that Init() works with it. |
| 58 // NOTE(shess): RecoverDatabaseOrRaze() depends on the specific | 58 // NOTE(shess): RecoverDatabaseOrRaze() depends on the specific |
| 59 // version number. The code is subtle and in development, contact me | 59 // version number. The code is subtle and in development, contact me |
| 60 // if the necessary changes are not obvious. | 60 // if the necessary changes are not obvious. |
| 61 static const int kVersionNumber = 3; | 61 static const int kVersionNumber = 3; |
|
beaudoin
2014/09/09 21:32:27
Aren't you changing the DB in a way that requires
huangs
2014/09/11 01:58:13
The code is backwards compatible for reading.
| |
| 62 static const int kDeprecatedVersionNumber = 1; // and earlier. | 62 static const int kDeprecatedVersionNumber = 1; // and earlier. |
| 63 | 63 |
| 64 bool InitTables(sql::Connection* db) { | 64 bool InitTables(sql::Connection* db) { |
| 65 const char kThumbnailsSql[] = | 65 const char kThumbnailsSql[] = |
| 66 "CREATE TABLE IF NOT EXISTS thumbnails (" | 66 "CREATE TABLE IF NOT EXISTS thumbnails (" |
| 67 "url LONGVARCHAR PRIMARY KEY," | 67 "url LONGVARCHAR PRIMARY KEY," |
| 68 "url_rank INTEGER," | 68 "url_rank INTEGER," |
| 69 "title LONGVARCHAR," | 69 "title LONGVARCHAR," |
| 70 "thumbnail BLOB," | 70 "thumbnail BLOB," |
| 71 "redirects LONGVARCHAR," | 71 "redirects LONGVARCHAR," |
| 72 "boring_score DOUBLE DEFAULT 1.0," | 72 "boring_score DOUBLE DEFAULT 1.0," |
| 73 "good_clipping INTEGER DEFAULT 0," | 73 "good_clipping INTEGER DEFAULT 0," |
| 74 "at_top INTEGER DEFAULT 0," | 74 "at_top INTEGER DEFAULT 0," |
| 75 "last_updated INTEGER DEFAULT 0," | 75 "last_updated INTEGER DEFAULT 0," |
| 76 "load_completed INTEGER DEFAULT 0," | 76 "load_completed INTEGER DEFAULT 0," |
| 77 "last_forced INTEGER DEFAULT 0)"; | 77 "last_forced INTEGER DEFAULT 0)"; |
| 78 return db->Execute(kThumbnailsSql); | 78 return db->Execute(kThumbnailsSql); |
| 79 } | 79 } |
| 80 | 80 |
| 81 // Encodes redirects into a string. | |
| 82 std::string GetRedirects(const history::MostVisitedURL& url) { | |
| 83 std::vector<std::string> redirects; | |
| 84 for (size_t i = 0; i < url.redirects.size(); i++) | |
| 85 redirects.push_back(url.redirects[i].spec()); | |
| 86 return JoinString(redirects, ' '); | |
| 87 } | |
| 88 | |
| 89 // Decodes redirects from a string and sets them for the url. | |
| 90 void SetRedirects(const std::string& redirects, history::MostVisitedURL* url) { | |
| 91 std::vector<std::string> redirects_vector; | |
| 92 base::SplitStringAlongWhitespace(redirects, &redirects_vector); | |
| 93 for (size_t i = 0; i < redirects_vector.size(); ++i) | |
| 94 url->redirects.push_back(GURL(redirects_vector[i])); | |
| 95 } | |
| 96 | |
| 97 // Track various failure (and success) cases in recovery code. | 81 // Track various failure (and success) cases in recovery code. |
| 98 // | 82 // |
| 99 // TODO(shess): The recovery code is complete, but by nature runs in challenging | 83 // TODO(shess): The recovery code is complete, but by nature runs in challenging |
| 100 // circumstances, so initially the default error response is to leave the | 84 // circumstances, so initially the default error response is to leave the |
| 101 // existing database in place. This histogram is intended to expose the | 85 // existing database in place. This histogram is intended to expose the |
| 102 // failures seen in the fleet. Frequent failure cases can be explored more | 86 // failures seen in the fleet. Frequent failure cases can be explored more |
| 103 // deeply to see if the complexity to fix them is warranted. Infrequent failure | 87 // deeply to see if the complexity to fix them is warranted. Infrequent failure |
| 104 // cases can be resolved by marking the database unrecoverable (which will | 88 // cases can be resolved by marking the database unrecoverable (which will |
| 105 // delete the data). | 89 // delete the data). |
| 106 // | 90 // |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 125 | 109 |
| 126 // Track invariants resolved by FixThumbnailsTable(). | 110 // Track invariants resolved by FixThumbnailsTable(). |
| 127 RECOVERY_EVENT_INVARIANT_RANK, | 111 RECOVERY_EVENT_INVARIANT_RANK, |
| 128 RECOVERY_EVENT_INVARIANT_REDIRECT, | 112 RECOVERY_EVENT_INVARIANT_REDIRECT, |
| 129 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, | 113 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, |
| 130 | 114 |
| 131 // Always keep this at the end. | 115 // Always keep this at the end. |
| 132 RECOVERY_EVENT_MAX, | 116 RECOVERY_EVENT_MAX, |
| 133 }; | 117 }; |
| 134 | 118 |
| 119 const char kDataUrlPrefix[] = "data:"; | |
| 120 | |
| 135 void RecordRecoveryEvent(RecoveryEventType recovery_event) { | 121 void RecordRecoveryEvent(RecoveryEventType recovery_event) { |
| 136 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", | 122 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", |
| 137 recovery_event, RECOVERY_EVENT_MAX); | 123 recovery_event, RECOVERY_EVENT_MAX); |
| 138 } | 124 } |
| 139 | 125 |
| 140 // Most corruption comes down to atomic updates between pages being broken | 126 // Most corruption comes down to atomic updates between pages being broken |
| 141 // somehow. This can result in either missing data, or overlapping data, | 127 // somehow. This can result in either missing data, or overlapping data, |
| 142 // depending on the operation broken. This table has large rows, which will use | 128 // depending on the operation broken. This table has large rows, which will use |
| 143 // overflow pages, so it is possible (though unlikely) that a chain could fit | 129 // overflow pages, so it is possible (though unlikely) that a chain could fit |
| 144 // together and yield a row with errors. | 130 // together and yield a row with errors. |
| (...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 453 | 439 |
| 454 while (statement.Step()) { | 440 while (statement.Step()) { |
| 455 // Results are sorted by url_rank. For forced thumbnails with url_rank = -1, | 441 // Results are sorted by url_rank. For forced thumbnails with url_rank = -1, |
| 456 // thumbnails are sorted by last_forced. | 442 // thumbnails are sorted by last_forced. |
| 457 MostVisitedURL url; | 443 MostVisitedURL url; |
| 458 GURL gurl(statement.ColumnString(0)); | 444 GURL gurl(statement.ColumnString(0)); |
| 459 url.url = gurl; | 445 url.url = gurl; |
| 460 url.title = statement.ColumnString16(2); | 446 url.title = statement.ColumnString16(2); |
| 461 url.last_forced_time = | 447 url.last_forced_time = |
| 462 base::Time::FromInternalValue(statement.ColumnInt64(10)); | 448 base::Time::FromInternalValue(statement.ColumnInt64(10)); |
| 463 std::string redirects = statement.ColumnString(4); | 449 std::string encoded_redirects = statement.ColumnString(4); |
| 464 SetRedirects(redirects, &url); | 450 DecodeRedirects(encoded_redirects, &url.redirects); |
| 465 urls->push_back(url); | 451 urls->push_back(url); |
| 466 | 452 |
| 467 std::vector<unsigned char> data; | 453 std::vector<unsigned char> data; |
| 468 statement.ColumnBlobAsVector(3, &data); | 454 statement.ColumnBlobAsVector(3, &data); |
| 469 Images thumbnail; | 455 Images thumbnail; |
| 470 if (!data.empty()) | 456 if (!data.empty()) |
| 471 thumbnail.thumbnail = base::RefCountedBytes::TakeVector(&data); | 457 thumbnail.thumbnail = base::RefCountedBytes::TakeVector(&data); |
| 472 thumbnail.thumbnail_score.boring_score = statement.ColumnDouble(5); | 458 thumbnail.thumbnail_score.boring_score = statement.ColumnDouble(5); |
| 473 thumbnail.thumbnail_score.good_clipping = statement.ColumnBool(6); | 459 thumbnail.thumbnail_score.good_clipping = statement.ColumnBool(6); |
| 474 thumbnail.thumbnail_score.at_top = statement.ColumnBool(7); | 460 thumbnail.thumbnail_score.at_top = statement.ColumnBool(7); |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 503 "UPDATE thumbnails SET " | 489 "UPDATE thumbnails SET " |
| 504 "title = ?, thumbnail = ?, redirects = ?, " | 490 "title = ?, thumbnail = ?, redirects = ?, " |
| 505 "boring_score = ?, good_clipping = ?, at_top = ?, last_updated = ?, " | 491 "boring_score = ?, good_clipping = ?, at_top = ?, last_updated = ?, " |
| 506 "load_completed = ?, last_forced = ?" | 492 "load_completed = ?, last_forced = ?" |
| 507 "WHERE url = ? ")); | 493 "WHERE url = ? ")); |
| 508 statement.BindString16(0, url.title); | 494 statement.BindString16(0, url.title); |
| 509 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { | 495 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { |
| 510 statement.BindBlob(1, thumbnail.thumbnail->front(), | 496 statement.BindBlob(1, thumbnail.thumbnail->front(), |
| 511 static_cast<int>(thumbnail.thumbnail->size())); | 497 static_cast<int>(thumbnail.thumbnail->size())); |
| 512 } | 498 } |
| 513 statement.BindString(2, GetRedirects(url)); | 499 statement.BindString(2, EncodeRedirects(url.redirects)); |
| 514 const ThumbnailScore& score = thumbnail.thumbnail_score; | 500 const ThumbnailScore& score = thumbnail.thumbnail_score; |
| 515 statement.BindDouble(3, score.boring_score); | 501 statement.BindDouble(3, score.boring_score); |
| 516 statement.BindBool(4, score.good_clipping); | 502 statement.BindBool(4, score.good_clipping); |
| 517 statement.BindBool(5, score.at_top); | 503 statement.BindBool(5, score.at_top); |
| 518 statement.BindInt64(6, score.time_at_snapshot.ToInternalValue()); | 504 statement.BindInt64(6, score.time_at_snapshot.ToInternalValue()); |
| 519 statement.BindBool(7, score.load_completed); | 505 statement.BindBool(7, score.load_completed); |
| 520 statement.BindInt64(8, url.last_forced_time.ToInternalValue()); | 506 statement.BindInt64(8, url.last_forced_time.ToInternalValue()); |
| 521 statement.BindString(9, url.url.spec()); | 507 statement.BindString(9, url.url.spec()); |
| 522 | 508 |
| 523 return statement.Run(); | 509 return statement.Run(); |
| 524 } | 510 } |
| 525 | 511 |
| 526 void TopSitesDatabase::AddPageThumbnail(const MostVisitedURL& url, | 512 void TopSitesDatabase::AddPageThumbnail(const MostVisitedURL& url, |
| 527 int new_rank, | 513 int new_rank, |
| 528 const Images& thumbnail) { | 514 const Images& thumbnail) { |
| 529 sql::Statement statement(db_->GetCachedStatement( | 515 sql::Statement statement(db_->GetCachedStatement( |
| 530 SQL_FROM_HERE, | 516 SQL_FROM_HERE, |
| 531 "INSERT OR REPLACE INTO thumbnails " | 517 "INSERT OR REPLACE INTO thumbnails " |
| 532 "(url, url_rank, title, thumbnail, redirects, " | 518 "(url, url_rank, title, thumbnail, redirects, " |
| 533 "boring_score, good_clipping, at_top, last_updated, load_completed, " | 519 "boring_score, good_clipping, at_top, last_updated, load_completed, " |
| 534 "last_forced) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")); | 520 "last_forced) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")); |
| 535 statement.BindString(0, url.url.spec()); | 521 statement.BindString(0, url.url.spec()); |
| 536 statement.BindInt(1, kRankOfForcedURL); // Fist make it a forced thumbnail. | 522 statement.BindInt(1, kRankOfForcedURL); // Fist make it a forced thumbnail. |
| 537 statement.BindString16(2, url.title); | 523 statement.BindString16(2, url.title); |
| 538 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { | 524 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { |
| 539 statement.BindBlob(3, thumbnail.thumbnail->front(), | 525 statement.BindBlob(3, thumbnail.thumbnail->front(), |
| 540 static_cast<int>(thumbnail.thumbnail->size())); | 526 static_cast<int>(thumbnail.thumbnail->size())); |
| 541 } | 527 } |
| 542 statement.BindString(4, GetRedirects(url)); | 528 statement.BindString(4, EncodeRedirects(url.redirects)); |
| 543 const ThumbnailScore& score = thumbnail.thumbnail_score; | 529 const ThumbnailScore& score = thumbnail.thumbnail_score; |
| 544 statement.BindDouble(5, score.boring_score); | 530 statement.BindDouble(5, score.boring_score); |
| 545 statement.BindBool(6, score.good_clipping); | 531 statement.BindBool(6, score.good_clipping); |
| 546 statement.BindBool(7, score.at_top); | 532 statement.BindBool(7, score.at_top); |
| 547 statement.BindInt64(8, score.time_at_snapshot.ToInternalValue()); | 533 statement.BindInt64(8, score.time_at_snapshot.ToInternalValue()); |
| 548 statement.BindBool(9, score.load_completed); | 534 statement.BindBool(9, score.load_completed); |
| 549 int64 last_forced = url.last_forced_time.ToInternalValue(); | 535 int64 last_forced = url.last_forced_time.ToInternalValue(); |
| 550 DCHECK((last_forced == 0) == (new_rank != kRankOfForcedURL)) | 536 DCHECK((last_forced == 0) == (new_rank != kRankOfForcedURL)) |
| 551 << "Thumbnail without a forced time stamp has a forced rank, or the " | 537 << "Thumbnail without a forced time stamp has a forced rank, or the " |
| 552 << "opposite."; | 538 << "opposite."; |
| 553 statement.BindInt64(10, last_forced); | 539 statement.BindInt64(10, last_forced); |
| 554 if (!statement.Run()) | 540 if (!statement.Run()) |
| 555 return; | 541 return; |
| 556 | 542 |
| 557 // Update rank if this is not a forced thumbnail. | 543 // Update rank if this is not a forced thumbnail. |
| 558 if (new_rank != kRankOfForcedURL) | 544 if (new_rank != kRankOfForcedURL) |
| 559 UpdatePageRankNoTransaction(url, new_rank); | 545 UpdatePageRankNoTransaction(url, new_rank); |
| 560 } | 546 } |
| 561 | 547 |
| 548 // static | |
| 549 std::string TopSitesDatabase::EncodeCSVString( | |
| 550 const std::vector<std::string> str_list) { | |
| 551 std::string csv; | |
| 552 for (std::vector<std::string>::const_iterator it = str_list.begin(); | |
| 553 it != str_list.end(); ++it) { | |
| 554 const std::string& str = *it; | |
| 555 if (it != str_list.begin()) | |
| 556 csv += ','; | |
| 557 csv += '"'; | |
| 558 for (std::string::const_iterator jt = str.begin(); jt != str.end(); ++jt) { | |
| 559 if (*jt == '"') | |
| 560 csv += '"'; | |
|
beaudoin
2014/09/09 21:32:27
Looks like you escape quotes as two quotes. Your c
huangs
2014/09/11 01:58:13
Comment in .h file updated.
| |
| 561 csv += *jt; | |
| 562 } | |
| 563 csv += '"'; | |
| 564 } | |
| 565 return csv; | |
| 566 } | |
| 567 | |
| 568 // static | |
| 569 bool TopSitesDatabase::DecodeCSVString(const std::string csv, | |
| 570 std::vector<std::string>* str_list) { | |
| 571 if (csv.empty()) { | |
| 572 str_list->clear(); | |
| 573 return true; | |
| 574 } | |
| 575 | |
| 576 enum { | |
| 577 SEEK_QUOTE, | |
| 578 READ_STRING, | |
| 579 SAW_ONE_QUOTE | |
| 580 } state = SEEK_QUOTE; | |
| 581 std::vector<std::string> out_list; | |
| 582 std::string str; | |
| 583 for (std::string::const_iterator it = csv.begin(); it != csv.end(); ++it) { | |
| 584 const char ch = *it; | |
| 585 if (state == SEEK_QUOTE) { | |
| 586 if (ch != '"') | |
| 587 return false; | |
| 588 state = READ_STRING; | |
| 589 } else if (state == READ_STRING) { | |
| 590 if (ch == '"') | |
| 591 state = SAW_ONE_QUOTE; | |
| 592 else | |
| 593 str += ch; | |
| 594 } else if (state == SAW_ONE_QUOTE) { | |
| 595 if (ch == '"') { | |
| 596 str += ch; | |
| 597 state = READ_STRING; | |
| 598 } else if (ch == ',') { | |
| 599 out_list.push_back(str); | |
| 600 str.clear(); | |
| 601 state = SEEK_QUOTE; | |
| 602 } else { | |
| 603 return false; | |
|
beaudoin
2014/09/09 21:32:27
You're strict about the fact that the comma must i
huangs
2014/09/11 01:58:13
Done.
| |
| 604 } | |
| 605 } else { | |
| 606 NOTREACHED(); | |
| 607 return false; | |
| 608 } | |
| 609 } | |
| 610 if (state != SAW_ONE_QUOTE) | |
| 611 return false; | |
| 612 out_list.push_back(str); | |
| 613 str_list->swap(out_list); | |
| 614 return true; | |
| 615 } | |
| 616 | |
| 617 // static | |
| 618 std::string TopSitesDatabase::EncodeRedirects(const RedirectList& redirects) { | |
| 619 std::vector<std::string> valid_urls; | |
| 620 for (size_t i = 0; i < redirects.size(); i++) { | |
| 621 // Example of invalid URL that may end up here: | |
| 622 // "data:text/plain,this string contains space". | |
| 623 if (redirects[i].is_valid()) | |
| 624 valid_urls.push_back(redirects[i].spec()); | |
| 625 } | |
| 626 return EncodeCSVString(valid_urls); | |
| 627 } | |
| 628 | |
| 629 // static | |
| 630 void TopSitesDatabase::DecodeRedirects(const std::string& encoded_redirects, | |
| 631 RedirectList* redirects) { | |
| 632 std::vector<std::string> redirects_vector; | |
| 633 if (!DecodeCSVString(encoded_redirects, &redirects_vector)) { | |
| 634 // Fall back to space-delimited list for backward compatibility. | |
| 635 base::SplitStringAlongWhitespace(encoded_redirects, &redirects_vector); | |
|
beaudoin
2014/09/09 21:32:27
I see, so you support DB in the old format too. It
huangs
2014/09/11 01:58:13
Changing version, but have to maintain backward co
| |
| 636 } | |
| 637 for (size_t i = 0; i < redirects_vector.size(); ++i) { | |
| 638 GURL redirect_url(redirects_vector[i]); | |
| 639 if (redirect_url.is_valid()) | |
| 640 redirects->push_back(redirect_url); | |
| 641 } | |
| 642 } | |
| 643 | |
| 562 void TopSitesDatabase::UpdatePageRank(const MostVisitedURL& url, | 644 void TopSitesDatabase::UpdatePageRank(const MostVisitedURL& url, |
| 563 int new_rank) { | 645 int new_rank) { |
| 564 DCHECK((url.last_forced_time.ToInternalValue() == 0) == | 646 DCHECK((url.last_forced_time.ToInternalValue() == 0) == |
| 565 (new_rank != kRankOfForcedURL)) | 647 (new_rank != kRankOfForcedURL)) |
| 566 << "Thumbnail without a forced time stamp has a forced rank, or the " | 648 << "Thumbnail without a forced time stamp has a forced rank, or the " |
| 567 << "opposite."; | 649 << "opposite."; |
| 568 sql::Transaction transaction(db_.get()); | 650 sql::Transaction transaction(db_.get()); |
| 569 transaction.Begin(); | 651 transaction.Begin(); |
| 570 UpdatePageRankNoTransaction(url, new_rank); | 652 UpdatePageRankNoTransaction(url, new_rank); |
| 571 transaction.Commit(); | 653 transaction.Commit(); |
| (...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 724 db.get(), db_name)); | 806 db.get(), db_name)); |
| 725 db->set_page_size(4096); | 807 db->set_page_size(4096); |
| 726 db->set_cache_size(32); | 808 db->set_cache_size(32); |
| 727 | 809 |
| 728 if (!db->Open(db_name)) | 810 if (!db->Open(db_name)) |
| 729 return NULL; | 811 return NULL; |
| 730 return db.release(); | 812 return db.release(); |
| 731 } | 813 } |
| 732 | 814 |
| 733 } // namespace history | 815 } // namespace history |
| OLD | NEW |