OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/top_sites_database.h" | 5 #include "chrome/browser/history/top_sites_database.h" |
6 | 6 |
7 #include "base/files/file_util.h" | 7 #include "base/files/file_util.h" |
8 #include "base/memory/ref_counted.h" | 8 #include "base/memory/ref_counted.h" |
9 #include "base/metrics/histogram.h" | 9 #include "base/metrics/histogram.h" |
10 #include "base/strings/string_split.h" | 10 #include "base/strings/string_split.h" |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
51 | 51 |
52 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 | 52 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 |
53 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 | 53 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 |
54 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) | 54 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) |
55 | 55 |
56 // NOTE(shess): When changing the version, add a new golden file for | 56 // NOTE(shess): When changing the version, add a new golden file for |
57 // the new version and a test to verify that Init() works with it. | 57 // the new version and a test to verify that Init() works with it. |
58 // NOTE(shess): RecoverDatabaseOrRaze() depends on the specific | 58 // NOTE(shess): RecoverDatabaseOrRaze() depends on the specific |
59 // version number. The code is subtle and in development, contact me | 59 // version number. The code is subtle and in development, contact me |
60 // if the necessary changes are not obvious. | 60 // if the necessary changes are not obvious. |
61 static const int kVersionNumber = 3; | 61 static const int kVersionNumber = 3; |
beaudoin
2014/09/09 21:32:27
Aren't you changing the DB in a way that requires
huangs
2014/09/11 01:58:13
The code is backwards compatible for reading.
| |
62 static const int kDeprecatedVersionNumber = 1; // and earlier. | 62 static const int kDeprecatedVersionNumber = 1; // and earlier. |
63 | 63 |
64 bool InitTables(sql::Connection* db) { | 64 bool InitTables(sql::Connection* db) { |
65 const char kThumbnailsSql[] = | 65 const char kThumbnailsSql[] = |
66 "CREATE TABLE IF NOT EXISTS thumbnails (" | 66 "CREATE TABLE IF NOT EXISTS thumbnails (" |
67 "url LONGVARCHAR PRIMARY KEY," | 67 "url LONGVARCHAR PRIMARY KEY," |
68 "url_rank INTEGER," | 68 "url_rank INTEGER," |
69 "title LONGVARCHAR," | 69 "title LONGVARCHAR," |
70 "thumbnail BLOB," | 70 "thumbnail BLOB," |
71 "redirects LONGVARCHAR," | 71 "redirects LONGVARCHAR," |
72 "boring_score DOUBLE DEFAULT 1.0," | 72 "boring_score DOUBLE DEFAULT 1.0," |
73 "good_clipping INTEGER DEFAULT 0," | 73 "good_clipping INTEGER DEFAULT 0," |
74 "at_top INTEGER DEFAULT 0," | 74 "at_top INTEGER DEFAULT 0," |
75 "last_updated INTEGER DEFAULT 0," | 75 "last_updated INTEGER DEFAULT 0," |
76 "load_completed INTEGER DEFAULT 0," | 76 "load_completed INTEGER DEFAULT 0," |
77 "last_forced INTEGER DEFAULT 0)"; | 77 "last_forced INTEGER DEFAULT 0)"; |
78 return db->Execute(kThumbnailsSql); | 78 return db->Execute(kThumbnailsSql); |
79 } | 79 } |
80 | 80 |
81 // Encodes redirects into a string. | |
82 std::string GetRedirects(const history::MostVisitedURL& url) { | |
83 std::vector<std::string> redirects; | |
84 for (size_t i = 0; i < url.redirects.size(); i++) | |
85 redirects.push_back(url.redirects[i].spec()); | |
86 return JoinString(redirects, ' '); | |
87 } | |
88 | |
89 // Decodes redirects from a string and sets them for the url. | |
90 void SetRedirects(const std::string& redirects, history::MostVisitedURL* url) { | |
91 std::vector<std::string> redirects_vector; | |
92 base::SplitStringAlongWhitespace(redirects, &redirects_vector); | |
93 for (size_t i = 0; i < redirects_vector.size(); ++i) | |
94 url->redirects.push_back(GURL(redirects_vector[i])); | |
95 } | |
96 | |
97 // Track various failure (and success) cases in recovery code. | 81 // Track various failure (and success) cases in recovery code. |
98 // | 82 // |
99 // TODO(shess): The recovery code is complete, but by nature runs in challenging | 83 // TODO(shess): The recovery code is complete, but by nature runs in challenging |
100 // circumstances, so initially the default error response is to leave the | 84 // circumstances, so initially the default error response is to leave the |
101 // existing database in place. This histogram is intended to expose the | 85 // existing database in place. This histogram is intended to expose the |
102 // failures seen in the fleet. Frequent failure cases can be explored more | 86 // failures seen in the fleet. Frequent failure cases can be explored more |
103 // deeply to see if the complexity to fix them is warranted. Infrequent failure | 87 // deeply to see if the complexity to fix them is warranted. Infrequent failure |
104 // cases can be resolved by marking the database unrecoverable (which will | 88 // cases can be resolved by marking the database unrecoverable (which will |
105 // delete the data). | 89 // delete the data). |
106 // | 90 // |
(...skipping 18 matching lines...) Expand all Loading... | |
125 | 109 |
126 // Track invariants resolved by FixThumbnailsTable(). | 110 // Track invariants resolved by FixThumbnailsTable(). |
127 RECOVERY_EVENT_INVARIANT_RANK, | 111 RECOVERY_EVENT_INVARIANT_RANK, |
128 RECOVERY_EVENT_INVARIANT_REDIRECT, | 112 RECOVERY_EVENT_INVARIANT_REDIRECT, |
129 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, | 113 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, |
130 | 114 |
131 // Always keep this at the end. | 115 // Always keep this at the end. |
132 RECOVERY_EVENT_MAX, | 116 RECOVERY_EVENT_MAX, |
133 }; | 117 }; |
134 | 118 |
119 const char kDataUrlPrefix[] = "data:"; | |
120 | |
135 void RecordRecoveryEvent(RecoveryEventType recovery_event) { | 121 void RecordRecoveryEvent(RecoveryEventType recovery_event) { |
136 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", | 122 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", |
137 recovery_event, RECOVERY_EVENT_MAX); | 123 recovery_event, RECOVERY_EVENT_MAX); |
138 } | 124 } |
139 | 125 |
140 // Most corruption comes down to atomic updates between pages being broken | 126 // Most corruption comes down to atomic updates between pages being broken |
141 // somehow. This can result in either missing data, or overlapping data, | 127 // somehow. This can result in either missing data, or overlapping data, |
142 // depending on the operation broken. This table has large rows, which will use | 128 // depending on the operation broken. This table has large rows, which will use |
143 // overflow pages, so it is possible (though unlikely) that a chain could fit | 129 // overflow pages, so it is possible (though unlikely) that a chain could fit |
144 // together and yield a row with errors. | 130 // together and yield a row with errors. |
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
453 | 439 |
454 while (statement.Step()) { | 440 while (statement.Step()) { |
455 // Results are sorted by url_rank. For forced thumbnails with url_rank = -1, | 441 // Results are sorted by url_rank. For forced thumbnails with url_rank = -1, |
456 // thumbnails are sorted by last_forced. | 442 // thumbnails are sorted by last_forced. |
457 MostVisitedURL url; | 443 MostVisitedURL url; |
458 GURL gurl(statement.ColumnString(0)); | 444 GURL gurl(statement.ColumnString(0)); |
459 url.url = gurl; | 445 url.url = gurl; |
460 url.title = statement.ColumnString16(2); | 446 url.title = statement.ColumnString16(2); |
461 url.last_forced_time = | 447 url.last_forced_time = |
462 base::Time::FromInternalValue(statement.ColumnInt64(10)); | 448 base::Time::FromInternalValue(statement.ColumnInt64(10)); |
463 std::string redirects = statement.ColumnString(4); | 449 std::string encoded_redirects = statement.ColumnString(4); |
464 SetRedirects(redirects, &url); | 450 DecodeRedirects(encoded_redirects, &url.redirects); |
465 urls->push_back(url); | 451 urls->push_back(url); |
466 | 452 |
467 std::vector<unsigned char> data; | 453 std::vector<unsigned char> data; |
468 statement.ColumnBlobAsVector(3, &data); | 454 statement.ColumnBlobAsVector(3, &data); |
469 Images thumbnail; | 455 Images thumbnail; |
470 if (!data.empty()) | 456 if (!data.empty()) |
471 thumbnail.thumbnail = base::RefCountedBytes::TakeVector(&data); | 457 thumbnail.thumbnail = base::RefCountedBytes::TakeVector(&data); |
472 thumbnail.thumbnail_score.boring_score = statement.ColumnDouble(5); | 458 thumbnail.thumbnail_score.boring_score = statement.ColumnDouble(5); |
473 thumbnail.thumbnail_score.good_clipping = statement.ColumnBool(6); | 459 thumbnail.thumbnail_score.good_clipping = statement.ColumnBool(6); |
474 thumbnail.thumbnail_score.at_top = statement.ColumnBool(7); | 460 thumbnail.thumbnail_score.at_top = statement.ColumnBool(7); |
(...skipping 28 matching lines...) Expand all Loading... | |
503 "UPDATE thumbnails SET " | 489 "UPDATE thumbnails SET " |
504 "title = ?, thumbnail = ?, redirects = ?, " | 490 "title = ?, thumbnail = ?, redirects = ?, " |
505 "boring_score = ?, good_clipping = ?, at_top = ?, last_updated = ?, " | 491 "boring_score = ?, good_clipping = ?, at_top = ?, last_updated = ?, " |
506 "load_completed = ?, last_forced = ?" | 492 "load_completed = ?, last_forced = ?" |
507 "WHERE url = ? ")); | 493 "WHERE url = ? ")); |
508 statement.BindString16(0, url.title); | 494 statement.BindString16(0, url.title); |
509 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { | 495 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { |
510 statement.BindBlob(1, thumbnail.thumbnail->front(), | 496 statement.BindBlob(1, thumbnail.thumbnail->front(), |
511 static_cast<int>(thumbnail.thumbnail->size())); | 497 static_cast<int>(thumbnail.thumbnail->size())); |
512 } | 498 } |
513 statement.BindString(2, GetRedirects(url)); | 499 statement.BindString(2, EncodeRedirects(url.redirects)); |
514 const ThumbnailScore& score = thumbnail.thumbnail_score; | 500 const ThumbnailScore& score = thumbnail.thumbnail_score; |
515 statement.BindDouble(3, score.boring_score); | 501 statement.BindDouble(3, score.boring_score); |
516 statement.BindBool(4, score.good_clipping); | 502 statement.BindBool(4, score.good_clipping); |
517 statement.BindBool(5, score.at_top); | 503 statement.BindBool(5, score.at_top); |
518 statement.BindInt64(6, score.time_at_snapshot.ToInternalValue()); | 504 statement.BindInt64(6, score.time_at_snapshot.ToInternalValue()); |
519 statement.BindBool(7, score.load_completed); | 505 statement.BindBool(7, score.load_completed); |
520 statement.BindInt64(8, url.last_forced_time.ToInternalValue()); | 506 statement.BindInt64(8, url.last_forced_time.ToInternalValue()); |
521 statement.BindString(9, url.url.spec()); | 507 statement.BindString(9, url.url.spec()); |
522 | 508 |
523 return statement.Run(); | 509 return statement.Run(); |
524 } | 510 } |
525 | 511 |
526 void TopSitesDatabase::AddPageThumbnail(const MostVisitedURL& url, | 512 void TopSitesDatabase::AddPageThumbnail(const MostVisitedURL& url, |
527 int new_rank, | 513 int new_rank, |
528 const Images& thumbnail) { | 514 const Images& thumbnail) { |
529 sql::Statement statement(db_->GetCachedStatement( | 515 sql::Statement statement(db_->GetCachedStatement( |
530 SQL_FROM_HERE, | 516 SQL_FROM_HERE, |
531 "INSERT OR REPLACE INTO thumbnails " | 517 "INSERT OR REPLACE INTO thumbnails " |
532 "(url, url_rank, title, thumbnail, redirects, " | 518 "(url, url_rank, title, thumbnail, redirects, " |
533 "boring_score, good_clipping, at_top, last_updated, load_completed, " | 519 "boring_score, good_clipping, at_top, last_updated, load_completed, " |
534 "last_forced) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")); | 520 "last_forced) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")); |
535 statement.BindString(0, url.url.spec()); | 521 statement.BindString(0, url.url.spec()); |
536 statement.BindInt(1, kRankOfForcedURL); // Fist make it a forced thumbnail. | 522 statement.BindInt(1, kRankOfForcedURL); // Fist make it a forced thumbnail. |
537 statement.BindString16(2, url.title); | 523 statement.BindString16(2, url.title); |
538 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { | 524 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { |
539 statement.BindBlob(3, thumbnail.thumbnail->front(), | 525 statement.BindBlob(3, thumbnail.thumbnail->front(), |
540 static_cast<int>(thumbnail.thumbnail->size())); | 526 static_cast<int>(thumbnail.thumbnail->size())); |
541 } | 527 } |
542 statement.BindString(4, GetRedirects(url)); | 528 statement.BindString(4, EncodeRedirects(url.redirects)); |
543 const ThumbnailScore& score = thumbnail.thumbnail_score; | 529 const ThumbnailScore& score = thumbnail.thumbnail_score; |
544 statement.BindDouble(5, score.boring_score); | 530 statement.BindDouble(5, score.boring_score); |
545 statement.BindBool(6, score.good_clipping); | 531 statement.BindBool(6, score.good_clipping); |
546 statement.BindBool(7, score.at_top); | 532 statement.BindBool(7, score.at_top); |
547 statement.BindInt64(8, score.time_at_snapshot.ToInternalValue()); | 533 statement.BindInt64(8, score.time_at_snapshot.ToInternalValue()); |
548 statement.BindBool(9, score.load_completed); | 534 statement.BindBool(9, score.load_completed); |
549 int64 last_forced = url.last_forced_time.ToInternalValue(); | 535 int64 last_forced = url.last_forced_time.ToInternalValue(); |
550 DCHECK((last_forced == 0) == (new_rank != kRankOfForcedURL)) | 536 DCHECK((last_forced == 0) == (new_rank != kRankOfForcedURL)) |
551 << "Thumbnail without a forced time stamp has a forced rank, or the " | 537 << "Thumbnail without a forced time stamp has a forced rank, or the " |
552 << "opposite."; | 538 << "opposite."; |
553 statement.BindInt64(10, last_forced); | 539 statement.BindInt64(10, last_forced); |
554 if (!statement.Run()) | 540 if (!statement.Run()) |
555 return; | 541 return; |
556 | 542 |
557 // Update rank if this is not a forced thumbnail. | 543 // Update rank if this is not a forced thumbnail. |
558 if (new_rank != kRankOfForcedURL) | 544 if (new_rank != kRankOfForcedURL) |
559 UpdatePageRankNoTransaction(url, new_rank); | 545 UpdatePageRankNoTransaction(url, new_rank); |
560 } | 546 } |
561 | 547 |
548 // static | |
549 std::string TopSitesDatabase::EncodeCSVString( | |
550 const std::vector<std::string> str_list) { | |
551 std::string csv; | |
552 for (std::vector<std::string>::const_iterator it = str_list.begin(); | |
553 it != str_list.end(); ++it) { | |
554 const std::string& str = *it; | |
555 if (it != str_list.begin()) | |
556 csv += ','; | |
557 csv += '"'; | |
558 for (std::string::const_iterator jt = str.begin(); jt != str.end(); ++jt) { | |
559 if (*jt == '"') | |
560 csv += '"'; | |
beaudoin
2014/09/09 21:32:27
Looks like you escape quotes as two quotes. Your c
huangs
2014/09/11 01:58:13
Comment in .h file updated.
| |
561 csv += *jt; | |
562 } | |
563 csv += '"'; | |
564 } | |
565 return csv; | |
566 } | |
567 | |
568 // static | |
569 bool TopSitesDatabase::DecodeCSVString(const std::string csv, | |
570 std::vector<std::string>* str_list) { | |
571 if (csv.empty()) { | |
572 str_list->clear(); | |
573 return true; | |
574 } | |
575 | |
576 enum { | |
577 SEEK_QUOTE, | |
578 READ_STRING, | |
579 SAW_ONE_QUOTE | |
580 } state = SEEK_QUOTE; | |
581 std::vector<std::string> out_list; | |
582 std::string str; | |
583 for (std::string::const_iterator it = csv.begin(); it != csv.end(); ++it) { | |
584 const char ch = *it; | |
585 if (state == SEEK_QUOTE) { | |
586 if (ch != '"') | |
587 return false; | |
588 state = READ_STRING; | |
589 } else if (state == READ_STRING) { | |
590 if (ch == '"') | |
591 state = SAW_ONE_QUOTE; | |
592 else | |
593 str += ch; | |
594 } else if (state == SAW_ONE_QUOTE) { | |
595 if (ch == '"') { | |
596 str += ch; | |
597 state = READ_STRING; | |
598 } else if (ch == ',') { | |
599 out_list.push_back(str); | |
600 str.clear(); | |
601 state = SEEK_QUOTE; | |
602 } else { | |
603 return false; | |
beaudoin
2014/09/09 21:32:27
You're strict about the fact that the comma must i
huangs
2014/09/11 01:58:13
Done.
| |
604 } | |
605 } else { | |
606 NOTREACHED(); | |
607 return false; | |
608 } | |
609 } | |
610 if (state != SAW_ONE_QUOTE) | |
611 return false; | |
612 out_list.push_back(str); | |
613 str_list->swap(out_list); | |
614 return true; | |
615 } | |
616 | |
617 // static | |
618 std::string TopSitesDatabase::EncodeRedirects(const RedirectList& redirects) { | |
619 std::vector<std::string> valid_urls; | |
620 for (size_t i = 0; i < redirects.size(); i++) { | |
621 // Example of invalid URL that may end up here: | |
622 // "data:text/plain,this string contains space". | |
623 if (redirects[i].is_valid()) | |
624 valid_urls.push_back(redirects[i].spec()); | |
625 } | |
626 return EncodeCSVString(valid_urls); | |
627 } | |
628 | |
629 // static | |
630 void TopSitesDatabase::DecodeRedirects(const std::string& encoded_redirects, | |
631 RedirectList* redirects) { | |
632 std::vector<std::string> redirects_vector; | |
633 if (!DecodeCSVString(encoded_redirects, &redirects_vector)) { | |
634 // Fall back to space-delimited list for backward compatibility. | |
635 base::SplitStringAlongWhitespace(encoded_redirects, &redirects_vector); | |
beaudoin
2014/09/09 21:32:27
I see, so you support DB in the old format too. It
huangs
2014/09/11 01:58:13
Changing version, but have to maintain backward co
| |
636 } | |
637 for (size_t i = 0; i < redirects_vector.size(); ++i) { | |
638 GURL redirect_url(redirects_vector[i]); | |
639 if (redirect_url.is_valid()) | |
640 redirects->push_back(redirect_url); | |
641 } | |
642 } | |
643 | |
562 void TopSitesDatabase::UpdatePageRank(const MostVisitedURL& url, | 644 void TopSitesDatabase::UpdatePageRank(const MostVisitedURL& url, |
563 int new_rank) { | 645 int new_rank) { |
564 DCHECK((url.last_forced_time.ToInternalValue() == 0) == | 646 DCHECK((url.last_forced_time.ToInternalValue() == 0) == |
565 (new_rank != kRankOfForcedURL)) | 647 (new_rank != kRankOfForcedURL)) |
566 << "Thumbnail without a forced time stamp has a forced rank, or the " | 648 << "Thumbnail without a forced time stamp has a forced rank, or the " |
567 << "opposite."; | 649 << "opposite."; |
568 sql::Transaction transaction(db_.get()); | 650 sql::Transaction transaction(db_.get()); |
569 transaction.Begin(); | 651 transaction.Begin(); |
570 UpdatePageRankNoTransaction(url, new_rank); | 652 UpdatePageRankNoTransaction(url, new_rank); |
571 transaction.Commit(); | 653 transaction.Commit(); |
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
724 db.get(), db_name)); | 806 db.get(), db_name)); |
725 db->set_page_size(4096); | 807 db->set_page_size(4096); |
726 db->set_cache_size(32); | 808 db->set_cache_size(32); |
727 | 809 |
728 if (!db->Open(db_name)) | 810 if (!db->Open(db_name)) |
729 return NULL; | 811 return NULL; |
730 return db.release(); | 812 return db.release(); |
731 } | 813 } |
732 | 814 |
733 } // namespace history | 815 } // namespace history |
OLD | NEW |