Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(97)

Side by Side Diff: chrome/browser/history/top_sites_database.cc

Issue 560543002: [Top Sites] Encoding redirects field in TopSitesDatabase, and adding validations (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/top_sites_database.h" 5 #include "chrome/browser/history/top_sites_database.h"
6 6
7 #include "base/files/file_util.h" 7 #include "base/files/file_util.h"
8 #include "base/memory/ref_counted.h" 8 #include "base/memory/ref_counted.h"
9 #include "base/metrics/histogram.h" 9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_split.h" 10 #include "base/strings/string_split.h"
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
51 51
52 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29 52 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29
53 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 53 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31
54 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated) 54 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated)
55 55
56 // NOTE(shess): When changing the version, add a new golden file for 56 // NOTE(shess): When changing the version, add a new golden file for
57 // the new version and a test to verify that Init() works with it. 57 // the new version and a test to verify that Init() works with it.
58 // NOTE(shess): RecoverDatabaseOrRaze() depends on the specific 58 // NOTE(shess): RecoverDatabaseOrRaze() depends on the specific
59 // version number. The code is subtle and in development, contact me 59 // version number. The code is subtle and in development, contact me
60 // if the necessary changes are not obvious. 60 // if the necessary changes are not obvious.
61 static const int kVersionNumber = 3; 61 static const int kVersionNumber = 3;
beaudoin 2014/09/09 21:32:27 Aren't you changing the DB in a way that requires
huangs 2014/09/11 01:58:13 The code is backwards compatible for reading.
62 static const int kDeprecatedVersionNumber = 1; // and earlier. 62 static const int kDeprecatedVersionNumber = 1; // and earlier.
63 63
64 bool InitTables(sql::Connection* db) { 64 bool InitTables(sql::Connection* db) {
65 const char kThumbnailsSql[] = 65 const char kThumbnailsSql[] =
66 "CREATE TABLE IF NOT EXISTS thumbnails (" 66 "CREATE TABLE IF NOT EXISTS thumbnails ("
67 "url LONGVARCHAR PRIMARY KEY," 67 "url LONGVARCHAR PRIMARY KEY,"
68 "url_rank INTEGER," 68 "url_rank INTEGER,"
69 "title LONGVARCHAR," 69 "title LONGVARCHAR,"
70 "thumbnail BLOB," 70 "thumbnail BLOB,"
71 "redirects LONGVARCHAR," 71 "redirects LONGVARCHAR,"
72 "boring_score DOUBLE DEFAULT 1.0," 72 "boring_score DOUBLE DEFAULT 1.0,"
73 "good_clipping INTEGER DEFAULT 0," 73 "good_clipping INTEGER DEFAULT 0,"
74 "at_top INTEGER DEFAULT 0," 74 "at_top INTEGER DEFAULT 0,"
75 "last_updated INTEGER DEFAULT 0," 75 "last_updated INTEGER DEFAULT 0,"
76 "load_completed INTEGER DEFAULT 0," 76 "load_completed INTEGER DEFAULT 0,"
77 "last_forced INTEGER DEFAULT 0)"; 77 "last_forced INTEGER DEFAULT 0)";
78 return db->Execute(kThumbnailsSql); 78 return db->Execute(kThumbnailsSql);
79 } 79 }
80 80
81 // Encodes redirects into a string.
82 std::string GetRedirects(const history::MostVisitedURL& url) {
83 std::vector<std::string> redirects;
84 for (size_t i = 0; i < url.redirects.size(); i++)
85 redirects.push_back(url.redirects[i].spec());
86 return JoinString(redirects, ' ');
87 }
88
89 // Decodes redirects from a string and sets them for the url.
90 void SetRedirects(const std::string& redirects, history::MostVisitedURL* url) {
91 std::vector<std::string> redirects_vector;
92 base::SplitStringAlongWhitespace(redirects, &redirects_vector);
93 for (size_t i = 0; i < redirects_vector.size(); ++i)
94 url->redirects.push_back(GURL(redirects_vector[i]));
95 }
96
97 // Track various failure (and success) cases in recovery code. 81 // Track various failure (and success) cases in recovery code.
98 // 82 //
99 // TODO(shess): The recovery code is complete, but by nature runs in challenging 83 // TODO(shess): The recovery code is complete, but by nature runs in challenging
100 // circumstances, so initially the default error response is to leave the 84 // circumstances, so initially the default error response is to leave the
101 // existing database in place. This histogram is intended to expose the 85 // existing database in place. This histogram is intended to expose the
102 // failures seen in the fleet. Frequent failure cases can be explored more 86 // failures seen in the fleet. Frequent failure cases can be explored more
103 // deeply to see if the complexity to fix them is warranted. Infrequent failure 87 // deeply to see if the complexity to fix them is warranted. Infrequent failure
104 // cases can be resolved by marking the database unrecoverable (which will 88 // cases can be resolved by marking the database unrecoverable (which will
105 // delete the data). 89 // delete the data).
106 // 90 //
(...skipping 18 matching lines...) Expand all
125 109
126 // Track invariants resolved by FixThumbnailsTable(). 110 // Track invariants resolved by FixThumbnailsTable().
127 RECOVERY_EVENT_INVARIANT_RANK, 111 RECOVERY_EVENT_INVARIANT_RANK,
128 RECOVERY_EVENT_INVARIANT_REDIRECT, 112 RECOVERY_EVENT_INVARIANT_REDIRECT,
129 RECOVERY_EVENT_INVARIANT_CONTIGUOUS, 113 RECOVERY_EVENT_INVARIANT_CONTIGUOUS,
130 114
131 // Always keep this at the end. 115 // Always keep this at the end.
132 RECOVERY_EVENT_MAX, 116 RECOVERY_EVENT_MAX,
133 }; 117 };
134 118
119 const char kDataUrlPrefix[] = "data:";
120
135 void RecordRecoveryEvent(RecoveryEventType recovery_event) { 121 void RecordRecoveryEvent(RecoveryEventType recovery_event) {
136 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", 122 UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery",
137 recovery_event, RECOVERY_EVENT_MAX); 123 recovery_event, RECOVERY_EVENT_MAX);
138 } 124 }
139 125
140 // Most corruption comes down to atomic updates between pages being broken 126 // Most corruption comes down to atomic updates between pages being broken
141 // somehow. This can result in either missing data, or overlapping data, 127 // somehow. This can result in either missing data, or overlapping data,
142 // depending on the operation broken. This table has large rows, which will use 128 // depending on the operation broken. This table has large rows, which will use
143 // overflow pages, so it is possible (though unlikely) that a chain could fit 129 // overflow pages, so it is possible (though unlikely) that a chain could fit
144 // together and yield a row with errors. 130 // together and yield a row with errors.
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 439
454 while (statement.Step()) { 440 while (statement.Step()) {
455 // Results are sorted by url_rank. For forced thumbnails with url_rank = -1, 441 // Results are sorted by url_rank. For forced thumbnails with url_rank = -1,
456 // thumbnails are sorted by last_forced. 442 // thumbnails are sorted by last_forced.
457 MostVisitedURL url; 443 MostVisitedURL url;
458 GURL gurl(statement.ColumnString(0)); 444 GURL gurl(statement.ColumnString(0));
459 url.url = gurl; 445 url.url = gurl;
460 url.title = statement.ColumnString16(2); 446 url.title = statement.ColumnString16(2);
461 url.last_forced_time = 447 url.last_forced_time =
462 base::Time::FromInternalValue(statement.ColumnInt64(10)); 448 base::Time::FromInternalValue(statement.ColumnInt64(10));
463 std::string redirects = statement.ColumnString(4); 449 std::string encoded_redirects = statement.ColumnString(4);
464 SetRedirects(redirects, &url); 450 DecodeRedirects(encoded_redirects, &url.redirects);
465 urls->push_back(url); 451 urls->push_back(url);
466 452
467 std::vector<unsigned char> data; 453 std::vector<unsigned char> data;
468 statement.ColumnBlobAsVector(3, &data); 454 statement.ColumnBlobAsVector(3, &data);
469 Images thumbnail; 455 Images thumbnail;
470 if (!data.empty()) 456 if (!data.empty())
471 thumbnail.thumbnail = base::RefCountedBytes::TakeVector(&data); 457 thumbnail.thumbnail = base::RefCountedBytes::TakeVector(&data);
472 thumbnail.thumbnail_score.boring_score = statement.ColumnDouble(5); 458 thumbnail.thumbnail_score.boring_score = statement.ColumnDouble(5);
473 thumbnail.thumbnail_score.good_clipping = statement.ColumnBool(6); 459 thumbnail.thumbnail_score.good_clipping = statement.ColumnBool(6);
474 thumbnail.thumbnail_score.at_top = statement.ColumnBool(7); 460 thumbnail.thumbnail_score.at_top = statement.ColumnBool(7);
(...skipping 28 matching lines...) Expand all
503 "UPDATE thumbnails SET " 489 "UPDATE thumbnails SET "
504 "title = ?, thumbnail = ?, redirects = ?, " 490 "title = ?, thumbnail = ?, redirects = ?, "
505 "boring_score = ?, good_clipping = ?, at_top = ?, last_updated = ?, " 491 "boring_score = ?, good_clipping = ?, at_top = ?, last_updated = ?, "
506 "load_completed = ?, last_forced = ?" 492 "load_completed = ?, last_forced = ?"
507 "WHERE url = ? ")); 493 "WHERE url = ? "));
508 statement.BindString16(0, url.title); 494 statement.BindString16(0, url.title);
509 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { 495 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) {
510 statement.BindBlob(1, thumbnail.thumbnail->front(), 496 statement.BindBlob(1, thumbnail.thumbnail->front(),
511 static_cast<int>(thumbnail.thumbnail->size())); 497 static_cast<int>(thumbnail.thumbnail->size()));
512 } 498 }
513 statement.BindString(2, GetRedirects(url)); 499 statement.BindString(2, EncodeRedirects(url.redirects));
514 const ThumbnailScore& score = thumbnail.thumbnail_score; 500 const ThumbnailScore& score = thumbnail.thumbnail_score;
515 statement.BindDouble(3, score.boring_score); 501 statement.BindDouble(3, score.boring_score);
516 statement.BindBool(4, score.good_clipping); 502 statement.BindBool(4, score.good_clipping);
517 statement.BindBool(5, score.at_top); 503 statement.BindBool(5, score.at_top);
518 statement.BindInt64(6, score.time_at_snapshot.ToInternalValue()); 504 statement.BindInt64(6, score.time_at_snapshot.ToInternalValue());
519 statement.BindBool(7, score.load_completed); 505 statement.BindBool(7, score.load_completed);
520 statement.BindInt64(8, url.last_forced_time.ToInternalValue()); 506 statement.BindInt64(8, url.last_forced_time.ToInternalValue());
521 statement.BindString(9, url.url.spec()); 507 statement.BindString(9, url.url.spec());
522 508
523 return statement.Run(); 509 return statement.Run();
524 } 510 }
525 511
526 void TopSitesDatabase::AddPageThumbnail(const MostVisitedURL& url, 512 void TopSitesDatabase::AddPageThumbnail(const MostVisitedURL& url,
527 int new_rank, 513 int new_rank,
528 const Images& thumbnail) { 514 const Images& thumbnail) {
529 sql::Statement statement(db_->GetCachedStatement( 515 sql::Statement statement(db_->GetCachedStatement(
530 SQL_FROM_HERE, 516 SQL_FROM_HERE,
531 "INSERT OR REPLACE INTO thumbnails " 517 "INSERT OR REPLACE INTO thumbnails "
532 "(url, url_rank, title, thumbnail, redirects, " 518 "(url, url_rank, title, thumbnail, redirects, "
533 "boring_score, good_clipping, at_top, last_updated, load_completed, " 519 "boring_score, good_clipping, at_top, last_updated, load_completed, "
534 "last_forced) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")); 520 "last_forced) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"));
535 statement.BindString(0, url.url.spec()); 521 statement.BindString(0, url.url.spec());
536 statement.BindInt(1, kRankOfForcedURL); // Fist make it a forced thumbnail. 522 statement.BindInt(1, kRankOfForcedURL); // Fist make it a forced thumbnail.
537 statement.BindString16(2, url.title); 523 statement.BindString16(2, url.title);
538 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) { 524 if (thumbnail.thumbnail.get() && thumbnail.thumbnail->front()) {
539 statement.BindBlob(3, thumbnail.thumbnail->front(), 525 statement.BindBlob(3, thumbnail.thumbnail->front(),
540 static_cast<int>(thumbnail.thumbnail->size())); 526 static_cast<int>(thumbnail.thumbnail->size()));
541 } 527 }
542 statement.BindString(4, GetRedirects(url)); 528 statement.BindString(4, EncodeRedirects(url.redirects));
543 const ThumbnailScore& score = thumbnail.thumbnail_score; 529 const ThumbnailScore& score = thumbnail.thumbnail_score;
544 statement.BindDouble(5, score.boring_score); 530 statement.BindDouble(5, score.boring_score);
545 statement.BindBool(6, score.good_clipping); 531 statement.BindBool(6, score.good_clipping);
546 statement.BindBool(7, score.at_top); 532 statement.BindBool(7, score.at_top);
547 statement.BindInt64(8, score.time_at_snapshot.ToInternalValue()); 533 statement.BindInt64(8, score.time_at_snapshot.ToInternalValue());
548 statement.BindBool(9, score.load_completed); 534 statement.BindBool(9, score.load_completed);
549 int64 last_forced = url.last_forced_time.ToInternalValue(); 535 int64 last_forced = url.last_forced_time.ToInternalValue();
550 DCHECK((last_forced == 0) == (new_rank != kRankOfForcedURL)) 536 DCHECK((last_forced == 0) == (new_rank != kRankOfForcedURL))
551 << "Thumbnail without a forced time stamp has a forced rank, or the " 537 << "Thumbnail without a forced time stamp has a forced rank, or the "
552 << "opposite."; 538 << "opposite.";
553 statement.BindInt64(10, last_forced); 539 statement.BindInt64(10, last_forced);
554 if (!statement.Run()) 540 if (!statement.Run())
555 return; 541 return;
556 542
557 // Update rank if this is not a forced thumbnail. 543 // Update rank if this is not a forced thumbnail.
558 if (new_rank != kRankOfForcedURL) 544 if (new_rank != kRankOfForcedURL)
559 UpdatePageRankNoTransaction(url, new_rank); 545 UpdatePageRankNoTransaction(url, new_rank);
560 } 546 }
561 547
548 // static
549 std::string TopSitesDatabase::EncodeCSVString(
550 const std::vector<std::string> str_list) {
551 std::string csv;
552 for (std::vector<std::string>::const_iterator it = str_list.begin();
553 it != str_list.end(); ++it) {
554 const std::string& str = *it;
555 if (it != str_list.begin())
556 csv += ',';
557 csv += '"';
558 for (std::string::const_iterator jt = str.begin(); jt != str.end(); ++jt) {
559 if (*jt == '"')
560 csv += '"';
beaudoin 2014/09/09 21:32:27 Looks like you escape quotes as two quotes. Your c
huangs 2014/09/11 01:58:13 Comment in .h file updated.
561 csv += *jt;
562 }
563 csv += '"';
564 }
565 return csv;
566 }
567
568 // static
569 bool TopSitesDatabase::DecodeCSVString(const std::string csv,
570 std::vector<std::string>* str_list) {
571 if (csv.empty()) {
572 str_list->clear();
573 return true;
574 }
575
576 enum {
577 SEEK_QUOTE,
578 READ_STRING,
579 SAW_ONE_QUOTE
580 } state = SEEK_QUOTE;
581 std::vector<std::string> out_list;
582 std::string str;
583 for (std::string::const_iterator it = csv.begin(); it != csv.end(); ++it) {
584 const char ch = *it;
585 if (state == SEEK_QUOTE) {
586 if (ch != '"')
587 return false;
588 state = READ_STRING;
589 } else if (state == READ_STRING) {
590 if (ch == '"')
591 state = SAW_ONE_QUOTE;
592 else
593 str += ch;
594 } else if (state == SAW_ONE_QUOTE) {
595 if (ch == '"') {
596 str += ch;
597 state = READ_STRING;
598 } else if (ch == ',') {
599 out_list.push_back(str);
600 str.clear();
601 state = SEEK_QUOTE;
602 } else {
603 return false;
beaudoin 2014/09/09 21:32:27 You're strict about the fact that the comma must i
huangs 2014/09/11 01:58:13 Done.
604 }
605 } else {
606 NOTREACHED();
607 return false;
608 }
609 }
610 if (state != SAW_ONE_QUOTE)
611 return false;
612 out_list.push_back(str);
613 str_list->swap(out_list);
614 return true;
615 }
616
617 // static
618 std::string TopSitesDatabase::EncodeRedirects(const RedirectList& redirects) {
619 std::vector<std::string> valid_urls;
620 for (size_t i = 0; i < redirects.size(); i++) {
621 // Example of invalid URL that may end up here:
622 // "data:text/plain,this string contains space".
623 if (redirects[i].is_valid())
624 valid_urls.push_back(redirects[i].spec());
625 }
626 return EncodeCSVString(valid_urls);
627 }
628
629 // static
630 void TopSitesDatabase::DecodeRedirects(const std::string& encoded_redirects,
631 RedirectList* redirects) {
632 std::vector<std::string> redirects_vector;
633 if (!DecodeCSVString(encoded_redirects, &redirects_vector)) {
634 // Fall back to space-delimited list for backward compatibility.
635 base::SplitStringAlongWhitespace(encoded_redirects, &redirects_vector);
beaudoin 2014/09/09 21:32:27 I see, so you support DB in the old format too. It
huangs 2014/09/11 01:58:13 Changing version, but have to maintain backward co
636 }
637 for (size_t i = 0; i < redirects_vector.size(); ++i) {
638 GURL redirect_url(redirects_vector[i]);
639 if (redirect_url.is_valid())
640 redirects->push_back(redirect_url);
641 }
642 }
643
562 void TopSitesDatabase::UpdatePageRank(const MostVisitedURL& url, 644 void TopSitesDatabase::UpdatePageRank(const MostVisitedURL& url,
563 int new_rank) { 645 int new_rank) {
564 DCHECK((url.last_forced_time.ToInternalValue() == 0) == 646 DCHECK((url.last_forced_time.ToInternalValue() == 0) ==
565 (new_rank != kRankOfForcedURL)) 647 (new_rank != kRankOfForcedURL))
566 << "Thumbnail without a forced time stamp has a forced rank, or the " 648 << "Thumbnail without a forced time stamp has a forced rank, or the "
567 << "opposite."; 649 << "opposite.";
568 sql::Transaction transaction(db_.get()); 650 sql::Transaction transaction(db_.get());
569 transaction.Begin(); 651 transaction.Begin();
570 UpdatePageRankNoTransaction(url, new_rank); 652 UpdatePageRankNoTransaction(url, new_rank);
571 transaction.Commit(); 653 transaction.Commit();
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
724 db.get(), db_name)); 806 db.get(), db_name));
725 db->set_page_size(4096); 807 db->set_page_size(4096);
726 db->set_cache_size(32); 808 db->set_cache_size(32);
727 809
728 if (!db->Open(db_name)) 810 if (!db->Open(db_name))
729 return NULL; 811 return NULL;
730 return db.release(); 812 return db.release();
731 } 813 }
732 814
733 } // namespace history 815 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698