| Index: chrome/browser/history/url_index_private_data.cc
|
| ===================================================================
|
| --- chrome/browser/history/url_index_private_data.cc (revision 126922)
|
| +++ chrome/browser/history/url_index_private_data.cc (working copy)
|
| @@ -14,11 +14,14 @@
|
| #include "base/i18n/case_conversion.h"
|
| #include "base/metrics/histogram.h"
|
| #include "base/string_util.h"
|
| -#include "base/threading/thread_restrictions.h"
|
| +#include "base/time.h"
|
| #include "base/utf_string_conversions.h"
|
| #include "chrome/browser/autocomplete/autocomplete.h"
|
| #include "chrome/browser/history/history_database.h"
|
| -#include "chrome/common/url_constants.h"
|
| +#include "chrome/browser/history/in_memory_url_index.h"
|
| +#include "content/public/browser/notification_details.h"
|
| +#include "content/public/browser/notification_service.h"
|
| +#include "content/public/browser/notification_source.h"
|
| #include "net/base/net_util.h"
|
| #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
|
|
|
| @@ -123,7 +126,6 @@
|
| pre_filter_item_count_(0),
|
| post_filter_item_count_(0),
|
| post_scoring_item_count_(0) {
|
| - URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);
|
| }
|
|
|
| URLIndexPrivateData::~URLIndexPrivateData() {}
|
| @@ -139,18 +141,42 @@
|
| word_starts_map_.clear();
|
| }
|
|
|
| +bool URLIndexPrivateData::Empty() const {
|
| + return history_info_map_.empty();
|
| +}
|
| +
|
| +scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {
|
| + scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;
|
| + data_copy->word_list_ = word_list_;
|
| + data_copy->available_words_ = available_words_;
|
| + data_copy->word_map_ = word_map_;
|
| + data_copy->char_word_map_ = char_word_map_;
|
| + data_copy->word_id_history_map_ = word_id_history_map_;
|
| + data_copy->history_id_word_map_ = history_id_word_map_;
|
| + data_copy->history_info_map_ = history_info_map_;
|
| + return data_copy;
|
| + // Not copied:
|
| + // search_term_cache_
|
| + // pre_filter_item_count_
|
| + // post_filter_item_count_
|
| + // post_scoring_item_count_
|
| +};
|
| +
|
| // Cache Updating --------------------------------------------------------------
|
|
|
| -bool URLIndexPrivateData::IndexRow(const URLRow& row) {
|
| +bool URLIndexPrivateData::IndexRow(
|
| + const URLRow& row,
|
| + const std::string& languages,
|
| + const std::set<std::string>& scheme_whitelist) {
|
| const GURL& gurl(row.url());
|
|
|
| // Index only URLs with a whitelisted scheme.
|
| - if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl))
|
| + if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))
|
| return false;
|
|
|
| URLID row_id = row.id();
|
| // Strip out username and password before saving and indexing.
|
| - string16 url(net::FormatUrl(gurl, languages_,
|
| + string16 url(net::FormatUrl(gurl, languages,
|
| net::kFormatUrlOmitUsernamePassword,
|
| net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
|
| NULL, NULL, NULL));
|
| @@ -168,17 +194,18 @@
|
|
|
| // Index the words contained in the URL and title of the row.
|
| RowWordStarts word_starts;
|
| - AddRowWordsToIndex(new_row, &word_starts);
|
| + AddRowWordsToIndex(new_row, &word_starts, languages);
|
| word_starts_map_[history_id] = word_starts;
|
| return true;
|
| }
|
|
|
| void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,
|
| - RowWordStarts* word_starts) {
|
| + RowWordStarts* word_starts,
|
| + const std::string& languages) {
|
| HistoryID history_id = static_cast<HistoryID>(row.id());
|
| // Split URL into individual, unique words then add in the title words.
|
| const GURL& gurl(row.url());
|
| - string16 url(net::FormatUrl(gurl, languages_,
|
| + string16 url(net::FormatUrl(gurl, languages,
|
| net::kFormatUrlOmitUsernamePassword,
|
| net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
|
| NULL, NULL, NULL));
|
| @@ -307,7 +334,10 @@
|
| }
|
| }
|
|
|
| -bool URLIndexPrivateData::UpdateURL(const URLRow& row) {
|
| +bool URLIndexPrivateData::UpdateURL(
|
| + const URLRow& row,
|
| + const std::string& languages,
|
| + const std::set<std::string>& scheme_whitelist) {
|
| // The row may or may not already be in our index. If it is not already
|
| // indexed and it qualifies then it gets indexed. If it is already
|
| // indexed and still qualifies then it gets updated, otherwise it
|
| @@ -319,8 +349,8 @@
|
| // This new row should be indexed if it qualifies.
|
| URLRow new_row(row);
|
| new_row.set_id(row_id);
|
| - row_was_updated =
|
| - RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row);
|
| + row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
|
| + IndexRow(new_row, languages, scheme_whitelist);
|
| } else if (RowQualifiesAsSignificant(row, base::Time())) {
|
| // This indexed row still qualifies and will be re-indexed.
|
| // The url won't have changed but the title, visit count, etc.
|
| @@ -341,7 +371,7 @@
|
| RemoveRowWordsFromIndex(row_to_update);
|
| row_to_update.set_title(row.title());
|
| RowWordStarts word_starts;
|
| - AddRowWordsToIndex(row_to_update, &word_starts);
|
| + AddRowWordsToIndex(row_to_update, &word_starts, languages);
|
| word_starts_map_[row_id] = word_starts;
|
| }
|
| row_was_updated = true;
|
| @@ -383,10 +413,6 @@
|
| return true;
|
| }
|
|
|
| -bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {
|
| - return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();
|
| -}
|
| -
|
| // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
|
|
|
| URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
|
| @@ -905,24 +931,19 @@
|
| return word_id_set;
|
| }
|
|
|
| +// Cache Saving ----------------------------------------------------------------
|
| +
|
| // static
|
| -void URLIndexPrivateData::InitializeSchemeWhitelist(
|
| - std::set<std::string>* whitelist) {
|
| - DCHECK(whitelist);
|
| - whitelist->insert(std::string(chrome::kAboutScheme));
|
| - whitelist->insert(std::string(chrome::kChromeUIScheme));
|
| - whitelist->insert(std::string(chrome::kFileScheme));
|
| - whitelist->insert(std::string(chrome::kFtpScheme));
|
| - whitelist->insert(std::string(chrome::kHttpScheme));
|
| - whitelist->insert(std::string(chrome::kHttpsScheme));
|
| - whitelist->insert(std::string(chrome::kMailToScheme));
|
| +void URLIndexPrivateData::WritePrivateDataToCacheFileTask(
|
| + scoped_refptr<URLIndexPrivateData> private_data,
|
| + const FilePath& file_path,
|
| + scoped_refptr<RefCountedBool> succeeded) {
|
| + DCHECK(private_data.get());
|
| + DCHECK(!file_path.empty());
|
| + succeeded->set_value(private_data->SaveToFile(file_path));
|
| }
|
|
|
| -// Cache Saving ----------------------------------------------------------------
|
| -
|
| bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {
|
| - // TODO(mrossetti): Move File IO to another thread.
|
| - base::ThreadRestrictions::ScopedAllowIO allow_io;
|
| base::TimeTicks beginning_time = base::TimeTicks::Now();
|
| InMemoryURLIndexCacheItem index_cache;
|
| SavePrivateData(&index_cache);
|
| @@ -1030,8 +1051,7 @@
|
| map_entry->set_history_id(iter->first);
|
| const URLRow& url_row(iter->second);
|
| // Note: We only save information that contributes to the index so there
|
| - // is no need to save search_term_cache_ (not persistent),
|
| - // languages_, etc.
|
| + // is no need to save search_term_cache_ (not persistent).
|
| map_entry->set_visit_count(url_row.visit_count());
|
| map_entry->set_typed_count(url_row.typed_count());
|
| map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
|
| @@ -1070,60 +1090,68 @@
|
|
|
| // Cache Restoring -------------------------------------------------------------
|
|
|
| -bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {
|
| - // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
|
| - // That is: ensure that the database has not been modified since the cache
|
| - // was last saved. DB file modification date is inadequate. There are no
|
| - // SQLite table checksums automatically stored.
|
| - Clear(); // Start with a clean slate.
|
| +// static
|
| +void URLIndexPrivateData::RestoreFromFileTask(
|
| + const FilePath& file_path,
|
| + scoped_refptr<URLIndexPrivateData> private_data,
|
| + std::string languages) {
|
| + private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);
|
| +}
|
|
|
| - // FIXME(mrossetti): Move File IO to another thread.
|
| - base::ThreadRestrictions::ScopedAllowIO allow_io;
|
| +// static
|
| +scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(
|
| + const FilePath& file_path,
|
| + const std::string& languages) {
|
| base::TimeTicks beginning_time = base::TimeTicks::Now();
|
| if (!file_util::PathExists(file_path))
|
| - return false;
|
| + return NULL;
|
| std::string data;
|
| // If there is no cache file then simply give up. This will cause us to
|
| // attempt to rebuild from the history database.
|
| if (!file_util::ReadFileToString(file_path, &data))
|
| - return false;
|
| + return NULL;
|
|
|
| + scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
|
| InMemoryURLIndexCacheItem index_cache;
|
| if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
|
| - LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "
|
| + LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
|
| << file_path.value();
|
| - return false;
|
| + return restored_data;
|
| }
|
|
|
| - if (!RestorePrivateData(index_cache)) {
|
| - Clear(); // Back to square one -- must build from scratch.
|
| - return false;
|
| - }
|
| + if (!restored_data->RestorePrivateData(index_cache, languages))
|
| + return NULL;
|
|
|
| UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
|
| base::TimeTicks::Now() - beginning_time);
|
| UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
|
| - history_id_word_map_.size());
|
| + restored_data->history_id_word_map_.size());
|
| UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
|
| - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
|
| - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
|
| - return true;
|
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
|
| + restored_data->word_map_.size());
|
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
|
| + restored_data->char_word_map_.size());
|
| + if (restored_data->Empty())
|
| + return NULL; // 'No data' is the same as a failed reload.
|
| + return restored_data;
|
| }
|
|
|
| // static
|
| -URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory(
|
| - HistoryDatabase* history_db) {
|
| +scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
|
| + HistoryDatabase* history_db,
|
| + const std::string& languages,
|
| + const std::set<std::string>& scheme_whitelist) {
|
| if (!history_db)
|
| return NULL;
|
|
|
| base::TimeTicks beginning_time = base::TimeTicks::Now();
|
|
|
| - scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
|
| + scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
|
| URLDatabase::URLEnumerator history_enum;
|
| if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
|
| return NULL;
|
| for (URLRow row; history_enum.GetNextURL(&row); )
|
| - rebuilt_data->IndexRow(row);
|
| + rebuilt_data->IndexRow(row, languages, scheme_whitelist);
|
|
|
| UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
|
| base::TimeTicks::Now() - beginning_time);
|
| @@ -1133,16 +1161,17 @@
|
| rebuilt_data->word_map_.size());
|
| UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
|
| rebuilt_data->char_word_map_.size());
|
| - return rebuilt_data.release();
|
| + return rebuilt_data;
|
| }
|
|
|
| bool URLIndexPrivateData::RestorePrivateData(
|
| - const InMemoryURLIndexCacheItem& cache) {
|
| + const InMemoryURLIndexCacheItem& cache,
|
| + const std::string& languages) {
|
| if (cache.has_version())
|
| restored_cache_version_ = cache.version();
|
| return RestoreWordList(cache) && RestoreWordMap(cache) &&
|
| RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
|
| - RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);
|
| + RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);
|
| }
|
|
|
| bool URLIndexPrivateData::RestoreWordList(
|
| @@ -1264,7 +1293,8 @@
|
| }
|
|
|
| bool URLIndexPrivateData::RestoreWordStartsMap(
|
| - const InMemoryURLIndexCacheItem& cache) {
|
| + const InMemoryURLIndexCacheItem& cache,
|
| + const std::string& languages) {
|
| // Note that this function must be called after RestoreHistoryInfoMap() has
|
| // been run as the word starts may have to be recalculated from the urls and
|
| // page titles.
|
| @@ -1299,7 +1329,7 @@
|
| iter != history_info_map_.end(); ++iter) {
|
| RowWordStarts word_starts;
|
| const URLRow& row(iter->second);
|
| - string16 url(net::FormatUrl(row.url(), languages_,
|
| + string16 url(net::FormatUrl(row.url(), languages,
|
| net::kFormatUrlOmitUsernamePassword,
|
| net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
|
| NULL, NULL, NULL));
|
| @@ -1313,4 +1343,11 @@
|
| return true;
|
| }
|
|
|
| +// static
|
| +bool URLIndexPrivateData::URLSchemeIsWhitelisted(
|
| + const GURL& gurl,
|
| + const std::set<std::string>& whitelist) {
|
| + return whitelist.find(gurl.scheme()) != whitelist.end();
|
| +}
|
| +
|
| } // namespace history
|
|
|