Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5430)

Unified Diff: chrome/browser/history/url_index_private_data.cc

Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Syncing with hopes of pleasing trybot update Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/history/url_index_private_data.cc
===================================================================
--- chrome/browser/history/url_index_private_data.cc (revision 126922)
+++ chrome/browser/history/url_index_private_data.cc (working copy)
@@ -14,11 +14,14 @@
#include "base/i18n/case_conversion.h"
#include "base/metrics/histogram.h"
#include "base/string_util.h"
-#include "base/threading/thread_restrictions.h"
+#include "base/time.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/autocomplete/autocomplete.h"
#include "chrome/browser/history/history_database.h"
-#include "chrome/common/url_constants.h"
+#include "chrome/browser/history/in_memory_url_index.h"
+#include "content/public/browser/notification_details.h"
+#include "content/public/browser/notification_service.h"
+#include "content/public/browser/notification_source.h"
#include "net/base/net_util.h"
#include "third_party/protobuf/src/google/protobuf/repeated_field.h"
@@ -123,7 +126,6 @@
pre_filter_item_count_(0),
post_filter_item_count_(0),
post_scoring_item_count_(0) {
- URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);
}
URLIndexPrivateData::~URLIndexPrivateData() {}
@@ -139,18 +141,42 @@
word_starts_map_.clear();
}
+bool URLIndexPrivateData::Empty() const {
+ return history_info_map_.empty();
+}
+
+scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {
+ scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;
+ data_copy->word_list_ = word_list_;
+ data_copy->available_words_ = available_words_;
+ data_copy->word_map_ = word_map_;
+ data_copy->char_word_map_ = char_word_map_;
+ data_copy->word_id_history_map_ = word_id_history_map_;
+ data_copy->history_id_word_map_ = history_id_word_map_;
+ data_copy->history_info_map_ = history_info_map_;
+ return data_copy;
+ // Not copied:
+ // search_term_cache_
+ // pre_filter_item_count_
+ // post_filter_item_count_
+ // post_scoring_item_count_
+};
+
// Cache Updating --------------------------------------------------------------
-bool URLIndexPrivateData::IndexRow(const URLRow& row) {
+bool URLIndexPrivateData::IndexRow(
+ const URLRow& row,
+ const std::string& languages,
+ const std::set<std::string>& scheme_whitelist) {
const GURL& gurl(row.url());
// Index only URLs with a whitelisted scheme.
- if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl))
+ if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))
return false;
URLID row_id = row.id();
// Strip out username and password before saving and indexing.
- string16 url(net::FormatUrl(gurl, languages_,
+ string16 url(net::FormatUrl(gurl, languages,
net::kFormatUrlOmitUsernamePassword,
net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
NULL, NULL, NULL));
@@ -168,17 +194,18 @@
// Index the words contained in the URL and title of the row.
RowWordStarts word_starts;
- AddRowWordsToIndex(new_row, &word_starts);
+ AddRowWordsToIndex(new_row, &word_starts, languages);
word_starts_map_[history_id] = word_starts;
return true;
}
void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,
- RowWordStarts* word_starts) {
+ RowWordStarts* word_starts,
+ const std::string& languages) {
HistoryID history_id = static_cast<HistoryID>(row.id());
// Split URL into individual, unique words then add in the title words.
const GURL& gurl(row.url());
- string16 url(net::FormatUrl(gurl, languages_,
+ string16 url(net::FormatUrl(gurl, languages,
net::kFormatUrlOmitUsernamePassword,
net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
NULL, NULL, NULL));
@@ -307,7 +334,10 @@
}
}
-bool URLIndexPrivateData::UpdateURL(const URLRow& row) {
+bool URLIndexPrivateData::UpdateURL(
+ const URLRow& row,
+ const std::string& languages,
+ const std::set<std::string>& scheme_whitelist) {
// The row may or may not already be in our index. If it is not already
// indexed and it qualifies then it gets indexed. If it is already
// indexed and still qualifies then it gets updated, otherwise it
@@ -319,8 +349,8 @@
// This new row should be indexed if it qualifies.
URLRow new_row(row);
new_row.set_id(row_id);
- row_was_updated =
- RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row);
+ row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
+ IndexRow(new_row, languages, scheme_whitelist);
} else if (RowQualifiesAsSignificant(row, base::Time())) {
// This indexed row still qualifies and will be re-indexed.
// The url won't have changed but the title, visit count, etc.
@@ -341,7 +371,7 @@
RemoveRowWordsFromIndex(row_to_update);
row_to_update.set_title(row.title());
RowWordStarts word_starts;
- AddRowWordsToIndex(row_to_update, &word_starts);
+ AddRowWordsToIndex(row_to_update, &word_starts, languages);
word_starts_map_[row_id] = word_starts;
}
row_was_updated = true;
@@ -383,10 +413,6 @@
return true;
}
-bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {
- return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();
-}
-
// URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
@@ -905,24 +931,19 @@
return word_id_set;
}
+// Cache Saving ----------------------------------------------------------------
+
// static
-void URLIndexPrivateData::InitializeSchemeWhitelist(
- std::set<std::string>* whitelist) {
- DCHECK(whitelist);
- whitelist->insert(std::string(chrome::kAboutScheme));
- whitelist->insert(std::string(chrome::kChromeUIScheme));
- whitelist->insert(std::string(chrome::kFileScheme));
- whitelist->insert(std::string(chrome::kFtpScheme));
- whitelist->insert(std::string(chrome::kHttpScheme));
- whitelist->insert(std::string(chrome::kHttpsScheme));
- whitelist->insert(std::string(chrome::kMailToScheme));
+void URLIndexPrivateData::WritePrivateDataToCacheFileTask(
+ scoped_refptr<URLIndexPrivateData> private_data,
+ const FilePath& file_path,
+ scoped_refptr<RefCountedBool> succeeded) {
+ DCHECK(private_data.get());
+ DCHECK(!file_path.empty());
+ succeeded->set_value(private_data->SaveToFile(file_path));
}
-// Cache Saving ----------------------------------------------------------------
-
bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {
- // TODO(mrossetti): Move File IO to another thread.
- base::ThreadRestrictions::ScopedAllowIO allow_io;
base::TimeTicks beginning_time = base::TimeTicks::Now();
InMemoryURLIndexCacheItem index_cache;
SavePrivateData(&index_cache);
@@ -1030,8 +1051,7 @@
map_entry->set_history_id(iter->first);
const URLRow& url_row(iter->second);
// Note: We only save information that contributes to the index so there
- // is no need to save search_term_cache_ (not persistent),
- // languages_, etc.
+ // is no need to save search_term_cache_ (not persistent).
map_entry->set_visit_count(url_row.visit_count());
map_entry->set_typed_count(url_row.typed_count());
map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
@@ -1070,60 +1090,68 @@
// Cache Restoring -------------------------------------------------------------
-bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {
- // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
- // That is: ensure that the database has not been modified since the cache
- // was last saved. DB file modification date is inadequate. There are no
- // SQLite table checksums automatically stored.
- Clear(); // Start with a clean slate.
+// static
+void URLIndexPrivateData::RestoreFromFileTask(
+ const FilePath& file_path,
+ scoped_refptr<URLIndexPrivateData> private_data,
+ std::string languages) {
+ private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);
+}
- // FIXME(mrossetti): Move File IO to another thread.
- base::ThreadRestrictions::ScopedAllowIO allow_io;
+// static
+scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(
+ const FilePath& file_path,
+ const std::string& languages) {
base::TimeTicks beginning_time = base::TimeTicks::Now();
if (!file_util::PathExists(file_path))
- return false;
+ return NULL;
std::string data;
// If there is no cache file then simply give up. This will cause us to
// attempt to rebuild from the history database.
if (!file_util::ReadFileToString(file_path, &data))
- return false;
+ return NULL;
+ scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
InMemoryURLIndexCacheItem index_cache;
if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
- LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "
+ LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
<< file_path.value();
- return false;
+ return restored_data;
}
- if (!RestorePrivateData(index_cache)) {
- Clear(); // Back to square one -- must build from scratch.
- return false;
- }
+ if (!restored_data->RestorePrivateData(index_cache, languages))
+ return NULL;
UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
base::TimeTicks::Now() - beginning_time);
UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
- history_id_word_map_.size());
+ restored_data->history_id_word_map_.size());
UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
- return true;
+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
+ restored_data->word_map_.size());
+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
+ restored_data->char_word_map_.size());
+ if (restored_data->Empty())
+ return NULL; // 'No data' is the same as a failed reload.
+ return restored_data;
}
// static
-URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory(
- HistoryDatabase* history_db) {
+scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
+ HistoryDatabase* history_db,
+ const std::string& languages,
+ const std::set<std::string>& scheme_whitelist) {
if (!history_db)
return NULL;
base::TimeTicks beginning_time = base::TimeTicks::Now();
- scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
+ scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
URLDatabase::URLEnumerator history_enum;
if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
return NULL;
for (URLRow row; history_enum.GetNextURL(&row); )
- rebuilt_data->IndexRow(row);
+ rebuilt_data->IndexRow(row, languages, scheme_whitelist);
UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
base::TimeTicks::Now() - beginning_time);
@@ -1133,16 +1161,17 @@
rebuilt_data->word_map_.size());
UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
rebuilt_data->char_word_map_.size());
- return rebuilt_data.release();
+ return rebuilt_data;
}
bool URLIndexPrivateData::RestorePrivateData(
- const InMemoryURLIndexCacheItem& cache) {
+ const InMemoryURLIndexCacheItem& cache,
+ const std::string& languages) {
if (cache.has_version())
restored_cache_version_ = cache.version();
return RestoreWordList(cache) && RestoreWordMap(cache) &&
RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
- RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);
+ RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);
}
bool URLIndexPrivateData::RestoreWordList(
@@ -1264,7 +1293,8 @@
}
bool URLIndexPrivateData::RestoreWordStartsMap(
- const InMemoryURLIndexCacheItem& cache) {
+ const InMemoryURLIndexCacheItem& cache,
+ const std::string& languages) {
// Note that this function must be called after RestoreHistoryInfoMap() has
// been run as the word starts may have to be recalculated from the urls and
// page titles.
@@ -1299,7 +1329,7 @@
iter != history_info_map_.end(); ++iter) {
RowWordStarts word_starts;
const URLRow& row(iter->second);
- string16 url(net::FormatUrl(row.url(), languages_,
+ string16 url(net::FormatUrl(row.url(), languages,
net::kFormatUrlOmitUsernamePassword,
net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
NULL, NULL, NULL));
@@ -1313,4 +1343,11 @@
return true;
}
+// static
+bool URLIndexPrivateData::URLSchemeIsWhitelisted(
+ const GURL& gurl,
+ const std::set<std::string>& whitelist) {
+ return whitelist.find(gurl.scheme()) != whitelist.end();
+}
+
} // namespace history
« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698