chrome/browser/history/url_index_private_data.cc - Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread

Unified Diff: chrome/browser/history/url_index_private_data.cc

Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Missed a private data swap. Try to fix update phase failure. Created 8 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/browser/history/url_index_private_data.h ('K') | « chrome/browser/history/url_index_private_data.h ('k') | chrome/browser/sync/profile_sync_service_typed_url_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/history/url_index_private_data.cc

===================================================================

--- chrome/browser/history/url_index_private_data.cc (revision 117518)

+++ chrome/browser/history/url_index_private_data.cc (working copy)

@@ -10,17 +10,21 @@

#include <limits>

#include <numeric>

+#include "base/bind.h"

#include "base/file_util.h"

+#include "base/i18n/break_iterator.h"

#include "base/i18n/case_conversion.h"

+#include "base/memory/scoped_ptr.h"

#include "base/metrics/histogram.h"

#include "base/string_util.h"

-#include "base/threading/thread_restrictions.h"

+#include "base/time.h"

#include "base/utf_string_conversions.h"

#include "chrome/browser/autocomplete/autocomplete.h"

#include "chrome/browser/history/url_database.h"

#include "chrome/common/url_constants.h"

#include "net/base/net_util.h"

#include "third_party/protobuf/src/google/protobuf/repeated_field.h"

+#include "ui/base/l10n/l10n_util.h"

using google::protobuf::RepeatedField;

using google::protobuf::RepeatedPtrField;

@@ -76,11 +80,13 @@

}

// Converts a raw value for some particular scoring factor into a score

-// component for that factor. The conversion function is piecewise linear, with

-// input values provided in |value_ranks| and resulting output scores from

-// |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A score

-// cannot be higher than kScoreRank[0], and drops directly to 0 if lower than

-// kScoreRank[3].

+// component for that factor. The conversion function is linear within the

+// range of the |value_ranks| array. |value| is mapped to a range within

+// |value_ranks| with the resulting range projecting into the |kScoreRank|

+// array. A score cannot be higher than kScoreRank[0], and drops directly to 0

+// if lower than kScoreRank[3] (eliminating the item being scored from further

+// consideration since such a score is insignificant and unlikely to be

+// presented to the user).

Peter Kasting 2012/01/14 00:12:49 Nit: Honestly, the old comment seemed a lot cleare

mrossetti 2012/03/03 05:05:56 Done.

// For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }.

// Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the

@@ -290,11 +296,12 @@

}

-void URLIndexPrivateData::UpdateURL(URLID row_id, const URLRow& row) {

+void URLIndexPrivateData::UpdateURL(const URLRow& row) {

// The row may or may not already be in our index. If it is not already

// indexed and it qualifies then it gets indexed. If it is already

// indexed and still qualifies then it gets updated, otherwise it

// is deleted from the index.

+ URLID row_id = row.id();

HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

if (row_pos == history_info_map_.end()) {

// This new row should be indexed if it qualifies.

@@ -322,21 +329,34 @@

} else {

// This indexed row no longer qualifies and will be de-indexed by

// clearing all words associated with this row.

- URLRow& removed_row = row_pos->second;

- RemoveRowFromIndex(removed_row);

+ RemoveRowFromIndex(row);

}

- // This invalidates the cache.

- search_term_cache_.clear();

+ search_term_cache_.clear(); // This invalidates the cache.

}

-void URLIndexPrivateData::DeleteURL(URLID row_id) {

- // Note that this does not remove any reference to this row from the

- // word_id_history_map_. That map will continue to contain (and return)

- // hits against this row until that map is rebuilt, but since the

- // history_info_map_ no longer references the row no erroneous results

- // will propagate to the user.

- history_info_map_.erase(row_id);

- search_term_cache_.clear(); // This invalidates the word cache.

+// Helper functor for DeleteURL.

+class HistoryInfoMapItemHasURL {

+ public:

+ explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

+ bool operator()(const std::pair<const HistoryID, URLRow> item) {

Peter Kasting 2012/01/14 00:12:49 Nit: Missing &?

mrossetti 2012/03/03 05:05:56 Done.

+ return item.second.url() == url_;

+ }

+ private:

+ const GURL& url_;

+};

+void URLIndexPrivateData::DeleteURL(const GURL& url) {

+ // Find the matching entry in the history_info_map_.

+ HistoryInfoMap::iterator pos = std::find_if(

+ history_info_map_.begin(),

+ history_info_map_.end(),

+ HistoryInfoMapItemHasURL(url));

+ if (pos != history_info_map_.end()) {

+ RemoveRowFromIndex(pos->second);

+ search_term_cache_.clear(); // This invalidates the cache.

+ }

}

bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {

@@ -837,8 +857,6 @@

// Cache Saving ----------------------------------------------------------------

bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {

- // TODO(mrossetti): Move File IO to another thread.

- base::ThreadRestrictions::ScopedAllowIO allow_io;

base::TimeTicks beginning_time = base::TimeTicks::Now();

InMemoryURLIndexCacheItem index_cache;

SavePrivateData(&index_cache);

@@ -956,60 +974,70 @@

// Cache Restoring -------------------------------------------------------------

-bool URLIndexPrivateData::ReloadFromHistory(history::URLDatabase* history_db) {

- Clear();

- if (!history_db)

- return false;

+// static

+URLIndexPrivateData* URLIndexPrivateData::RestoreFromFile(

+ const FilePath& file_path) {

base::TimeTicks beginning_time = base::TimeTicks::Now();

- URLDatabase::URLEnumerator history_enum;

- if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

- return false;

- URLRow row;

- while (history_enum.GetNextURL(&row))

- IndexRow(row);

- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

- base::TimeTicks::Now() - beginning_time);

- return true;

-bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {

- // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.

- // That is: ensure that the database has not been modified since the cache

- // was last saved. DB file modification date is inadequate. There are no

- // SQLite table checksums automatically stored.

- // FIXME(mrossetti): Move File IO to another thread.

- base::ThreadRestrictions::ScopedAllowIO allow_io;

- base::TimeTicks beginning_time = base::TimeTicks::Now();

+ if (!file_util::PathExists(file_path))

+ return NULL;

std::string data;

// If there is no cache file then simply give up. This will cause us to

// attempt to rebuild from the history database.

if (!file_util::ReadFileToString(file_path, &data))

- return false;

+ return NULL;

+ scoped_ptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);

InMemoryURLIndexCacheItem index_cache;

if (!index_cache.ParseFromArray(data.c_str(), data.size())) {

- LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "

+ LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "

<< file_path.value();

- return false;

+ return restored_data.release();

}

- if (!RestorePrivateData(index_cache)) {

- Clear(); // Back to square one -- must build from scratch.

- return false;

+ if (!restored_data->RestorePrivateData(index_cache)) {

+ restored_data.reset(); // Back to square one -- must build from history DB.

+ return NULL;

}

UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

base::TimeTicks::Now() - beginning_time);

UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

- history_id_word_map_.size());

+ restored_data->history_id_word_map_.size());

UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());

- return true;

+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

+ restored_data->word_map_.size());

+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

+ restored_data->char_word_map_.size());

+ return restored_data.release();

}

+// static

+URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory(

+ URLDatabase* history_db) {

+ if (!history_db)

+ return NULL;

+ base::TimeTicks beginning_time = base::TimeTicks::Now();

+ scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);

+ URLDatabase::URLEnumerator history_enum;

+ if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

+ return NULL;

+ URLRow row;

Peter Kasting 2012/01/14 00:12:49 Nit: You can condense these three lines to two and

mrossetti 2012/03/03 05:05:56 w00t! My ancient mind just doesn't work that way!

+ while (history_enum.GetNextURL(&row))

+ rebuilt_data->IndexRow(row);

+ UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

+ base::TimeTicks::Now() - beginning_time);

+ UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

+ rebuilt_data->history_id_word_map_.size());

+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

+ rebuilt_data->word_map_.size());

+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

+ rebuilt_data->char_word_map_.size());

+ return rebuilt_data.release();

bool URLIndexPrivateData::RestorePrivateData(

const InMemoryURLIndexCacheItem& cache) {

return RestoreWordList(cache) && RestoreWordMap(cache) &&