Chromium Code Reviews| Index: chrome/browser/history/url_index_private_data.cc |
| =================================================================== |
| --- chrome/browser/history/url_index_private_data.cc (revision 117518) |
| +++ chrome/browser/history/url_index_private_data.cc (working copy) |
| @@ -10,17 +10,21 @@ |
| #include <limits> |
| #include <numeric> |
| +#include "base/bind.h" |
| #include "base/file_util.h" |
| +#include "base/i18n/break_iterator.h" |
| #include "base/i18n/case_conversion.h" |
| +#include "base/memory/scoped_ptr.h" |
| #include "base/metrics/histogram.h" |
| #include "base/string_util.h" |
| -#include "base/threading/thread_restrictions.h" |
| +#include "base/time.h" |
| #include "base/utf_string_conversions.h" |
| #include "chrome/browser/autocomplete/autocomplete.h" |
| #include "chrome/browser/history/url_database.h" |
| #include "chrome/common/url_constants.h" |
| #include "net/base/net_util.h" |
| #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
| +#include "ui/base/l10n/l10n_util.h" |
| using google::protobuf::RepeatedField; |
| using google::protobuf::RepeatedPtrField; |
| @@ -76,11 +80,13 @@ |
| } |
| // Converts a raw value for some particular scoring factor into a score |
| -// component for that factor. The conversion function is piecewise linear, with |
| -// input values provided in |value_ranks| and resulting output scores from |
| -// |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A score |
| -// cannot be higher than kScoreRank[0], and drops directly to 0 if lower than |
| -// kScoreRank[3]. |
| +// component for that factor. The conversion function is linear within the |
| +// range of the |value_ranks| array. |value| is mapped to a range within |
| +// |value_ranks| with the resulting range projecting into the |kScoreRank| |
| +// array. A score cannot be higher than kScoreRank[0], and drops directly to 0 |
| +// if lower than kScoreRank[3] (eliminating the item being scored from further |
| +// consideration since such a score is insignificant and unlikely to be |
| +// presented to the user). |
|
Peter Kasting
2012/01/14 00:12:49
Nit: Honestly, the old comment seemed a lot cleare
mrossetti
2012/03/03 05:05:56
Done.
|
| // |
| // For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }. |
| // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the |
| @@ -290,11 +296,12 @@ |
| } |
| } |
| -void URLIndexPrivateData::UpdateURL(URLID row_id, const URLRow& row) { |
| +void URLIndexPrivateData::UpdateURL(const URLRow& row) { |
| // The row may or may not already be in our index. If it is not already |
| // indexed and it qualifies then it gets indexed. If it is already |
| // indexed and still qualifies then it gets updated, otherwise it |
| // is deleted from the index. |
| + URLID row_id = row.id(); |
| HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); |
| if (row_pos == history_info_map_.end()) { |
| // This new row should be indexed if it qualifies. |
| @@ -322,21 +329,34 @@ |
| } else { |
| // This indexed row no longer qualifies and will be de-indexed by |
| // clearing all words associated with this row. |
| - URLRow& removed_row = row_pos->second; |
| - RemoveRowFromIndex(removed_row); |
| + RemoveRowFromIndex(row); |
| } |
| - // This invalidates the cache. |
| - search_term_cache_.clear(); |
| + search_term_cache_.clear(); // This invalidates the cache. |
| } |
| -void URLIndexPrivateData::DeleteURL(URLID row_id) { |
| - // Note that this does not remove any reference to this row from the |
| - // word_id_history_map_. That map will continue to contain (and return) |
| - // hits against this row until that map is rebuilt, but since the |
| - // history_info_map_ no longer references the row no erroneous results |
| - // will propagate to the user. |
| - history_info_map_.erase(row_id); |
| - search_term_cache_.clear(); // This invalidates the word cache. |
| +// Helper functor for DeleteURL. |
| +class HistoryInfoMapItemHasURL { |
| + public: |
| + explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {} |
| + |
| + bool operator()(const std::pair<const HistoryID, URLRow> item) { |
|
Peter Kasting
2012/01/14 00:12:49
Nit: Missing &?
mrossetti
2012/03/03 05:05:56
Done.
|
| + return item.second.url() == url_; |
| + } |
| + |
| + private: |
| + const GURL& url_; |
| +}; |
| + |
| +void URLIndexPrivateData::DeleteURL(const GURL& url) { |
| + // Find the matching entry in the history_info_map_. |
| + HistoryInfoMap::iterator pos = std::find_if( |
| + history_info_map_.begin(), |
| + history_info_map_.end(), |
| + HistoryInfoMapItemHasURL(url)); |
| + if (pos != history_info_map_.end()) { |
| + RemoveRowFromIndex(pos->second); |
| + search_term_cache_.clear(); // This invalidates the cache. |
| + } |
| } |
| bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { |
| @@ -837,8 +857,6 @@ |
| // Cache Saving ---------------------------------------------------------------- |
| bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { |
| - // TODO(mrossetti): Move File IO to another thread. |
| - base::ThreadRestrictions::ScopedAllowIO allow_io; |
| base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| InMemoryURLIndexCacheItem index_cache; |
| SavePrivateData(&index_cache); |
| @@ -956,60 +974,70 @@ |
| // Cache Restoring ------------------------------------------------------------- |
| -bool URLIndexPrivateData::ReloadFromHistory(history::URLDatabase* history_db) { |
| - Clear(); |
| - |
| - if (!history_db) |
| - return false; |
| - |
| +// static |
| +URLIndexPrivateData* URLIndexPrivateData::RestoreFromFile( |
| + const FilePath& file_path) { |
| base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| - URLDatabase::URLEnumerator history_enum; |
| - if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
| - return false; |
| - URLRow row; |
| - while (history_enum.GetNextURL(&row)) |
| - IndexRow(row); |
| - UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
| - base::TimeTicks::Now() - beginning_time); |
| - return true; |
| -} |
| - |
| -bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { |
| - // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
| - // That is: ensure that the database has not been modified since the cache |
| - // was last saved. DB file modification date is inadequate. There are no |
| - // SQLite table checksums automatically stored. |
| - // FIXME(mrossetti): Move File IO to another thread. |
| - base::ThreadRestrictions::ScopedAllowIO allow_io; |
| - base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| + if (!file_util::PathExists(file_path)) |
| + return NULL; |
| std::string data; |
| // If there is no cache file then simply give up. This will cause us to |
| // attempt to rebuild from the history database. |
| if (!file_util::ReadFileToString(file_path, &data)) |
| - return false; |
| + return NULL; |
| + scoped_ptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData); |
| InMemoryURLIndexCacheItem index_cache; |
| if (!index_cache.ParseFromArray(data.c_str(), data.size())) { |
| - LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " |
| + LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from " |
| << file_path.value(); |
| - return false; |
| + return restored_data.release(); |
| } |
| - if (!RestorePrivateData(index_cache)) { |
| - Clear(); // Back to square one -- must build from scratch. |
| - return false; |
| + if (!restored_data->RestorePrivateData(index_cache)) { |
| + restored_data.reset(); // Back to square one -- must build from history DB. |
| + return NULL; |
| } |
| UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
| base::TimeTicks::Now() - beginning_time); |
| UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| - history_id_word_map_.size()); |
| + restored_data->history_id_word_map_.size()); |
| UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
| - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); |
| - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); |
| - return true; |
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
| + restored_data->word_map_.size()); |
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| + restored_data->char_word_map_.size()); |
| + return restored_data.release(); |
| } |
| +// static |
| +URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory( |
| + URLDatabase* history_db) { |
| + if (!history_db) |
| + return NULL; |
| + |
| + base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| + |
| + scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); |
| + URLDatabase::URLEnumerator history_enum; |
| + if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
| + return NULL; |
| + URLRow row; |
|
Peter Kasting
2012/01/14 00:12:49
Nit: You can condense these three lines to two and
mrossetti
2012/03/03 05:05:56
w00t! My ancient mind just doesn't work that way!
|
| + while (history_enum.GetNextURL(&row)) |
| + rebuilt_data->IndexRow(row); |
| + |
| + UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
| + base::TimeTicks::Now() - beginning_time); |
| + UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| + rebuilt_data->history_id_word_map_.size()); |
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
| + rebuilt_data->word_map_.size()); |
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| + rebuilt_data->char_word_map_.size()); |
| + return rebuilt_data.release(); |
| +} |
| + |
| bool URLIndexPrivateData::RestorePrivateData( |
| const InMemoryURLIndexCacheItem& cache) { |
| return RestoreWordList(cache) && RestoreWordMap(cache) && |