Index: chrome/browser/history/url_index_private_data.cc |
=================================================================== |
--- chrome/browser/history/url_index_private_data.cc (revision 117518) |
+++ chrome/browser/history/url_index_private_data.cc (working copy) |
@@ -10,17 +10,21 @@ |
#include <limits> |
#include <numeric> |
+#include "base/bind.h" |
#include "base/file_util.h" |
+#include "base/i18n/break_iterator.h" |
#include "base/i18n/case_conversion.h" |
+#include "base/memory/scoped_ptr.h" |
#include "base/metrics/histogram.h" |
#include "base/string_util.h" |
-#include "base/threading/thread_restrictions.h" |
+#include "base/time.h" |
#include "base/utf_string_conversions.h" |
#include "chrome/browser/autocomplete/autocomplete.h" |
#include "chrome/browser/history/url_database.h" |
#include "chrome/common/url_constants.h" |
#include "net/base/net_util.h" |
#include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
+#include "ui/base/l10n/l10n_util.h" |
using google::protobuf::RepeatedField; |
using google::protobuf::RepeatedPtrField; |
@@ -76,11 +80,13 @@ |
} |
// Converts a raw value for some particular scoring factor into a score |
-// component for that factor. The conversion function is piecewise linear, with |
-// input values provided in |value_ranks| and resulting output scores from |
-// |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A score |
-// cannot be higher than kScoreRank[0], and drops directly to 0 if lower than |
-// kScoreRank[3]. |
+// component for that factor. The conversion function is linear within the |
+// range of the |value_ranks| array. |value| is mapped to a range within |
+// |value_ranks| with the resulting range projecting into the |kScoreRank| |
+// array. A score cannot be higher than kScoreRank[0], and drops directly to 0 |
+// if lower than kScoreRank[3] (eliminating the item being scored from further |
+// consideration since such a score is insignificant and unlikely to be |
+// presented to the user). |
Peter Kasting
2012/01/14 00:12:49
Nit: Honestly, the old comment seemed a lot cleare
mrossetti
2012/03/03 05:05:56
Done.
|
// |
// For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }. |
// Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the |
@@ -290,11 +296,12 @@ |
} |
} |
-void URLIndexPrivateData::UpdateURL(URLID row_id, const URLRow& row) { |
+void URLIndexPrivateData::UpdateURL(const URLRow& row) { |
// The row may or may not already be in our index. If it is not already |
// indexed and it qualifies then it gets indexed. If it is already |
// indexed and still qualifies then it gets updated, otherwise it |
// is deleted from the index. |
+ URLID row_id = row.id(); |
HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); |
if (row_pos == history_info_map_.end()) { |
// This new row should be indexed if it qualifies. |
@@ -322,21 +329,34 @@ |
} else { |
// This indexed row no longer qualifies and will be de-indexed by |
// clearing all words associated with this row. |
- URLRow& removed_row = row_pos->second; |
- RemoveRowFromIndex(removed_row); |
+ RemoveRowFromIndex(row); |
} |
- // This invalidates the cache. |
- search_term_cache_.clear(); |
+ search_term_cache_.clear(); // This invalidates the cache. |
} |
-void URLIndexPrivateData::DeleteURL(URLID row_id) { |
- // Note that this does not remove any reference to this row from the |
- // word_id_history_map_. That map will continue to contain (and return) |
- // hits against this row until that map is rebuilt, but since the |
- // history_info_map_ no longer references the row no erroneous results |
- // will propagate to the user. |
- history_info_map_.erase(row_id); |
- search_term_cache_.clear(); // This invalidates the word cache. |
+// Helper functor for DeleteURL. |
+class HistoryInfoMapItemHasURL { |
+ public: |
+ explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {} |
+ |
+ bool operator()(const std::pair<const HistoryID, URLRow> item) { |
Peter Kasting
2012/01/14 00:12:49
Nit: Missing &?
mrossetti
2012/03/03 05:05:56
Done.
|
+ return item.second.url() == url_; |
+ } |
+ |
+ private: |
+ const GURL& url_; |
+}; |
+ |
+void URLIndexPrivateData::DeleteURL(const GURL& url) { |
+ // Find the matching entry in the history_info_map_. |
+ HistoryInfoMap::iterator pos = std::find_if( |
+ history_info_map_.begin(), |
+ history_info_map_.end(), |
+ HistoryInfoMapItemHasURL(url)); |
+ if (pos != history_info_map_.end()) { |
+ RemoveRowFromIndex(pos->second); |
+ search_term_cache_.clear(); // This invalidates the cache. |
+ } |
} |
bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { |
@@ -837,8 +857,6 @@ |
// Cache Saving ---------------------------------------------------------------- |
bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { |
- // TODO(mrossetti): Move File IO to another thread. |
- base::ThreadRestrictions::ScopedAllowIO allow_io; |
base::TimeTicks beginning_time = base::TimeTicks::Now(); |
InMemoryURLIndexCacheItem index_cache; |
SavePrivateData(&index_cache); |
@@ -956,60 +974,70 @@ |
// Cache Restoring ------------------------------------------------------------- |
-bool URLIndexPrivateData::ReloadFromHistory(history::URLDatabase* history_db) { |
- Clear(); |
- |
- if (!history_db) |
- return false; |
- |
+// static |
+URLIndexPrivateData* URLIndexPrivateData::RestoreFromFile( |
+ const FilePath& file_path) { |
base::TimeTicks beginning_time = base::TimeTicks::Now(); |
- URLDatabase::URLEnumerator history_enum; |
- if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
- return false; |
- URLRow row; |
- while (history_enum.GetNextURL(&row)) |
- IndexRow(row); |
- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
- base::TimeTicks::Now() - beginning_time); |
- return true; |
-} |
- |
-bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { |
- // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
- // That is: ensure that the database has not been modified since the cache |
- // was last saved. DB file modification date is inadequate. There are no |
- // SQLite table checksums automatically stored. |
- // FIXME(mrossetti): Move File IO to another thread. |
- base::ThreadRestrictions::ScopedAllowIO allow_io; |
- base::TimeTicks beginning_time = base::TimeTicks::Now(); |
+ if (!file_util::PathExists(file_path)) |
+ return NULL; |
std::string data; |
// If there is no cache file then simply give up. This will cause us to |
// attempt to rebuild from the history database. |
if (!file_util::ReadFileToString(file_path, &data)) |
- return false; |
+ return NULL; |
+ scoped_ptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData); |
InMemoryURLIndexCacheItem index_cache; |
if (!index_cache.ParseFromArray(data.c_str(), data.size())) { |
- LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " |
+ LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from " |
<< file_path.value(); |
- return false; |
+ return restored_data.release(); |
} |
- if (!RestorePrivateData(index_cache)) { |
- Clear(); // Back to square one -- must build from scratch. |
- return false; |
+ if (!restored_data->RestorePrivateData(index_cache)) { |
+ restored_data.reset(); // Back to square one -- must build from history DB. |
+ return NULL; |
} |
UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
base::TimeTicks::Now() - beginning_time); |
UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
- history_id_word_map_.size()); |
+ restored_data->history_id_word_map_.size()); |
UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); |
- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); |
- return true; |
+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
+ restored_data->word_map_.size()); |
+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
+ restored_data->char_word_map_.size()); |
+ return restored_data.release(); |
} |
+// static |
+URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory( |
+ URLDatabase* history_db) { |
+ if (!history_db) |
+ return NULL; |
+ |
+ base::TimeTicks beginning_time = base::TimeTicks::Now(); |
+ |
+ scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); |
+ URLDatabase::URLEnumerator history_enum; |
+ if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
+ return NULL; |
+ URLRow row; |
Peter Kasting
2012/01/14 00:12:49
Nit: You can condense these three lines to two and
mrossetti
2012/03/03 05:05:56
w00t! My ancient mind just doesn't work that way!
|
+ while (history_enum.GetNextURL(&row)) |
+ rebuilt_data->IndexRow(row); |
+ |
+ UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
+ base::TimeTicks::Now() - beginning_time); |
+ UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
+ rebuilt_data->history_id_word_map_.size()); |
+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
+ rebuilt_data->word_map_.size()); |
+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
+ rebuilt_data->char_word_map_.size()); |
+ return rebuilt_data.release(); |
+} |
+ |
bool URLIndexPrivateData::RestorePrivateData( |
const InMemoryURLIndexCacheItem& cache) { |
return RestoreWordList(cache) && RestoreWordMap(cache) && |