Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| 7 #pragma once | 7 #pragma once |
| 8 | 8 |
| 9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
| 10 #include "base/gtest_prod_util.h" | 10 #include "base/gtest_prod_util.h" |
| 11 #include "chrome/browser/history/in_memory_url_index_types.h" | 11 #include "chrome/browser/history/in_memory_url_index_types.h" |
| 12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 13 | 13 |
| 14 class InMemoryURLIndexTest; | |
| 15 | |
| 14 namespace in_memory_url_index { | 16 namespace in_memory_url_index { |
| 15 class InMemoryURLIndexCacheItem; | 17 class InMemoryURLIndexCacheItem; |
| 16 } | 18 } |
| 17 | 19 |
| 18 namespace history { | 20 namespace history { |
| 19 | 21 |
| 20 namespace imui = in_memory_url_index; | 22 namespace imui = in_memory_url_index; |
| 21 | 23 |
| 22 // A structure describing the InMemoryURLIndex's internal data and providing for | 24 // A structure describing the InMemoryURLIndex's internal data and providing for |
| 23 // restoring, rebuilding and updating that internal data. | 25 // restoring, rebuilding and updating that internal data. |
| 24 class URLIndexPrivateData { | 26 class URLIndexPrivateData { |
| 25 public: | 27 public: |
| 26 URLIndexPrivateData(); | 28 URLIndexPrivateData(); |
| 27 ~URLIndexPrivateData(); | 29 ~URLIndexPrivateData(); |
| 28 | 30 |
| 29 private: | 31 private: |
| 32 friend class AddHistoryMatch; | |
| 33 friend class HistoryQuickProviderTest; | |
| 30 friend class InMemoryURLIndex; | 34 friend class InMemoryURLIndex; |
| 31 friend class AddHistoryMatch; | |
| 32 friend class InMemoryURLIndexTest; | 35 friend class InMemoryURLIndexTest; |
| 33 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 36 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
| 34 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); | 37 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
| 35 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 38 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
| 36 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 39 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
| 37 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 40 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
| 38 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 41 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
| 39 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 42 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
| 40 | 43 |
| 44 #if 1 | |
| 45 void DUMP() const; | |
| 46 #endif | |
|
Peter Kasting
2012/01/14 00:12:49
This seems like it should have disappeared before
mrossetti
2012/03/03 05:05:56
Done.
| |
| 47 | |
| 41 // Support caching of term results so that we can optimize searches which | 48 // Support caching of term results so that we can optimize searches which |
| 42 // build upon a previous search. Each entry in this map represents one | 49 // build upon a previous search. Each entry in this map represents one |
| 43 // search term from the most recent search. For example, if the user had | 50 // search term from the most recent search. For example, if the user had |
| 44 // typed "google blog trans" and then typed an additional 'l' (at the end, | 51 // typed "google blog trans" and then typed an additional 'l' (at the end, |
| 45 // of course) then there would be four items in the cache: 'blog', 'google', | 52 // of course) then there would be four items in the cache: 'blog', 'google', |
| 46 // 'trans', and 'transl'. All would be marked as being in use except for the | 53 // 'trans', and 'transl'. All would be marked as being in use except for the |
| 47 // 'trans' item; its cached data would have been used when optimizing the | 54 // 'trans' item; its cached data would have been used when optimizing the |
| 48 // construction of the search results candidates for 'transl' but then would | 55 // construction of the search results candidates for 'transl' but then would |
| 49 // no longer needed. | 56 // no longer needed. |
| 50 // | 57 // |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 117 // descending score. The full results set (i.e. beyond the | 124 // descending score. The full results set (i.e. beyond the |
| 118 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | 125 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
| 119 // to this function. | 126 // to this function. |
| 120 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); | 127 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); |
| 121 | 128 |
| 122 // Sets the |languages| to a list of language encodings with which the history | 129 // Sets the |languages| to a list of language encodings with which the history |
| 123 // URLs and omnibox searches are interpreted, i.e. how each is broken | 130 // URLs and omnibox searches are interpreted, i.e. how each is broken |
| 124 // down into words and each word is broken down into characters. | 131 // down into words and each word is broken down into characters. |
| 125 void set_languages(const std::string& languages) { languages_ = languages; } | 132 void set_languages(const std::string& languages) { languages_ = languages; } |
| 126 | 133 |
| 127 // Restores the index's private data from the cache file stored in the | 134 // Constructs a new object by restoring its contents from the file at |path|. |
| 128 // profile directory and returns true if successful. | 135 // Returns the new URLIndexPrivateData which on success will contain the |
| 129 bool RestoreFromFile(const FilePath& file_path); | 136 // restored data but upon failure will be empty. |
| 137 static URLIndexPrivateData* RestoreFromFile(const FilePath& path); | |
| 138 | |
| 139 // Constructs a new object by rebuilding its contents from the history | |
| 140 // database in |db|. Returns the new URLIndexPrivateData which on success | |
| 141 // will contain the rebuilt data but upon failure will be empty. | |
| 142 static URLIndexPrivateData* RebuildFromHistory(URLDatabase* history_db); | |
| 130 | 143 |
| 131 // Caches the index private data and writes the cache file to the profile | 144 // Caches the index private data and writes the cache file to the profile |
| 132 // directory. | 145 // directory. |
| 133 bool SaveToFile(const FilePath& file_path); | 146 bool SaveToFile(const FilePath& file_path); |
| 134 | 147 |
| 135 // Reloads the history index from |history_db|. | |
| 136 bool ReloadFromHistory(URLDatabase* history_db); | |
| 137 | |
| 138 // Initializes all index data members in preparation for restoring the index | 148 // Initializes all index data members in preparation for restoring the index |
| 139 // from the cache or a complete rebuild from the history database. | 149 // from the cache or a complete rebuild from the history database. |
| 140 void Clear(); | 150 void Clear(); |
| 141 | 151 |
| 142 // Adds |word_id| to |history_id|'s entry in the history/word map, | 152 // Adds |word_id| to |history_id|'s entry in the history/word map, |
| 143 // creating a new entry if one does not already exist. | 153 // creating a new entry if one does not already exist. |
| 144 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | 154 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
| 145 | 155 |
| 146 // Given a set of Char16s, finds words containing those characters. | 156 // Given a set of Char16s, finds words containing those characters. |
| 147 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 157 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
| 148 | 158 |
| 149 // Initializes the whitelist of URL schemes. | 159 // Initializes the whitelist of URL schemes. |
| 150 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 160 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
| 151 | 161 |
| 152 // URL History indexing support functions. | 162 // URL History indexing support functions. |
| 153 | 163 |
| 154 // Indexes one URL history item. | 164 // Indexes one URL history item. |
| 155 void IndexRow(const URLRow& row); | 165 void IndexRow(const URLRow& row); |
| 156 | 166 |
| 157 // Updates or adds an history item to the index if it meets the minimum | 167 // Updates or adds the history item in |row| to the index if it meets the |
| 158 // 'quick' criteria. | 168 // minimum 'quick' criteria. |
| 159 void UpdateURL(URLID row_id, const URLRow& row); | 169 void UpdateURL(const URLRow& row); |
| 160 | 170 |
| 161 // Deletes indexing data for an history item. The item may not have actually | 171 // Deletes indexing data for the history item with the URL given in |url|. |
| 162 // been indexed (which is the case if it did not previously meet minimum | 172 // The item may not have actually been indexed, which is the case if it did |
| 163 // 'quick' criteria). | 173 // not previously meet minimum 'quick' criteria. |
| 164 void DeleteURL(URLID row_id); | 174 void DeleteURL(const GURL& url); |
| 165 | 175 |
| 166 // Parses and indexes the words in the URL and page title of |row|. | 176 // Parses and indexes the words in the URL and page title of |row|. |
| 167 void AddRowWordsToIndex(const URLRow& row); | 177 void AddRowWordsToIndex(const URLRow& row); |
| 168 | 178 |
| 169 // Removes |row| and all associated words and characters from the index. | 179 // Removes |row| and all associated words and characters from the index. |
| 170 void RemoveRowFromIndex(const URLRow& row); | 180 void RemoveRowFromIndex(const URLRow& row); |
| 171 | 181 |
| 172 // Removes all words and characters associated with |row| from the index. | 182 // Removes all words and characters associated with |row| from the index. |
| 173 void RemoveRowWordsFromIndex(const URLRow& row); | 183 void RemoveRowWordsFromIndex(const URLRow& row); |
| 174 | 184 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 208 static ScoredHistoryMatch ScoredMatchForURL( | 218 static ScoredHistoryMatch ScoredMatchForURL( |
| 209 const URLRow& row, | 219 const URLRow& row, |
| 210 const String16Vector& terms_vector); | 220 const String16Vector& terms_vector); |
| 211 | 221 |
| 212 // Calculates a component score based on position, ordering and total | 222 // Calculates a component score based on position, ordering and total |
| 213 // substring match size using metrics recorded in |matches|. |max_length| | 223 // substring match size using metrics recorded in |matches|. |max_length| |
| 214 // is the length of the string against which the terms are being searched. | 224 // is the length of the string against which the terms are being searched. |
| 215 static int ScoreComponentForMatches(const TermMatches& matches, | 225 static int ScoreComponentForMatches(const TermMatches& matches, |
| 216 size_t max_length); | 226 size_t max_length); |
| 217 | 227 |
| 218 // Determines if |gurl| has a whitelisted scheme and returns true if so. | |
| 219 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | |
| 220 | |
| 221 // Encode a data structure into the protobuf |cache|. | 228 // Encode a data structure into the protobuf |cache|. |
| 222 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 229 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
| 223 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 230 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
| 224 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 231 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 225 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 232 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 226 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 233 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 227 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 234 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 228 | 235 |
| 229 // Decode a data structure from the protobuf |cache|. Return false if there | 236 // Decode a data structure from the protobuf |cache|. Return false if there |
| 230 // is any kind of failure. | 237 // is any kind of failure. |
| 231 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 238 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
| 232 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 239 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
| 233 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 240 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 234 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 241 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 235 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 242 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 236 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 243 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 237 | 244 |
| 245 // Determines if |gurl| has a whitelisted scheme and returns true if so. | |
| 246 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | |
| 247 | |
| 238 // Cache of search terms. | 248 // Cache of search terms. |
| 239 SearchTermCacheMap search_term_cache_; | 249 SearchTermCacheMap search_term_cache_; |
| 240 | 250 |
| 241 // Languages used during the word-breaking process during indexing. | 251 // Languages used during the word-breaking process during indexing. |
| 242 std::string languages_; | 252 std::string languages_; |
| 243 | 253 |
| 244 // Only URLs with a whitelisted scheme are indexed. | 254 // Only URLs with a whitelisted scheme are indexed. |
| 245 std::set<std::string> scheme_whitelist_; | 255 std::set<std::string> scheme_whitelist_; |
| 246 | 256 |
| 247 // Start of data members that are cached ------------------------------------- | 257 // Start of data members that are cached ------------------------------------- |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 287 // Used for unit testing only. Records the number of candidate history items | 297 // Used for unit testing only. Records the number of candidate history items |
| 288 // at three stages in the index searching process. | 298 // at three stages in the index searching process. |
| 289 size_t pre_filter_item_count_; // After word index is queried. | 299 size_t pre_filter_item_count_; // After word index is queried. |
| 290 size_t post_filter_item_count_; // After trimming large result set. | 300 size_t post_filter_item_count_; // After trimming large result set. |
| 291 size_t post_scoring_item_count_; // After performing final filter/scoring. | 301 size_t post_scoring_item_count_; // After performing final filter/scoring. |
| 292 }; | 302 }; |
| 293 | 303 |
| 294 } // namespace history | 304 } // namespace history |
| 295 | 305 |
| 296 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 306 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| OLD | NEW |