| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| 7 #pragma once | 7 #pragma once |
| 8 | 8 |
| 9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
| 10 #include "base/gtest_prod_util.h" | 10 #include "base/gtest_prod_util.h" |
| 11 #include "base/memory/ref_counted.h" |
| 11 #include "chrome/browser/history/in_memory_url_index_types.h" | 12 #include "chrome/browser/history/in_memory_url_index_types.h" |
| 12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 13 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 14 #include "content/public/browser/notification_details.h" |
| 13 | 15 |
| 14 class HistoryQuickProviderTest; | 16 class HistoryQuickProviderTest; |
| 15 | 17 |
| 16 namespace in_memory_url_index { | 18 namespace in_memory_url_index { |
| 17 class InMemoryURLIndexCacheItem; | 19 class InMemoryURLIndexCacheItem; |
| 18 } | 20 } |
| 19 | 21 |
| 20 namespace history { | 22 namespace history { |
| 21 | 23 |
| 22 namespace imui = in_memory_url_index; | 24 namespace imui = in_memory_url_index; |
| 23 | 25 |
| 24 class HistoryDatabase; | 26 class HistoryDatabase; |
| 27 class InMemoryURLIndex; |
| 28 class RefCountedBool; |
| 25 | 29 |
| 26 // A structure describing the InMemoryURLIndex's internal data and providing for | 30 // A structure describing the InMemoryURLIndex's internal data and providing for |
| 27 // restoring, rebuilding and updating that internal data. | 31 // restoring, rebuilding and updating that internal data. |
| 28 class URLIndexPrivateData { | 32 class URLIndexPrivateData |
| 33 : public base::RefCountedThreadSafe<URLIndexPrivateData> { |
| 29 public: | 34 public: |
| 30 URLIndexPrivateData(); | 35 URLIndexPrivateData(); |
| 36 |
| 37 private: |
| 38 friend class base::RefCountedThreadSafe<URLIndexPrivateData>; |
| 31 ~URLIndexPrivateData(); | 39 ~URLIndexPrivateData(); |
| 32 | 40 |
| 33 private: | |
| 34 friend class AddHistoryMatch; | 41 friend class AddHistoryMatch; |
| 35 friend class ::HistoryQuickProviderTest; | 42 friend class ::HistoryQuickProviderTest; |
| 36 friend class InMemoryURLIndex; | 43 friend class InMemoryURLIndex; |
| 37 friend class InMemoryURLIndexTest; | 44 friend class InMemoryURLIndexTest; |
| 38 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 45 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
| 39 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); | 46 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
| 40 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 47 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
| 41 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 48 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
| 42 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 49 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
| 43 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 50 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 119 // or page title. Scores are then calculated on no more than | 126 // or page title. Scores are then calculated on no more than |
| 120 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of | 127 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of |
| 121 // candidates may cause perceptible typing response delays in the omnibox. | 128 // candidates may cause perceptible typing response delays in the omnibox. |
| 122 // This is likely to occur for short omnibox terms such as 'h' and 'w' which | 129 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
| 123 // will be found in nearly all history candidates. Results are sorted by | 130 // will be found in nearly all history candidates. Results are sorted by |
| 124 // descending score. The full results set (i.e. beyond the | 131 // descending score. The full results set (i.e. beyond the |
| 125 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | 132 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
| 126 // to this function. | 133 // to this function. |
| 127 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); | 134 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); |
| 128 | 135 |
| 129 // Sets the |languages| to a list of language encodings with which the history | 136 // Creates a new URLIndexPrivateData object, populates it from the contents |
| 130 // URLs and omnibox searches are interpreted, i.e. how each is broken | 137 // of the cache file stored in |file_path|, and assigns it to |
| 131 // down into words and each word is broken down into characters. | 138 // |private_data_ptr|. |
| 132 void set_languages(const std::string& languages) { languages_ = languages; } | 139 static void RestoreFromFileTask( |
| 140 const FilePath& file_path, |
| 141 scoped_refptr<URLIndexPrivateData> private_data_ptr); |
| 133 | 142 |
| 134 // Restores the index's private data from the cache file stored in the | 143 // Constructs a new object by restoring its contents from the file at |path|. |
| 135 // profile directory and returns true if successful. | 144 // Returns the new URLIndexPrivateData which on success will contain the |
| 136 bool RestoreFromFile(const FilePath& file_path); | 145 // restored data but upon failure will be empty. |
| 146 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( |
| 147 const FilePath& path); |
| 137 | 148 |
| 138 // Constructs a new object by rebuilding its contents from the history | 149 // Constructs a new object by rebuilding its contents from the history |
| 139 // database in |history_db|. Returns the new URLIndexPrivateData which on | 150 // database in |history_db|. Returns the new URLIndexPrivateData which on |
| 140 // success will contain the rebuilt data but upon failure will be empty. | 151 // success will contain the rebuilt data but upon failure will be empty. |
| 141 static URLIndexPrivateData* RebuildFromHistory(HistoryDatabase* history_db); | 152 // |languages| gives a list of language encodings by which the URLs and page |
| 153 // titles are broken down into words and characters. |
| 154 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |
| 155 HistoryDatabase* history_db, |
| 156 const std::string& languages, |
| 157 const std::set<std::string>& scheme_whitelist); |
| 158 |
| 159 // Writes |private_data| as a cache file to |file_path| and returns success |
| 160 // via |succeeded|. |
| 161 static void WritePrivateDataToCacheFileTask( |
| 162 scoped_refptr<URLIndexPrivateData> private_data, |
| 163 const FilePath& file_path, |
| 164 scoped_refptr<RefCountedBool> succeeded); |
| 142 | 165 |
| 143 // Caches the index private data and writes the cache file to the profile | 166 // Caches the index private data and writes the cache file to the profile |
| 144 // directory. | 167 // directory. Called by WritePrivateDataToCacheFileTask. |
| 145 bool SaveToFile(const FilePath& file_path); | 168 bool SaveToFile(const FilePath& file_path); |
| 146 | 169 |
| 147 // Initializes all index data members in preparation for restoring the index | 170 // Initializes all index data members in preparation for restoring the index |
| 148 // from the cache or a complete rebuild from the history database. | 171 // from the cache or a complete rebuild from the history database. |
| 149 void Clear(); | 172 void Clear(); |
| 150 | 173 |
| 174 // Returns true if there is no data in the index. |
| 175 bool Empty() const; |
| 176 |
| 177 // Creates a copy of ourself. |
| 178 URLIndexPrivateData* Duplicate() const; |
| 179 |
| 151 // Adds |word_id| to |history_id|'s entry in the history/word map, | 180 // Adds |word_id| to |history_id|'s entry in the history/word map, |
| 152 // creating a new entry if one does not already exist. | 181 // creating a new entry if one does not already exist. |
| 153 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | 182 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
| 154 | 183 |
| 155 // Given a set of Char16s, finds words containing those characters. | 184 // Given a set of Char16s, finds words containing those characters. |
| 156 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 185 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
| 157 | 186 |
| 158 // Initializes the whitelist of URL schemes. | |
| 159 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | |
| 160 | |
| 161 // URL History indexing support functions. | 187 // URL History indexing support functions. |
| 162 | 188 |
| 163 // Indexes one URL history item as described by |row|. Returns true if the | 189 // Indexes one URL history item as described by |row|. Returns true if the |
| 164 // row was actually indexed. | 190 // row was actually indexed. |languages| gives a list of language encodings by |
| 165 bool IndexRow(const URLRow& row); | 191 // which the URLs and page titles are broken down into words and characters. |
| 192 // |scheme_whitelist| is used to filter non-qualifying schemes. |
| 193 bool IndexRow(const URLRow& row, |
| 194 const std::string& languages, |
| 195 const std::set<std::string>& scheme_whitelist); |
| 166 | 196 |
| 167 // Adds the history item in |row| to the index if it does not already already | 197 // Adds the history item in |row| to the index if it does not already already |
| 168 // exist and it meets the minimum 'quick' criteria. If the row already exists | 198 // exist and it meets the minimum 'quick' criteria. If the row already exists |
| 169 // in the index then the index will be updated if the row still meets the | 199 // in the index then the index will be updated if the row still meets the |
| 170 // criteria, otherwise the row will be removed from the index. Returns true | 200 // criteria, otherwise the row will be removed from the index. Returns true |
| 171 // if the index was actually updated. | 201 // if the index was actually updated. |languages| gives a list of language |
| 172 bool UpdateURL(const URLRow& row); | 202 // encodings by which the URLs and page titles are broken down into words and |
| 203 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. |
| 204 bool UpdateURL(const URLRow& row, |
| 205 const std::string& languages, |
| 206 const std::set<std::string>& scheme_whitelist); |
| 173 | 207 |
| 174 // Deletes indexing data for the history item with the URL given in |url|. | 208 // Deletes indexing data for the history item with the URL given in |url|. |
| 175 // The item may not have actually been indexed, which is the case if it did | 209 // The item may not have actually been indexed, which is the case if it did |
| 176 // not previously meet minimum 'quick' criteria. Returns true if the index | 210 // not previously meet minimum 'quick' criteria. Returns true if the index |
| 177 // was actually updated. | 211 // was actually updated. |
| 178 bool DeleteURL(const GURL& url); | 212 bool DeleteURL(const GURL& url); |
| 179 | 213 |
| 180 // Parses and indexes the words in the URL and page title of |row|. | 214 // Parses and indexes the words in the URL and page title of |row|. |
| 181 void AddRowWordsToIndex(const URLRow& row); | 215 // |languages| gives a list of language encodings by which the URLs and page |
| 216 // titles are broken down into words and characters. |
| 217 void AddRowWordsToIndex(const URLRow& row, |
| 218 const std::string& languages); |
| 182 | 219 |
| 183 // Removes |row| and all associated words and characters from the index. | 220 // Removes |row| and all associated words and characters from the index. |
| 184 void RemoveRowFromIndex(const URLRow& row); | 221 void RemoveRowFromIndex(const URLRow& row); |
| 185 | 222 |
| 186 // Removes all words and characters associated with |row| from the index. | 223 // Removes all words and characters associated with |row| from the index. |
| 187 void RemoveRowWordsFromIndex(const URLRow& row); | 224 void RemoveRowWordsFromIndex(const URLRow& row); |
| 188 | 225 |
| 189 // Given a single word in |uni_word|, adds a reference for the containing | 226 // Given a single word in |uni_word|, adds a reference for the containing |
| 190 // history item identified by |history_id| to the index. | 227 // history item identified by |history_id| to the index. |
| 191 void AddWordToIndex(const string16& uni_word, HistoryID history_id); | 228 void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 223 const URLRow& row, | 260 const URLRow& row, |
| 224 const string16& lower_string, | 261 const string16& lower_string, |
| 225 const String16Vector& terms_vector); | 262 const String16Vector& terms_vector); |
| 226 | 263 |
| 227 // Calculates a component score based on position, ordering and total | 264 // Calculates a component score based on position, ordering and total |
| 228 // substring match size using metrics recorded in |matches|. |max_length| | 265 // substring match size using metrics recorded in |matches|. |max_length| |
| 229 // is the length of the string against which the terms are being searched. | 266 // is the length of the string against which the terms are being searched. |
| 230 static int ScoreComponentForMatches(const TermMatches& matches, | 267 static int ScoreComponentForMatches(const TermMatches& matches, |
| 231 size_t max_length); | 268 size_t max_length); |
| 232 | 269 |
| 233 // Determines if |gurl| has a whitelisted scheme and returns true if so. | |
| 234 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | |
| 235 | |
| 236 // Encode a data structure into the protobuf |cache|. | 270 // Encode a data structure into the protobuf |cache|. |
| 237 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 271 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
| 238 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 272 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
| 239 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 273 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 240 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 274 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 241 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 275 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 242 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 276 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 243 | 277 |
| 244 // Decode a data structure from the protobuf |cache|. Return false if there | 278 // Decode a data structure from the protobuf |cache|. Return false if there |
| 245 // is any kind of failure. | 279 // is any kind of failure. |
| 246 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 280 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
| 247 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 281 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
| 248 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 282 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 249 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 283 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 250 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 284 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 251 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 285 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 252 | 286 |
| 287 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
| 288 static bool URLSchemeIsWhitelisted(const GURL& gurl, |
| 289 const std::set<std::string>& whitelist); |
| 290 |
| 253 // Cache of search terms. | 291 // Cache of search terms. |
| 254 SearchTermCacheMap search_term_cache_; | 292 SearchTermCacheMap search_term_cache_; |
| 255 | 293 |
| 256 // Languages used during the word-breaking process during indexing. | |
| 257 std::string languages_; | |
| 258 | |
| 259 // Only URLs with a whitelisted scheme are indexed. | |
| 260 std::set<std::string> scheme_whitelist_; | |
| 261 | |
| 262 // Start of data members that are cached ------------------------------------- | 294 // Start of data members that are cached ------------------------------------- |
| 263 | 295 |
| 264 // A list of all of indexed words. The index of a word in this list is the | 296 // A list of all of indexed words. The index of a word in this list is the |
| 265 // ID of the word in the word_map_. It reduces the memory overhead by | 297 // ID of the word in the word_map_. It reduces the memory overhead by |
| 266 // replacing a potentially long and repeated string with a simple index. | 298 // replacing a potentially long and repeated string with a simple index. |
| 267 String16Vector word_list_; | 299 String16Vector word_list_; |
| 268 | 300 |
| 269 // A list of available words slots in |word_list_|. An available word slot | 301 // A list of available words slots in |word_list_|. An available word slot |
| 270 // is the index of a unused word in word_list_ vector, also referred to as | 302 // is the index of a unused word in word_list_ vector, also referred to as |
| 271 // a WordID. As URL visits are added or modified new words may be added to | 303 // a WordID. As URL visits are added or modified new words may be added to |
| (...skipping 30 matching lines...) Expand all Loading... |
| 302 // Used for unit testing only. Records the number of candidate history items | 334 // Used for unit testing only. Records the number of candidate history items |
| 303 // at three stages in the index searching process. | 335 // at three stages in the index searching process. |
| 304 size_t pre_filter_item_count_; // After word index is queried. | 336 size_t pre_filter_item_count_; // After word index is queried. |
| 305 size_t post_filter_item_count_; // After trimming large result set. | 337 size_t post_filter_item_count_; // After trimming large result set. |
| 306 size_t post_scoring_item_count_; // After performing final filter/scoring. | 338 size_t post_scoring_item_count_; // After performing final filter/scoring. |
| 307 }; | 339 }; |
| 308 | 340 |
| 309 } // namespace history | 341 } // namespace history |
| 310 | 342 |
| 311 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 343 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| OLD | NEW |