| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| 7 #pragma once | 7 #pragma once |
| 8 | 8 |
| 9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
| 10 #include "base/gtest_prod_util.h" | 10 #include "base/gtest_prod_util.h" |
| 11 #include "chrome/browser/history/in_memory_url_index_types.h" | 11 #include "chrome/browser/history/in_memory_url_index_types.h" |
| 12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 13 | 13 |
| 14 class HistoryQuickProviderTest; | 14 class HistoryQuickProviderTest; |
| 15 | 15 |
| 16 namespace in_memory_url_index { | 16 namespace in_memory_url_index { |
| 17 class InMemoryURLIndexCacheItem; | 17 class InMemoryURLIndexCacheItem; |
| 18 } | 18 } |
| 19 | 19 |
| 20 namespace history { | 20 namespace history { |
| 21 | 21 |
| 22 namespace imui = in_memory_url_index; | 22 namespace imui = in_memory_url_index; |
| 23 | 23 |
| 24 class HistoryDatabase; | 24 class HistoryDatabase; |
| 25 | 25 |
| 26 // Current version of the cache file. |
| 27 static const int kCurrentCacheFileVersion = 1; |
| 28 |
| 26 // A structure describing the InMemoryURLIndex's internal data and providing for | 29 // A structure describing the InMemoryURLIndex's internal data and providing for |
| 27 // restoring, rebuilding and updating that internal data. | 30 // restoring, rebuilding and updating that internal data. |
| 28 class URLIndexPrivateData { | 31 class URLIndexPrivateData { |
| 29 public: | 32 public: |
| 30 URLIndexPrivateData(); | 33 URLIndexPrivateData(); |
| 31 ~URLIndexPrivateData(); | 34 ~URLIndexPrivateData(); |
| 32 | 35 |
| 33 private: | 36 private: |
| 34 friend class AddHistoryMatch; | 37 friend class AddHistoryMatch; |
| 35 friend class ::HistoryQuickProviderTest; | 38 friend class ::HistoryQuickProviderTest; |
| (...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 170 // criteria, otherwise the row will be removed from the index. Returns true | 173 // criteria, otherwise the row will be removed from the index. Returns true |
| 171 // if the index was actually updated. | 174 // if the index was actually updated. |
| 172 bool UpdateURL(const URLRow& row); | 175 bool UpdateURL(const URLRow& row); |
| 173 | 176 |
| 174 // Deletes indexing data for the history item with the URL given in |url|. | 177 // Deletes indexing data for the history item with the URL given in |url|. |
| 175 // The item may not have actually been indexed, which is the case if it did | 178 // The item may not have actually been indexed, which is the case if it did |
| 176 // not previously meet minimum 'quick' criteria. Returns true if the index | 179 // not previously meet minimum 'quick' criteria. Returns true if the index |
| 177 // was actually updated. | 180 // was actually updated. |
| 178 bool DeleteURL(const GURL& url); | 181 bool DeleteURL(const GURL& url); |
| 179 | 182 |
| 180 // Parses and indexes the words in the URL and page title of |row|. | 183 // Parses and indexes the words in the URL and page title of |row| and |
| 181 void AddRowWordsToIndex(const URLRow& row); | 184 // calculate the word starts in each, saving the starts in |word_starts|. |
| 185 void AddRowWordsToIndex(const URLRow& row, WordStarts* word_starts); |
| 182 | 186 |
| 183 // Removes |row| and all associated words and characters from the index. | 187 // Removes |row| and all associated words and characters from the index. |
| 184 void RemoveRowFromIndex(const URLRow& row); | 188 void RemoveRowFromIndex(const URLRow& row); |
| 185 | 189 |
| 186 // Removes all words and characters associated with |row| from the index. | 190 // Removes all words and characters associated with |row| from the index. |
| 187 void RemoveRowWordsFromIndex(const URLRow& row); | 191 void RemoveRowWordsFromIndex(const URLRow& row); |
| 188 | 192 |
| 189 // Given a single word in |uni_word|, adds a reference for the containing | 193 // Given a single word in |uni_word|, adds a reference for the containing |
| 190 // history item identified by |history_id| to the index. | 194 // history item identified by |history_id| to the index. |
| 191 void AddWordToIndex(const string16& uni_word, HistoryID history_id); | 195 void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 215 // in the user input, 2) completeness of each term's match, 3) ordering | 219 // in the user input, 2) completeness of each term's match, 3) ordering |
| 216 // of the occurrence of each term (i.e. they appear in order), 4) last | 220 // of the occurrence of each term (i.e. they appear in order), 4) last |
| 217 // visit time, and 5) number of visits. | 221 // visit time, and 5) number of visits. |
| 218 // This raw score allows the results to be ordered and can be used | 222 // This raw score allows the results to be ordered and can be used |
| 219 // to influence the final score calculated by the client of this | 223 // to influence the final score calculated by the client of this |
| 220 // index. Returns a ScoredHistoryMatch structure with the raw score and | 224 // index. Returns a ScoredHistoryMatch structure with the raw score and |
| 221 // substring matching metrics. | 225 // substring matching metrics. |
| 222 static ScoredHistoryMatch ScoredMatchForURL( | 226 static ScoredHistoryMatch ScoredMatchForURL( |
| 223 const URLRow& row, | 227 const URLRow& row, |
| 224 const string16& lower_string, | 228 const string16& lower_string, |
| 225 const String16Vector& terms_vector); | 229 const String16Vector& terms_vector, |
| 230 const WordStarts& word_starts); |
| 226 | 231 |
| 227 // Calculates a component score based on position, ordering and total | 232 // Calculates a component score based on position, ordering and total |
| 228 // substring match size using metrics recorded in |matches|. |max_length| | 233 // substring match size using metrics recorded in |matches|. |max_length| |
| 229 // is the length of the string against which the terms are being searched. | 234 // is the length of the string against which the terms are being searched. |
| 230 static int ScoreComponentForMatches(const TermMatches& matches, | 235 static int ScoreComponentForMatches(const TermMatches& matches, |
| 231 size_t max_length); | 236 size_t max_length); |
| 232 | 237 |
| 233 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 238 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
| 234 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | 239 bool URLSchemeIsWhitelisted(const GURL& gurl) const; |
| 235 | 240 |
| 241 // Sets the version of the cache file that will be saved when calling |
| 242 // SavePrivateData(). For unit testing only. |
| 243 void set_saved_cache_version(int version) { saved_cache_version_ = version; } |
| 244 |
| 236 // Encode a data structure into the protobuf |cache|. | 245 // Encode a data structure into the protobuf |cache|. |
| 237 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 246 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
| 238 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 247 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
| 239 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 248 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 240 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 249 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 241 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 250 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 242 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 251 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 252 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 243 | 253 |
| 244 // Decode a data structure from the protobuf |cache|. Return false if there | 254 // Decode a data structure from the protobuf |cache|. Return false if there |
| 245 // is any kind of failure. | 255 // is any kind of failure. |
| 246 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 256 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
| 247 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 257 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
| 248 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 258 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 249 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 259 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 250 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 260 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 251 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 261 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 262 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 252 | 263 |
| 253 // Cache of search terms. | 264 // Cache of search terms. |
| 254 SearchTermCacheMap search_term_cache_; | 265 SearchTermCacheMap search_term_cache_; |
| 255 | 266 |
| 256 // Languages used during the word-breaking process during indexing. | 267 // Languages used during the word-breaking process during indexing. |
| 257 std::string languages_; | 268 std::string languages_; |
| 258 | 269 |
| 259 // Only URLs with a whitelisted scheme are indexed. | 270 // Only URLs with a whitelisted scheme are indexed. |
| 260 std::set<std::string> scheme_whitelist_; | 271 std::set<std::string> scheme_whitelist_; |
| 261 | 272 |
| 262 // Start of data members that are cached ------------------------------------- | 273 // Start of data members that are cached ------------------------------------- |
| 263 | 274 |
| 275 // The version of the cache file most recently used to restore this instance |
| 276 // of the private data. If the private data was rebuilt from the history |
| 277 // database this will be 0. |
| 278 int restored_cache_version_; |
| 279 |
| 264 // A list of all of indexed words. The index of a word in this list is the | 280 // A list of all of indexed words. The index of a word in this list is the |
| 265 // ID of the word in the word_map_. It reduces the memory overhead by | 281 // ID of the word in the word_map_. It reduces the memory overhead by |
| 266 // replacing a potentially long and repeated string with a simple index. | 282 // replacing a potentially long and repeated string with a simple index. |
| 267 String16Vector word_list_; | 283 String16Vector word_list_; |
| 268 | 284 |
| 269 // A list of available words slots in |word_list_|. An available word slot | 285 // A list of available words slots in |word_list_|. An available word slot |
| 270 // is the index of a unused word in word_list_ vector, also referred to as | 286 // is the index of a unused word in word_list_ vector, also referred to as |
| 271 // a WordID. As URL visits are added or modified new words may be added to | 287 // a WordID. As URL visits are added or modified new words may be added to |
| 272 // the index, in which case any available words are used, if any, and then | 288 // the index, in which case any available words are used, if any, and then |
| 273 // words are added to the end of the word_list_. When URL visits are | 289 // words are added to the end of the word_list_. When URL visits are |
| (...skipping 16 matching lines...) Expand all Loading... |
| 290 | 306 |
| 291 // A one-to-many mapping from a HistoryID to all WordIDs of words that occur | 307 // A one-to-many mapping from a HistoryID to all WordIDs of words that occur |
| 292 // in the URL and/or page title of the history item referenced by that | 308 // in the URL and/or page title of the history item referenced by that |
| 293 // HistoryID. | 309 // HistoryID. |
| 294 HistoryIDWordMap history_id_word_map_; | 310 HistoryIDWordMap history_id_word_map_; |
| 295 | 311 |
| 296 // A one-to-one mapping from HistoryID to the history item data governing | 312 // A one-to-one mapping from HistoryID to the history item data governing |
| 297 // index inclusion and relevance scoring. | 313 // index inclusion and relevance scoring. |
| 298 HistoryInfoMap history_info_map_; | 314 HistoryInfoMap history_info_map_; |
| 299 | 315 |
| 316 // A one-to-one mapping from HistoryID to the word starts detected in each |
| 317 // item's URL and page title. |
| 318 WordStartsMap word_starts_map_; |
| 319 |
| 300 // End of data members that are cached --------------------------------------- | 320 // End of data members that are cached --------------------------------------- |
| 301 | 321 |
| 322 // For unit testing only. Specifies the version of the cache file to be saved. |
| 323 // Used only for testing upgrading of an older version of the cache upon |
| 324 // restore. |
| 325 int saved_cache_version_; |
| 326 |
| 302 // Used for unit testing only. Records the number of candidate history items | 327 // Used for unit testing only. Records the number of candidate history items |
| 303 // at three stages in the index searching process. | 328 // at three stages in the index searching process. |
| 304 size_t pre_filter_item_count_; // After word index is queried. | 329 size_t pre_filter_item_count_; // After word index is queried. |
| 305 size_t post_filter_item_count_; // After trimming large result set. | 330 size_t post_filter_item_count_; // After trimming large result set. |
| 306 size_t post_scoring_item_count_; // After performing final filter/scoring. | 331 size_t post_scoring_item_count_; // After performing final filter/scoring. |
| 307 }; | 332 }; |
| 308 | 333 |
| 309 } // namespace history | 334 } // namespace history |
| 310 | 335 |
| 311 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 336 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
| OLD | NEW |